From 8acfa2097a86296063bdbb31baac7cea68c70a0e Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 15 Feb 2021 19:27:01 -0800 Subject: [PATCH 001/901] kram/kramv - increase label area, and simplify decode with encode/decode routines that go to ram instead of disk. --- kramv/KramLoader.mm | 69 ++++------------- kramv/KramViewerMain.mm | 2 +- libkram/kram/KTXImage.cpp | 16 ++++ libkram/kram/KTXImage.h | 4 + libkram/kram/KramImage.cpp | 155 ++++++++++++++++++++++++------------- libkram/kram/KramImage.h | 9 +++ 6 files changed, 147 insertions(+), 108 deletions(-) diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 7e2c15fa..3ef9c0cb 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -48,69 +48,31 @@ @implementation KramLoader { // on macOS/arm, the M1 supports all 3 encode formats #define DO_DECODE TARGET_CPU_X86_64 -- (BOOL)decodeImageIfNeeded:(KTXImage&)image data:(vector&)data +- (BOOL)decodeImageIfNeeded:(KTXImage&)image imageDecoded:(KTXImage&)imageDecoded useImageDecoded:(bool&)useImageDecoded { #if DO_DECODE - MyMTLPixelFormat format = image.pixelFormat; - - // decode to disk, and then load that in place of original - // MacIntel can only open BC and explicit formats. - FileHelper decodedTmpFile; - - bool useDecode = false; - if (isETCFormat(format)) { - if (!decodedTmpFile.openTemporaryFile(".ktx", "w+")) { - return NO; - } - - Image imageDecode; - if (!imageDecode.decode(image, decodedTmpFile.pointer(), kTexEncoderEtcenc, false, "")) { + useImageDecoded = false; + + Image imageUnused; // TODO: move to only using KTXImage, decode needs to move there + + if (isETCFormat(image.pixelFormat)) { + if (!imageUnused.decode(image, imageDecoded, kTexEncoderEtcenc, false, "")) { return NO; } - useDecode = true; + useImageDecoded = true; } - else if (isASTCFormat(format)) { - if (!decodedTmpFile.openTemporaryFile(".ktx", "w+")) { - return NO; - } - - Image imageDecode; - if (!imageDecode.decode(image, decodedTmpFile.pointer(), kTexEncoderAstcenc, false, "")) { + else if (isASTCFormat(image.pixelFormat)) { + if (!imageUnused.decode(image, imageDecoded, kTexEncoderAstcenc, false, "")) { return NO; } - useDecode = true; + useImageDecoded = true; } // TODO: decode BC format on iOS when not supported, but viewer only on macOS for now - - if (useDecode) { - FILE* fp = decodedTmpFile.pointer(); - - size_t size = decodedTmpFile.size(); - if (size <= 0) { - return NO; - } - - data.resize(size); - - // have to pull into buffer, this only works with sync load path for now - rewind(fp); - - size_t readBytes = fread(data.data(), 1, size, fp); - if (readBytes != size) { - fprintf(stderr, "%s\n", strerror(errno)); - - return NO; - } - - image.skipImageLength = false; - if (!image.open(data.data(), (int32_t)size)) { // doesn't fail - return NO; - } - } #endif + return YES; } @@ -126,12 +88,13 @@ - (BOOL)decodeImageIfNeeded:(KTXImage&)image data:(vector&)data *originalFormat = (MTLPixelFormat)image.pixelFormat; } - vector data; - if (![self decodeImageIfNeeded:image data:data]) { + KTXImage imageDecoded; + bool useImageDecoded = false; + if (![self decodeImageIfNeeded:image imageDecoded:imageDecoded useImageDecoded:useImageDecoded]) { return nil; } - return [self loadTextureFromImage:image]; + return [self loadTextureFromImage:useImageDecoded ? imageDecoded : image]; } static int32_t numberOfMipmapLevels(const Image& image) { diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 1280bc08..e75e8893 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -371,7 +371,7 @@ - (nonnull ShowSettings*)showSettings { - (NSTextField*)_addHud:(BOOL)isShadow { // add a label for the hud - NSTextField *label = [[NSTextField alloc] initWithFrame:NSMakeRect(isShadow ? 11 : 10, isShadow ? 11 : 10, 400, 200)]; + NSTextField *label = [[NSTextField alloc] initWithFrame:NSMakeRect(isShadow ? 11 : 10, isShadow ? 11 : 10, 800, 300)]; label.drawsBackground = NO; label.textColor = !isShadow ? [NSColor colorWithSRGBRed:0 green:1 blue:0 alpha:1] : diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index b915a447..fba3e0ad 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -1458,5 +1458,21 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) return true; } +vector& KTXImage::imageData() { + return imageDataFromKTX2; +} + +void KTXImage::reserveImageData() { + int32_t numChunks = totalChunks(); + const auto& lastMip = mipLevels[header.numberOfMipmapLevels-1]; + size_t totalKTXSize = + lastMip.offset + lastMip.length * numChunks; + imageDataFromKTX2.resize(totalKTXSize); + memset(imageDataFromKTX2.data(), 0, totalKTXSize); + + fileDataLength = totalKTXSize; + fileData = imageDataFromKTX2.data(); +} + } // namespace kram diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index 8da3b1c7..6ae660df 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -207,6 +207,10 @@ class KTXImage { //int totalMipLevels() const; uint32_t totalChunks() const; + // this is where KTXImage holds all mip data internally + void reserveImageData(); + vector& imageData(); + private: bool openKTX2(const uint8_t* imageData, size_t imageDataLength); diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index a1c17174..f32439e2 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -428,7 +428,32 @@ void Image::averageChannelsInBlock( } } +// this can return on failure to write +static bool writeDataAtOffset(const uint8_t* data, size_t dataSize, size_t dataOffset, FILE* dstFile, KTXImage& dstImage) +{ + if (dstFile) { + fseek(dstFile, dataOffset, SEEK_SET); + if (!FileHelper::writeBytes(dstFile, data, dataSize)) + return false; + } + else { + memcpy(dstImage.imageData().data() + dataOffset, data, dataSize); + } + return true; +} + bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder, bool isVerbose, const string& swizzleText) const +{ + KTXImage dstImage; + return decodeImpl(srcImage, dstFile, dstImage, decoder, isVerbose, swizzleText); +} + +bool Image::decode(const KTXImage& srcImage, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const +{ + return decodeImpl(srcImage, nullptr, dstImage, decoder, isVerbose, swizzleText); +} + +bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const { // read existing KTX file into mip offset, then start decoding the blocks // and write these to 8u,16f,32f ktx with mips @@ -436,13 +461,14 @@ bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder, // Image sorta represents uncompressed Image mips, not compressed. // But wriing things out to dstFile. - + int32_t numChunks = srcImage.totalChunks(); + MyMTLPixelFormat pixelFormat = srcImage.pixelFormat; bool isSrgb = isSrgbFormat(pixelFormat); bool isHDR = isHdrFormat(pixelFormat); // setup dstImage - KTXImage dstImage; + //KTXImage dstImage; dstImage = srcImage; // copy src (name-value pairs copied too) // important otherwise offsets are wrong if src is ktx2 @@ -473,6 +499,11 @@ bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder, return false; } + // allocate to hold props and entire image to write out + if (!dstFile) { + dstImage.reserveImageData(); + } + bool success = false; // 1d textures need to write out 0 width @@ -483,13 +514,14 @@ bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder, headerCopy.pixelDepth = 0; } + // write the header out - if (!FileHelper::writeBytes(dstFile, (const uint8_t*)&headerCopy, sizeof(headerCopy))) { + if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(headerCopy), 0, dstFile, dstImage)) { return false; } - + // write out the props - if (!FileHelper::writeBytes(dstFile, propsData.data(), propsData.size())) { + if (!writeDataAtOffset(propsData.data(), propsData.size(), sizeof(KTXHeader), dstFile, dstImage)) { return false; } @@ -517,7 +549,6 @@ bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder, // DONE: walk chunks here and seek to src and dst offsets in conversion // make sure to walk chunks in the exact same order they are written, array then face, or slice - int32_t numChunks = srcImage.totalChunks(); int32_t w = srcImage.width; int32_t h = srcImage.height; @@ -803,19 +834,15 @@ bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder, levelSize *= numChunks; } - fseek(dstFile, dstMipOffset - sizeof(levelSize), SEEK_SET); // from begin - - if (!FileHelper::writeBytes(dstFile, (const uint8_t*)&levelSize, sizeof(levelSize))) { + if (!writeDataAtOffset((const uint8_t*)&levelSize, sizeof(levelSize), dstMipOffset - sizeof(levelSize), dstFile, dstImage)) { return false; } } - fseek(dstFile, dstMipOffset, SEEK_SET); // from begin - - if (!FileHelper::writeBytes(dstFile, outputTexture.data(), dstMipLevel.length)) { + if (!writeDataAtOffset(outputTexture.data(), dstMipLevel.length, dstMipOffset, dstFile, dstImage)) { return false; } - + // next mip level mipDown(w, h); } @@ -945,10 +972,23 @@ void Image::heightToNormals(float scale) } } +bool Image::encode(ImageInfo& info, KTXImage& dstImage) const +{ + return encodeImpl(info, nullptr, dstImage); +} + bool Image::encode(ImageInfo& info, FILE* dstFile) const { - KTXImage image; - KTXHeader& header = image.header; + // this will be throw out + KTXImage dstImage; + return encodeImpl(info, dstFile, dstImage); +} + + +bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const +{ + //KTXImage image; + KTXHeader& header = dstImage.header; vector chunkOffsets; @@ -969,10 +1009,10 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const // work out how much memory we need to load header.initFormatGL(info.pixelFormat); - image.pixelFormat = info.pixelFormat; - image.textureType = info.textureType; + dstImage.pixelFormat = info.pixelFormat; + dstImage.textureType = info.textureType; - image.addFormatProps(); + dstImage.addFormatProps(); // TODO: caller should really set post swizzle string postSwizzleText; @@ -983,20 +1023,20 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const else if (info.swizzleText == "rrr1") postSwizzleText = "r001"; // to match up with BC4/EAC_R11 - image.addSwizzleProps(info.swizzleText.c_str(), postSwizzleText.c_str()); + dstImage.addSwizzleProps(info.swizzleText.c_str(), postSwizzleText.c_str()); // TODO: caller should really set this, channels and address/filter // three letter codes for the channel names so viewer/game can interpret them if (info.isNormal) { - image.addChannelProps("Nrm.x,Nrm.y,X,X"); + dstImage.addChannelProps("Nrm.x,Nrm.y,X,X"); } else if (info.isSRGB) { // !hasAlpha doesn't change the channel designation if (info.isPremultiplied) { - image.addChannelProps("Alb.ra,Alb.ga,Alb.ba,Alb.a"); + dstImage.addChannelProps("Alb.ra,Alb.ga,Alb.ba,Alb.a"); } else { - image.addChannelProps("Alb.r,Alb.g,Alb.b,Alb.a"); + dstImage.addChannelProps("Alb.r,Alb.g,Alb.b,Alb.a"); } } @@ -1005,21 +1045,21 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const // address: Wrap, Clamp, MirrorWrap, MirrorClamp, BorderClamp, BorderClamp0 // filter: Point, Linear, None (Mip only), TODO: what about Aniso (Mip only + level?) // min/maxLOD too for which range of mips to use, atlas should stop before entries merge - if (image.textureType == MyMTLTextureType1DArray) { - image.addAddressProps("Rep,X,X"); + if (dstImage.textureType == MyMTLTextureType1DArray) { + dstImage.addAddressProps("Rep,X,X"); } - else if (image.textureType == MyMTLTextureType3D) { - image.addAddressProps("Rep,Rep,Rep"); + else if (dstImage.textureType == MyMTLTextureType3D) { + dstImage.addAddressProps("Rep,Rep,Rep"); } else { - image.addAddressProps("Rep,Rep,X"); + dstImage.addAddressProps("Rep,Rep,X"); } if (info.doMipmaps) { - image.addFilterProps("Lin,Lin,Lin"); // min,mag,mip + dstImage.addFilterProps("Lin,Lin,Lin"); // min,mag,mip } else { - image.addFilterProps("Lin,Lin,X"); // min,mag,mip + dstImage.addFilterProps("Lin,Lin,X"); // min,mag,mip } // This is hash of source png/ktx file (use xxhash32 or crc32) @@ -1029,13 +1069,13 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const // convert props into a data blob that can be written out vector propsData; - image.toPropsData(propsData); + dstImage.toPropsData(propsData); header.bytesOfKeyValueData = (uint32_t)propsData.size(); //ktxImage.bytesPerBlock = header.blockSize(); //ktxImage.blockDims = header.blockDims(); - int32_t storageSize = image.mipLevelSize(w, h); + int32_t storageSize = dstImage.mipLevelSize(w, h); // how much to store to store biggest level of ktx (will in-place mip to // this) @@ -1047,7 +1087,7 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const int32_t numMipLevels = 0; // header only holds pixelFormat, but can generate block info from that - computeMipStorage(image, w, h, // pixelFormat, + computeMipStorage(dstImage, w, h, // pixelFormat, info.doMipmaps, info.mipMinSize, info.mipMaxSize, storageSize, storageSizeTotal, mipStorageSizes, numDstMipLevels, numMipLevels); @@ -1102,9 +1142,9 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const } // update image to match - image.width = header.pixelWidth; - image.height = header.pixelHeight; - image.depth = header.pixelDepth; + dstImage.width = header.pixelWidth; + dstImage.height = header.pixelHeight; + dstImage.depth = header.pixelDepth; // ---------------------------------------------------- @@ -1196,7 +1236,14 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const srcImage.pixelsHalf = halfImage.data(); } } + + int32_t numChunks = (int32_t)chunkOffsets.size(); + // allocate to hold props and entire image to write out + if (!dstFile) { + dstImage.reserveImageData(); + } + // ---------------------------------------------------- Mipper mipper; @@ -1204,20 +1251,20 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const // write the header out KTXHeader headerCopy = header; - if (image.textureType == MyMTLTextureType1DArray) { + if (dstImage.textureType == MyMTLTextureType1DArray) { headerCopy.pixelHeight = 0; headerCopy.pixelDepth = 0; } - if (!FileHelper::writeBytes(dstFile, (const uint8_t*)&headerCopy, sizeof(headerCopy))) { + if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(headerCopy), 0, dstFile, dstImage)) { return false; } // write out the props - if (!FileHelper::writeBytes(dstFile, propsData.data(), propsData.size())) { + if (!writeDataAtOffset(propsData.data(), propsData.size(), sizeof(KTXHeader), dstFile, dstImage)) { return false; } - for (int32_t chunk = 0; chunk < (int32_t)chunkOffsets.size(); ++chunk) { + for (int32_t chunk = 0; chunk < numChunks; ++chunk) { // this needs to append before chunkOffset copy below w = modifiedWidth; h = modifiedHeight; @@ -1276,7 +1323,7 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const } // doing in-place mips - ImageData dstImage = srcImage; + ImageData dstImageData = srcImage; //---------------------------------------------- @@ -1303,23 +1350,23 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const if (!skipMip) { // sdf mipper has to build from origin sourceImage // but it can in-place write to the same dstImage - sdfMipper.mipmap(dstImage, mipLevel); + sdfMipper.mipmap(dstImageData, mipLevel); - w = dstImage.width; - h = dstImage.height; + w = dstImageData.width; + h = dstImageData.height; } } else { // can export existing image for mip 0 if (mipLevel > 0) { // have to build the submips even with skipMip - mipper.mipmap(srcImage, dstImage); + mipper.mipmap(srcImage, dstImageData); // dst becomes src for next in-place mipmap - srcImage = dstImage; + srcImage = dstImageData; - w = dstImage.width; - h = dstImage.height; + w = dstImageData.width; + h = dstImageData.height; } } @@ -1336,11 +1383,11 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const //KLOGI("Image", "chunk:%d %d\n", chunk, mipOffset); // average channels per block if requested (mods 8-bit data on a per block basis) - ImageData mipImage = dstImage; + ImageData mipImage = dstImageData; if (!info.averageChannels.empty()) { // this isn't applied to srgb data (what about premul?) - averageChannelsInBlock(info.averageChannels.c_str(), image, + averageChannelsInBlock(info.averageChannels.c_str(), dstImage, mipImage, tmpImageData8); mipImage.pixels = tmpImageData8.data(); @@ -1349,7 +1396,7 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const Timer timer; bool success = - compressMipLevel(info, image, + compressMipLevel(info, dstImage, mipImage, outputTexture, mipStorageSize); assert(success); @@ -1377,19 +1424,19 @@ bool Image::encode(ImageInfo& info, FILE* dstFile) const int32_t levelSizeOf = sizeof(levelSize); assert(levelSizeOf == 4); - fseek(dstFile, mipOffset - levelSizeOf, SEEK_SET); // from begin + //fseek(dstFile, mipOffset - levelSizeOf, SEEK_SET); // from begin - if (!FileHelper::writeBytes(dstFile, (const uint8_t*)&levelSize, levelSizeOf)) { + if (!writeDataAtOffset((const uint8_t*)&levelSize, levelSizeOf, mipOffset - levelSizeOf, dstFile, dstImage)) { return false; } } - fseek(dstFile, mipOffset, SEEK_SET); // from begin + //fseek(dstFile, mipOffset, SEEK_SET); // from begin // Note that default ktx alignment is 4, so r8u, r16f mips need to be padded out to 4 bytes // may need to write these out row by row, and let fseek pad the rows to 4. - if (!FileHelper::writeBytes(dstFile, outputTexture.data.data(), mipStorageSize)) { + if (!writeDataAtOffset(outputTexture.data.data(), mipStorageSize, mipOffset, dstFile, dstImage)) { return false; } } diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h index cf83409c..2c82f8e0 100644 --- a/libkram/kram/KramImage.h +++ b/libkram/kram/KramImage.h @@ -41,9 +41,15 @@ class Image { bool loadImageFromKTX(const KTXImage& image); + // encode/ecode to a file bool encode(ImageInfo& info, FILE* dstFile) const; bool decode(const KTXImage& image, FILE* dstFile, TexEncoder decoder, bool isVerbose, const string& swizzleText) const; + + // encode/decode to a memory block (TODO: change over to returning dstImage holding all data inside) + bool encode(ImageInfo& info, KTXImage& dstImage) const; + + bool decode(const KTXImage& image, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const; // this is only for 2d images bool resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, ImageResizeFilter filter = kImageResizeFilterPoint); @@ -59,6 +65,9 @@ class Image { bool hasAlpha() const { return _hasAlpha; } private: + bool encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const; + bool decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const; + // compute how big mips will be void computeMipStorage(const KTXImage& image, int32_t w, int32_t h, bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, From 449f551ede8e65d97385e449d22484289ff5ea74 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 20 Feb 2021 00:31:33 -0800 Subject: [PATCH 002/901] Kram - add non-pow2 downsample for odd -> even in mipmapLevelOdd This is needed to prevent a shift in the image. Also simplifies the fast path which can ignore the odd case. This is done single pass with 3x3 pixel area. 9 -> 4, 11 -> 5, etc. Can see the difference when stepping through mips on Toof-a image. --- libkram/kram/KTXMipper.cpp | 237 ++++++++++++++++++++++++++++++++----- libkram/kram/KTXMipper.h | 2 + 2 files changed, 207 insertions(+), 32 deletions(-) diff --git a/libkram/kram/KTXMipper.cpp b/libkram/kram/KTXMipper.cpp index dc732a4d..0718d19e 100644 --- a/libkram/kram/KTXMipper.cpp +++ b/libkram/kram/KTXMipper.cpp @@ -304,7 +304,7 @@ void Mipper::mipmap(const ImageData& srcImage, ImageData& dstImage) const mipmapLevel(srcImage, dstImage); } -void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const +void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) const { int32_t width = srcImage.width; int32_t height = srcImage.height; @@ -324,50 +324,223 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const int32_t dstIndex = 0; - // To see the downsampled mip dimensions enable this - // int32_t wDst = width; - // int32_t hDst = height; - // mipDown(wDst, hDst); - - // 535 produces 267.5 -> 267, last pixel in an odd width or height is skipped - // this code was incrementing too often at the end bool isOddX = width & 1; bool isOddY = height & 1; - for (int32_t y = 0; y < height; y += 2) { - // last y row is skipped if odd, this causes a shift - if (isOddY) { - if (y == (height - 1)) { - break; - } - } - + // advance always by 2, but sample from neighbors + int32_t mipWidth = std::max(1, width / 2); + int32_t mipHeight = std::max(1, height / 2); + + float invWidth = 1.0f/width; + float invHeight = 1.0f/height; + + for (int32_t y = isOddY ? 1 : 0; y < height; y += 2) { + int32_t ym = y - 1; int32_t y0 = y; int32_t y1 = y + 1; - if (y1 == height) { - y1 = y; + + // weights + int32_t mipY = y/2; + float ymw = (mipHeight - mipY - 1) * invHeight; + float y0w = mipHeight * invHeight; + float y1w = mipY * invHeight; + + if (!isOddY) { + ym = y; // weight is 0 + + ymw = 0.0f; + y0w = 0.5f; + y1w = 0.5f; } + + ym *= width; y0 *= width; y1 *= width; - for (int32_t x = 0; x < width; x += 2) { - // last x column is skipped if odd, this causes a shift - if (isOddX) { - if (x == (width - 1)) { - break; + for (int32_t x = isOddX ? 1 : 0; x < width; x += 2) { + + int32_t xm = x - 1; + int32_t x0 = x; + int32_t x1 = x + 1; + + // weights + int32_t mipX = x/2; + float xmw = (mipWidth - mipX - 1) * invWidth; + float x0w = mipWidth * invWidth; + float x1w = mipX * invWidth; + + if (!isOddX) { + xm = x; // weight is 0 + + xmw = 0.0f; + x0w = 0.5f; + x1w = 0.5f; + } + + // we have 3x2, 2x3 or 3x3 pattern to weight + // now lookup the 9 values from the buffer + + float4 c[9]; + + if (srcHalf) { + c[0] = toFloat4(srcHalf[ym + xm]); + c[1] = toFloat4(srcHalf[ym + x0]); + c[2] = toFloat4(srcHalf[ym + x1]); + + c[3] = toFloat4(srcHalf[y0 + xm]); + c[4] = toFloat4(srcHalf[y0 + x0]); + c[5] = toFloat4(srcHalf[y0 + x1]); + + c[6] = toFloat4(srcHalf[y1 + xm]); + c[7] = toFloat4(srcHalf[y1 + x0]); + c[8] = toFloat4(srcHalf[y1 + x1]); + } + else if (srcFloat) { + c[0] = srcFloat[ym + xm]; + c[1] = srcFloat[ym + x0]; + c[2] = srcFloat[ym + x1]; + + c[3] = srcFloat[y0 + xm]; + c[4] = srcFloat[y0 + x0]; + c[5] = srcFloat[y0 + x1]; + + c[6] = srcFloat[y1 + xm]; + c[7] = srcFloat[y1 + x0]; + c[8] = srcFloat[y1 + x1]; + } + else { + c[0] = ColorToUnormFloat4(srcColor[ym + xm]); + c[1] = ColorToUnormFloat4(srcColor[ym + x0]); + c[2] = ColorToUnormFloat4(srcColor[ym + x1]); + + c[3] = ColorToUnormFloat4(srcColor[y0 + xm]); + c[4] = ColorToUnormFloat4(srcColor[y0 + x0]); + c[5] = ColorToUnormFloat4(srcColor[y0 + x1]); + + c[6] = ColorToUnormFloat4(srcColor[y1 + xm]); + c[7] = ColorToUnormFloat4(srcColor[y1 + x0]); + c[8] = ColorToUnormFloat4(srcColor[y1 + x1]); + } + + // apply weights to columns/rows + for (int32_t i = 0; i < 3; i++) { + c[3*i+0] *= xmw; + c[3*i+1] *= x0w; + c[3*i+2] *= x1w; + } + + for (int32_t i = 0; i < 3; i++) { + c[0+i] *= ymw; + c[3+i] *= y0w; + c[6+i] *= y1w; + } + + // add them all up + float4 cFloat = c[0]; + for (int32_t i = 1; i < 9; ++i) { + cFloat += c[i]; + } + + if (srcHalf) { + + // overwrite float4 image + cDstHalf[dstIndex] = toHalf4(cFloat); + + // assume hdr pulls from half/float data + if (!srcImage.isHDR) { + // convert back to srgb for encode + if (srcImage.isSRGB) { + cFloat.x = linearToSRGBFunc(cFloat.x); + cFloat.y = linearToSRGBFunc(cFloat.y); + cFloat.z = linearToSRGBFunc(cFloat.z); + } + + // override rgba8u version, since this is what is encoded + Color c = Unormfloat4ToColor(cFloat); + + // can only skip this if cSrc = cDst + cDstColor[dstIndex] = c; } } + else if (srcFloat) { - int32_t x1 = x + 1; - if (x1 == width) { - x1 = x; + // overwrite float4 image + cDstFloat[dstIndex] = cFloat; + + // assume hdr pulls from half/float data + if (!srcImage.isHDR) { + // convert back to srgb for encode + if (srcImage.isSRGB) { + cFloat.x = linearToSRGBFunc(cFloat.x); + cFloat.y = linearToSRGBFunc(cFloat.y); + cFloat.z = linearToSRGBFunc(cFloat.z); + } + + // Overwrite the RGBA8u image too (this will go out to + // encoder) that means BC/ASTC are linearly fit to + // non-linear srgb colors - ick + Color c = Unormfloat4ToColor(cFloat); + cDstColor[dstIndex] = c; + } } + else { + + // can overwrite memory on linear image, some precision loss, but fast + Color c = Unormfloat4ToColor(cFloat); + cDstColor[dstIndex] = c; + } + + dstIndex++; + } + } +} + + +void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const +{ + int32_t width = srcImage.width; + int32_t height = srcImage.height; + + bool isOddX = width & 1; + bool isOddY = height & 1; + + if (isOddX || isOddY) { + mipmapLevelOdd(srcImage, dstImage); + return; + } + + // fast path for 2x2 downsample below, can do in 4 taps + + // this can receive premul, srgb data + // the mip chain is linear data only + Color* cDstColor = dstImage.pixels; + const Color* srcColor = srcImage.pixels; + + float4* cDstFloat = dstImage.pixelsFloat; + const float4* srcFloat = srcImage.pixelsFloat; + + half4* cDstHalf = dstImage.pixelsHalf; + const half4* srcHalf = srcImage.pixelsHalf; + + // Note the ptrs above may point to same memory + + int32_t dstIndex = 0; + + for (int32_t y = 0; y < height; y += 2) { + int32_t y0 = y; + int32_t y1 = y + 1; + y0 *= width; + y1 *= width; + + for (int32_t x = 0; x < width; x += 2) { + int32_t x0 = x; + int32_t x1 = x + 1; if (srcHalf) { float4 c0, c1, c2, c3; - c0 = toFloat4(srcHalf[y0 + x]); + c0 = toFloat4(srcHalf[y0 + x0]); c1 = toFloat4(srcHalf[y0 + x1]); - c2 = toFloat4(srcHalf[y1 + x]); + c2 = toFloat4(srcHalf[y1 + x0]); c3 = toFloat4(srcHalf[y1 + x1]); // mip filter is simple box filter @@ -394,10 +567,10 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const } } else if (srcFloat) { - const float4& c0 = srcFloat[y0 + x]; + const float4& c0 = srcFloat[y0 + x0]; const float4& c1 = srcFloat[y0 + x1]; - const float4& c2 = srcFloat[y1 + x]; + const float4& c2 = srcFloat[y1 + x0]; const float4& c3 = srcFloat[y1 + x1]; // mip filter is simple box filter @@ -425,10 +598,10 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const } else { // faster 8-bit only path for LDR and unmultiplied - const Color& c0 = srcColor[y0 + x]; + const Color& c0 = srcColor[y0 + x0]; const Color& c1 = srcColor[y0 + x1]; - const Color& c2 = srcColor[y1 + x]; + const Color& c2 = srcColor[y1 + x0]; const Color& c3 = srcColor[y1 + x1]; // 8-bit box filter, with +2/4 for rounding diff --git a/libkram/kram/KTXMipper.h b/libkram/kram/KTXMipper.h index 2a5a97f2..ca991a60 100644 --- a/libkram/kram/KTXMipper.h +++ b/libkram/kram/KTXMipper.h @@ -73,6 +73,8 @@ class Mipper { private: void mipmapLevel(const ImageData &srcImage, ImageData &dstImage) const; + + void mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) const; }; } // namespace kram From 727160bd6af2306f4693582efc82d72209a4ebda Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 20 Feb 2021 00:38:27 -0800 Subject: [PATCH 003/901] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 290a4bef..4acb0d48 100644 --- a/README.md +++ b/README.md @@ -302,7 +302,7 @@ kram includes additional open-source: * Tile command for SVT tiling * Block twiddling support for consoles * Merge command to combine images (similar to ImageMagick) -* Atlas command to atlas to 2D and 2D array textures. Display names, show bounds of atlases. +* Atlas command to atlas to 2D and 2D array textures. Display names, show bounds of atlases. Have -chunks arg now. * 3D chart flattening. * Motion vector direction analysis. * Split view comparison rendering. Move horizontal slider like ShaderToy. @@ -541,7 +541,7 @@ ASTC doesn't compress and RDO as tightly. ### On mip calculations and non-power-of-two textures -With the exception of PVRTC, the block encoded formats support non-power-of-two mipmaps. But very little literature talks about how mips are calculated. D3D first used round-down mips, GL followed suit, and Metal/Vulkan followed suit. Round down cuts out a mip level, and does a floor of the mip levels. Round-up mips generally have a better mapping to the upper with a simple box filter. kram hasn't adjusted it's box filter to adjust for this yet, but there are links into the code to articles about how to better weight pixels. The kram box filter is correct for power-of-two mipgen, but should be improved for these cases. +With the exception of PVRTC, the block encoded formats support non-power-of-two mipmaps. But very little literature talks about how mips are calculated. OpenGL/D3D first used round-down mips, and Metal/Vulkan had to follow suit. Round down cuts out a mip level, and does a floor of the mip levels. Round-up mips generally have a better mapping to the upper with a simple box filter. kram now has reasonable cases for pow2 and non-pow2 mip generation. Odd source pixel counts have to shift weights as leftmost/rightmost pixels contribute more on the left/right sides, and avoid a shift in image pixels. ``` Round Down From 6c4fb304f32d3b8ddb9f1a1a2910a683222a5e50 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 21 Feb 2021 01:29:29 -0800 Subject: [PATCH 004/901] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 4acb0d48..81024c36 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,9 @@ C++11 library from 200 to 800KB in size depending on encoder options. Compiles # kramv.app ObjC++ Viewer for PNG/KTX supported files from kram. 530KB in size. Uses Metal compute and shaders, eyedropper, grids, debugging, preview. Supports HDR and all texture types. Mip, face, and array access. No dmg yet, just drop onto /Applications folder, and then run scripts/fixfinder.sh to flush LaunchServices (see below). Runs on macOS (ARM/Intel). +Diagrams and screenshots can be located here: +https://www.figma.com/file/bPmPSpBGTi2xTVnBDqVEq0/kram + #### Releases includes builds for macOS (Xcode 12.3 - arm64/x64) and Windows x64 (VS 2019 - x64). libkram can be built for iOS/Android. ### About kram From 209b0c0878be73e5a698365efc1fc6e3fa784d95 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 21 Feb 2021 21:13:24 -0800 Subject: [PATCH 005/901] CMake - add SDK comparison, and use CACHE variable on deployment/arch --- CMakeLists.txt | 76 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 692c2150..d643a537 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,11 +10,17 @@ endif() # https://cmake.org/cmake/help/latest/policy/CMP0077.html#policy:CMP0077 #cmake_policy(SET CMP0077 NEW) -set(UNIXBUILD FALSE) +set(BUILD_UNIX FALSE) +set(BUILD_IOS FALSE) if (APPLE) - message("build for macOS") + if (CMAKE_SYSTEM_NAME STREQUAL "iOS") + message("build for iOS") + set(BUILD_IOS TRUE) + else() + message("build for macOS") + endif() elseif (WIN32) - message("build for win") + message("build for win x64") elseif (UNIX AND NOT APPLE) message("build for unix") set(UNIXBUILD TRUE) @@ -45,8 +51,16 @@ set(CMAKE_CXX_EXTENSIONS NO) # set(CMAKE_OSX_SYSROOT macosx11.0) # set(CMAKE_OSX_SYSROOT macos) # this doesn't work -set(CMAKE_OSX_DEPLOYMENT_TARGET 10.14) -set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)") +# CMAKE_OSX_DEPLOYMENT_TARGET must be set as a CACHE variable, or it will be stripped +if (APPLE) + if (BUILD_IOS) + set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "Minimum iOS") + set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture iOS") + else() + set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14" CACHE STRING "Minimum macOS") + set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS") + endif() +endif() set(CMAKE_CONFIGURATION_TYPES "Debug;Release") set(CMAKE_BUILD_TYPE Release) @@ -60,9 +74,9 @@ set(CMAKE_DEFAULT_STARTUP_PROJECT "kram") set(myTargetWorkspace kramWorkspace) if (APPLE) -project(${myTargetWorkspace} LANGUAGES C CXX OBJCXX) + project(${myTargetWorkspace} LANGUAGES C CXX OBJCXX) else() -project(${myTargetWorkspace} LANGUAGES C CXX) + project(${myTargetWorkspace} LANGUAGES C CXX) endif() # the kram static library libkram which should build on iOS/Android/Mac/Win @@ -79,6 +93,54 @@ endif() #----------------------------------------------------- +# https://discourse.cmake.org/t/specifying-cmake-osx-sysroot-breaks-xcode-projects-but-no-other-choice/2532/8 +# use snipet from Alian Martin to validate SDK + +if (APPLE) + if(NOT DEFINED CMAKE_OSX_SYSROOT) + message(FATAL_ERROR "Cannot check SDK version if CMAKE_OSX_SYSROOT is not defined." + ) + endif() + + # check the Xcode app itself for it's version + set(XCODE_MIN_APP 12.2) + if(XCODE AND XCODE_VERSION VERSION_LESS XCODE_MIN_APP) + message(FATAL_ERROR "This project requires at least Xcode ${XCODE_MIN_APP}") + endif() + + # check the SDK + set(XCODE_MIN_SDK_IOS, 14.0) + set(XCODE_MIN_SDK_MACOS, 11.0) + + execute_process( + COMMAND xcrun --sdk "${CMAKE_OSX_SYSROOT}" --show-sdk-version + OUTPUT_VARIABLE SDK_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + + if (BUILD_IOS) + message("iOS SDK ${SDK_VERSION}") + message("iOS deploy ${CMAKE_OSX_DEPLOYMENT_TARGET}") + message("iOS arch ${CMAKE_OSX_ARCHITECTURES}") + + if (SDK_VERSION VERSION_LESS XCODE_MIN_SDK_IOS) + message(FATAL_ERROR "This project requires at least iPhoneOS ${XCODE_MIN_SDK_IOS}" + ) + endif() + else() + message("macOS SDK ${SDK_VERSION}") + message("macOS deploy ${CMAKE_OSX_DEPLOYMENT_TARGET}") + message("macOS arch ${CMAKE_OSX_ARCHITECTURES}") + + if (SDK_VERSION VERSION_LESS XCODE_MIN_SDK_MACOS) + message(FATAL_ERROR "This project requires at least macOS SDK ${XCODE_MIN_SDK_MACOS}" + ) + endif() + endif() +endif() + +#----------------------------------------------------- + set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin) # install doesn't seem to do anything on WIN32, the build elements are not copied From dc45672b408b2539ac6e3931079462c3c07cb066 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 22 Feb 2021 09:44:22 -0800 Subject: [PATCH 006/901] kram - rename mipper --- kramv/KramViewerMain.mm | 14 +++++++++----- libkram/kram/KramImage.cpp | 7 ++++--- libkram/kram/KramImage.h | 2 +- libkram/kram/KramImageInfo.h | 2 +- libkram/kram/{KTXMipper.cpp => KramMipper.cpp} | 2 +- libkram/kram/{KTXMipper.h => KramMipper.h} | 0 libkram/kram/KramSDFMipper.cpp | 2 +- 7 files changed, 17 insertions(+), 12 deletions(-) rename libkram/kram/{KTXMipper.cpp => KramMipper.cpp} (99%) rename libkram/kram/{KTXMipper.h => KramMipper.h} (100%) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index e75e8893..08b78d75 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -19,7 +19,7 @@ #import "KramRenderer.h" #import "KramShaders.h" #include "KramLog.h" -#include "KTXMipper.h" +#include "KramMipper.h" #include "KramMmapHelper.h" #include "KramImage.h" #include "KramViewerBase.h" @@ -261,7 +261,10 @@ MyMTLPixelFormat encodeSrcTextureAsFormat(MyMTLPixelFormat currentFormat, bool i void encodeSrcForEncodeComparisons(bool increment) { auto newFormat = encodeSrcTextureAsFormat(displayedFormat, increment); - // TODO: have to encode and then decode astc on macOS-Intel + // This is really only useful for variable block size formats like ASTC + // maybe some value in BC7 to BC1 comparison (original vs. BC7 vs. BC1) + + // TODO: have to encode and then decode astc/etc on macOS-Intel // load png and keep it around, and then call encode and then diff the image against the original pixels // 565 will always differ from the original. @@ -275,10 +278,11 @@ void encodeSrcForEncodeComparisons(bool increment) { // encode incremented format and cache, that way don't wait too long // and once all encode formats generated, can cycle through them until next image loaded - //KTXImage image; + // Could reuse the same buffer for all ASTC formats, larger blocks always need less mem + //KramImage image; // TODO: move encode to KTXImage, convert png to one layer KTXImage //image.open(...); - //image.encode(); - //decodeIfNeeded(...); + //image.encode(dstImage); + //decodeIfNeeded(dstImage, dstImageDecoded); //comparisonTexture = [createImage:image]; //set that onto the shader to diff against after recontruct diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index f32439e2..028f5ded 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -39,7 +39,7 @@ #include #include "KTXImage.h" -#include "KTXMipper.h" +#include "KramMipper.h" #include "KramFileHelper.h" #include "KramSDFMipper.h" #include "KramTimer.h" @@ -85,6 +85,7 @@ Image::Image() : _width(0), _height(0), _hasColor(false), _hasAlpha(false) { } +// TODO: eliminate this and Image class, use KTXImage everywhere so can have explicit mip chains bool Image::loadImageFromKTX(const KTXImage& image) { // copy the data into a contiguous array @@ -444,7 +445,7 @@ static bool writeDataAtOffset(const uint8_t* data, size_t dataSize, size_t dataO bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder, bool isVerbose, const string& swizzleText) const { - KTXImage dstImage; + KTXImage dstImage; // thrown out, data written to file return decodeImpl(srcImage, dstFile, dstImage, decoder, isVerbose, swizzleText); } @@ -2087,7 +2088,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, #if 0 // This hackimproves L1 and LA block generating // even enabled dual-plane mode for LA. Otherwise rgb and rgba blocks - // are generated on data that only contains L or LA blocks. + // are generated on data that only contain L or LA blocks. bool useUniqueChannels = true; if (useUniqueChannels) { diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h index 2c82f8e0..6fcdfdb9 100644 --- a/libkram/kram/KramImage.h +++ b/libkram/kram/KramImage.h @@ -8,7 +8,7 @@ #include #include "KTXImage.h" // for MyMTLTextureType -#include "KTXMipper.h" +#include "KramMipper.h" #include "KramConfig.h" #include "KramImageInfo.h" diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h index bbb23088..2cef4c2b 100644 --- a/libkram/kram/KramImageInfo.h +++ b/libkram/kram/KramImageInfo.h @@ -8,7 +8,7 @@ #include #include "KTXImage.h" -#include "KTXMipper.h" // for Color +#include "KramMipper.h" // for Color #include "KramConfig.h" namespace kram { diff --git a/libkram/kram/KTXMipper.cpp b/libkram/kram/KramMipper.cpp similarity index 99% rename from libkram/kram/KTXMipper.cpp rename to libkram/kram/KramMipper.cpp index 0718d19e..f845a6d6 100644 --- a/libkram/kram/KTXMipper.cpp +++ b/libkram/kram/KramMipper.cpp @@ -2,7 +2,7 @@ // The license and copyright notice shall be included // in all copies or substantial portions of the Software. -#include "KTXMipper.h" +#include "KramMipper.h" #include #include diff --git a/libkram/kram/KTXMipper.h b/libkram/kram/KramMipper.h similarity index 100% rename from libkram/kram/KTXMipper.h rename to libkram/kram/KramMipper.h diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp index 92d8a9bc..1d51e4d1 100644 --- a/libkram/kram/KramSDFMipper.cpp +++ b/libkram/kram/KramSDFMipper.cpp @@ -6,7 +6,7 @@ #include -#include "KTXMipper.h" +#include "KramMipper.h" namespace kram { using namespace heman; From d427f7c67be31fd14285f23510e5721b94564bae Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 22 Feb 2021 09:45:39 -0800 Subject: [PATCH 007/901] kramv - support DebugPosX on SDF and don't offset the comparison. Highlight color now purple. --- kramv/KramShaders.metal | 13 ++++++++++--- kramv/KramViewerBase.cpp | 4 ++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 9e43e9cd..97e76179 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -337,8 +337,15 @@ float4 DrawPixels( // adding some slop here so that flat areas don't flood the visual with red else if (uniforms.debugMode == ShDebugModePosX) { // two channels here, would need to color each channel - if (c.r >= 0.5 + 0.05) { - isHighlighted = true; + if (uniforms.isSDF) { + if (c.r >= 0.5) { + isHighlighted = true; + } + } + else { + if (c.r >= 0.5 + 0.05) { + isHighlighted = true; + } } } else if (uniforms.debugMode == ShDebugModePosY) { @@ -350,7 +357,7 @@ float4 DrawPixels( // TODO: is it best to highlight the interest pixels in red // or the negation of that to see which ones aren't. if (isHighlighted) { - float3 highlightColor = float3(1.0f, 0.0f, 0.0f); + float3 highlightColor = float3(1.0f, 0.0f, 1.0f); c.rgb = highlightColor; } diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp index e468f569..e1edac33 100644 --- a/kramv/KramViewerBase.cpp +++ b/kramv/KramViewerBase.cpp @@ -47,10 +47,10 @@ void ShowSettings::advanceDebugMode(bool isShiftKeyDown) { // if (_showSettings->debugMode == DebugModeGray && !hasColor) advanceDebugMode(isShiftKeyDown); // for normals show directions - if (debugMode == DebugModePosX && !isNormal) { + if (debugMode == DebugModePosX && !(isNormal || isSDF)) { advanceDebugMode(isShiftKeyDown); } - if (debugMode == DebugModePosY && !isNormal) { + if (debugMode == DebugModePosY && !(isNormal)) { advanceDebugMode(isShiftKeyDown); } From b5c52237b55bfec0d45d170addc9875de51be744 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 23 Feb 2021 10:07:35 -0800 Subject: [PATCH 008/901] Update README.md --- README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 81024c36..425f342a 100644 --- a/README.md +++ b/README.md @@ -609,17 +609,19 @@ The encoders all have to encode non-linear srgb point clouds, which isn't correc ``` -### On texture alases (TODO:) +### On texture atlases and charts (TODO:) -2D atlas packing works for source textures, but suffers from many issues. Often packed by hand or algorithm, the results look great as PNG where there are no mips and no block encoding. But the images break down once textures are block encoded. These are some of the complex problems: +2D atlas packing works for source textures like particle flipbooks, but suffers from many issues. Often packed by hand or algorithm, the results look great as PNG, but break down once mipped and block encoded. These are some of the complex problems: -* Mip bleed - Solved with mip lod clamping. -* Alignment bleed - Solved with padding. +* Mip bleed - Solved with mip lod clamping or disabling mips. +* Alignment bleed - Solved with padding to smallest visible mip blocks. * Block bleed - Solved with pow2 blocks - 4x4 scales down to 2x2 and 1x1. 6x6 scales to non-integral 3x3 and 1.5x1.5. * Clamp only - Solved by disabling wrap/mirror modes and uv scaling. * Complex pack - stb_rect_pack tightly pack images to a 2d area without accounting for bleed issues kram will soon offer an atlas mode that uses ES3-level 2d array textures. These waste some space, but are much simpler to pack, provide a full encoded mip chain with any block type, and also avoid the 5 problems mentioned above. Named atlas entries reference a given array element, but could be repacked and remapped as used to a smaller atlas. Dropping mip levels can be done across all entries, but is a little harder for a single array element. Sparse textures work for 2d array textures, but often the min sparse size is 256x256 (64K) or 128x128 (16K) and the rest is the packed mip tail. Can draw many types of objects and particles with only a single texture array. -The idea is to copy all atlased images to a 2d vertical strip. This makes row-byte handling simpler. Then kram can already convert a vertical strip to a 2D array, and the output rectangle, array index, mip range, and altas names are tracked as well. But there is some subtlety to copy smaller textures to the smaller mips and use sampler mip clamping. +The idea is to copy all atlased images to a 2d vertical strip. This makes row-byte handling simpler. Then kram can already convert a vertical strip to a 2D array, and the output rectangle, array index, mip range, and altas names are tracked as well. But there is some subtlety to copy smaller textures to the smaller mips and use sampler mip clamping. Non-pow2 textures will have transparent fill around the sides. + +Apps like Substance Painter use charts of unwrapped UV. These need to be gapped and aligned to block sizes to avoid the problems above. Often times the gap is too small (1px) for the mipchain, and instead the algorithms cover up the issue by dilating colors into the gutter regions, so that black outlines are not visible. thelka_atlas, xatlas, and other utilities can build these charts. From f8e38d09e393082dd903fcb0916f8d9d46f476f0 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Wed, 3 Mar 2021 10:25:29 -0800 Subject: [PATCH 009/901] kram - fixes to memory encode for plugin, log format checks, expose KTX2 header These were fixes to support plugin. --- libkram/kram/KTXImage.cpp | 38 -------------------------- libkram/kram/KTXImage.h | 44 +++++++++++++++++++++++++++++- libkram/kram/Kram.cpp | 7 ++--- libkram/kram/KramImage.cpp | 20 ++++++++++++-- libkram/kram/KramImage.h | 2 +- libkram/kram/KramLog.cpp | 55 +++++++++++++++++++++++++++----------- libkram/kram/KramLog.h | 17 ++++++++++-- libkram/kram/sse2neon.h | 2 ++ 8 files changed, 123 insertions(+), 62 deletions(-) diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index fba3e0ad..34698a01 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -1205,45 +1205,7 @@ class KTX2ImageLevel { uint64_t length; // size of a single mip }; -// Mips are reversed from KTX1 (mips are smallest first for streaming), -// and this stores an array of supercompressed levels, and has dfds. -class KTX2Header { -public: - - uint8_t identifier[12] = { // same is kKTX2Identifier - 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A - // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n' - }; - - uint32_t vkFormat = 0; // invalid - uint32_t typeSize = 1; - - uint32_t pixelWidth = 1; - uint32_t pixelHeight = 0; - uint32_t pixelDepth = 0; - - uint32_t layerCount = 0; - uint32_t faceCount = 1; - uint32_t levelCount = 1; - uint32_t supercompressionScheme = 0; - // Index - - // dfd block - uint32_t dfdByteOffset = 0; - uint32_t dfdByteLength = 0; - - // key-value - uint32_t kvdByteOffset = 0; - uint32_t kvdByteLength = 0; - - // supercompress global data - uint64_t sgdByteOffset = 0; - uint64_t sgdByteLength = 0; - - // chunks hold levelCount of all mips of the same size - // KTX2ImageChunk* chunks; // [levelCount] -}; //// Data Format Descriptor //uint32_t dfdTotalSize = 0; diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index 6ae660df..ac11b27c 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -167,6 +167,48 @@ class KTXHeader { //--------------------------------------------- +// Mips are reversed from KTX1 (mips are smallest first for streaming), +// and this stores an array of supercompressed levels, and has dfds. +class KTX2Header { +public: + + uint8_t identifier[12] = { // same is kKTX2Identifier + 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A + // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n' + }; + + uint32_t vkFormat = 0; // invalid + uint32_t typeSize = 1; + + uint32_t pixelWidth = 1; + uint32_t pixelHeight = 0; + uint32_t pixelDepth = 0; + + uint32_t layerCount = 0; + uint32_t faceCount = 1; + uint32_t levelCount = 1; + uint32_t supercompressionScheme = 0; + + // Index + + // dfd block + uint32_t dfdByteOffset = 0; + uint32_t dfdByteLength = 0; + + // key-value + uint32_t kvdByteOffset = 0; + uint32_t kvdByteLength = 0; + + // supercompress global data + uint64_t sgdByteOffset = 0; + uint64_t sgdByteLength = 0; + + // chunks hold levelCount of all mips of the same size + // KTX2ImageChunk* chunks; // [levelCount] +}; + +//--------------------------------------------- + // This is one entire level of mipLevels. class KTXImageLevel { public: @@ -233,7 +275,7 @@ class KTXImage { KTXHeader header; // copy of KTXHeader, so can be modified and then written back // write out only string/string props, for easy of viewing - vector> props; + vector > props; vector mipLevels; // offsets into fileData diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 3f97703e..ee375592 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -1269,7 +1270,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6 bool isMB = (dataSize > (512 * 1024)); sprintf(tmp, "file: %s\n" - "size: %d\n" + "size: %" PRIu64 "\n" "sizm: %0.3f %s\n", srcFilename.c_str(), dataSize, @@ -1417,7 +1418,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, "mipd: %dx%d\n" "mips: %zu\n" "mipc: %dx\n" - "mipo: %zu\n", + "mipo: %" PRIu64 "\n", w, h, mipLevel++, mip.length, srcImage.totalChunks(), mip.offset); info += tmp; @@ -1991,7 +1992,7 @@ static int32_t kramAppEncode(vector& args) if (success) { success = srcImage.encode(info, tmpFileHelper.pointer()); - + if (!success) { KLOGE("Kram", "encode failed"); } diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 028f5ded..338455a3 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -85,7 +85,21 @@ Image::Image() : _width(0), _height(0), _hasColor(false), _hasAlpha(false) { } -// TODO: eliminate this and Image class, use KTXImage everywhere so can have explicit mip chains +// TODO: use KTXImage everywhere so can have explicit mip chains +// this routine converts KTX to float4, but don't need if already matching 4 channels +// could do other formata conversions here on more supported formats (101010A2, etc). + +// TODO: handle loading KTXImage with custom mips +// TODO: handle loading KTXImage with other texture types (cube, array, etc) + +// TODO: image here is very specifically a single level of chunks of float4 or Color (RGBA8Unorm) +// the encoder is only written to deal with those types. + +// TODO: for png need to turn grid/horizontal strip into a vertical strip if not already +// that way can move through the chunks and overwrite them in-place. +// That would avoid copying each chunk out in the encode, but have to do in reodering. +// That way data is stored as KTX would instead of how PNG does. + bool Image::loadImageFromKTX(const KTXImage& image) { // copy the data into a contiguous array @@ -186,7 +200,7 @@ bool Image::loadImageFromKTX(const KTXImage& image) int32_t srcX = (y0 + x) * numSrcChannels; int32_t dstX = (y0 + x) * numDstChannels; - // copy in available alues + // copy in available values for (int32_t i = 0; i < numSrcChannels; ++i) { srcPixel.v[i] = srcPixels[srcX + i]; } @@ -1242,6 +1256,8 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const // allocate to hold props and entire image to write out if (!dstFile) { + dstImage.initMipLevels(false, mipOffset); + dstImage.reserveImageData(); } diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h index 6fcdfdb9..535a74b2 100644 --- a/libkram/kram/KramImage.h +++ b/libkram/kram/KramImage.h @@ -46,7 +46,7 @@ class Image { bool decode(const KTXImage& image, FILE* dstFile, TexEncoder decoder, bool isVerbose, const string& swizzleText) const; - // encode/decode to a memory block (TODO: change over to returning dstImage holding all data inside) + // encode/decode to a memory block bool encode(ImageInfo& info, KTXImage& dstImage) const; bool decode(const KTXImage& image, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const; diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp index 881e3816..81d78051 100644 --- a/libkram/kram/KramLog.cpp +++ b/libkram/kram/KramLog.cpp @@ -19,29 +19,45 @@ using namespace std; // //} -static int32_t vsprintf(string& str, const char* format, va_list args) + + +static int32_t append_vsprintf(string& str, const char* format, va_list args) { - if (strchr(format, '%') == nullptr) { - str = format; - return (int32_t)str.length(); + if (strcmp(format, "%s") == 0) { + const char* firstArg = va_arg(args, const char*); + str += firstArg; + return strlen(firstArg); + } + if (strrchr(format, '%') == nullptr) { + str += format; + return strlen(format); } - // can't reuse args after vsnprintf + // format once to get length (without NULL at end) va_list argsCopy; va_copy(argsCopy, args); - - // format once to get length (without NULL at end) int32_t len = vsnprintf(NULL, 0, format, argsCopy); - + va_end(argsCopy); + if (len > 0) { + size_t existingLen = str.length(); + // resize and format again into string - str.resize(len); + str.resize(existingLen + len, 0); - vsnprintf(&str[0], len + 1, format, args); + vsnprintf((char*)str.data() + existingLen, len + 1, format, args); } + return len; } + +static int32_t vsprintf(string& str, const char* format, va_list args) +{ + str.clear(); + return append_vsprintf(str, format, args); +} + int32_t sprintf(string& str, const char* format, ...) { va_list args; @@ -52,6 +68,17 @@ int32_t sprintf(string& str, const char* format, ...) return len; } +int32_t append_sprintf(string& str, const char* format, ...) +{ + va_list args; + va_start(args, format); + int32_t len = append_vsprintf(str, format, args); + va_end(args); + + return len; +} + + bool startsWith(const char* str, const string& substring) { return strncmp(str, substring.c_str(), substring.size()) == 0; @@ -84,22 +111,20 @@ extern int32_t logMessage(const char* group, int32_t logLevel, // convert var ags to a msg const char* msg; + string str; - - va_list args; - va_start(args, fmt); if (strstr(fmt, "%") == nullptr) { msg = fmt; } else { + va_list args; va_start(args, fmt); vsprintf(str, fmt, args); va_end(args); msg = str.c_str(); } - va_end(args); - + // pipe to correct place, could even be file output FILE* fp = stdout; if (logLevel >= LogLevelWarning) diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h index f6907e4c..d8493afe 100644 --- a/libkram/kram/KramLog.h +++ b/libkram/kram/KramLog.h @@ -18,9 +18,17 @@ enum LogLevel { LogLevelError = 3, }; +// these validate the inputs to any sprintf like format + args +#ifndef __printflike + #define __printflike(fmtIndex, varargIndex) +#endif +#ifndef __scanflike + #define __scanflike(fmtIndex, varargIndex) +#endif + extern int32_t logMessage(const char* group, int32_t logLevel, const char* file, int32_t line, const char* func, - const char* fmt, ...); + const char* fmt, ...) __printflike(6, 7); // verify leaves conditional code in the build #if KRAM_DEBUG @@ -38,7 +46,12 @@ extern int32_t logMessage(const char* group, int32_t logLevel, // TODO: move to Strings.h using namespace std; -int32_t sprintf(string& str, const char* format, ...); + +// returns length of string, -1 if failure +int32_t sprintf(string& str, const char* format, ...) __printflike(2, 3); + +// returns length of chars appended, -1 if failure +int32_t append_sprintf(string& str, const char* format, ...) __printflike(2, 3); bool startsWith(const char* str, const string& substring); bool endsWithExtension(const char* str, const string& substring); diff --git a/libkram/kram/sse2neon.h b/libkram/kram/sse2neon.h index 49a3c9e1..9ce4712a 100644 --- a/libkram/kram/sse2neon.h +++ b/libkram/kram/sse2neon.h @@ -5855,6 +5855,7 @@ FORCE_INLINE void _mm_clflush(void const *p) // no corollary for Neon? } +/* conflicts with mm_malloc.h // Allocate aligned blocks of memory. // https://software.intel.com/en-us/ // cpp-compiler-developer-guide-and-reference-allocating-and-freeing-aligned-memory-blocks @@ -5874,6 +5875,7 @@ FORCE_INLINE void _mm_free(void *addr) { free(addr); } +*/ // Starting with the initial value in crc, accumulates a CRC32 value for // unsigned 8-bit integer v. From bcb10717337ee87beade183405e70e67576346f8 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Wed, 3 Mar 2021 12:37:15 -0800 Subject: [PATCH 010/901] kram - add prezero to see if that helps with texture creation. This is meant to use with shaders that do premul post sampling. The issue is that if you feed premul to these shaders, then they apply alpha twice. But on zero pixel areas, these throw off the encoders if the rgb isn't also zero since they weight towards dilation or unseen rgb. --- libkram/kram/Kram.cpp | 11 ++++++ libkram/kram/KramImage.cpp | 28 +++++++++++---- libkram/kram/KramImageInfo.cpp | 4 +++ libkram/kram/KramImageInfo.h | 3 ++ libkram/kram/KramMipper.cpp | 62 +++++++++++++++++++++++++++------- libkram/kram/KramMipper.h | 2 +- 6 files changed, 89 insertions(+), 21 deletions(-) diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index ee375592..e481badd 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -935,6 +935,7 @@ void kramEncodeUsage(bool showVersion = true) "\t [-avg rxbx]\n" "\t [-sdf]\n" "\t [-premul]\n" + "\t [-prezero]\n" "\t [-quality 0-100]\n" "\t [-optopaque]\n" "\t [-v]\n" @@ -1008,6 +1009,12 @@ void kramEncodeUsage(bool showVersion = true) "\tPremultiplied alpha to src pixels before output\n" "\n" + // This is meant to work with shaders that (incorrectly) premul after sampling. + // limits the rgb bleed in regions that should not display colors. Can stil have black color halos. + "\t-prezero" + "\tPremultiplied alpha to src pixels before output but only where a=0\n" + "\n" + "\t-optopaque" "\tChange format from bc7/3 to bc1, or etc2rgba to rgba if opaque\n" "\n" @@ -1825,6 +1832,10 @@ static int32_t kramAppEncode(vector& args) infoArgs.isPremultiplied = true; continue; } + else if (isStringEqual(word, "-prezero")) { + infoArgs.isPrezero = true; + continue; + } else if (isStringEqual(word, "-v") || isStringEqual(word, "-verbose")) { infoArgs.isVerbose = true; diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 338455a3..516c9d56 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -1198,7 +1198,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const vector halfImage; vector floatImage; - bool doPremultiply = info.hasAlpha && info.isPremultiplied; + bool doPremultiply = info.hasAlpha && (info.isPremultiplied || info.isPrezero); bool isMultichunk = chunkOffsets.size() > 1; if (info.isHDR) { @@ -1216,11 +1216,25 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const // run this across all the source data // do this in-place before mips are generated if (doPremultiply) { - for (const auto& pixel : _pixelsFloat) { - float alpha = pixel.w; - float4& pixelChange = const_cast(pixel); - pixelChange *= alpha; - pixelChange.w = alpha; + if (info.isPrezero) { + for (const auto& pixel : _pixelsFloat) { + float alpha = pixel.w; + float4& pixelChange = const_cast(pixel); + + // only premul at 0 alpha regions + if (alpha == 0.0f) { + pixelChange *= alpha; + pixelChange.w = alpha; + } + } + } + else { + for (const auto& pixel : _pixelsFloat) { + float alpha = pixel.w; + float4& pixelChange = const_cast(pixel); + pixelChange *= alpha; + pixelChange.w = alpha; + } } } } @@ -1334,7 +1348,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const // copy and convert to half4 or float4 image // srcImage already points to float data, so could modify that // only need doPremultiply at the top mip - mipper.initPixelsHalfIfNeeded(srcImage, doPremultiply, + mipper.initPixelsHalfIfNeeded(srcImage, doPremultiply && !info.isPrezero, info.isPrezero, halfImage); } } diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index 3304d032..04d339bf 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -991,7 +991,11 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args) textureEncoder = args.textureEncoder; textureType = args.textureType; + isPrezero = args.isPrezero; isPremultiplied = args.isPremultiplied; + if (!isPremultiplied) + isPrezero = false; + isNormal = args.isNormal; doSDF = args.doSDF; diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h index 2cef4c2b..0d9b2ae5 100644 --- a/libkram/kram/KramImageInfo.h +++ b/libkram/kram/KramImageInfo.h @@ -56,12 +56,14 @@ class ImageInfoArgs { bool isVerbose = false; bool doSDF = false; bool isPremultiplied = false; + bool isPrezero = false; bool isNormal = false; // signed, but may be stored unorm and swizzled (f.e. astc/bc3nm gggr or rrrg) // can pick a smaller format if alpha = 1 (only for bc and etc) bool optimizeFormatForOpaque = false; // these and formatString set the pixelFormat + // if pixelFOrmat set directly, then these are updated off that format bool isSigned = false; bool isSRGB = false; bool isHDR = false; @@ -116,6 +118,7 @@ class ImageInfo { bool isSRGB = false; bool isColorWeighted = false; bool isPremultiplied = false; // don't premul + bool isPrezero = false; bool isHDR = false; //bool skipImageLength = false; // gen ktxa diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp index f845a6d6..f2f4d300 100644 --- a/libkram/kram/KramMipper.cpp +++ b/libkram/kram/KramMipper.cpp @@ -164,9 +164,13 @@ void Mipper::initTables() #endif } -void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, +void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, bool doPrezero, vector& halfImage) const { + Color zeroColor = { 0, 0, 0, 0 }; + float4 zeroColorf = simd_make_float4(0.0, 0.0f, 0.0f, 0.f); // need a constant for this + half4 zeroColorh = toHalf4(zeroColorf); + int32_t w = srcImage.width; int32_t h = srcImage.height; @@ -177,6 +181,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, assert(false); } else if (srcImage.isSRGB) { + // this does srgb and premul conversion for (int32_t y = 0; y < h; y++) { int32_t y0 = y * w; @@ -189,19 +194,29 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, if (c0.a != 255) { float alpha = alphaToFloat[c0.a]; - if (!doPremultiply) { - cFloat.w = alpha; - } - else { + if (doPremultiply) { // premul and sets alpha cFloat *= alpha; } + else if (doPrezero) { + if (c0.a == 0) { + cFloat = zeroColorf; + c0 = zeroColor; + } + else { + cFloat.w = alpha; + } + } + else { + cFloat.w = alpha; + } } - // if (!floatImage.empty()) { - // floatImage[y0 + x] = cFloat; - // } - // else + // TODO: 32F path + // if (!floatImage.empty()) { + // floatImage[y0 + x] = cFloat; + // } + // else { halfImage[y0 + x] = toHalf4(cFloat); } @@ -219,6 +234,26 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, } } } + else if (doPrezero) { + // do premul conversion + for (int32_t y = 0; y < h; y++) { + int32_t y0 = y * w; + for (int32_t x = 0; x < w; x++) { + Color& c0 = srcImage.pixels[y0 + x]; + + // TODO: assumes 16, need 32f path too + if (c0.a == 0) { + c0 = zeroColor; + halfImage[y0 + x] = zeroColorh; + } + else { + float4 cFloat = {alphaToFloat[c0.r], alphaToFloat[c0.g], + alphaToFloat[c0.b], alphaToFloat[c0.a]}; + halfImage[y0 + x] = toHalf4(cFloat); + } + } + } + } else if (doPremultiply) { // do premul conversion for (int32_t y = 0; y < h; y++) { @@ -234,10 +269,11 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, cFloat *= alpha; } - // if (!floatImage.empty()) { - // floatImage[y0 + x] = cFloat; - // } - // else + // TODO: 32F path + // if (!floatImage.empty()) { + // floatImage[y0 + x] = cFloat; + // } + // else { halfImage[y0 + x] = toHalf4(cFloat); } diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h index ca991a60..0751d440 100644 --- a/libkram/kram/KramMipper.h +++ b/libkram/kram/KramMipper.h @@ -68,7 +68,7 @@ class Mipper { // drop by 1 mip level by box filter void mipmap(const ImageData &srcImage, ImageData &dstImage) const; - void initPixelsHalfIfNeeded(ImageData &srcImage, bool doPremultiply, + void initPixelsHalfIfNeeded(ImageData &srcImage, bool doPremultiply, bool doPrezero, vector &halfImage) const; private: From 683de92199a26bff156bb211c6a281587ab97ad5 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Wed, 3 Mar 2021 16:03:28 -0800 Subject: [PATCH 011/901] kram - fix png info, sprintf format mismatch, and prezero setup --- libkram/kram/Kram.cpp | 6 +++--- libkram/kram/KramImageInfo.cpp | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index e481badd..8a82e448 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -1204,9 +1204,9 @@ string kramInfoToString(const string& srcFilename, bool isVerbose) data = srcFileBuffer.data(); dataSize = (int32_t)srcFileBuffer.size(); - - info = kramInfoPNGToString(srcFilename, data, dataSize, isVerbose); } + info = kramInfoPNGToString(srcFilename, data, dataSize, isVerbose); + } else if (isKTX) { KTXImage srcImage; @@ -1423,7 +1423,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, sprintf(tmp, "mipn: %d\n" "mipd: %dx%d\n" - "mips: %zu\n" + "mips: %" PRIu64 "\n" "mipc: %dx\n" "mipo: %" PRIu64 "\n", w, h, mipLevel++, mip.length, srcImage.totalChunks(), mip.offset); diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index 04d339bf..85e5ba4e 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -993,7 +993,7 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args) isPrezero = args.isPrezero; isPremultiplied = args.isPremultiplied; - if (!isPremultiplied) + if (isPremultiplied) isPrezero = false; isNormal = args.isNormal; From 17fc34a391b9e2cb9954fef3953d07f39d3df4b6 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Wed, 3 Mar 2021 16:09:30 -0800 Subject: [PATCH 012/901] bc7enc - fix code to not use anonymous structs. On VS, these cannot contain functions. --- libkram/bc7enc/bc7enc.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libkram/bc7enc/bc7enc.cpp b/libkram/bc7enc/bc7enc.cpp index 1a511d51..a8e32f15 100644 --- a/libkram/bc7enc/bc7enc.cpp +++ b/libkram/bc7enc/bc7enc.cpp @@ -33,11 +33,11 @@ static inline int32_t iabs32(int32_t v) { uint32_t msk = v >> 31; return (v ^ ms static inline void swapu(uint32_t* a, uint32_t* b) { uint32_t t = *a; *a = *b; *b = t; } //static inline void swapf(float* a, float* b) { float t = *a; *a = *b; *b = t; } -typedef struct { +struct color_quad_u8 { uint8_t r, g, b, a; inline const uint8_t& operator[](int index) const { return *(&r + index); } inline uint8_t& operator[](int index) { return *(&r + index); } -} color_quad_u8; +}; static inline color_quad_u8 *color_quad_u8_set_clamped(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { pRes->r = (uint8_t)clampi(r, 0, 255); pRes->g = (uint8_t)clampi(g, 0, 255); pRes->b = (uint8_t)clampi(b, 0, 255); pRes->a = (uint8_t)clampi(a, 0, 255); return pRes; } static inline color_quad_u8 *color_quad_u8_set(color_quad_u8 *pRes, int32_t r, int32_t g, int32_t b, int32_t a) { assert((uint32_t)(r | g | b | a) <= 255); pRes->r = (uint8_t)r; pRes->g = (uint8_t)g; pRes->b = (uint8_t)b; pRes->a = (uint8_t)a; return pRes; } @@ -60,11 +60,11 @@ static inline vec4F vec4F_mul(const vec4F *pLHS, float s) { vec4F res = *pLHS * static inline vec4F *vec4F_normalize_in_place(vec4F *pV) { *pV = normalize(*pV); return pV; } #else -typedef struct { +struct vec4F { float r, g, b, a; inline const float& operator[](int index) const { return *(&r + index); } inline float& operator[](int index) { return *(&r + index); } -} vec4F; +}; static inline vec4F *vec4F_set_scalar(vec4F *pV, float x) { pV->r = x; pV->g = x; pV->b = x; pV->a = x; return pV; } static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) { pV->r = x; pV->g = y; pV->b = z; pV->a = w; return pV; } From c12d931c54d0b66bf2b5a8eeaafdc180e9cfd145 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Fri, 5 Mar 2021 08:36:56 -0800 Subject: [PATCH 013/901] Update README.md --- README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 425f342a..29a3d94d 100644 --- a/README.md +++ b/README.md @@ -382,6 +382,7 @@ Usage: kram encode [-avg rxbx] [-sdf] [-premul] + [-prezero] [-quality 0-100] [-optopaque] [-v] @@ -413,8 +414,8 @@ OPTIONS -signed Signed r or rg for etc/bc formats, astc doesn't have signed format. -normal Normal map rg storage signed for etc/bc (rg01), only unsigned astc L+A (gggr). -sdf Generate single-channel SDF from a bitmap, can mip and drop large mips. Encode to r8, bc4, etc2r, astc4x4 (Unorm LLL1) to encode - -premul Premultiplied alpha to src pixels before output - + -premul Premultiplied alpha to src pixels before output. Disable multiply of alpha post-sampling. In kramv, view with "Premul off". + -prezero Premultiplied alpha only where 0, where shaders multiply alpha post-sampling. Not true premul and black halos if alpha ramp is fast. In kramv, view with "Premul on". -optopaque Change format from bc7/3 to bc1, or etc2rgba to rgba if opaque -chunks 4x4 Specifies how many chunks to split up texture into 2darray From 5a502b373f3b5cb2242331ebdaac31f41bd59345 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 7 Mar 2021 09:51:18 -0800 Subject: [PATCH 014/901] Kram - plugin fixes mipoffset needed to be recalculated on encode. This was causing files to be 2x bigger when using the memory path. Didn't affect kram encode, since it goes to dstFile. Fix up imageInfo init to honor a pixelFormat set onto args. Expose some more format functions. Expose the identifiers for file filtering. Can read the first 6 bytes to identify ktx vs. ktx2. Or 4 bytes to identify ktx and ktx2. --- libkram/kram/KTXImage.cpp | 16 ++++++++++++++-- libkram/kram/KTXImage.h | 10 ++++++++-- libkram/kram/KramImage.cpp | 3 +++ libkram/kram/KramImageInfo.cpp | 6 +++++- libkram/kram/KramLog.cpp | 9 ++++++++- libkram/kram/KramLog.h | 1 + 6 files changed, 39 insertions(+), 6 deletions(-) diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 34698a01..754a0172 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -41,11 +41,11 @@ const char* kPropFilter = "KramFilter"; using namespace std; // These start each KTX file to indicate the type -const uint8_t kKTXIdentifier[12] = { +const uint8_t kKTXIdentifier[kKTXIdentifierSize] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A //'«', 'K', 'T', 'X', ' ', '1', '1', '»', '\r', '\n', '\x1A', '\n' }; -const uint8_t kKTX2Identifier[12] = { +const uint8_t kKTX2Identifier[kKTXIdentifierSize] = { 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n' }; @@ -513,6 +513,12 @@ bool isFloatFormat(MyMTLPixelFormat format) return it.is16F() || it.is32F(); } +bool isHalfFormat(MyMTLPixelFormat format) +{ + const auto& it = formatInfo(format); + return it.is16F(); +} + bool isBCFormat(MyMTLPixelFormat format) { const auto& it = formatInfo(format); @@ -531,6 +537,12 @@ bool isASTCFormat(MyMTLPixelFormat format) return it.isASTC(); } +bool isExplicitFormat(MyMTLPixelFormat format) +{ + const auto& it = formatInfo(format); + return !(it.isASTC() || it.isETC() || it.isBC()); +} + bool isHdrFormat(MyMTLPixelFormat format) { const auto& it = formatInfo(format); diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index ac11b27c..02387293 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -123,13 +123,17 @@ struct Int2 { //--------------------------------------------- +constexpr int32_t kKTXIdentifierSize = 12; +extern const uint8_t kKTXIdentifier[kKTXIdentifierSize]; +extern const uint8_t kKTX2Identifier[kKTXIdentifierSize]; + class KTXHeader { public: // Don't add any date to this class. It's typically the top of a file cast to this. // As such, this doesn't have much functionality, other than to hold the header. // 64-byte header - uint8_t identifier[12] = { // same is kKTXIdentifier + uint8_t identifier[kKTXIdentifierSize] = { // same is kKTXIdentifier 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A //'«', 'K', 'T', 'X', ' ', '1', '1', '»', '\r', '\n', '\x1A', '\n' }; @@ -172,7 +176,7 @@ class KTXHeader { class KTX2Header { public: - uint8_t identifier[12] = { // same is kKTX2Identifier + uint8_t identifier[kKTXIdentifierSize] = { // same is kKTX2Identifier 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n' }; @@ -286,6 +290,7 @@ class KTXImage { // Generic format helpers. All based on the ubiquitous type. bool isFloatFormat(MyMTLPixelFormat format); +bool isHalfFormat(MyMTLPixelFormat format); bool isHdrFormat(MyMTLPixelFormat format); bool isSrgbFormat(MyMTLPixelFormat format); bool isColorFormat(MyMTLPixelFormat format); @@ -295,6 +300,7 @@ bool isSignedFormat(MyMTLPixelFormat format); bool isBCFormat(MyMTLPixelFormat format); bool isETCFormat(MyMTLPixelFormat format); bool isASTCFormat(MyMTLPixelFormat format); +bool isExplicitFormat(MyMTLPixelFormat format); Int2 blockDimsOfFormat(MyMTLPixelFormat format); uint32_t blockSizeOfFormat(MyMTLPixelFormat format); diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 516c9d56..6e71f5ed 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -1270,6 +1270,9 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const // allocate to hold props and entire image to write out if (!dstFile) { + // recompute, it's had mips added into it above + mipOffset = sizeof(KTXHeader) + header.bytesOfKeyValueData; + dstImage.initMipLevels(false, mipOffset); dstImage.reserveImageData(); diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index 85e5ba4e..2652c0c3 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -542,7 +542,11 @@ bool validateFormatAndEncoder(ImageInfoArgs& infoArgs) { bool error = false; - MyMTLPixelFormat format = parseFormat(infoArgs); + // caller an set or this can parse format from the format text + MyMTLPixelFormat format = infoArgs.pixelFormat; + if (format == MyMTLPixelFormatInvalid) { + format = parseFormat(infoArgs); + } if (format == MyMTLPixelFormatInvalid) { return false; } diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp index 81d78051..89090557 100644 --- a/libkram/kram/KramLog.cpp +++ b/libkram/kram/KramLog.cpp @@ -19,15 +19,22 @@ using namespace std; // //} +// Note: careful with stdio sscanf. In clang, this does and initial strlen which for long buffers +// being parsed (f.e. mmapped Json) this can significantly slow a parser down. static int32_t append_vsprintf(string& str, const char* format, va_list args) { + // for KLOGE("group", "%s", "text") if (strcmp(format, "%s") == 0) { const char* firstArg = va_arg(args, const char*); str += firstArg; return strlen(firstArg); } + + // This is important for the case where ##VAR_ARGS only leaves the format. + // In this case "text" must be a compile time constant string to avoid security warning needed for above. + // for KLOGE("group", "text") if (strrchr(format, '%') == nullptr) { str += format; return strlen(format); @@ -113,7 +120,7 @@ extern int32_t logMessage(const char* group, int32_t logLevel, const char* msg; string str; - if (strstr(fmt, "%") == nullptr) { + if (strrchr(fmt, '%') == nullptr) { msg = fmt; } else { diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h index d8493afe..0b24d871 100644 --- a/libkram/kram/KramLog.h +++ b/libkram/kram/KramLog.h @@ -19,6 +19,7 @@ enum LogLevel { }; // these validate the inputs to any sprintf like format + args +// these come from sys/cdefs.h on Apple, but need to be define for __clang__ on other platforms #ifndef __printflike #define __printflike(fmtIndex, varargIndex) #endif From 9d2be269a4c974aabd4bbb1eca728f46d3876b44 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 8 Mar 2021 00:07:32 -0800 Subject: [PATCH 015/901] plugin - start of a PS plugin for loading/saving ktx/2 files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapted DDS plugin from Brendan Bolles from here: https://github.com/fnordware/AdobeDDS Thanks Brendan for making so many open-source plugins! Replaced crn with libkam to support more input/output formats and also ktx/2 files. There's still a lot of work to support files that are different channels, bit depths, and texture types. Likely layer support is needed to track cube and array faces, mips, etc. It's unclear if this should support lossless formats, since I mostly just want 8u/16f/32f compressed as KTX2 source formats. This currently can load and decode ktx files, and also save them back out to disk as ktx only (not ktx2). Will likely need to build two plugins - one for ktx, the other for ktx2, First attempt to get CMake to build a plugin. But PS plugin api uses Carbon.r/CoreServices.r which are deprecated since macOS 10.8. Can get this to build and run with original project since it has Rez support, but new XCode removes that completely from newer projects. Can't get a command line Rez to generate anything but a 0 size file even though the command succeeds. I can't checkin the SDK due to licensing, but this is where to obtain the sdk. Just drop it into an plugin/ext/photoshopsdk folder. Download the "Adobe Photoshop Plug-In and Connection SDK" at https://console.adobe.io/downloads/ps --- .gitignore | 3 +- CMakeLists.txt | 19 +- kramv/CMakeLists.txt | 10 +- libkram/CMakeLists.txt | 1 + plugin/CMakeLists.txt | 260 ++++++ plugin/kps/KPS.cpp | 1140 +++++++++++++++++++++++++ plugin/kps/KPS.h | 214 +++++ plugin/kps/KPS.r | 456 ++++++++++ plugin/kps/KPSScripting.cpp | 296 +++++++ plugin/kps/KPSTerminology.h | 109 +++ plugin/kps/KPSUI.h | 167 ++++ plugin/kps/KPSVersion.h | 58 ++ plugin/kps/kram-ps.rsrc | Bin 0 -> 1399 bytes plugin/kps/mac/Info.plist | 22 + plugin/kps/mac/KPSAbout.xib | 72 ++ plugin/kps/mac/KPSAboutController.h | 58 ++ plugin/kps/mac/KPSAboutController.mm | 71 ++ plugin/kps/mac/KPSInput.xib | 112 +++ plugin/kps/mac/KPSInputController.h | 66 ++ plugin/kps/mac/KPSInputController.mm | 104 +++ plugin/kps/mac/KPSOutput.xib | 174 ++++ plugin/kps/mac/KPSOutputController.h | 95 +++ plugin/kps/mac/KPSOutputController.mm | 220 +++++ plugin/kps/mac/KPSUICocoa.mm | 252 ++++++ plugin/kps/win/KPSDialogs.rc | 168 ++++ plugin/kps/win/KPSInputDialog.cpp | 217 +++++ plugin/kps/win/KPSOutputDialog.cpp | 371 ++++++++ plugin/kps/win/resource.h | 20 + 28 files changed, 4749 insertions(+), 6 deletions(-) create mode 100644 plugin/CMakeLists.txt create mode 100755 plugin/kps/KPS.cpp create mode 100755 plugin/kps/KPS.h create mode 100755 plugin/kps/KPS.r create mode 100755 plugin/kps/KPSScripting.cpp create mode 100755 plugin/kps/KPSTerminology.h create mode 100644 plugin/kps/KPSUI.h create mode 100755 plugin/kps/KPSVersion.h create mode 100644 plugin/kps/kram-ps.rsrc create mode 100644 plugin/kps/mac/Info.plist create mode 100644 plugin/kps/mac/KPSAbout.xib create mode 100644 plugin/kps/mac/KPSAboutController.h create mode 100644 plugin/kps/mac/KPSAboutController.mm create mode 100644 plugin/kps/mac/KPSInput.xib create mode 100644 plugin/kps/mac/KPSInputController.h create mode 100644 plugin/kps/mac/KPSInputController.mm create mode 100644 plugin/kps/mac/KPSOutput.xib create mode 100644 plugin/kps/mac/KPSOutputController.h create mode 100644 plugin/kps/mac/KPSOutputController.mm create mode 100644 plugin/kps/mac/KPSUICocoa.mm create mode 100644 plugin/kps/win/KPSDialogs.rc create mode 100644 plugin/kps/win/KPSInputDialog.cpp create mode 100644 plugin/kps/win/KPSOutputDialog.cpp create mode 100644 plugin/kps/win/resource.h diff --git a/.gitignore b/.gitignore index 71ee634a..1b6d6ff2 100644 --- a/.gitignore +++ b/.gitignore @@ -41,4 +41,5 @@ out/ # Executables *.exe *.out -*.app + +plugin/ext/ diff --git a/CMakeLists.txt b/CMakeLists.txt index d643a537..5018cb5b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -91,6 +91,11 @@ if (APPLE) add_subdirectory(kramv) endif() +# ps plugin that uses libkram +if (APPLE) + add_subdirectory(plugin) +endif() + #----------------------------------------------------- # https://discourse.cmake.org/t/specifying-cmake-osx-sysroot-breaks-xcode-projects-but-no-other-choice/2532/8 @@ -109,8 +114,8 @@ if (APPLE) endif() # check the SDK - set(XCODE_MIN_SDK_IOS, 14.0) - set(XCODE_MIN_SDK_MACOS, 11.0) + set(XCODE_MIN_SDK_IOS 14.0) + set(XCODE_MIN_SDK_MACOS 11.0) execute_process( COMMAND xcrun --sdk "${CMAKE_OSX_SYSROOT}" --show-sdk-version @@ -143,13 +148,21 @@ endif() set(BIN_DIR ${PROJECT_SOURCE_DIR}/bin) +# So by default install depends on ALL_BUILD target, but that will fail if plugin +# does not have everything setup to build (or like now is not building). +# The plugin is currently setting EXCLUDE_FROM_ALL on the target so it's not built. +# https://stackoverflow.com/questions/17164731/installing-only-one-target-and-its-dependencies-out-of-a-complex-project-with + # install doesn't seem to do anything on WIN32, the build elements are not copied install(TARGETS libkram ARCHIVE DESTINATION ${BIN_DIR}) install(TARGETS kram RUNTIME DESTINATION ${BIN_DIR}) if (APPLE) install(TARGETS kramv BUNDLE DESTINATION ${BIN_DIR}) endif() - +# don't install this +#if (APPLE) +# install(TARGETS kram-ps BUNDLE DESTINATION ${BIN_DIR}) +#endif() diff --git a/kramv/CMakeLists.txt b/kramv/CMakeLists.txt index 918a1667..68599317 100644 --- a/kramv/CMakeLists.txt +++ b/kramv/CMakeLists.txt @@ -30,7 +30,9 @@ target_link_libraries(${myTargetApp} libkram "-framework Cocoa" "-framework Metal" - "-framework MetalKit" + "-framework MetalKit" + + # could eliminate this by replacing cube in kramv, but may want full 3d models for charts w/xatlas "-framework ModelIO" ) @@ -136,7 +138,11 @@ target_sources(${myTargetApp} PRIVATE # only these 2 resources are copied into the Resource, the other two are signed # Can't lowercase Resources or files don't go to correct place -set_source_files_properties(Assets.xcassets Base.lproj/Main.storyboard PROPERTIES +set_source_files_properties( + Assets.xcassets + Base.lproj/Main.storyboard + + PROPERTIES MACOSX_PACKAGE_LOCATION Resources ) diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt index 7564b7c7..7f3173e7 100644 --- a/libkram/CMakeLists.txt +++ b/libkram/CMakeLists.txt @@ -43,6 +43,7 @@ endif() set(myTargetLib libkram) # **** This will create libkram.a or .so or kram.lib depending on platform. +# can also use OBJECT or SHARED, object cuts compile time add_library(${myTargetLib} STATIC) # turn off pch diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt new file mode 100644 index 00000000..de2c32bc --- /dev/null +++ b/plugin/CMakeLists.txt @@ -0,0 +1,260 @@ +cmake_minimum_required(VERSION 3.19.1 FATAL_ERROR) + +# This is only configured for a Mac build, but see kram cli app +# for the Windows configuration. Eventually port to Win. + +# have to add this to each file, or run with this +# -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON +# set(CMAKE_VERBOSE_MAKEFILE ON) + +#----------------------------------------------------- +# kramv + +# now setup the app project, and link to libkram +set(myTargetApp kram-ps) + +# the mac build has ObjC++ +project( + ${myTargetApp} + #VERSION 0.9.0 + LANGUAGES C CXX OBJCXX +) + +add_executable(${myTargetApp} EXCLUDE_FROM_ALL) + +#----------------------------------------------------- + +target_link_libraries(${myTargetApp} + ate + libkram + "-framework Cocoa" + "-framework AppKit" +) + +set_target_properties(${myTargetApp} PROPERTIES + # Note: match this up with CXX version + # c++11 min + XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++14" + XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++" + + # avx1 + XCODE_ATTRIBUTE_CLANG_X86_VECTOR_INSTRUCTIONS "avx" + + # turn off exceptions/rtti + XCODE_ATTRIBUTE_GCC_ENABLE_CPP_EXCEPTIONS NO + XCODE_ATTRIBUTE_GCC_ENABLE_CPP_RTTI NO + + # can't believe this isn't on by default in CMAKE + XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC YES + + #------------------------- + + # libs can use dwarf, but apps need dSym generated + XCODE_ATTRIBUTE_DEBUG_INFORMATION_FORMAT "dwarf-with-dsym" + XCODE_ATTRIBUTE_ONLY_ACTIVE_ARCH "NO" + + # this drops app from 762KB to 174KB with only ATE enabled + # note about needing -gfull instead of -gused here or debug info messed up: + # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397 + XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES + XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental" + + #------------------------- + XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER "com.ba.kram-ps" + MACOSX_BUNDLE_GUI_IDENTIFIER "com.ba.kram-ps" + + # for now "sign to run locally", or entitlements can't be bundled + XCODE_ATTRIBUTE_CODE_SIGNING_REQUIRED "YES" + XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY "-" + + # use the AssetCatalog for icons + XCODE_ATTRIBUTE_ASSETCATALOG_COMPILER_APPICON_NAME "AppIcon" + + # TODO: not sure how to set this, nothing online either ? + # MACOSX_BUNDLE_APP_CATEGORY "Developer Tools" +) + +target_compile_options(${myTargetApp} PRIVATE -W -Wall) + +#-------------- +# sdk + +# Don't ever use a , in a set call, it causes the glob to process entire hard drive +# and it never seems to return. Maybe it's building a glob of all files on HD. + +set(SDK_DIR "${PROJECT_SOURCE_DIR}/ext/photoshopsdk/pluginsdk") +set(SDK_SOURCE_DIR "${SDK_DIR}/photoshopapi") +set(SDK_COMMON_DIR "${SDK_DIR}/samplecode/common") + +file(GLOB_RECURSE sdkSources CONFIGURE_DEPENDS + "${SDK_SOURCE_DIR}/*.cpp" + "${SDK_SOURCE_DIR}/*.h" + "${SDK_SOURCE_DIR}/*.m" + "${SDK_SOURCE_DIR}/*.mm" + "${SDK_SOURCE_DIR}/*.r" +) + +file(GLOB_RECURSE sdkCommonSources CONFIGURE_DEPENDS + "${SDK_COMMON_DIR}/*.cpp" + "${SDK_COMMON_DIR}/*.h" + "${SDK_COMMON_DIR}/*.m" + "${SDK_COMMON_DIR}/*.mm" + "${SDK_COMMON_DIR}/*.r" +) + +# TODO: had to modify some files to not use exceptions +# pass those onto Adobe + +# TODO: can these be combined into one list? +# this is a win file +list(FILTER sdkCommonSources EXCLUDE REGEX ".pstypelibrary.cpp$") +list(FILTER sdkCommonSources EXCLUDE REGEX ".pstypelibrary.h$") + +list(FILTER sdkCommonSources EXCLUDE REGEX ".PIDLLInstance.cpp$") +list(FILTER sdkCommonSources EXCLUDE REGEX ".PIDLLInstance.h$") + +list(FILTER sdkCommonSources EXCLUDE REGEX ".PIUFile.cpp$") +list(FILTER sdkCommonSources EXCLUDE REGEX ".PIUFile.h$") + +list(FILTER sdkCommonSources EXCLUDE REGEX ".PSConstantArray.cpp$") +list(FILTER sdkCommonSources EXCLUDE REGEX ".*Win*.cpp$") +list(FILTER sdkCommonSources EXCLUDE REGEX ".PIWinUI.cpp$") + +# intermingled Win files in with Mac +list(FILTER sdkSources EXCLUDE REGEX ".*Win*.cpp$") + +source_group(TREE "${SDK_SOURCE_DIR}" PREFIX "sdk" FILES ${sdkSources}) +source_group(TREE "${SDK_COMMON_DIR}" PREFIX "sdkcommon" FILES ${sdkCommonSources}) + +set_target_properties(${myTargetApp} PROPERTIES + + XCODE_ATTRIBUTE_WRAPPER_EXTENSION "plugin" + + # these aren't supported anymore, only on archival projects with Rez support + #XCODE_ATTRIBUTE_REZ_PREFIX_FILE + # $(SDK_COMMON_DIR)/includes/MachOMacrezXcode.h + #XCODE_ATTRIBUTE_REZ_SEARCH_PATHS + # $(SDK_SOURCE_DIR)/resources/ + # $(SDK_SOURCE_DIR)/photoshop/ + # $(SDK_COMMON_DIR)/includes/ +) + + +#-------------- +# sources + +set(KPS_SOURCE_DIR "${PROJECT_SOURCE_DIR}/kps") + +file(GLOB_RECURSE appSources CONFIGURE_DEPENDS + "${KPS_SOURCE_DIR}/*.cpp" + "${KPS_SOURCE_DIR}/*.mm" + "${KPS_SOURCE_DIR}/*.h" + + # TODO: also include .r files from + "${KPS_SOURCE_DIR}/*.r" + "${KPS_SOURCE_DIR}/${myTargetApp}.rsrc" +) + +file(GLOB_RECURSE appNibSources CONFIGURE_DEPENDS + "${KPS_SOURCE_DIR}/*.xib" # TODO: move this to resource area below +) + +# win files +list(FILTER appSources EXCLUDE REGEX ".KPSInputDialog.cpp$") +list(FILTER appSources EXCLUDE REGEX ".KPSOutputDialog.cpp$") +list(FILTER appSources EXCLUDE REGEX ".resource.h$") + +source_group(TREE "${KPS_SOURCE_DIR}" PREFIX "source" FILES ${appSources}) + +target_sources(${myTargetApp} PRIVATE + ${appSources} + + ${sdkSources} + ${sdkCommonSources} +) + +target_include_directories(${myTargetApp} PRIVATE + "${KPS_SOURCE_DIR}" + + # the sdk includes and resources + "${SDK_SOURCE_DIR}/photoshop" + "${SDK_SOURCE_DIR}/pica_sp" + "${SDK_SOURCE_DIR}/resources" + + "${SDK_COMMON_DIR}/includes" + "${SDK_COMMON_DIR}/resources" +) + + +#-------------- +# resources + +# for some reason the Cmake template gens/add an Info.plist even though we override it +set_target_properties(${myTargetApp} PROPERTIES + MACOSX_BUNDLE TRUE + + MACOSX_BUNDLE_INFO_PLIST ${KPS_SOURCE_DIR}/mac/Info.plist + XCODE_ATTRIBUTE_INFOPLIST_FILE ${KPS_SOURCE_DIR}/mac/Info.plist + #XCODE_ATTRIBUTE_CODE_SIGN_ENTITLEMENTS ${KPS_SOURCE_DIR}/mac/kramv.entitlements +) + +target_sources(${myTargetApp} PRIVATE +# Assets.xcassets +# Base.lproj/Main.storyboard + ${appNibSources} + + ${KPS_SOURCE_DIR}/mac/Info.plist + +# ${KPS_SOURCE_DIR}/mac/Info.plist +# kramv.entitlements +) + +# only these 2 resources are copied into the Resource, the other two are signed +# Can't lowercase Resources or files don't go to correct place +set_source_files_properties( + ${appNibSources} + + # this is created in the PRE_BUILD step below + ${KPS_SOURCE_DIR}/${myTargetApp}.rsrc + + PROPERTIES + MACOSX_PACKAGE_LOCATION Resources +) + +#-------------- +# rez + +# note that despite the usage printed, -i and -s don't actually work +# for some reason only -I actually includes search paths. Ugh. +# But even though this succeeds, it gens a 0 size rsrc file. Ugh! + +# turned off for now, and checking in pre-built resource +# but app still can't find _main entrpoint. + +if (FALSE) + +execute_process( + COMMAND xcrun -f Rez + OUTPUT_VARIABLE rezCompiler + OUTPUT_STRIP_TRAILING_WHITESPACE +) + +add_custom_command(TARGET ${myTargetApp} PRE_BUILD + DEPENDS ${KPS_SOURCE_DIR}/KPS.r + COMMAND ${rezCompiler} + -I ${SDK_SOURCE_DIR}/resources/ + -I ${SDK_SOURCE_DIR}/photoshop/ + -I ${SDK_COMMON_DIR}/includes/ + + -arch x86_64 + + # needs this for Carbon.r and CoreServices.r in the Adobe .r headers + #-F Carbon + #-F CoreServices + -F /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/ + + -o "${KPS_SOURCE_DIR}/${myTargetApp}.rsrc" + ${KPS_SOURCE_DIR}/KPS.r +) + +endif() diff --git a/plugin/kps/KPS.cpp b/plugin/kps/KPS.cpp new file mode 100755 index 00000000..c45e07a0 --- /dev/null +++ b/plugin/kps/KPS.cpp @@ -0,0 +1,1140 @@ + +// kram - Copyright 2020 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + +#include "KPS.h" + +#include "KPSVersion.h" +#include "KPSUI.h" + +//#include "crn_core.h" +//#include "crn_mipmapped_texture.h" + +#include +#include + +#include + +#ifdef __PIMac__ +#include +#endif + +#ifndef MIN +#define MIN(A,B) ( (A) < (B) ? (A) : (B)) +#endif + +#include "Kram.h" +#include "KTXImage.h" +#include "KramImage.h" +#include "KramImageInfo.h" + +// this is only on macOS +#include +// including FileUtilities pulls in ObjC crud to .cpp file +//#include "FileUtilities.h" +// these sporadically take intptr_t and int32 on Win, so fix signatures on port +extern OSErr PSSDKWrite(int32 refNum, int32 refFD, int16 usePOSIXIO, int32 * count, void * buffPtr); +extern OSErr PSSDKRead(int32 refNum, int32 refFD, int16 usePOSIXIO, int32 * count, void * buffPtr); +extern OSErr PSSDKSetFPos(int32 refNum, int32 refFD, int16 usePOSIXIO, short posMode, long posOff); + +using namespace kram; + +// take from KPSScripting.cpp +extern DialogFormat FormatToDialog(DDS_Format fmt); +extern DDS_Format DialogToFormat(DialogFormat fmt); +extern MyMTLPixelFormat FormatToPixelFormat(DDS_Format fmt); + +// this just strips args +#define macroUnusedArg(x) + +// global needed by a bunch of Photoshop SDK routines +SPBasicSuite *sSPBasic = NULL; + + +const char* kBundleIdentifier = "com.ba.kram-ps"; + +static void DoAbout(AboutRecordPtr aboutP) +{ +#ifdef __PIMac__ + const char * const plugHndl = kBundleIdentifier; + const void *hwnd = aboutP; +#else + const HINSTANCE const plugHndl = GetDLLInstance((SPPluginRef)aboutP->plugInRef); + HWND hwnd = (HWND)((PlatformData *)aboutP->platformData)->hwnd; +#endif + + DDS_About(DDS_Build_Complete_Manual, plugHndl, hwnd); +} + + +#pragma mark- + + +static void HandleError(GlobalsPtr globals, const char *errStr) +{ + const int size = MIN(255, strlen(errStr)); + + Str255 p_str; + p_str[0] = size; + strncpy((char *)&p_str[1], errStr, size); + + PIReportError(p_str); + gResult = errReportString; // macro uses globals +} + +#pragma mark- + +static Rect ConvertRect(VRect rect) { + Rect r; + r.left = rect.left; + r.right = rect.right; + r.top = rect.top; + r.bottom = rect.bottom; + return r; +} + + +#pragma mark- + +static void InitGlobals(GlobalsPtr globals) +{ + // create "globals" as a our struct global pointer so that any + // macros work: + //GlobalsPtr globals = (GlobalsPtr)globalPtr; + + // load options + memset(&gInOptions, 0, sizeof(gInOptions)); + strncpy(gInOptions.sig, "Krmi", 4); + gInOptions.version = 1; + gInOptions.alpha = DDS_ALPHA_CHANNEL; // ignored + + // save options + memset(&gOptions, 0, sizeof(gOptions)); + strncpy(gOptions.sig, "Krmo", 4); + gOptions.version = 1; + gOptions.format = DDS_FMT_RGBA8; + gOptions.alpha = DDS_ALPHA_CHANNEL; // ignored + gOptions.premultiply = FALSE; // ignored + gOptions.mipmap = FALSE; // ignored + gOptions.filter = DDS_FILTER_MITCHELL; // ignored + gOptions.cubemap = FALSE; // ignored +} + +// TODO: replace handles with buffers, but revertInfo is a HANDLE, so how to update that? + +static Handle myNewHandle(GlobalsPtr globals, const int32 inSize) +{ + return gStuff->handleProcs->newProc(inSize); +} + +static Ptr myLockHandle(GlobalsPtr globals, Handle h) +{ + return gStuff->handleProcs->lockProc(h, TRUE); +} + +static void myUnlockHandle(GlobalsPtr globals, Handle h) +{ + gStuff->handleProcs->unlockProc(h); +} + +static int32 myGetHandleSize(GlobalsPtr globals, Handle h) +{ + return gStuff->handleProcs->getSizeProc(h); +} + +static void mySetHandleSize(GlobalsPtr globals, Handle h, const int32 inSize) +{ + gStuff->handleProcs->setSizeProc(h, inSize); +} + +// newHandle doesn't have matching call to this +//static void myDisposeHandle(GlobalsPtr globals, Handle h) +//{ +// gStuff->handleProcs->disposeProc(h); +//} + + +class PSStream +{ +public: + PSStream(int32_t fd); + virtual ~PSStream() {}; + + bool read(void* pBuf, int32_t len); + bool write(const void* pBuf, int32_t len); + //bool flush() { return true; }; + uint64_t size(); + //uint64_t tell(); + bool seek(uint64_t ofs); + +private: + int32_t _fd; +}; + +// posix not supported on Windows, why? + + +PSStream::PSStream(int32_t fd) + : _fd(fd) +{ + //seek(0); +} + +bool PSStream::read(void* pBuf, int32_t len) +{ + OSErr err = PSSDKRead(0, _fd, (int16_t)true, &len, pBuf); + return err == 0; +} + + +bool PSStream::write(const void* pBuf, int32_t len) +{ + OSErr err = PSSDKWrite(0, _fd, (int16_t)true, &len, (void*)pBuf); + return err == 0; + +} + +// not sure why this isn't a part of the api, and neither is tell? +uint64_t PSStream::size() +{ + struct stat st; + fstat(_fd, &st); + return st.st_size; + +} + +bool PSStream::seek(uint64_t offset) +{ + // seek from begnning + OSErr err = PSSDKSetFPos(0, _fd, (int16_t)true, 1, (long)offset); + return err == 0; +} + + +#pragma mark- + + + + +// Additional parameter functions +// These transfer settings to and from gStuff->revertInfo + +template +static bool ReadParams(GlobalsPtr globals, T *options) +{ + bool found_revert = FALSE; + + if ( gStuff->revertInfo != NULL ) + { + if( myGetHandleSize(globals, gStuff->revertInfo) == sizeof(T) ) + { + T *flat_options = (T *)myLockHandle(globals, gStuff->revertInfo); + + memcpy((char*)options, (char*)flat_options, sizeof(T) ); + + myUnlockHandle(globals, gStuff->revertInfo); + + found_revert = TRUE; + } + } + + return found_revert; +} + +template +static void WriteParams(GlobalsPtr globals, T *options) +{ + T *flat_options = NULL; + + if (gStuff->hostNewHdl != NULL) // we have the handle function + { + if (gStuff->revertInfo == NULL) + { + gStuff->revertInfo = myNewHandle(globals, sizeof(T) ); + } + else + { + if(myGetHandleSize(globals, gStuff->revertInfo) != sizeof(T) ) + mySetHandleSize(globals, gStuff->revertInfo, sizeof(T) ); + } + + flat_options = (T *)myLockHandle(globals, gStuff->revertInfo); + + memcpy((char*)flat_options, (char*)options, sizeof(T) ); + + myUnlockHandle(globals, gStuff->revertInfo); + } +} + + +// this is called first on read +static void DoReadPrepare(GlobalsPtr globals) +{ + // posix only on Mac + if (!gStuff->hostSupportsPOSIXIO) + { + //data->gResult = errPlugInHostInsufficient; + HandleError(globals, "Read - only support posix io"); + return; + } + + // set to indicate posixIO usage + gStuff->pluginUsingPOSIXIO = TRUE; + + + if (!gStuff->HostSupports32BitCoordinates) + { + HandleError(globals, "Read - only support imageSize32"); + return; + } + + // have to ack that plug supports 32-bit + gStuff->PluginUsing32BitCoordinates = TRUE; + + gStuff->maxData = 0; +} + +// read first 4 bytes and determine the file system +static void DoFilterFile(GlobalsPtr globals) +{ + // Note: for now only suppor KTX + //#define DDS_SIG "DDS " + + // note 6 instead of 4 chars + PSStream stream(gStuff->posixFileDescriptor); + + if (!stream.seek(0)) { + HandleError(globals, "Read - cannot rewind in filter"); + return; + } + + bool isKTX = false; + bool isKTX2 = false; + + uint8_t hdr[6]; + if (stream.read(hdr, kKTXIdentifierSize)) { + if (memcmp(hdr, kKTXIdentifier, kKTXIdentifierSize) == 0) + isKTX = true; + else if (memcmp(hdr, kKTX2Identifier, kKTXIdentifierSize) == 0) + isKTX2 = true; + } + + // TODO: should this also filter out ktx/ktx2 that are unsupported + // could mostly look at header except in case of ASTC HDR where ktx + // must also look for format prop. + + if (!(isKTX || isKTX2)) { + gResult = formatCannotRead; + } +} + + +static void DoReadStart(GlobalsPtr globals) +{ + gResult = noErr; + + // read it a second time, but only the header + bool isKTX = false; + bool isKTX2 = false; + + PSStream stream(gStuff->posixFileDescriptor); + if (!stream.seek(0)) { + HandleError(globals, "Read - cannot rewind"); + return; + } + + uint8_t hdr[6]; + if (stream.read(hdr, kKTXIdentifierSize)) { + + if (memcmp(hdr, kKTXIdentifier, kKTXIdentifierSize) == 0) + isKTX = true; + else if (memcmp(hdr, kKTX2Identifier, kKTXIdentifierSize) == 0) + isKTX2 = true; + } + + + if (!(isKTX || isKTX2)) { + HandleError(globals, "Read - no valid ktx/ktx2 signature"); + return; + } + + int32_t w, h; + MyMTLPixelFormat format; + KTXHeader header; + KTX2Header header2; + + if (!stream.seek(0)) { + HandleError(globals, "Read - cannot rewind after sig"); + return; + } + + if (isKTX) { + if (!stream.read(&header, sizeof(KTXHeader))) + { + HandleError(globals, "Read - couldn't read ktx header"); + return; + } + + w = header.pixelWidth; + h = header.pixelHeight; + format = header.metalFormat(); + } + else { + if (!stream.read(&header2, sizeof(KTX2Header))) + { + HandleError(globals, "Read - couldn't read ktx2 header"); + return; + } + + w = header2.pixelWidth; + h = header2.pixelHeight; + format = vulkanToMetalFormat(header2.vkFormat); + } + + gStuff->imageMode = plugInModeRGBColor; + + bool hasAlpha = isAlphaFormat(format); + int32_t numChannels = numChannelsOfFormat(format); + + gStuff->imageSize32.h = w; + gStuff->imageSize32.v = h; + + // plugin sets the numChannels here + // 3 for rgb, 4 for rgba, ... + gStuff->planes = numChannels; // (hasAlpha ? 4 : 3); + + if (numChannels == 4) { + bool isPremul = false; // TODO: hookup to premul state in props field (Alb.ra,Alb.ga,...) + gStuff->transparencyPlane = 3; + gStuff->transparencyMatting = isPremul ? 1 : 0; + } + + // 16f and 32f go to 32f + gStuff->depth = isFloatFormat(header.metalFormat()) ? 32 : 8; + + + bool reverting = ReadParams(globals, &gInOptions); + + if (!reverting && gStuff->hostSig != 'FXTC') + { + DDS_InUI_Data params; + + #ifdef __PIMac__ + const char * const plugHndl = kBundleIdentifier; + const void *hwnd = globals; + #else + const HINSTANCE const plugHndl = GetDLLInstance((SPPluginRef)gStuff->plugInRef); + HWND hwnd = (HWND)((PlatformData *)gStuff->platformData)->hwnd; + #endif + + // DDS_InUI is responsible for not popping a dialog if the user + // didn't request it. It still has to set the read settings from preferences though. + bool result = DDS_InUI(¶ms, hasAlpha, plugHndl, hwnd); + + if(result) + { + gInOptions.alpha = params.alpha; + + WriteParams(globals, &gInOptions); + } + else + { + gResult = userCanceledErr; + } + } + +// the following was suppose to set alpha if it was set in the options +// but not using the option anymore. Honoring the format of the src file. +// if(gInOptions.alpha == DDS_ALPHA_TRANSPARENCY && gStuff->planes == 4) +} + + +void CopyImageRectToPS(GlobalsPtr globals, const KTXImage& image, int32_t mipLevel) +{ + // TODO: may need to decocde compressed KTX if want to support those + int32_t numPlanes = MAX(4, gStuff->planes); + + int32_t w = image.width; + //int32_t h = image.height; + //int32_t rowBytes = numPlanes * w; + const uint8_t* pixels = image.fileData + image.mipLevels[mipLevel].offset; + + gStuff->data = (void*)pixels; + + gStuff->planeBytes = 1; + gStuff->colBytes = gStuff->planeBytes * numPlanes; + gStuff->rowBytes = gStuff->colBytes * w; + + gStuff->loPlane = 0; + gStuff->hiPlane = numPlanes - 1; + + gStuff->theRect32.left = 0; + gStuff->theRect32.right = gStuff->imageSize32.h; + gStuff->theRect32.top = 0; + gStuff->theRect32.bottom = gStuff->imageSize32.v; + + gStuff->theRect = ConvertRect(gStuff->theRect32); + + // THis actuall writes the rectangle above from data + gResult = AdvanceState(); + + // very important! + gStuff->data = NULL; +} + +static void DoReadContinue(GlobalsPtr globals) +{ + gResult = noErr; + + PSStream stream(gStuff->posixFileDescriptor); + + if (!stream.seek(0)) { + HandleError(globals, "Read - cannot rewind after sig"); + return; + } + + // read it yet a third time, this time reading the first mip + uint64_t size = stream.size(); + + // read entire ktx/2 into memory (ideally mmap it) + std::vector data; + data.resize(size); + + if (!stream.read(data.data(), data.size())) { + HandleError(globals, "Read - Couldn't read file"); + return; + } + + KTXImage srcImage; + if (!srcImage.open(data.data(), data.size())) { + HandleError(globals, "Read - Couldn't parse file"); + return; + } + + auto pixelFormat = srcImage.pixelFormat; + + KTXImage* outputImage = &srcImage; + Image imageDecoder; + KTXImage decodedImage; + + if (isExplicitFormat(pixelFormat)) { + if (isFloatFormat(pixelFormat)) { + HandleError(globals, "Read - can't decode explicit texture format or type"); + return; + + // TODO: not sure that decode does this, code for this exists when a KTX file is imported to kram + // and then that's converted to an Image to feed to mip gen on the encode side. +// if (isHalfFormat(pixelFormat)) { +// TexEncoder decoderType = kTexEncoderUnknown; +// if (!validateFormatAndDecoder(srcImage.textureType, pixelFormat, decoderType)) { +// HandleError(globals, "Read - can't decode this texture format or type"); +// return; +// } +// +// // only need to decode 16f -> 32f, since PS only has 8u, 16u, and 32f +// TexEncoder decoder = kTexEncoderUnknown; +// if (!imageDecoder.decode(srcImage, decodedImage, decoder, false, "")) { +// HandleError(globals, "Read - Couldn't decode file"); +// return; +// } +// outputImage = &decodedImage; +// } + } + } + else if (isHdrFormat(pixelFormat)){ + // TODO: hdr block encoded formats must be decoded + // only ASTC and BC6 formats, but no BC6 support right now + HandleError(globals, "Read - can't decode hdr texture format or type"); + return; + } + else { + TexEncoder decoder = kTexEncoderUnknown; + if (!validateFormatAndDecoder(srcImage.textureType, pixelFormat, decoder)) { + HandleError(globals, "Read - can't decode this texture format or type"); + return; + } + + // ldr block encoded formats must be decoded + if (!imageDecoder.decode(srcImage, decodedImage, decoder, false, "")) { + HandleError(globals, "Read - Couldn't decode file"); + return; + } + outputImage = &decodedImage; + } + + CopyImageRectToPS(globals, *outputImage, 0); +} + + + + +static void DoReadFinish(GlobalsPtr macroUnusedArg(globals)) +{ + +} + +#pragma mark- + +static void DoOptionsPrepare(GlobalsPtr globals) +{ + gStuff->maxData = 0; +} + + +static void DoOptionsStart(GlobalsPtr globals) +{ + ReadParams(globals, &gOptions); + + if( ReadScriptParamsOnWrite(globals) ) + { + bool have_transparency = false; + const char *alpha_name = NULL; + + if (gStuff->hostSig == '8BIM') + // this is a PSD file? + have_transparency = (gStuff->documentInfo && gStuff->documentInfo->mergedTransparency); + else + // either rgba or la + have_transparency = (gStuff->planes == 2 || gStuff->planes == 4); + + + if (gStuff->documentInfo && gStuff->documentInfo->alphaChannels) + alpha_name = gStuff->documentInfo->alphaChannels->name; + + + DDS_OutUI_Data params; + + params.format = FormatToDialog(gOptions.format); + + params.alpha = (DialogAlpha)gOptions.alpha; + params.premultiply = gOptions.premultiply; + + params.mipmap = gOptions.mipmap; + + params.filter = (gOptions.filter == DDS_FILTER_BOX ? DIALOG_FILTER_BOX : + gOptions.filter == DDS_FILTER_TENT ? DIALOG_FILTER_TENT : + gOptions.filter == DDS_FILTER_LANCZOS4 ? DIALOG_FILTER_LANCZOS4 : + gOptions.filter == DDS_FILTER_MITCHELL ? DIALOG_FILTER_MITCHELL : + gOptions.filter == DDS_FILTER_KAISER ? DIALOG_FILTER_KAISER : + DIALOG_FILTER_MITCHELL); + + params.cubemap = gOptions.cubemap; + + #ifdef __PIMac__ + const char * const plugHndl = kBundleIdentifier; + const void *hwnd = globals; + #else + const HINSTANCE const plugHndl = GetDLLInstance((SPPluginRef)gStuff->plugInRef); + HWND hwnd = (HWND)((PlatformData *)gStuff->platformData)->hwnd; + #endif + + const bool ae_ui = (gStuff->hostSig == 'FXTC'); + + + bool result = DDS_OutUI(¶ms, have_transparency, alpha_name, ae_ui, plugHndl, hwnd); + + + if (result) + { + gOptions.format = DialogToFormat(params.format); + + gOptions.alpha = params.alpha; + gOptions.premultiply = params.premultiply; + + gOptions.mipmap = params.mipmap; + + gOptions.filter = (params.filter == DIALOG_FILTER_BOX ? DDS_FILTER_BOX : + params.filter == DIALOG_FILTER_TENT ? DDS_FILTER_TENT : + params.filter == DIALOG_FILTER_LANCZOS4 ? DDS_FILTER_LANCZOS4 : + params.filter == DIALOG_FILTER_MITCHELL ? DDS_FILTER_MITCHELL : + params.filter == DIALOG_FILTER_KAISER ? DDS_FILTER_KAISER : + DDS_FILTER_MITCHELL); + + gOptions.cubemap = params.cubemap; + + + WriteParams(globals, &gOptions); + WriteScriptParamsOnWrite(globals); + } + else + gResult = userCanceledErr; + } +} + + +static void DoOptionsContinue(GlobalsPtr macroUnusedArg(globals)) +{ + +} + + +static void DoOptionsFinish(GlobalsPtr macroUnusedArg(globals)) +{ + +} + +#pragma mark- + +// Tis is an esimate of memory use? + +static void DoEstimatePrepare(GlobalsPtr globals) +{ + if (!gStuff->HostSupports32BitCoordinates) + { + HandleError(globals, "only support imageSize32"); + return; + } + + // poxis only on Mac + if (!gStuff->hostSupportsPOSIXIO) + { + HandleError(globals, "only support posix io"); + return; + } + + // set to indicate posixIO usage + gStuff->pluginUsingPOSIXIO = TRUE; + + // have to ack that plug supports 32-bit + gStuff->PluginUsing32BitCoordinates = TRUE; + + gStuff->maxData = 0; +} + + +static void DoEstimateStart(GlobalsPtr globals) +{ + int64_t width = gStuff->imageSize32.h; + int64_t height = gStuff->imageSize32.v; + + // TODO: this assumes single 2d image in dds, and an 8-bit depth multiple + int64_t numPlanes = MAX(4, gStuff->planes); + int64_t depth = gStuff->depth; // this is in bits + + int64_t dataBytes = (width * height * numPlanes * depth + 7) >> 3; + + // this is how much space we need to write out data as KTX/KTX2 file + // KTX can precompute max size from width/height and mip count, but ktx2 is compressed + // I think PS will make sure there are enough + + // Can we get this number quickly out of kram based on mip setting, format, etc. + // May not always write out full depth. May have encoded format. + + size_t bytesToRead = dataBytes; + + gStuff->minDataBytes = MIN(bytesToRead / 2, INT_MAX); + gStuff->maxDataBytes = MIN(bytesToRead, INT_MAX); + + gStuff->data = NULL; +} + + +static void DoEstimateContinue(GlobalsPtr macroUnusedArg(globals)) +{ + +} + + +static void DoEstimateFinish(GlobalsPtr macroUnusedArg(globals)) +{ + +} + +#pragma mark- + +static void DoWritePrepare(GlobalsPtr globals) +{ + if (!gStuff->HostSupports32BitCoordinates) + { + HandleError(globals, "only support imageSize32"); + return; + } + + // poxis only on Mac + if (!gStuff->hostSupportsPOSIXIO) + { + HandleError(globals, "only support posix io"); + return; + } + + // set to indicate posixIO usage + gStuff->pluginUsingPOSIXIO = TRUE; + + // have to ack that plug supports 32-bit + gStuff->PluginUsing32BitCoordinates = TRUE; + + gStuff->maxData = 0; +} + +// TODO: extent to take a rect, and return he data +static bool CopyImageRectFromPS(GlobalsPtr globals, vector&pixels, int32_t numPlanes, int32_t width, int32_t height) +{ + int32_t rowBytes = width * numPlanes; + + // this is where data will go + pixels.resize(rowBytes * height); + + gStuff->loPlane = 0; + gStuff->hiPlane = numPlanes-1; // either b for rgb, or a for rgba are the last byte + gStuff->planeBytes = sizeof(unsigned char); // 1 for interleaved data + gStuff->colBytes = gStuff->planeBytes * numPlanes; + gStuff->rowBytes = rowBytes; // * gStuff->colBytes; // interleaved or non-interleaved data is why colBytes is here + + gStuff->theRect32.left = 0; + gStuff->theRect32.right = gStuff->theRect32.left + width; + gStuff->theRect32.top = 0; + gStuff->theRect32.bottom = gStuff->theRect32.top + height; + + // set deprecated rect + gStuff->theRect = ConvertRect(gStuff->theRect32); + + // This fills out the pixel data + gStuff->data = pixels.data(); + + gResult = AdvanceState(); + if (gResult != noErr) { + HandleError(globals, "Write - AdvanceState failed to read pixels"); + } + + // this pack alpha into 3rd channel? + bool have_alpha_channel = (gStuff->channelPortProcs && gStuff->documentInfo && gStuff->documentInfo->alphaChannels); + if (gResult == noErr && have_alpha_channel) // && gOptions.alpha == DDS_ALPHA_CHANNEL) + { + ReadPixelsProc ReadProc = gStuff->channelPortProcs->readPixelsProc; + + ReadChannelDesc *alpha_channel = gStuff->documentInfo->alphaChannels; + + VRect wroteRect; + VRect writeRect = { 0, 0, height, width }; // tlbr + + PSScaling scaling; + scaling.sourceRect = writeRect; + scaling.destinationRect = writeRect; + + // this is converting to bits + PixelMemoryDesc memDesc = { (char *)gStuff->data, gStuff->rowBytes * 8, gStuff->colBytes * 8, 3 * 8, gStuff->depth }; + + gResult = ReadProc(alpha_channel->port, &scaling, &writeRect, &memDesc, &wroteRect); + + if (gResult != noErr) { + HandleError(globals, "Write - convert layer to 4 channels failed"); + } + } + + // very important!, so it's not filled in with data again and again on remaining AdvanceState calls + gStuff->data = NULL; + + if (gResult != noErr) { + return false; + } + + return true; +} + +static void DoWriteStart(GlobalsPtr globals) +{ + ReadParams(globals, &gOptions); + ReadScriptParamsOnWrite(globals); + + // Xcode can't debug p gStuff->..., so must type p global->formatParamBlock->... + + int32_t numPlanes = MAX(4, gStuff->planes); + + if (gStuff->imageMode != plugInModeRGBColor) { + HandleError(globals, "Not rgb color"); + return; + } + + if (gStuff->depth != 8) { + HandleError(globals, "Not 8-bit color"); + return; + } + + // Note: loadImageFromPixels only works with 4 byte image right now + bool haveAlpha = (numPlanes == 4) || ((gStuff->channelPortProcs && gStuff->documentInfo && gStuff->documentInfo->alphaChannels)); + if ((numPlanes != 4) || (gStuff->planes == 3 && !haveAlpha)) + { + HandleError(globals, "Not 4 planes, or 3 with alpha"); + return; + } + + int width = gStuff->imageSize32.h; + int height = gStuff->imageSize32.v; + + // this is a potentiall large memory allocation for one level of the image + std::vector pixels; + if (!CopyImageRectFromPS(globals, pixels, numPlanes, width, height)) { + //return; + } + + Image srcImage; + ImageInfo dstImageInfo; + KTXImage dstImage; + + if (gResult == noErr) + { + // convert pixels into ktx with mips if needed in memory + // note: cannot roundtrip mips, so may want to not do mips or block encodes here + // try to support + // TODO: this is limiting since loadImage must be single 2D image + + if (!srcImage.loadImageFromPixels(pixels, width, height, true, true)) { + HandleError(globals, "Write - loadImageFromPixels failed"); + } + } + + if (gResult == noErr) + { + MyMTLPixelFormat pixelFormat = FormatToPixelFormat(gOptions.format); + + // setup all the data to generate dstImage + // now apply user picked format + ImageInfoArgs dstImageInfoArgs; + dstImageInfoArgs.pixelFormat = pixelFormat; + dstImageInfoArgs.doMipmaps = false; + + // ? + // photoshop provides raw image as unmultiplied, but need to premul it +// if (haveAlpha) { +// dstImageInfoArgs.isPremultiplied = true; +// } + + if (!validateFormatAndEncoder(dstImageInfoArgs)) { + HandleError(globals, "Write - validate format failed"); + } + else { + dstImageInfo.initWithArgs(dstImageInfoArgs); + + if (!srcImage.encode(dstImageInfo, dstImage)) { + HandleError(globals, "Write - encode failed"); + } + } + } + + // testing only + //HandleError(globals, "Write - made it past encode"); + + if (gResult == noErr) { + // this needs to write ktx with mips and all to the memory, then copy it to dataFork + // is this dataFork even valid anymore + PSStream stream(gStuff->posixFileDescriptor); // write + + // TOOD: this is writing 1k x 1k image out as 8MB instead of 4MB + // see if validate above fixes that. + + if (!stream.write(dstImage.fileData, (int32_t)dstImage.fileDataLength)) { + HandleError(globals, "Write - stream write failed"); + } + } +} + + +static void DoWriteContinue(GlobalsPtr macroUnusedArg(globals)) +{ + +} + + +static void DoWriteFinish(GlobalsPtr globals) +{ + if (gStuff->hostSig != 'FXTC') + WriteScriptParamsOnWrite(globals); +} + + +#pragma mark- + + +DLLExport MACPASCAL void PluginMain(const short selector, + FormatRecord *formatParamBlock, + intptr_t *dataPointer, + short *result) +{ + if (selector == formatSelectorAbout) + { + sSPBasic = ((AboutRecordPtr)formatParamBlock)->sSPBasic; + + DoAbout((AboutRecordPtr)formatParamBlock); + } + else + { + sSPBasic = formatParamBlock->sSPBasic; //thanks Tom + + GlobalsPtr globals = (GlobalsPtr)*dataPointer; + if (globals == NULL) + { + globals = (GlobalsPtr)malloc(sizeof(Globals)); + + if(globals == NULL) { + *result = memFullErr; + return; + } + + InitGlobals(globals); + + *dataPointer = (intptr_t)globals; + } + + globals->result = result; + globals->formatParamBlock = formatParamBlock; + +#if 1 + static const FProc routineForSelector [] = + { + /* formatSelectorAbout DoAbout, */ + + /* formatSelectorReadPrepare */ DoReadPrepare, + /* formatSelectorReadStart */ DoReadStart, + /* formatSelectorReadContinue */ DoReadContinue, + /* formatSelectorReadFinish */ DoReadFinish, + + /* formatSelectorOptionsPrepare */ DoOptionsPrepare, + /* formatSelectorOptionsStart */ DoOptionsStart, + /* formatSelectorOptionsContinue */ DoOptionsContinue, + /* formatSelectorOptionsFinish */ DoOptionsFinish, + + /* formatSelectorEstimatePrepare */ DoEstimatePrepare, + /* formatSelectorEstimateStart */ DoEstimateStart, + /* formatSelectorEstimateContinue */ DoEstimateContinue, + /* formatSelectorEstimateFinish */ DoEstimateFinish, + + /* formatSelectorWritePrepare */ DoWritePrepare, + /* formatSelectorWriteStart */ DoWriteStart, + /* formatSelectorWriteContinue */ DoWriteContinue, + /* formatSelectorWriteFinish */ DoWriteFinish, + + /* formatSelectorFilterFile */ DoFilterFile + }; + + // Dispatch selector + if (selector > formatSelectorAbout && selector <= formatSelectorFilterFile) + (routineForSelector[selector-1])(globals); // dispatch using jump table + else + gResult = formatBadParameters; + +#else + // This explicit dispatch is much easier to follow + // can can set breakpoints, and step from a central dispatch point here. + + // Dispatch selector. + switch (selector) { + case formatSelectorReadPrepare: + DoReadPrepare(format_record, data, result); + break; + case formatSelectorReadStart: + DoReadStart(format_record, data, result); + break; + case formatSelectorReadContinue: + DoReadContinue(format_record, data, result); + break; + case formatSelectorReadFinish: + DoReadFinish(format_record, data, result); + break; + + case formatSelectorOptionsPrepare: + DoOptionsPrepare(format_record, data, result); + break; + case formatSelectorOptionsStart: + DoOptionsStart(format_record, data, result, plugin_ref); + break; + case formatSelectorOptionsContinue: + DoOptionsContinue(format_record, data, result); + break; + case formatSelectorOptionsFinish: + DoOptionsFinish(format_record, data, result); + break; + + case formatSelectorEstimatePrepare: + DoEstimatePrepare(format_record, data, result); + break; + case formatSelectorEstimateStart: + DoEstimateStart(format_record, data, result); + break; + case formatSelectorEstimateContinue: + DoEstimateContinue(format_record, data, result); + break; + case formatSelectorEstimateFinish: + DoEstimateFinish(format_record, data, result); + break; + + case formatSelectorWritePrepare: + DoWritePrepare(format_record, data, result); + break; + case formatSelectorWriteStart: + DoWriteStart(format_record, data, result); + break; + case formatSelectorWriteContinue: + DoWriteContinue(format_record, data, result); + break; + case formatSelectorWriteFinish: + DoWriteFinish(format_record, data, result); + break; + + case formatSelectorReadLayerStart: + DoReadLayerStart(format_record, data, result); + break; + case formatSelectorReadLayerContinue: + DoReadLayerContinue(format_record, data, result); + break; + case formatSelectorReadLayerFinish: + DoReadLayerFinish(format_record, data, result); + break; + + case formatSelectorWriteLayerStart: + DoWriteLayerStart(format_record, data, result); + break; + case formatSelectorWriteLayerContinue: + DoWriteLayerContinue(format_record, data, result); + break; + case formatSelectorWriteLayerFinish: + DoWriteLayerFinish(format_record, data, result); + break; + + case formatSelectorFilterFile: + DoFilterFile(format_record, data, result); + break; + } + } +#endif + + } +} diff --git a/plugin/kps/KPS.h b/plugin/kps/KPS.h new file mode 100755 index 00000000..1399fe97 --- /dev/null +++ b/plugin/kps/KPS.h @@ -0,0 +1,214 @@ + +// kram - Copyright 2020 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + +#pragma once + +#include "PIDefines.h" +#include "PIFormat.h" +#include "PIExport.h" +#include "PIUtilities.h" +#include "PIProperties.h" + + +// these are format settings for output +enum { + // lossy BC + DDS_FMT_BC1 = 0, + DDS_FMT_BC1S = 1, + + DDS_FMT_BC3 = 4, + DDS_FMT_BC3S = 5, + + DDS_FMT_BC4 = 6, + DDS_FMT_BC4S = 7, + + DDS_FMT_BC5 = 8, + DDS_FMT_BC5S = 9, + + DDS_FMT_BC7 = 12, + DDS_FMT_BC7S = 13, + + // lossless formats + DDS_FMT_R8 = 128, + DDS_FMT_RG8 = 138, + DDS_FMT_RGBA8 = 148, + DDS_FMT_RGBA8S = 158, + + DDS_FMT_R16F = 168, + DDS_FMT_RG16F = 178, + DDS_FMT_RGBA16F = 188, + + DDS_FMT_R32F = 198, + DDS_FMT_RG32F = 208, + DDS_FMT_RGBA32F = 218, + + // TODO: not sure this should allow lossy export + // TODO: add ASTC4x4, 5x5, 6x6, 8x8 + // TODO: add ETC2 + + // TODO: R16S format for depth/heighmaps?, PS can store this for edits + // PS stores data as 16S acco +}; +typedef uint8 DDS_Format; + + +//----------------------------- + +#if 1 // not used + +// not used +enum { + DDS_ALPHA_NONE = 0, + DDS_ALPHA_TRANSPARENCY, + DDS_ALPHA_CHANNEL +}; +typedef uint8 DDS_Alpha; + +// not used +enum{ + DDS_FILTER_BOX, + DDS_FILTER_TENT, + DDS_FILTER_LANCZOS4, + DDS_FILTER_MITCHELL, + DDS_FILTER_KAISER +}; +typedef uint8 DDS_Filter; + + +// TODO: revisit these options, these are mostly no longer used + +// Load options +typedef struct { + char sig[4]; + uint8 version; + DDS_Alpha alpha; + uint8 reserved[26]; + +} DDS_inData; + +// Save options +typedef struct { + char sig[4]; + uint8 version; + DDS_Format format; + DDS_Alpha alpha; + Boolean premultiply; + Boolean mipmap; + DDS_Filter filter; + Boolean cubemap; + uint8 reserved[245]; + +} DDS_outData; + +#else + +// no input settings + +struct DDS_outData +{ + DDS_Format format; + + // these need UI, should we only do lossless non-mipped import/export + // kram can do all this, but premul here is lossy + // much better to script presets for export, and pick from those + // the plugin could read the preset file (or embed it). + // + //Boolean premultiply; + //Boolean mipmap; +}; + +#endif + + +typedef struct Globals +{ // This is our structure that we use to pass globals between routines: + + short *result; // Must always be first in Globals. + FormatRecord *formatParamBlock; // Must always be second in Globals. + + //Handle fileH; // stores the entire binary file + + DDS_inData in_options; + DDS_outData options; + +} Globals, *GlobalsPtr; + + + +// The routines that are dispatched to from the jump list should all be +// defined as +// void RoutineName (GPtr globals); +// And this typedef will be used as the type to create a jump list: +typedef void (* FProc)(GlobalsPtr globals); + + +//------------------------------------------------------------------------------- +// Globals -- definitions and macros +//------------------------------------------------------------------------------- + +#define gResult (*(globals->result)) +#define gStuff (globals->formatParamBlock) + +#define gInOptions (globals->in_options) +#define gOptions (globals->options) + +#define gAliasHandle (globals->aliasHandle) + +//------------------------------------------------------------------------------- +// Prototypes +//------------------------------------------------------------------------------- + + +// Everything comes in and out of PluginMain. It must be first routine in source: +DLLExport MACPASCAL void PluginMain (const short selector, + FormatRecord *formatParamBlock, + intptr_t *data, + short *result); + +// Scripting functions +Boolean ReadScriptParamsOnWrite (GlobalsPtr globals); // Read any scripting params. + +OSErr WriteScriptParamsOnWrite (GlobalsPtr globals); // Write any scripting params. + +//------------------------------------------------------------------------------- + diff --git a/plugin/kps/KPS.r b/plugin/kps/KPS.r new file mode 100755 index 00000000..be8da61b --- /dev/null +++ b/plugin/kps/KPS.r @@ -0,0 +1,456 @@ + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + +//------------------------------------------------------------------------------- +// Definitions -- Required by include files. +//------------------------------------------------------------------------------- + +#include "KPSVersion.h" + +// TODO: see if can simplify to kram +#define plugInName "kram" +#define plugInCopyrightYear DDS_Copyright_Year +#define plugInDescription DDS_Description +#define VersionString DDS_Version_String +#define ReleaseString DDS_Build_Date_Manual +#define CurrentYear DDS_Build_Year + +//------------------------------------------------------------------------------- +// Definitions -- Required by other resources in this rez file. +//------------------------------------------------------------------------------- + +// Dictionary (aete) resources: + +#define vendorName "kram" +#define plugInAETEComment DDS_Description + +// TODO: bump this sdk higher? +#define plugInSuiteID 'sdK4' +#define plugInClassID 'kram' +#define plugInEventID typeNull // must be this + +//------------------------------------------------------------------------------- +// Set up included files for Macintosh and Windows. +//------------------------------------------------------------------------------- + + +#if 0 +#include "PIDefines.h" +#else + + +/// Create a definition if we're on a Macintosh +#ifdef _WIN32 + #define __PIWin__ 1 + #define DLLExport extern "C" __declspec(dllexport) + +#else + // this pulls in Carbon.r, but trying not to use Carbon + // also CoreServices/CoreServices.r is pulled in from another Adobe header, which is also Carbon + //#include "MacOMacrezXcode.h" + +#define Macintosh 1 + +#ifndef TARGET_API_MAC_CARBON +#define TARGET_API_MAC_CARBON 1 +#endif + +#ifndef TARGET_MAC_OS +#define TARGET_MAC_OS 1 +#endif + +#include + + + #define __PIMac__ 1 + #define DLLExport extern "C" + + // instead of PIPlatform + #define PRAGMA_ONCE 1 + #define Macintosh 1 +#endif + +#ifdef __PIMac__ + #include "PIGeneral.r" +#elif defined(__PIWin__) + #include "PIGeneral.h" +#endif + + +//#include "PIUtilities.r" +#ifndef ResourceID + #define ResourceID 16000 +#endif + +#include "PITerminology.h" +#include "PIActions.h" + +#include "KPSTerminology.h" + +//------------------------------------------------------------------------------- +// PiPL resource +//------------------------------------------------------------------------------- + +#define USE_KTX 1 +#define USE_KTX2 0 + +resource 'PiPL' (ResourceID, plugInName " PiPL", purgeable) +{ + { + Kind { ImageFormat }, + Name { plugInName }, + + //Category { "KTX" }, + //Priority { 1 }, // Can use this to override a built-in Photoshop plug-in + + Version { (latestFormatVersion << 16) | latestFormatSubVersion }, + + #ifdef __PIMac__ + #if defined(__arm64__) + CodeMacARM64 { "PluginMain" }, + #endif + #if (defined(__x86_64__)) + CodeMacIntel64 { "PluginMain" }, + #endif + #else + #if defined(_WIN64) + CodeWin64X86 { "PluginMain" }, + #endif + + // kram-ps not supporting 32-bit PS + // CodeWin32X86 { "PluginMain" }, + #endif + + // ClassID, eventID, aete ID, uniqueString: + HasTerminology + { + plugInClassID, + plugInEventID, + ResourceID, + vendorName " " plugInName + }, + + SupportedModes + { + noBitmap, + noGrayScale, // TODO: add support + noIndexedColor, + doesSupportRGBColor, // this is the only supported + noCMYKColor, + noHSLColor, + noHSBColor, + noMultichannel, + noDuotone, + noLABColor + }, + + // Using this on macOS to avoid Carbon use. Uses file descriptor, + // but doesn't work on Win. Should really provide FILE* on all platforms. + SupportsPOSIXIO {}, + + + EnableInfo { "in (PSHOP_ImageMode, RGBMode, RGBColorMode)" }, + + // TODO: can't get 'ktx2' extension files to show up + // tried add thta into list below + + #if USE_KTX + // ktx1 and 2 have the same 4 character start, then ' 1' or ' 2' + FmtFileType { 'KTX ', '8BIM' }, + ReadTypes { { 'KTX ', ' ' } }, + ReadExtensions { { 'ktx ' } }, + WriteExtensions { { 'ktx ' } }, + FilteredExtensions { { 'ktx ' } }, + #elif USE_KTX2 + FmtFileType { 'KTX2', '8BIM' }, + ReadTypes { { 'KTX2', ' ' } }, + ReadExtensions { { 'ktx2' } }, + WriteExtensions { { 'ktx2' } } // kram can't write KTX2, only read it + FilteredExtensions { { 'ktx2' } }, + #endif + + FormatFlags + { + fmtSavesImageResources, //(by saying we do, PS won't store them, thereby avoiding problems) + fmtCanRead, + fmtCanWrite, + fmtCanWriteIfRead, + fmtCanWriteTransparency, + fmtCannotCreateThumbnail + }, + + // commented these out, so can have larger array textures + //PlugInMaxSize { 8192, 8192 }, + //FormatMaxSize { { 8192, 8192 } }, + + // ? shouldn't this be 4, not 5? + FormatMaxChannels { { 0, 0, 0, 4, 0, 0, + 0, 0, 0, 0, 0, 0 } }, + + FormatICCFlags { iccCannotEmbedGray, + iccCannotEmbedIndexed, + iccCannotEmbedRGB, + iccCannotEmbedCMYK }, + + // consider this for reading chunks into layers, + // don't need writing can use CopyAllLayers to avoid wait for DoWriteLayer + // FormatLayerSupport{doesSupportFormatLayers}, + // FormatLayerSupportReadOnly{} + + }, +}; + +//------------------------------------------------------------------------------- +// Dictionary (scripting) resource +//------------------------------------------------------------------------------- + +resource 'aete' (ResourceID, plugInName " dictionary", purgeable) +{ + 1, 0, english, roman, /* aete version and language specifiers */ + { + vendorName, /* vendor suite name */ + "kram format", /* optional description */ + plugInSuiteID, /* suite ID */ + 1, /* suite code, must be 1 */ + 1, /* suite level, must be 1 */ + {}, /* structure for filters */ + { /* non-filter plug-in class here */ + "kram", /* unique class name */ + plugInClassID, /* class ID, must be unique or Suite ID */ + plugInAETEComment, /* optional description */ + { /* define inheritance */ + "", /* must be exactly this */ + keyInherits, /* must be keyInherits */ + classFormat, /* parent: Format, Import, Export */ + "parent class format", /* optional description */ + flagsSingleProperty, /* if properties, list below */ + + "Format", + keyDDSformat, + typeEnumerated, + "Output encode format", + flagsSingleProperty, + + // "Alpha Channel", + // keyDDSalpha, + // typeEnumerated, + // "Source of the alpha channel", + // flagsSingleProperty, + // + // "Premultiply", + // keyDDSpremult, + // typeBoolean, + // "Premultiply RGB by Alpha", + // flagsSingleProperty, +// + // "Mipmap", + // keyDDSmipmap, + // typeBoolean, + // "Create Mipmaps", + // flagsSingleProperty, + // + // "Filter", + // keyDDSfilter, + // typeEnumerated, + // "Mipmap filter", + // flagsSingleProperty, +// + // "Cube Map", + // keyDDScubemap, + // typeBoolean, + // "Convert vertical cross to cube map", + // flagsSingleProperty, + }, + {}, /* elements (not supported) */ + /* class descriptions */ + }, + {}, /* comparison ops (not supported) */ + { /* any enumerations */ + typeDDSformat, + { + // explicit + "E8r", + formatR8, + "RGBA8", + + "E8rg", + formatRG8, + "RGBA8", + + "E84", + formatRGBA8, + "RGBA8", + + "E84S", + formatRGBA8S, + "RGBA8 srgb", + + "EHr", + formatR16F, + "RGBA16F", + + "EHrg", + formatRG16F, + "RGBA16F", + + "EH4", + formatRGBA16F, + "RGBA16F", + + "EFr", + formatR32F, + "RGBA32F", + + "EFrg", + formatRG32F, + "RGBA32F", + + "EF4", + formatRGBA32F, + "RGBA32F", + + // BC with and without srgb + "BC1", + formatBC1, + "BC1", + + "BC3", + formatBC3, + "BC3", + + "BC4", + formatBC4, + "BC4", + + "BC5", + formatBC5, + "BC5", + + "BC7", + formatBC7, + "BC7", + + + "BC1S", + formatBC1S, + "BC1 srgb", + + "BC3S", + formatBC3S, + "BC3 srgb", + + "BC4S", + formatBC4S, + "BC4 srgb", + + "BC5S", + formatBC5S, + "BC5 srgb", + + "BC7S", + formatBC7S, + "BC7 srgb" + + // TODO: add other formats + } + //typeAlphaChannel, + //{ + // "None", + // alphaChannelNone, + // "No alpha channel", + + // "Transparency", + // alphaChannelTransparency, + // "Get alpha from Transparency", + + // "Channel", + // alphaChannelChannel, + // "Get alpha from channels palette" + //}, + //typeFilter, + //{ + // "Box", + // filterBox, + // "Box filter", + + // "Tent", + // filterTent, + // "Tent filter", + + // "Lanczos4", + // filterLanczos4, + // "Lanczos4 filter", + + // "Mitchell", + // filterMitchell, + // "Mitchell filter", + + // "Kaiser", + // filterKaiser, + // "Kaiser filter" + //} + } + } +}; + + +#ifdef __PIMac__ + +//------------------------------------------------------------------------------- +// Version 'vers' resources. +//------------------------------------------------------------------------------- + +resource 'vers' (1, plugInName " Version", purgeable) +{ + 5, 0x50, final, 0, verUs, + VersionString, + VersionString " ©" plugInCopyrightYear " kram" +}; + +resource 'vers' (2, plugInName " Version", purgeable) +{ + 5, 0x50, final, 0, verUs, + VersionString, + "by Alec Miller (based on DDS plugin by Brendan Bolles)" +}; + + +#endif // __PIMac__ + + diff --git a/plugin/kps/KPSScripting.cpp b/plugin/kps/KPSScripting.cpp new file mode 100755 index 00000000..bb8d2324 --- /dev/null +++ b/plugin/kps/KPSScripting.cpp @@ -0,0 +1,296 @@ + +// kram - Copyright 2020 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + +#include "PIDefines.h" +#include "KPS.h" + +#include "KPSTerminology.h" +#include "KPSUI.h" +#include "KramImageInfo.h" + +using namespace kram; + +struct Format { + DDS_Format fileFormat; // map to MyMTLPixelFormat + DialogFormat uiFormat; // for UI + uint32_t signature; // 4 char code + MyMTLPixelFormat pixelFormat; // this is what kram uses +}; + +// Note export needs to offer a menu of what format to convert to +// Incoming data is 8u, 16u, or 32f +// Update .r, UI.h file if adding more formats +const Format kFormatTable[] = { + { DDS_FMT_BC1, DIALOG_FMT_BC1, formatBC1, MyMTLPixelFormatBC1_RGBA }, + { DDS_FMT_BC3, DIALOG_FMT_BC3, formatBC3, MyMTLPixelFormatBC3_RGBA }, + { DDS_FMT_BC4, DIALOG_FMT_BC4, formatBC4, MyMTLPixelFormatBC4_RUnorm }, + { DDS_FMT_BC5, DIALOG_FMT_BC5, formatBC5, MyMTLPixelFormatBC5_RGUnorm }, + { DDS_FMT_BC7, DIALOG_FMT_BC7, formatBC7, MyMTLPixelFormatBC7_RGBAUnorm }, + + { DDS_FMT_BC1S, DIALOG_FMT_BC1S, formatBC1S, MyMTLPixelFormatBC1_RGBA_sRGB }, + { DDS_FMT_BC3S, DIALOG_FMT_BC3S, formatBC3S, MyMTLPixelFormatBC3_RGBA_sRGB }, + { DDS_FMT_BC4S, DIALOG_FMT_BC4S, formatBC4S, MyMTLPixelFormatBC4_RSnorm }, + { DDS_FMT_BC5S, DIALOG_FMT_BC5S, formatBC5S, MyMTLPixelFormatBC5_RGSnorm }, + { DDS_FMT_BC7S, DIALOG_FMT_BC7S, formatBC7S, MyMTLPixelFormatBC7_RGBAUnorm_sRGB }, + + // TODO: add ASTC + // TODO: add ETC2 + + { DDS_FMT_R8, DIALOG_FMT_R8, formatR8, MyMTLPixelFormatR8Unorm }, + { DDS_FMT_RG8, DIALOG_FMT_RG8, formatRG8, MyMTLPixelFormatRG8Unorm }, + { DDS_FMT_RGBA8, DIALOG_FMT_RGBA8, formatRGBA8, MyMTLPixelFormatRGBA8Unorm }, + { DDS_FMT_RGBA8S, DIALOG_FMT_RGBA8S, formatRGBA8S, MyMTLPixelFormatRGBA8Unorm_sRGB }, + + { DDS_FMT_R16F, DIALOG_FMT_R16F, formatR16F, MyMTLPixelFormatR16Float }, + { DDS_FMT_RG16F, DIALOG_FMT_RG16F, formatRG16F, MyMTLPixelFormatRG16Float }, + { DDS_FMT_RGBA16F, DIALOG_FMT_RGBA16F, formatRGBA16F, MyMTLPixelFormatRGBA16Float }, + + { DDS_FMT_R32F, DIALOG_FMT_R32F, formatR32F, MyMTLPixelFormatR32Float }, + { DDS_FMT_RG32F, DIALOG_FMT_RG32F, formatRG32F, MyMTLPixelFormatRG32Float }, + { DDS_FMT_RGBA32F, DIALOG_FMT_RGBA32F, formatRGBA32F, MyMTLPixelFormatRGBA32Float }, +}; +const int32_t kFormatTableSize = sizeof(kFormatTable) / sizeof(kFormatTable[0]); + +static DDS_Format SignatureToFormat(OSType fmt) +{ + for (int32_t i = 0; i < kFormatTableSize; ++i) { + if (fmt == kFormatTable[i].signature) { + return kFormatTable[i].fileFormat; + } + } + + return DDS_FMT_RGBA8; +} + +static OSType FormatToSignature(DDS_Format fmt) +{ + for (int32_t i = 0; i < kFormatTableSize; ++i) { + if (fmt == kFormatTable[i].fileFormat) { + return kFormatTable[i].signature; + } + } + + return formatRGBA8; +} + +DialogFormat FormatToDialog(DDS_Format fmt) +{ + for (int32_t i = 0; i < kFormatTableSize; ++i) { + if (fmt == kFormatTable[i].fileFormat) { + return kFormatTable[i].uiFormat; + } + } + + return DIALOG_FMT_RGBA8; +} + +DDS_Format DialogToFormat(DialogFormat fmt) +{ + for (int32_t i = 0; i < kFormatTableSize; ++i) { + if (fmt == kFormatTable[i].uiFormat) { + return kFormatTable[i].fileFormat; + } + } + + return DIALOG_FMT_RGBA8; +} + +MyMTLPixelFormat FormatToPixelFormat(DDS_Format fmt) +{ + for (int32_t i = 0; i < kFormatTableSize; ++i) { + if (fmt == kFormatTable[i].fileFormat) { + return kFormatTable[i].pixelFormat; + } + } + + return MyMTLPixelFormatRGBA8Unorm; +} + + +//static DDS_Alpha KeyToAlpha(OSType key) +//{ +// return (key == alphaChannelNone) ? DDS_ALPHA_NONE : +// (key == alphaChannelTransparency) ? DDS_ALPHA_TRANSPARENCY : +// (key == alphaChannelChannel) ? DDS_ALPHA_CHANNEL : +// DDS_ALPHA_TRANSPARENCY; +//} +// +//static DDS_Filter KeyToFilter(OSType key) +//{ +// return (key == filterBox ? DDS_FILTER_BOX : +// key == filterTent ? DDS_FILTER_TENT : +// key == filterLanczos4 ? DDS_FILTER_LANCZOS4 : +// key == filterMitchell ? DDS_FILTER_MITCHELL : +// key == filterKaiser ? DDS_FILTER_KAISER : +// DDS_FILTER_MITCHELL); +//} + +Boolean ReadScriptParamsOnWrite(GlobalsPtr globals) +{ + PIReadDescriptor token = NULL; + DescriptorKeyID key = 0; + DescriptorTypeID type = 0; + //OSType shape = 0, create = 0; + DescriptorKeyIDArray array = { NULLID }; + int32 flags = 0; + OSErr //gotErr = noErr, + stickyError = noErr; + Boolean returnValue = true; + //int32 storeValue; + DescriptorEnumID ostypeStoreValue; + //Boolean boolStoreValue; + + if (DescriptorAvailable(NULL)) + { + token = OpenReader(array); + if (token) + { + while (PIGetKey(token, &key, &type, &flags)) + { + switch (key) + { + case keyDDSformat: + PIGetEnum(token, &ostypeStoreValue); + gOptions.format = SignatureToFormat(ostypeStoreValue); + break; + +// case keyDDSalpha: +// PIGetEnum(token, &ostypeStoreValue); +// gOptions.alpha = KeyToAlpha(ostypeStoreValue); +// break; +// +// case keyDDSpremult: +// PIGetBool(token, &boolStoreValue); +// gOptions.premultiply = boolStoreValue; +// break; +// +// case keyDDSmipmap: +// PIGetBool(token, &boolStoreValue); +// gOptions.mipmap = boolStoreValue; +// break; +// +// case keyDDSfilter: +// PIGetEnum(token, &ostypeStoreValue); +// gOptions.filter = KeyToFilter(ostypeStoreValue); +// break; +// +// case keyDDScubemap: +// PIGetBool(token, &boolStoreValue); +// gOptions.cubemap = boolStoreValue; +// break; + } + } + + stickyError = CloseReader(&token); // closes & disposes. + + if (stickyError) + { + if (stickyError == errMissingParameter) // missedParamErr == -1715 + ; + /* (descriptorKeyIDArray != NULL) + missing parameter somewhere. Walk IDarray to find which one. */ + else + gResult = stickyError; + } + } + + returnValue = PlayDialog(); + // return TRUE if want to show our Dialog + } + + return returnValue; +} + + + +//static OSType AlphaToKey(DDS_Alpha alpha) +//{ +// return (alpha == DDS_ALPHA_NONE) ? alphaChannelNone : +// (alpha == DDS_ALPHA_TRANSPARENCY) ? alphaChannelTransparency : +// (alpha == DDS_ALPHA_CHANNEL) ? alphaChannelChannel : +// alphaChannelTransparency; +//} +// +//static OSType FilterToKey(DDS_Filter filter) +//{ +// return (filter == DDS_FILTER_BOX ? filterBox : +// filter == DDS_FILTER_TENT ? filterTent : +// filter == DDS_FILTER_LANCZOS4 ? filterLanczos4 : +// filter == DDS_FILTER_MITCHELL ? filterMitchell : +// filter == DDS_FILTER_KAISER ? filterKaiser : +// filterMitchell); +//} + +OSErr WriteScriptParamsOnWrite(GlobalsPtr globals) +{ + PIWriteDescriptor token = nil; + OSErr gotErr = noErr; + + if (DescriptorAvailable(NULL)) + { + token = OpenWriter(); + if (token) + { + // write keys here + PIPutEnum(token, keyDDSformat, typeDDSformat, FormatToSignature(gOptions.format)); + + //PIPutEnum(token, keyDDSalpha, typeAlphaChannel, AlphaToKey(gOptions.alpha)); + +// if(gOptions.alpha != DDS_ALPHA_NONE) +// PIPutBool(token, keyDDSpremult, gOptions.premultiply); + +// PIPutBool(token, keyDDSmipmap, gOptions.mipmap); +// +// if(gOptions.mipmap) +// PIPutEnum(token, keyDDSfilter, typeFilter, FilterToKey(gOptions.filter)); +// +// PIPutBool(token, keyDDScubemap, gOptions.cubemap); + + gotErr = CloseWriter(&token); /* closes and sets dialog optional */ + /* done. Now pass handle on to Photoshop */ + } + } + return gotErr; +} + + diff --git a/plugin/kps/KPSTerminology.h b/plugin/kps/KPSTerminology.h new file mode 100755 index 00000000..8e249db3 --- /dev/null +++ b/plugin/kps/KPSTerminology.h @@ -0,0 +1,109 @@ + +// kram - Copyright 2020 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + +#ifndef DDSTerminology_H +#define DDSTerminology_H + +//------------------------------------------------------------------------------- +// Options +//------------------------------------------------------------------------------- + +//------------------------------------------------------------------------------- +// Definitions -- Scripting keys +//------------------------------------------------------------------------------- + +#define keyDDSformat 'kkfm' +//#define keyDDSalpha 'DDSa' +//#define keyDDSpremult 'DDSp' +//#define keyDDSmipmap 'DDSm' +//#define keyDDSfilter 'DDSq' +//#define keyDDScubemap 'DDSc' + +#define typeDDSformat 'ktfm' + +#define formatBC1 'BC1 ' +#define formatBC3 'BC3 ' +#define formatBC4 'BC4 ' +#define formatBC5 'BC5 ' +#define formatBC7 'BC7 ' + +// signed and srgb variants +#define formatBC1S 'BC1S' +#define formatBC3S 'BC3S' +#define formatBC4S 'BC4S' +#define formatBC5S 'BC5S' +#define formatBC7S 'BC7S' + +// explicit +#define formatR8 'U8r ' +#define formatRG8 'U8rg' +#define formatRGBA8 'U84 ' +#define formatRGBA8S 'U84S' + +#define formatR16F 'H4r ' +#define formatRG16F 'H4rg' +#define formatRGBA16F 'H4 ' + +#define formatR32F 'F4r ' +#define formatRG32F 'F4rg' +#define formatRGBA32F 'F4 ' + +// TODO: signed RGBA8? +// TODO: ASTC +// TODO: ETC + +//#define typeAlphaChannel 'alfT' +// +//#define alphaChannelNone 'Nalf' +//#define alphaChannelTransparency 'Talf' +//#define alphaChannelChannel 'Calf' +// +//#define typeFilter 'filT' +// +//#define filterBox 'Bfil' +//#define filterTent 'Tfil' +//#define filterLanczos4 'Lfil' +//#define filterMitchell 'Mfil' +//#define filterKaiser 'Kfil' + +#endif diff --git a/plugin/kps/KPSUI.h b/plugin/kps/KPSUI.h new file mode 100644 index 00000000..60238b81 --- /dev/null +++ b/plugin/kps/KPSUI.h @@ -0,0 +1,167 @@ + +// kram - Copyright 2020 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + +#ifndef DDSUI_H +#define DDSUI_H + +typedef enum DialogFormat { + // bc + DIALOG_FMT_BC1, + DIALOG_FMT_BC3, + DIALOG_FMT_BC4, + DIALOG_FMT_BC5, + DIALOG_FMT_BC7, + + DIALOG_FMT_BC1S, + DIALOG_FMT_BC3S, + DIALOG_FMT_BC4S, + DIALOG_FMT_BC5S, + DIALOG_FMT_BC7S, + + // TODO: firt decide if lossy formats should be a part of plugin + // this encourages opening/saving and each time loss occurs + // TODO: ETC2 + // TODO: ASTC + + // TODO: consider 4.12 Valve type HDR, or 101010A2 or 111110 + + // lossless formats good for source + // explicit + DIALOG_FMT_R8, + DIALOG_FMT_RG8, + DIALOG_FMT_RGBA8, + DIALOG_FMT_RGBA8S, + + DIALOG_FMT_R16F, + DIALOG_FMT_RG16F, + DIALOG_FMT_RGBA16F, + + DIALOG_FMT_R32F, + DIALOG_FMT_RG32F, + DIALOG_FMT_RGBA32F, +} DialogFormat; + + +#if 1 // Not using any of these + +typedef enum { + DIALOG_ALPHA_NONE, + DIALOG_ALPHA_TRANSPARENCY, + DIALOG_ALPHA_CHANNEL +} DialogAlpha; + +typedef enum { + DIALOG_FILTER_BOX, + DIALOG_FILTER_TENT, + DIALOG_FILTER_LANCZOS4, + DIALOG_FILTER_MITCHELL, + DIALOG_FILTER_KAISER +} Dialog_Filter; + +typedef struct { + DialogAlpha alpha; +} DDS_InUI_Data; + +typedef struct { + DialogFormat format; + DialogAlpha alpha; + bool premultiply; + bool mipmap; + Dialog_Filter filter; + bool cubemap; +} DDS_OutUI_Data; + +#else + +// no real input, just want to drop ktx/2 onto PS and go + +struct DDS_OutUI_Data { + DialogFormat format; +}; +#endif + + +// DDS UI +// +// return true if user hit OK +// if user hit OK, params block will have been modified +// +// plugHndl is bundle identifier string on Mac, hInstance on win +// mwnd is the main window for Windows +// +bool +DDS_InUI( + DDS_InUI_Data *params, + bool has_alpha, + const void *plugHndl, + const void *mwnd); + +bool +DDS_OutUI( + DDS_OutUI_Data *params, + bool have_transparency, + const char *alpha_name, + bool ae_ui, + const void *plugHndl, + const void *mwnd); + +void +DDS_About( + const char *plugin_version_string, + const void *plugHndl, + const void *mwnd); + + +// Mac prefs keys +#define DDS_PREFS_ID "com.ba.kram-ps" +#define DDS_PREFS_ALPHA "Alpha Mode" +#define DDS_PREFS_AUTO "Auto" + + +// Windows registry keys +#define DDS_PREFIX "Software\\ba\\kram-ps" +#define DDS_ALPHA_KEY "Alpha" +#define DDS_AUTO_KEY "Auto" + + +#endif diff --git a/plugin/kps/KPSVersion.h b/plugin/kps/KPSVersion.h new file mode 100755 index 00000000..1c20922e --- /dev/null +++ b/plugin/kps/KPSVersion.h @@ -0,0 +1,58 @@ + +// kram - Copyright 2020 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + +#ifndef DDSVerion_H +#define DDSVerion_H + +#define DDS_Major_Version 0 +#define DDS_Minor_Version 6 +#define DDS_Version_String "0.9" +#define DDS_Build_Date __DATE__ +#define DDS_Build_Date_Manual "February 28 2021" +#define DDS_Build_Complete_Manual "v0.9 - " DDS_Build_Date +#define DDS_Copyright_Year "2021" +#define DDS_Build_Year "2021" + +#define DDS_Description "A import/output plugin for using ktx files from kram in Adobe Photoshop." + +#endif diff --git a/plugin/kps/kram-ps.rsrc b/plugin/kps/kram-ps.rsrc new file mode 100644 index 0000000000000000000000000000000000000000..d0022fea60dcc4a2da14dcdc552c6b91e4d513c5 GIT binary patch literal 1399 zcmd^<-EPw`6vt1R7Dfkj5;th#!V(i~Xj->6U5f-0?Ub--R77j}xB{h#(^^fODsiU{ zak0lhh!@~Bcnl=og>#&&Z14zJKJoegY#%$vP5=x5DE|U5(cmlEe^!hy#S7Ouz2p8= z$e_?=)DAJ}#k^pZ@-+2g6vWSXnq7{iG!l!&zE!Z*svFJ6LiRl&b>AdYG?mJIdQd1i zkD`MM@|cdSVqKY_rps zy`^)QL`h0&D~}fs4~w}gJ14{U6p!pQ7@l9{3`@@KMZwEcwb1Nc`Y#JZW&b@cj8#}L z<1;!GFJAbQg6UkKUn%sJ#`sCM6N{nqzN+&SCTZ*`$bu6KvrYr@UY*Z=mx*|mq^h3G z)oiZVOrHV6No<}8IcB)~I$DE;BH|ef(}NmP7Ee|AM2fdAs*2PanS-pot6RLqaO3>VPbU4t7=zVGQYjJcf0 zcWmk7aV-1u_nfLQhP$TSvEFnnR>2eOcFL#*fO=5Xnq}#D1g3pt9a)UsB(}Hjn6_g% zm%7<-%--Q1Rz@va6D#>mGz%;F7Fr7{`7N|IR`T0roHGD=tWXHO9A?M#9G8!OV%dVCf3dwzVCX)y}-uY-FaF=V?Vw(b{l*7hXBz1)4M-9XWfYVtS_R7 zr))F8BRJzhetN|bCf4K z#=!*uLqG-nDg)!276z;{Fh6O(1dq|zzm8k_R#LQ@_5$H6kw`B+zpeFK{7s;~CVm6I C90||> literal 0 HcmV?d00001 diff --git a/plugin/kps/mac/Info.plist b/plugin/kps/mac/Info.plist new file mode 100644 index 00000000..311d1a8c --- /dev/null +++ b/plugin/kps/mac/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + $(PRODUCT_NAME) + CFBundleGetInfoString + 22.0 © 2020 Adobe. All rights reserved. + NSHumanReadableCopyright + © 2020 Adobe. All rights reserved. + CFBundleShortVersionString + 22.0.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PLUGIN_TYPE) + CFBundleSignature + 8BIM + + diff --git a/plugin/kps/mac/KPSAbout.xib b/plugin/kps/mac/KPSAbout.xib new file mode 100644 index 00000000..b381b10f --- /dev/null +++ b/plugin/kps/mac/KPSAbout.xib @@ -0,0 +1,72 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/plugin/kps/mac/KPSAboutController.h b/plugin/kps/mac/KPSAboutController.h new file mode 100644 index 00000000..dfdc4374 --- /dev/null +++ b/plugin/kps/mac/KPSAboutController.h @@ -0,0 +1,58 @@ + +// kram - Copyright 2020 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + + +#import + +@interface KPSAboutController : NSObject { + IBOutlet NSWindow *theWindow; + IBOutlet NSTextField *versionString; +} + +- (id)init:(const char *)version_string; + +- (IBAction)clickedOK:(id)sender; + +- (NSWindow *)getWindow; + +@end diff --git a/plugin/kps/mac/KPSAboutController.mm b/plugin/kps/mac/KPSAboutController.mm new file mode 100644 index 00000000..878dad6d --- /dev/null +++ b/plugin/kps/mac/KPSAboutController.mm @@ -0,0 +1,71 @@ + +// kram - Copyright 2020 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + + +#import "KPSAboutController.h" + +@implementation KPSAboutController + +- (id)init:(const char *)version_string +{ + self = [super init]; + + if(!([[NSBundle mainBundle] loadNibNamed:@"DDSAbout" owner:self topLevelObjects:nil])) + return nil; + + [versionString setStringValue:[NSString stringWithUTF8String:version_string]]; + + [theWindow center]; + + return self; +} + +- (IBAction)clickedOK:(id)sender { + [NSApp stopModal]; +} + +- (NSWindow *)getWindow { + return theWindow; +} + +@end diff --git a/plugin/kps/mac/KPSInput.xib b/plugin/kps/mac/KPSInput.xib new file mode 100644 index 00000000..825a5183 --- /dev/null +++ b/plugin/kps/mac/KPSInput.xib @@ -0,0 +1,112 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/plugin/kps/mac/KPSInputController.h b/plugin/kps/mac/KPSInputController.h new file mode 100644 index 00000000..681f20ae --- /dev/null +++ b/plugin/kps/mac/KPSInputController.h @@ -0,0 +1,66 @@ + +// kram - Copyright 2020 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + + +#import + +#include "KPSUI.h" + + +@interface KPSInputController : NSObject { + IBOutlet NSWindow *theWindow; + IBOutlet NSMatrix *alphaMatrix; + IBOutlet NSButton *autoCheckbox; +} +- (id)init:(DialogAlpha)the_alpha + autoDialog:(BOOL)autoDialog; + +- (IBAction)clickedOK:(id)sender; +- (IBAction)clickedCancel:(id)sender; +- (IBAction)clickedSetDefaults:(id)sender; + +- (NSWindow *)getWindow; + +- (DialogAlpha)getAlpha; +- (BOOL)getAuto; +@end diff --git a/plugin/kps/mac/KPSInputController.mm b/plugin/kps/mac/KPSInputController.mm new file mode 100644 index 00000000..872c1355 --- /dev/null +++ b/plugin/kps/mac/KPSInputController.mm @@ -0,0 +1,104 @@ + +// kram - Copyright 2020 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + + +#import "KPSInputController.h" + +@implementation KPSInputController + +- (id)init:(DialogAlpha)the_alpha autoDialog:(BOOL)autoDialog +{ + self = [super init]; + + if(!([[NSBundle mainBundle] loadNibNamed:@"KPSInput" owner:self topLevelObjects:nil])) + return nil; + + [alphaMatrix selectCellAtRow:(NSInteger)(the_alpha - 1) column:0]; + + [autoCheckbox setState:(autoDialog ? NSControlStateValueOn : NSControlStateValueOff)]; + + [theWindow center]; + + return self; +} + +- (IBAction)clickedOK:(id)sender { + [NSApp stopModal]; +} + +- (IBAction)clickedCancel:(id)sender { + [NSApp abortModal]; +} + +- (IBAction)clickedSetDefaults:(id)sender { + char alphaMode_char = [self getAlpha]; + CFNumberRef alphaMode = CFNumberCreate(kCFAllocatorDefault, kCFNumberCharType, &alphaMode_char); + CFBooleanRef autoRef = (([autoCheckbox state] == NSControlStateValueOn) ? kCFBooleanTrue : kCFBooleanFalse); + + CFPreferencesSetAppValue(CFSTR(DDS_PREFS_ALPHA), alphaMode, CFSTR(DDS_PREFS_ID)); + CFPreferencesSetAppValue(CFSTR(DDS_PREFS_AUTO), autoRef, CFSTR(DDS_PREFS_ID)); + + CFPreferencesAppSynchronize(CFSTR(DDS_PREFS_ID)); + + CFRelease(alphaMode); + CFRelease(autoRef); +} + +- (NSWindow *)getWindow { + return theWindow; +} + +- (DialogAlpha)getAlpha { + switch([alphaMatrix selectedRow]) + { + case 0: return DIALOG_ALPHA_TRANSPARENCY; + case 1: return DIALOG_ALPHA_CHANNEL; + default: return DIALOG_ALPHA_CHANNEL; + } +} + +- (BOOL)getAuto { + return [autoCheckbox state] == NSControlStateValueOn; +} + +@end diff --git a/plugin/kps/mac/KPSOutput.xib b/plugin/kps/mac/KPSOutput.xib new file mode 100644 index 00000000..af59ed13 --- /dev/null +++ b/plugin/kps/mac/KPSOutput.xib @@ -0,0 +1,174 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/plugin/kps/mac/KPSOutputController.h b/plugin/kps/mac/KPSOutputController.h new file mode 100644 index 00000000..0c1f11e8 --- /dev/null +++ b/plugin/kps/mac/KPSOutputController.h @@ -0,0 +1,95 @@ + +// kram - Copyright 2020 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + + +#import + +#include "KPSUI.h" + +typedef enum { + DIALOG_RESULT_CONTINUE = 0, + DIALOG_RESULT_OK, + DIALOG_RESULT_CANCEL +} DialogResult; + +@interface KPSOutputController : NSObject { + IBOutlet NSWindow *theWindow; + IBOutlet NSPopUpButton *formatPulldown; + IBOutlet NSButton *mipmapCheck; + IBOutlet NSPopUpButton *filterPulldown; + IBOutlet NSTextField *filterLabel; + IBOutlet NSMatrix *alphaMatrix; + IBOutlet NSButton *premultiplyCheck; + IBOutlet NSBox *alphaBox; + IBOutlet NSButton *cubemapCheck; + IBOutlet NSButton *ok_button; + IBOutlet NSButton *cancel_button; + DialogResult theResult; +} +- (id)init:(DialogFormat)format + mipmap:(BOOL)mipmap + filter:(Dialog_Filter)filter + alpha:(DialogAlpha)alpha + premultiply:(BOOL)premultiply + cube_map:(BOOL)cube_map + have_transparency:(BOOL)has_transparency + alpha_name:(const char *)alphaName + ae_ui:(BOOL)ae_ui; + +- (IBAction)clickedOK:(id)sender; +- (IBAction)clickedCancel:(id)sender; + +- (IBAction)trackMipmap:(id)sender; +- (IBAction)trackAlpha:(id)sender; + +- (NSWindow *)getWindow; +- (DialogResult)getResult; + +- (DialogFormat)getFormat; +- (BOOL)getMipmap; +- (Dialog_Filter)getFilter; +- (DialogAlpha)getAlpha; +- (BOOL)getPremultiply; +- (BOOL)getCubeMap; + +@end diff --git a/plugin/kps/mac/KPSOutputController.mm b/plugin/kps/mac/KPSOutputController.mm new file mode 100644 index 00000000..76b1047b --- /dev/null +++ b/plugin/kps/mac/KPSOutputController.mm @@ -0,0 +1,220 @@ + +// kram - Copyright 2020 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + + +#import "KPSOutputController.h" + +@implementation KPSOutputController + +- (id)init:(DialogFormat)format + mipmap:(BOOL)mipmap + filter:(Dialog_Filter)filter + alpha:(DialogAlpha)alpha + premultiply:(BOOL)premultiply + cube_map:(BOOL)cube_map + have_transparency:(BOOL)has_transparency + alpha_name:(const char *)alphaName + ae_ui:(BOOL)ae_ui +{ + self = [super init]; + + if(!([[NSBundle mainBundle] loadNibNamed:@"KPSOutput" owner:self topLevelObjects:nil])) + return nil; + + // TODO: strings are hardcode in this arrray, these need to line up with + // actual types now + [formatPulldown addItemsWithTitles: + [NSArray arrayWithObjects: + @"DXT1", + @"DXT1A", + @"DXT2", + @"DXT3", + @"DXT4", + @"DXT5", + @"DXT5A", + @"3Dc", + @"DXN", + @"Uncompressed", + nil] + ]; + [formatPulldown selectItem:[formatPulldown itemAtIndex:format]]; + + + [mipmapCheck setState:(mipmap ? NSControlStateValueOn : NSControlStateValueOff)]; + + + [filterPulldown addItemsWithTitles: + [NSArray arrayWithObjects:@"Box", @"Tent", @"Lanczos4", @"Mitchell", @"Kaiser", nil]]; + [filterPulldown selectItem:[filterPulldown itemAtIndex:filter]]; + + + if(!has_transparency) + { + [[alphaMatrix cellAtRow:1 column:0] setEnabled:FALSE]; + + if(alpha == DIALOG_ALPHA_TRANSPARENCY) + { + alpha = (alphaName ? DIALOG_ALPHA_CHANNEL : DIALOG_ALPHA_NONE); + } + } + + if(alphaName) + { + [[alphaMatrix cellAtRow:2 column:0] setTitle:[NSString stringWithUTF8String:alphaName]]; + } + else + { + [[alphaMatrix cellAtRow:2 column:0] setEnabled:FALSE]; + + if(alpha == DIALOG_ALPHA_CHANNEL) + { + alpha = (has_transparency ? DIALOG_ALPHA_TRANSPARENCY : DIALOG_ALPHA_NONE); + } + } + + [alphaMatrix selectCellAtRow:(NSInteger)alpha column:0]; + + + [premultiplyCheck setState:(premultiply ? NSControlStateValueOn : NSControlStateValueOff)]; + + + [cubemapCheck setState:(cube_map ? NSControlStateValueOn : NSControlStateValueOff)]; + + + [self trackMipmap:self]; + [self trackAlpha:self]; + + if(ae_ui) + { + [alphaMatrix setHidden:TRUE]; + [premultiplyCheck setHidden:TRUE]; + [alphaBox setHidden:TRUE]; + + const int shrink = 170; + + NSRect window_frame = [theWindow frame]; + NSRect cube_map_frame = [cubemapCheck frame]; + NSRect ok_frame = [ok_button frame]; + NSRect cancel_frame = [cancel_button frame]; + + window_frame.size.height -= shrink; + cube_map_frame.origin.y += shrink; + ok_frame.origin.y += shrink; + cancel_frame.origin.y += shrink; + + [cubemapCheck setFrame:cube_map_frame]; + [ok_button setFrame:ok_frame]; + [cancel_button setFrame:cancel_frame]; + [theWindow setFrame:window_frame display:TRUE]; + } + + [theWindow center]; + + theResult = DIALOG_RESULT_CONTINUE; + + return self; +} + +- (IBAction)clickedOK:(id)sender { + theResult = DIALOG_RESULT_OK; +} + +- (IBAction)clickedCancel:(id)sender { + theResult = DIALOG_RESULT_CANCEL; +} + +- (IBAction)trackMipmap:(id)sender { + const BOOL enabled = [self getMipmap]; + NSColor *label_color = (enabled ? [NSColor textColor] : [NSColor disabledControlTextColor]); + + [filterPulldown setEnabled:enabled]; + [filterLabel setTextColor:label_color]; + + //[label_color release]; +} + +- (IBAction)trackAlpha:(id)sender { + const BOOL enabled = ([self getAlpha] != DIALOG_ALPHA_NONE); + + [premultiplyCheck setEnabled:enabled]; +} + +- (NSWindow *)getWindow { + return theWindow; +} + +- (DialogResult)getResult { + return theResult; +} + +- (DialogFormat)getFormat { + return (DialogFormat)[formatPulldown indexOfSelectedItem]; +} + +- (BOOL)getMipmap { + return ([mipmapCheck state] == NSControlStateValueOn); +} + +- (Dialog_Filter)getFilter { + return (Dialog_Filter)[filterPulldown indexOfSelectedItem]; +} + +- (DialogAlpha)getAlpha { + switch([alphaMatrix selectedRow]) + { + case 0: return DIALOG_ALPHA_NONE; + case 1: return DIALOG_ALPHA_TRANSPARENCY; + case 2: return DIALOG_ALPHA_CHANNEL; + default: return DIALOG_ALPHA_CHANNEL; + } +} + +- (BOOL)getPremultiply { + return ([premultiplyCheck state] == NSControlStateValueOn); +} + +- (BOOL)getCubeMap { + return ([cubemapCheck state] == NSControlStateValueOn); +} + +@end diff --git a/plugin/kps/mac/KPSUICocoa.mm b/plugin/kps/mac/KPSUICocoa.mm new file mode 100644 index 00000000..a3627457 --- /dev/null +++ b/plugin/kps/mac/KPSUICocoa.mm @@ -0,0 +1,252 @@ + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// kram - Copyright 2020 by Alec Miller. - MIT License +// The license and copyright notice shall be included +// in all copies or substantial portions of the Software. + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + +#include "KPSUI.h" + +#import "KPSInputController.h" +#import "KPSOutputController.h" +#import "KPSAboutController.h" + +#include "KPSVersion.h" + +#include "PIUtilities.h" + + +bool +DDS_InUI( + DDS_InUI_Data *params, + bool has_alpha, + const void *plugHndl, + const void *mwnd) +{ + bool result = true; + + params->alpha = DIALOG_ALPHA_CHANNEL; + + // get the prefs + BOOL auto_dialog = FALSE; + + CFPropertyListRef alphaMode_val = CFPreferencesCopyAppValue(CFSTR(DDS_PREFS_ALPHA), CFSTR(DDS_PREFS_ID)); + CFPropertyListRef auto_val = CFPreferencesCopyAppValue(CFSTR(DDS_PREFS_AUTO), CFSTR(DDS_PREFS_ID)); + + if(alphaMode_val) + { + char alphaMode_char; + + if( CFNumberGetValue((CFNumberRef)alphaMode_val, kCFNumberCharType, &alphaMode_char) ) + { + params->alpha = (DialogAlpha)alphaMode_char; + } + + CFRelease(alphaMode_val); + } + + if(auto_val) + { + auto_dialog = CFBooleanGetValue((CFBooleanRef)auto_val); + + CFRelease(auto_val); + } + + + // user can force dialog open buy holding shift or option + const NSUInteger flags = [[NSApp currentEvent] modifierFlags]; + const bool shift_key = ( (flags & NSEventModifierFlagShift) || (flags & NSEventModifierFlagOption) ); + + if((has_alpha && auto_dialog) || shift_key) + { + // do the dialog (or maybe not (but we still load the object to get the prefs) + NSString *bundle_id = [NSString stringWithUTF8String:(const char *)plugHndl]; + + Class ui_controller_class = [[NSBundle bundleWithIdentifier:bundle_id] + classNamed:@"KPSInputController"]; + + if(ui_controller_class) + { + KPSInputController *ui_controller = [[ui_controller_class alloc] init:params->alpha + autoDialog:auto_dialog]; + + if(ui_controller) + { + NSWindow *my_window = [ui_controller getWindow]; + + if(my_window) + { + NSInteger modal_result = [NSApp runModalForWindow:my_window]; + + if(modal_result == NSModalResponseStop) + { + params->alpha = [ui_controller getAlpha]; + + result = true; + } + else + result = false; + + + // record the auto pref every time + CFBooleanRef autoRef = ([ui_controller getAuto] ? kCFBooleanTrue : kCFBooleanFalse); + CFPreferencesSetAppValue(CFSTR(DDS_PREFS_AUTO), autoRef, CFSTR(DDS_PREFS_ID)); + + CFPreferencesAppSynchronize(CFSTR(DDS_PREFS_ID)); + + + [my_window close]; + } + + //[ui_controller release]; + } + } + } + + + return result; +} + + +bool +DDS_OutUI( + DDS_OutUI_Data *params, + bool have_transparency, + const char *alpha_name, + bool ae_ui, + const void *plugHndl, + const void *mwnd) +{ + bool result = true; + + NSString *bundle_id = [NSString stringWithUTF8String:(const char *)plugHndl]; + + Class ui_controller_class = [[NSBundle bundleWithIdentifier:bundle_id] + classNamed:@"KPSOutputController"]; + + if(ui_controller_class) + { + KPSOutputController *ui_controller = [[ui_controller_class alloc] init: params->format + mipmap: params->mipmap + filter: params->filter + alpha: params->alpha + premultiply: params->premultiply + cube_map: params->cubemap + have_transparency: have_transparency + alpha_name: alpha_name + ae_ui: ae_ui ]; + + if(ui_controller) + { + NSWindow *my_window = [ui_controller getWindow]; + + if(my_window) + { + NSInteger modal_result; + DialogResult dialog_result; + + NSModalSession modal_session = [NSApp beginModalSessionForWindow:my_window]; + + do{ + modal_result = [NSApp runModalSession:modal_session]; + + dialog_result = [ui_controller getResult]; + } + while(dialog_result == DIALOG_RESULT_CONTINUE && modal_result == NSModalResponseContinue); + + [NSApp endModalSession:modal_session]; + + + if(dialog_result == DIALOG_RESULT_OK || modal_result == NSModalResponseStop) + { + params->format = [ui_controller getFormat]; + params->mipmap = [ui_controller getMipmap]; + params->filter = [ui_controller getFilter]; + params->alpha = [ui_controller getAlpha]; + params->premultiply = [ui_controller getPremultiply]; + params->cubemap = [ui_controller getCubeMap]; + + result = true; + } + else + result = false; + + [my_window close]; + } + + //[ui_controller release]; + } + } + + + return result; +} + + +void +DDS_About( + const char *plugin_version_string, + const void *plugHndl, + const void *mwnd) +{ + NSString *bundle_id = [NSString stringWithUTF8String:(const char *)plugHndl]; + + Class about_controller_class = [[NSBundle bundleWithIdentifier:bundle_id] + classNamed:@"KPSAboutController"]; + + if(about_controller_class) + { + KPSAboutController *about_controller = [[about_controller_class alloc] init:plugin_version_string]; + + if(about_controller) + { + NSWindow *the_window = [about_controller getWindow]; + + if(the_window) + { + [NSApp runModalForWindow:the_window]; + + [the_window close]; + } + + //[about_controller release]; + } + } +} + diff --git a/plugin/kps/win/KPSDialogs.rc b/plugin/kps/win/KPSDialogs.rc new file mode 100644 index 00000000..2ed37107 --- /dev/null +++ b/plugin/kps/win/KPSDialogs.rc @@ -0,0 +1,168 @@ +// Microsoft Visual C++ generated resource script. +// +#include "resource.h" + +#define APSTUDIO_READONLY_SYMBOLS +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 2 resource. +// +#include "afxres.h" + +///////////////////////////////////////////////////////////////////////////// +#undef APSTUDIO_READONLY_SYMBOLS + +///////////////////////////////////////////////////////////////////////////// +// English (U.S.) resources + +#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU) +#ifdef _WIN32 +LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US +#pragma code_page(1252) +#endif //_WIN32 + +#ifdef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// TEXTINCLUDE +// + +1 TEXTINCLUDE +BEGIN + "resource.h\0" +END + +2 TEXTINCLUDE +BEGIN + "#include ""afxres.h""\r\n" + "\0" +END + +3 TEXTINCLUDE +BEGIN + "\r\n" + "\0" +END + +#endif // APSTUDIO_INVOKED + + +///////////////////////////////////////////////////////////////////////////// +// +// Dialog +// + +IN_DIALOG DIALOGEX 0, 0, 242, 127 +STYLE DS_SYSMODAL | DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | DS_CENTER | WS_POPUP | WS_CAPTION | WS_SYSMENU +CAPTION "DDS Input Options" +FONT 8, "MS Shell Dlg", 400, 0, 0x1 +BEGIN + DEFPUSHBUTTON "OK",IDOK,131,105,50,14 + PUSHBUTTON "Cancel",IDCANCEL,185,105,50,14 + CONTROL "Transparency",4,"Button",BS_AUTORADIOBUTTON | WS_GROUP,83,21,113,10 + CONTROL "Channels Palette",5,"Button",BS_AUTORADIOBUTTON,83,36,85,10 + CONTROL "Automatically bring up this dialog",6,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,60,76,121,10 + PUSHBUTTON "Set Defaults",3,9,105,61,14 + GROUPBOX "Alpha Channel Handling",IDC_STATIC,68,7,107,51 +END + +OUT_DIALOG DIALOGEX 0, 0, 151, 234 +STYLE DS_SYSMODAL | DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | DS_CENTER | WS_POPUP | WS_CAPTION | WS_SYSMENU +CAPTION "DDS Options" +FONT 8, "MS Shell Dlg", 400, 0, 0x1 +BEGIN + DEFPUSHBUTTON "OK",IDOK,38,214,50,14 + PUSHBUTTON "Cancel",IDCANCEL,95,214,50,14 + GROUPBOX "Alpha Channel",11,25,88,96,90 + CONTROL "None",7,"Button",BS_AUTORADIOBUTTON | WS_GROUP,37,102,33,10 + CONTROL "Transparency",8,"Button",BS_AUTORADIOBUTTON,37,122,60,10 + CONTROL "Channels Palette",9,"Button",BS_AUTORADIOBUTTON,37,142,82,10 + COMBOBOX 3,56,12,68,30,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP + RTEXT "Format:",IDC_STATIC,14,14,37,8 + CONTROL "Mipmap",4,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,56,40,40,10 + COMBOBOX 5,56,56,68,30,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP + RTEXT "Filter:",6,14,58,37,8 + CONTROL "Premultiply",10,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,56,160,53,10 + CONTROL "Cube Map",12,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,56,188,48,10 +END + +ABOUT_DIALOG DIALOGEX 0, 0, 242, 125 +STYLE DS_SYSMODAL | DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | DS_CENTER | WS_POPUP | WS_CAPTION | WS_SYSMENU +CAPTION "About WebP" +FONT 8, "MS Shell Dlg", 400, 0, 0x1 +BEGIN + DEFPUSHBUTTON "OK",IDOK,95,100,50,14 + CTEXT "DDS Photoshop Plug-In",IDC_STATIC,74,40,98,8 + CTEXT "plug-in version",4,61,60,118,8 +END + + +///////////////////////////////////////////////////////////////////////////// +// +// Version +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION 0,6,0,0 + PRODUCTVERSION 0,6,0,0 + FILEFLAGSMASK 0x17L +#ifdef _DEBUG + FILEFLAGS 0x1L +#else + FILEFLAGS 0x0L +#endif + FILEOS 0x4L + FILETYPE 0x2L + FILESUBTYPE 0x0L +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904b0" + BEGIN + VALUE "FileDescription", "DDS" + VALUE "FileVersion", "0.6" + VALUE "InternalName", "DDS" + VALUE "LegalCopyright", "Copyright (C) 2014-2018" + VALUE "OriginalFilename", "DDS.dll" + VALUE "ProductName", "DDS Photoshop Plug-in" + VALUE "ProductVersion", "2.0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END + + +///////////////////////////////////////////////////////////////////////////// +// +// DESIGNINFO +// + +#ifdef APSTUDIO_INVOKED +GUIDELINES DESIGNINFO +BEGIN + "OUT_DIALOG", DIALOG + BEGIN + RIGHTMARGIN, 145 + BOTTOMMARGIN, 228 + END +END +#endif // APSTUDIO_INVOKED + +#endif // English (U.S.) resources +///////////////////////////////////////////////////////////////////////////// + + + +#ifndef APSTUDIO_INVOKED +///////////////////////////////////////////////////////////////////////////// +// +// Generated from the TEXTINCLUDE 3 resource. +// + + +///////////////////////////////////////////////////////////////////////////// +#endif // not APSTUDIO_INVOKED + diff --git a/plugin/kps/win/KPSInputDialog.cpp b/plugin/kps/win/KPSInputDialog.cpp new file mode 100644 index 00000000..0200e495 --- /dev/null +++ b/plugin/kps/win/KPSInputDialog.cpp @@ -0,0 +1,217 @@ + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + +#ifdef _WIN32 + +#include "DDS.h" + +#include "DDS_UI.h" + +#include + +enum { + IN_noUI = -1, + IN_OK = IDOK, + IN_Cancel = IDCANCEL, + IN_Set_Defaults_Button, + IN_Alpha_Radio_Transparent, + IN_Alpha_Radio_Channel, + IN_Auto_Checkbox +}; + +// sensible Win macros +#define GET_ITEM(ITEM) GetDlgItem(hwndDlg, (ITEM)) + +#define SET_CHECK(ITEM, VAL) SendMessage(GET_ITEM(ITEM), BM_SETCHECK, (WPARAM)(VAL), (LPARAM)0) +#define GET_CHECK(ITEM) SendMessage(GET_ITEM(ITEM), BM_GETCHECK, (WPARAM)0, (LPARAM)0) + +#define ENABLE_ITEM(ITEM, ENABLE) EnableWindow(GetDlgItem(hwndDlg, (ITEM)), (ENABLE)); + + +static DialogAlpha g_alpha = DIALOG_ALPHA_CHANNEL; +static bool g_autoD = false; + + +static void ReadPrefs() +{ + // read prefs from registry + HKEY dds_hkey; + LONG reg_error = RegOpenKeyEx(HKEY_CURRENT_USER, DDS_PREFIX, 0, KEY_READ, &dds_hkey); + + if(reg_error == ERROR_SUCCESS) + { + DWORD type; + DWORD size = sizeof(DWORD); + + DWORD alpha = g_alpha, + autoD = g_autoD; + + reg_error = RegQueryValueEx(dds_hkey, DDS_ALPHA_KEY, NULL, &type, (LPBYTE)&alpha, &size); + + reg_error = RegQueryValueEx(dds_hkey, DDS_AUTO_KEY, NULL, &type, (LPBYTE)&autoD, &size); + + if(reg_error == ERROR_SUCCESS && type == REG_DWORD) + g_autoD = autoD; + + reg_error = RegCloseKey(dds_hkey); + } +} + +static void WriteAlphaPrefs() +{ + HKEY dds_hkey; + + LONG reg_error = RegCreateKeyEx(HKEY_CURRENT_USER, DDS_PREFIX, NULL, NULL, REG_OPTION_NON_VOLATILE, KEY_WRITE, NULL, &dds_hkey, NULL); + + if(reg_error == ERROR_SUCCESS) + { + DWORD alpha = g_alpha; + + reg_error = RegSetValueEx(dds_hkey, DDS_ALPHA_KEY, NULL, REG_DWORD, (BYTE *)&alpha, sizeof(DWORD)); + + reg_error = RegCloseKey(dds_hkey); + } +} + +static void WriteAutoPrefs() +{ + HKEY dds_hkey; + + LONG reg_error = RegCreateKeyEx(HKEY_CURRENT_USER, DDS_PREFIX, NULL, NULL, REG_OPTION_NON_VOLATILE, KEY_WRITE, NULL, &dds_hkey, NULL); + + if(reg_error == ERROR_SUCCESS) + { + DWORD autoD = g_autoD; + + reg_error = RegSetValueEx(dds_hkey, DDS_AUTO_KEY, NULL, REG_DWORD, (BYTE *)&autoD, sizeof(DWORD)); + + reg_error = RegCloseKey(dds_hkey); + } +} + + +static WORD g_item_clicked = 0; + +static BOOL CALLBACK DialogProc(HWND hwndDlg, UINT message, WPARAM wParam, LPARAM lParam) +{ + BOOL fError; + + switch(message) + { + case WM_INITDIALOG: + SET_CHECK( (g_alpha == DIALOG_ALPHA_TRANSPARENCY ? IN_Alpha_Radio_Transparent : + g_alpha == DIALOG_ALPHA_CHANNEL ? IN_Alpha_Radio_Channel : + IN_Alpha_Radio_Transparent), TRUE); + + SET_CHECK(IN_Auto_Checkbox, g_autoD); + + return TRUE; + + case WM_NOTIFY: + return FALSE; + + case WM_COMMAND: + g_alpha = GET_CHECK(IN_Alpha_Radio_Transparent) ? DIALOG_ALPHA_TRANSPARENCY : + GET_CHECK(IN_Alpha_Radio_Channel) ? DIALOG_ALPHA_CHANNEL : + DIALOG_ALPHA_TRANSPARENCY; + + g_autoD = GET_CHECK(IN_Auto_Checkbox); + + g_item_clicked = LOWORD(wParam); + + switch(g_item_clicked) + { + case IN_OK: + case IN_Cancel: + EndDialog(hwndDlg, 0); + return TRUE; + + case IN_Set_Defaults_Button: + WriteAlphaPrefs(); + WriteAutoPrefs(); + return TRUE; + } + } + return FALSE; +} + + +static inline bool KeyIsDown(int vKey) +{ + return (GetAsyncKeyState(vKey) & 0x8000); +} + + +bool +DDS_InUI( + DDS_InUI_Data *params, + bool has_alpha, + const void *plugHndl, + const void *mwnd) +{ + bool continue_reading = true; + + g_alpha = DIALOG_ALPHA_CHANNEL; + g_autoD = false; + + ReadPrefs(); + + // check for that shift key + bool shift_key = ( KeyIsDown(VK_LSHIFT) || KeyIsDown(VK_RSHIFT) || KeyIsDown(VK_LMENU) || KeyIsDown(VK_RMENU) ); + + if((g_autoD && has_alpha) || shift_key) + { + int status = DialogBox((HINSTANCE)plugHndl, (LPSTR)"IN_DIALOG", (HWND)mwnd, (DLGPROC)DialogProc); + + if(g_item_clicked == IN_OK) + { + WriteAutoPrefs(); + + continue_reading = true; + } + else + continue_reading = false; + } + + params->alpha = g_alpha; + + return continue_reading; +} + +#endif diff --git a/plugin/kps/win/KPSOutputDialog.cpp b/plugin/kps/win/KPSOutputDialog.cpp new file mode 100644 index 00000000..13b04d76 --- /dev/null +++ b/plugin/kps/win/KPSOutputDialog.cpp @@ -0,0 +1,371 @@ + +/////////////////////////////////////////////////////////////////////////// +// +// Copyright (c) 2014, Brendan Bolles +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////// + +// ------------------------------------------------------------------------ +// +// DDS Photoshop plug-in +// +// by Brendan Bolles +// +// ------------------------------------------------------------------------ + +#if _WIN32 + +#include "DDS.h" + +#include "DDS_UI.h" +#include "DDS_version.h" + +#include + +#include + +enum { + OUT_noUI = -1, + OUT_OK = IDOK, + OUT_Cancel = IDCANCEL, + OUT_Format_Menu, + OUT_Mipmap_Check, + OUT_Filter_Menu, + OUT_Filter_Menu_Label, + OUT_Alpha_Radio_None, + OUT_Alpha_Radio_Transparency, + OUT_Alpha_Radio_Channel, + OUT_Premultiply_Check, + OUT_Alpha_Frame, + OUT_CubeMap_Check +}; + +// sensible Win macros +#define GET_ITEM(ITEM) GetDlgItem(hwndDlg, (ITEM)) + +#define SET_CHECK(ITEM, VAL) SendMessage(GET_ITEM(ITEM), BM_SETCHECK, (WPARAM)(VAL), (LPARAM)0) +#define GET_CHECK(ITEM) SendMessage(GET_ITEM(ITEM), BM_GETCHECK, (WPARAM)0, (LPARAM)0) + +#define ENABLE_ITEM(ITEM, ENABLE) EnableWindow(GetDlgItem(hwndDlg, (ITEM)), (ENABLE)); + +#define SHOW_ITEM(ITEM, SHOW) ShowWindow(GetDlgItem(hwndDlg, (ITEM)), (SHOW) ? SW_SHOW : SW_HIDE) + + + +static DialogFormat g_format = DIALOG_FMT_DXT5; +static DialogAlpha g_alpha = DIALOG_ALPHA_NONE; +static bool g_premultiply = false; +static bool g_mipmap = false; +static Dialog_Filter g_filter = DIALOG_FILTER_MITCHELL; +static bool g_cubemap = false; + +static bool g_have_transparency = false; +static const char *g_alpha_name = NULL; +static bool g_ae_ui = false; + +static WORD g_item_clicked = 0; + + +static void TrackMipmap(HWND hwndDlg) +{ + BOOL enable_state = GET_CHECK(OUT_Mipmap_Check); + ENABLE_ITEM(OUT_Filter_Menu, enable_state); + ENABLE_ITEM(OUT_Filter_Menu_Label, enable_state); +} + + +static void TrackAlpha(HWND hwndDlg) +{ + BOOL enable_state = !GET_CHECK(OUT_Alpha_Radio_None); + ENABLE_ITEM(OUT_Premultiply_Check, enable_state); +} + + +static BOOL CALLBACK DialogProc(HWND hwndDlg, UINT message, WPARAM wParam, LPARAM lParam) +{ + BOOL fError; + + switch(message) + { + case WM_INITDIALOG: + do{ + // set up the menu + // I prefer to do it programatically to insure that the compression types match the index + const char *opts[] = { "DXT1", + "DXT1A", + "DXT2", + "DXT3", + "DXT4", + "DXT5", + "DXT5A", + "3Dc", + "DXN", + "Uncompressed" }; + + HWND menu = GetDlgItem(hwndDlg, OUT_Format_Menu); + + for(int i=DIALOG_FMT_DXT1; i <= DIALOG_FMT_UNCOMPRESSED; i++) + { + SendMessage(menu, (UINT)CB_ADDSTRING, (WPARAM)wParam, (LPARAM)(LPCTSTR)opts[i] ); + SendMessage(menu, (UINT)CB_SETITEMDATA, (WPARAM)i, (LPARAM)(DWORD)i); // this is the compresion number + + if(i == g_format) + SendMessage(menu, CB_SETCURSEL, (WPARAM)i, (LPARAM)0); + } + + + const char *f_opts[] = {"Box", + "Tent", + "Lanczos4", + "Mitchell", + "Kaiser" }; + + HWND f_menu = GetDlgItem(hwndDlg, OUT_Filter_Menu); + + for(int i=DIALOG_FILTER_BOX; i <= DIALOG_FILTER_KAISER; i++) + { + SendMessage(f_menu, (UINT)CB_ADDSTRING, (WPARAM)wParam, (LPARAM)(LPCTSTR)f_opts[i] ); + SendMessage(f_menu, (UINT)CB_SETITEMDATA, (WPARAM)i, (LPARAM)(DWORD)i); // this is the compresion number + + if(i == g_filter) + SendMessage(f_menu, CB_SETCURSEL, (WPARAM)i, (LPARAM)0); + } + }while(0); + + SET_CHECK(OUT_Mipmap_Check, g_mipmap); + + if(!g_have_transparency) + { + ENABLE_ITEM(OUT_Alpha_Radio_Transparency, FALSE); + + if(g_alpha == DIALOG_ALPHA_TRANSPARENCY) + { + g_alpha = (g_alpha_name != NULL ? DIALOG_ALPHA_CHANNEL : DIALOG_ALPHA_NONE); + } + } + + if(g_alpha_name == NULL) + { + ENABLE_ITEM(OUT_Alpha_Radio_Channel, FALSE); + + if(g_alpha == DIALOG_ALPHA_CHANNEL) + { + g_alpha = (g_have_transparency ? DIALOG_ALPHA_TRANSPARENCY : DIALOG_ALPHA_NONE); + } + } + else + { + SetDlgItemText(hwndDlg, OUT_Alpha_Radio_Channel, g_alpha_name); + } + + SET_CHECK(OUT_Premultiply_Check, g_premultiply); + + SET_CHECK( (g_alpha == DIALOG_ALPHA_NONE ? OUT_Alpha_Radio_None : + g_alpha == DIALOG_ALPHA_TRANSPARENCY ? OUT_Alpha_Radio_Transparency : + g_alpha == DIALOG_ALPHA_CHANNEL ? OUT_Alpha_Radio_Channel : + OUT_Alpha_Radio_None), TRUE); + + SET_CHECK(OUT_CubeMap_Check, g_cubemap); + + TrackAlpha(hwndDlg); + TrackMipmap(hwndDlg); + + if(g_ae_ui) + { + for(int i = OUT_Alpha_Radio_None; i <= OUT_Alpha_Frame; i++) + SHOW_ITEM(i, false); + + WINDOWPLACEMENT winPlace, cubemapPlace, okPlace, cancelPlace; + winPlace.length = cubemapPlace.length = okPlace.length = cancelPlace.length = sizeof(WINDOWPLACEMENT); + + GetWindowPlacement(hwndDlg, &winPlace); + GetWindowPlacement(GET_ITEM(OUT_CubeMap_Check), &cubemapPlace); + GetWindowPlacement(GET_ITEM(OUT_OK), &okPlace); + GetWindowPlacement(GET_ITEM(OUT_Cancel), &cancelPlace); + + const int resize = 170; + + winPlace.rcNormalPosition.bottom -= resize; + cubemapPlace.rcNormalPosition.top -= resize; + cubemapPlace.rcNormalPosition.bottom -= resize; + okPlace.rcNormalPosition.top -= resize; + okPlace.rcNormalPosition.bottom -= resize; + cancelPlace.rcNormalPosition.top -= resize; + cancelPlace.rcNormalPosition.bottom -= resize; + + SetWindowPlacement(GET_ITEM(OUT_CubeMap_Check), &cubemapPlace); + SetWindowPlacement(GET_ITEM(OUT_Cancel), &cancelPlace); + SetWindowPlacement(GET_ITEM(OUT_OK), &okPlace); + SetWindowPlacement(hwndDlg, &winPlace); + } + + return TRUE; + + case WM_NOTIFY: + return FALSE; + + case WM_COMMAND: + g_item_clicked = LOWORD(wParam); + + switch(g_item_clicked) + { + case OUT_OK: + case OUT_Cancel: // do the same thing, but g_item_clicked will be different + do{ + HWND menu = GetDlgItem(hwndDlg, OUT_Format_Menu); + + // get the channel index associated with the selected menu item + LRESULT cur_sel = SendMessage(menu,(UINT)CB_GETCURSEL, (WPARAM)0, (LPARAM)0); + + g_format = (DialogFormat)SendMessage(menu, (UINT)CB_GETITEMDATA, (WPARAM)cur_sel, (LPARAM)0); + + g_alpha = GET_CHECK(OUT_Alpha_Radio_None) ? DIALOG_ALPHA_NONE : + GET_CHECK(OUT_Alpha_Radio_Transparency) ? DIALOG_ALPHA_TRANSPARENCY : + GET_CHECK(OUT_Alpha_Radio_Channel) ? DIALOG_ALPHA_CHANNEL : + DIALOG_ALPHA_TRANSPARENCY; + + g_premultiply = GET_CHECK(OUT_Premultiply_Check); + + g_mipmap = GET_CHECK(OUT_Mipmap_Check); + + HWND f_menu = GetDlgItem(hwndDlg, OUT_Filter_Menu); + cur_sel = SendMessage(f_menu,(UINT)CB_GETCURSEL, (WPARAM)0, (LPARAM)0); + g_filter = (Dialog_Filter)SendMessage(f_menu, (UINT)CB_GETITEMDATA, (WPARAM)cur_sel, (LPARAM)0); + + g_cubemap = GET_CHECK(OUT_CubeMap_Check); + + EndDialog(hwndDlg, 0); + return TRUE; + }while(0); + + case OUT_Alpha_Radio_None: + case OUT_Alpha_Radio_Transparency: + case OUT_Alpha_Radio_Channel: + TrackAlpha(hwndDlg); + return TRUE; + + + case OUT_Mipmap_Check: + TrackMipmap(hwndDlg); + return TRUE; + } + } + return FALSE; +} + +bool +DDS_OutUI( + DDS_OutUI_Data *params, + bool have_transparency, + const char *alpha_name, + bool ae_ui, + const void *plugHndl, + const void *mwnd) +{ + g_format = params->format; + g_alpha = params->alpha; + g_premultiply = params->premultiply; + g_mipmap = params->mipmap; + g_filter = params->filter; + g_mipmap = params->mipmap; + + g_have_transparency = have_transparency; + g_alpha_name = alpha_name; + g_ae_ui = ae_ui; + + if(ae_ui) + { + g_alpha = DIALOG_ALPHA_TRANSPARENCY; + g_premultiply = false; + assert(g_alpha_name == NULL); + } + + int status = DialogBox((HINSTANCE)plugHndl, (LPSTR)"OUT_DIALOG", (HWND)mwnd, (DLGPROC)DialogProc); + + + if(g_item_clicked == OUT_OK) + { + params->format = g_format; + params->alpha = g_alpha; + params->premultiply = g_premultiply; + params->mipmap = g_mipmap; + params->filter = g_filter; + params->cubemap = g_cubemap; + + return true; + } + else + return false; +} + + +enum { + ABOUT_noUI = -1, + ABOUT_OK = IDOK, + ABOUT_Plugin_Version_String = 4, +}; + +static const char *g_plugin_version_string = NULL; + +static BOOL CALLBACK AboutProc(HWND hwndDlg, UINT message, WPARAM wParam, LPARAM lParam) +{ + BOOL fError; + + switch(message) + { + case WM_INITDIALOG: + SetDlgItemText(hwndDlg, ABOUT_Plugin_Version_String, g_plugin_version_string); + + return TRUE; + + case WM_NOTIFY: + return FALSE; + + case WM_COMMAND: + switch(LOWORD(wParam)) + { + case OUT_OK: + case OUT_Cancel: + EndDialog(hwndDlg, 0); + return TRUE; + } + } + return FALSE; +} + +void +DDS_About( + const char *plugin_version_string, + const void *plugHndl, + const void *mwnd) +{ + g_plugin_version_string = plugin_version_string; + + int status = DialogBox((HINSTANCE)plugHndl, (LPSTR)"ABOUT_DIALOG", (HWND)mwnd, (DLGPROC)AboutProc); +} + +#endif diff --git a/plugin/kps/win/resource.h b/plugin/kps/win/resource.h new file mode 100644 index 00000000..a70236c9 --- /dev/null +++ b/plugin/kps/win/resource.h @@ -0,0 +1,20 @@ +//{{NO_DEPENDENCIES}} +// Microsoft Visual C++ generated include file. +// Used by DDS_Dialogs.rc +// +#define IDC_RADIO1 1001 +#define IDC_RADIO2 1002 +#define IDC_RADIO3 1003 +#define IDC_RADIO4 1004 +#define IDC_CHECK1 1016 + +// Next default values for new objects +// +#ifdef APSTUDIO_INVOKED +#ifndef APSTUDIO_READONLY_SYMBOLS +#define _APS_NEXT_RESOURCE_VALUE 103 +#define _APS_NEXT_COMMAND_VALUE 40001 +#define _APS_NEXT_CONTROL_VALUE 1017 +#define _APS_NEXT_SYMED_VALUE 101 +#endif +#endif From b399d163c7262fac59ae70613bdfb042b746ba43 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 9 Mar 2021 22:35:10 -0800 Subject: [PATCH 016/901] kram-ps - fix resource gen from cmake and correct Info.plist file Build is still not finding _main(). Will try adding one, even though other working plugin doesn't have one. --- plugin/CMakeLists.txt | 6 +++--- plugin/kps/KPS.r | 34 +++++++++++++++++++++---------- plugin/kps/mac/Info.plist | 42 +++++++++++++++++++-------------------- 3 files changed, 46 insertions(+), 36 deletions(-) mode change 100644 => 100755 plugin/kps/mac/Info.plist diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt index de2c32bc..98eab9a5 100644 --- a/plugin/CMakeLists.txt +++ b/plugin/CMakeLists.txt @@ -56,7 +56,7 @@ set_target_properties(${myTargetApp} PROPERTIES # this drops app from 762KB to 174KB with only ATE enabled # note about needing -gfull instead of -gused here or debug info messed up: # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397 - XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES + #XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental" #------------------------- @@ -244,14 +244,14 @@ add_custom_command(TARGET ${myTargetApp} PRE_BUILD COMMAND ${rezCompiler} -I ${SDK_SOURCE_DIR}/resources/ -I ${SDK_SOURCE_DIR}/photoshop/ - -I ${SDK_COMMON_DIR}/includes/ + # -I ${SDK_COMMON_DIR}/includes/ -arch x86_64 # needs this for Carbon.r and CoreServices.r in the Adobe .r headers #-F Carbon #-F CoreServices - -F /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks/ + -isysroot /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/ -o "${KPS_SOURCE_DIR}/${myTargetApp}.rsrc" ${KPS_SOURCE_DIR}/KPS.r diff --git a/plugin/kps/KPS.r b/plugin/kps/KPS.r index be8da61b..50dc6ca7 100755 --- a/plugin/kps/KPS.r +++ b/plugin/kps/KPS.r @@ -85,18 +85,30 @@ // also CoreServices/CoreServices.r is pulled in from another Adobe header, which is also Carbon //#include "MacOMacrezXcode.h" -#define Macintosh 1 - -#ifndef TARGET_API_MAC_CARBON -#define TARGET_API_MAC_CARBON 1 -#endif + #define Macintosh 1 + #define MSWindows 0 + #define Rez 1 + + #ifndef TARGET_MAC_OS + #define TARGET_MAC_OS 1 + #endif + + #ifndef DEBUG + #ifndef NDEBUG + #define DEBUG 1 + #else + #define DEBUG 0 + #endif + #endif -#ifndef TARGET_MAC_OS -#define TARGET_MAC_OS 1 -#endif + #define BUILDING_FOR_MACH 1 -#include + // can this carbon dependency be eliminated? + #ifndef TARGET_API_MAC_CARBON + #define TARGET_API_MAC_CARBON 1 + #endif + #include #define __PIMac__ 1 #define DLLExport extern "C" @@ -106,14 +118,14 @@ #define Macintosh 1 #endif -#ifdef __PIMac__ +#if defined(__PIMac__) #include "PIGeneral.r" #elif defined(__PIWin__) #include "PIGeneral.h" #endif - //#include "PIUtilities.r" + #ifndef ResourceID #define ResourceID 16000 #endif diff --git a/plugin/kps/mac/Info.plist b/plugin/kps/mac/Info.plist old mode 100644 new mode 100755 index 311d1a8c..0537f0ac --- a/plugin/kps/mac/Info.plist +++ b/plugin/kps/mac/Info.plist @@ -1,22 +1,20 @@ - - - - - CFBundleDevelopmentRegion - English - CFBundleExecutable - $(PRODUCT_NAME) - CFBundleGetInfoString - 22.0 © 2020 Adobe. All rights reserved. - NSHumanReadableCopyright - © 2020 Adobe. All rights reserved. - CFBundleShortVersionString - 22.0.0 - CFBundleName - $(PRODUCT_NAME) - CFBundlePackageType - $(PLUGIN_TYPE) - CFBundleSignature - 8BIM - - + + + + + CFBundleDevelopmentRegion + English + CFBundleExecutable + $(PRODUCT_NAME) + CFBundleGetInfoString + ©2021 kram-ps + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PLUGIN_TYPE) + CFBundleSignature + 8BIM + + From e02ec584344a3ba824d0c54d020f0450d41c62c1 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 9 Mar 2021 22:45:08 -0800 Subject: [PATCH 017/901] kram-ps - add dummy main() to KPS.cpp to get the plugin to link PS doesn't recognize the plugin. Will have to make sure all entry points are correct. Something might ref DDS. --- plugin/kps/KPS.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/plugin/kps/KPS.cpp b/plugin/kps/KPS.cpp index c45e07a0..80562844 100755 --- a/plugin/kps/KPS.cpp +++ b/plugin/kps/KPS.cpp @@ -1138,3 +1138,10 @@ DLLExport MACPASCAL void PluginMain(const short selector, } } + +// Tthis is just to silence broken build. +// Even though this is a plugin, Xcode wants _main or won't link. +int main(int macroUnusedArg(argc), char** macroUnusedArg(argv)) +{ + return 0; +} From f8f22cac7267f5828a0f648f389b1609dd7d3848 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Wed, 10 Mar 2021 08:43:06 -0800 Subject: [PATCH 018/901] kram-ps - call PluginMain to prevent dead-strip of code, fix rsrc gen, turn on rsrc creation PS still doesn't see this plugin at all. I tried with and without the working rsrc file from the legacy project. Only the legacy project works, but that's not checked in. --- plugin/CMakeLists.txt | 16 +++++++++++++--- plugin/kps/KPS.cpp | 11 ++++++++++- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt index 98eab9a5..c1ea92db 100644 --- a/plugin/CMakeLists.txt +++ b/plugin/CMakeLists.txt @@ -56,7 +56,7 @@ set_target_properties(${myTargetApp} PROPERTIES # this drops app from 762KB to 174KB with only ATE enabled # note about needing -gfull instead of -gused here or debug info messed up: # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397 - #XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES + XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental" #------------------------- @@ -203,6 +203,9 @@ target_sources(${myTargetApp} PRIVATE # Base.lproj/Main.storyboard ${appNibSources} + # this is created in the PRE_BUILD step below + ${KPS_SOURCE_DIR}/${myTargetApp}.rsrc + ${KPS_SOURCE_DIR}/mac/Info.plist # ${KPS_SOURCE_DIR}/mac/Info.plist @@ -231,7 +234,7 @@ set_source_files_properties( # turned off for now, and checking in pre-built resource # but app still can't find _main entrpoint. -if (FALSE) +if (TRUE) execute_process( COMMAND xcrun -f Rez @@ -242,17 +245,24 @@ execute_process( add_custom_command(TARGET ${myTargetApp} PRE_BUILD DEPENDS ${KPS_SOURCE_DIR}/KPS.r COMMAND ${rezCompiler} + + # several .r are located across the build -I ${SDK_SOURCE_DIR}/resources/ -I ${SDK_SOURCE_DIR}/photoshop/ # -I ${SDK_COMMON_DIR}/includes/ -arch x86_64 + # use the datafork + -useDF + # needs this for Carbon.r and CoreServices.r in the Adobe .r headers #-F Carbon #-F CoreServices + + # where to find framework files -isysroot /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/ - + -o "${KPS_SOURCE_DIR}/${myTargetApp}.rsrc" ${KPS_SOURCE_DIR}/KPS.r ) diff --git a/plugin/kps/KPS.cpp b/plugin/kps/KPS.cpp index 80562844..5239a7b8 100755 --- a/plugin/kps/KPS.cpp +++ b/plugin/kps/KPS.cpp @@ -989,6 +989,12 @@ DLLExport MACPASCAL void PluginMain(const short selector, intptr_t *dataPointer, short *result) { + // using this to keep dead-strip from removing all code + if (selector == formatSelectorAbout && formatParamBlock == nullptr) + { + return; + } + if (selector == formatSelectorAbout) { sSPBasic = ((AboutRecordPtr)formatParamBlock)->sSPBasic; @@ -1139,9 +1145,12 @@ DLLExport MACPASCAL void PluginMain(const short selector, } } -// Tthis is just to silence broken build. +// This is just to silence broken build. // Even though this is a plugin, Xcode wants _main or won't link. int main(int macroUnusedArg(argc), char** macroUnusedArg(argv)) { + // call this to prevent dead-stripping + PluginMain(formatSelectorAbout, nullptr, nullptr, nullptr); + return 0; } From 4bdcf7b65a80704867e6fd5291cbd754aabf009f Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Thu, 18 Mar 2021 10:24:19 -0700 Subject: [PATCH 019/901] kram - update Win builds settings for perf and faster builds, update lodepng --- kram/CMakeLists.txt | 28 +- kramv/CMakeLists.txt | 2 +- libkram/CMakeLists.txt | 11 +- libkram/lodepng/LICENSE | 46 +- libkram/lodepng/lodepng.cpp | 12641 +++++++++++++++++----------------- libkram/lodepng/lodepng.h | 3738 +++++----- 6 files changed, 8502 insertions(+), 7964 deletions(-) diff --git a/kram/CMakeLists.txt b/kram/CMakeLists.txt index 4d852cad..b2afa050 100644 --- a/kram/CMakeLists.txt +++ b/kram/CMakeLists.txt @@ -83,12 +83,28 @@ elseif (WIN32) string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") # don't need force with apps, since they only access kram folder files which include KramConfig - # force include and fix STL - #target_compile_options(${myTargetApp} PRIVATE /FIKramConfig.h) - target_compile_definitions(${myTargetApp} PRIVATE "-D_D_HAS_EXCEPTIONS=0") - - target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX) -elseif (UNIXBUILD) + + # all warnings, AVX1, and multiprocess compiles + target_compile_options(${myTargetApp} PRIVATE /W3 /arch:AVX /MP /GF /FC) + + # fix STL + target_compile_definitions(${myTargetApp} PRIVATE "-D_D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0") + + if (CMAKE_BUILD_TYPE EQUAL "Debug") + target_compile_definitions(${myTargetLib} PRIVATE "/INCREMENTAL") + + elseif (CMAKE_BUILD_TYPE EQUAL "Release") + # only dead strip on Release builds since this disables Incremental linking, may want Profile build that doesn't use this + target_compile_definitions(${myTargetLib} PRIVATE "/OPT:REF") + + # other possibliities + # /GL - whole program optimization + # /Gy - edit and continue with function level linking + # /Oi - enable intrinsic functions + + endif() + +elseif (UNIXBUILD) target_link_libraries(${myTargetApp} libkram) # TODO: finish this diff --git a/kramv/CMakeLists.txt b/kramv/CMakeLists.txt index 68599317..cd290094 100644 --- a/kramv/CMakeLists.txt +++ b/kramv/CMakeLists.txt @@ -61,7 +61,7 @@ set_target_properties(${myTargetApp} PROPERTIES # this drops app from 762KB to 174KB with only ATE enabled # note about needing -gfull instead of -gused here or debug info messed up: # https://gist.github.com/tkersey/39b4fe69e14b859889ffadccb009e397 - XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING YES + XCODE_ATTRIBUTE_DEAD_CODE_STRIPPING[variant=Release] YES XCODE_ATTRIBUTE_LLVM_LTO[variant=Release] "Incremental" #------------------------- diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt index 7f3173e7..3c3b3be5 100644 --- a/libkram/CMakeLists.txt +++ b/libkram/CMakeLists.txt @@ -158,9 +158,14 @@ elseif (WIN32) string(REGEX REPLACE "/GR" "/GR-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") string(REGEX REPLACE "/EHsc" "/EHs-c-" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - # force include and fix STL - target_compile_options(${myTargetLib} PRIVATE /FIKramConfig.h /W3 /arch:AVX) - target_compile_definitions(${myTargetLib} PRIVATE "-D_D_HAS_EXCEPTIONS=0") + # force include + target_compile_options(${myTargetLib} PRIVATE /FIKramConfig.h) + + # all warnings, AVX1, and multiprocess compiles + target_compile_options(${myTargetLib} PRIVATE /W3 /arch:AVX /MP) + + # fix STL + target_compile_definitions(${myTargetLib} PRIVATE "-D_D_HAS_EXCEPTIONS=0 -D_ITERATOR_DEBUG_LEVEL=0") elseif (UNIXBUILD) # TODO: finish this diff --git a/libkram/lodepng/LICENSE b/libkram/lodepng/LICENSE index 9382c4d0..a5fb0603 100644 --- a/libkram/lodepng/LICENSE +++ b/libkram/lodepng/LICENSE @@ -1,25 +1,21 @@ -LodePNG version 20160124 - -Copyright (c) 2005-2016 Lode Vandevenne - -This software is provided 'as-is', without any express or implied -warranty. In no event will the authors be held liable for any damages -arising from the use of this software. - -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - - 3. This notice may not be removed or altered from any source - distribution. - -The manual and changelog are in the header file "lodepng.h" -Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C. +Copyright (c) 2005-2018 Lode Vandevenne + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. + diff --git a/libkram/lodepng/lodepng.cpp b/libkram/lodepng/lodepng.cpp index a9f0e0c8..b08b0858 100644 --- a/libkram/lodepng/lodepng.cpp +++ b/libkram/lodepng/lodepng.cpp @@ -1,6168 +1,6473 @@ -/* -LodePNG version 20160124 - -Copyright (c) 2005-2016 Lode Vandevenne - -This software is provided 'as-is', without any express or implied -warranty. In no event will the authors be held liable for any damages -arising from the use of this software. - -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - - 3. This notice may not be removed or altered from any source - distribution. -*/ - -/* -The manual and changelog are in the header file "lodepng.h" -Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C. -*/ - -#include "lodepng.h" - -#include -#include - -#ifdef LODEPNG_COMPILE_CPP -#include -#endif /*LODEPNG_COMPILE_CPP*/ - -#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/ -#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/ -#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/ -#endif /*_MSC_VER */ - -const char* LODEPNG_VERSION_STRING = "20160124"; - -/* -This source file is built up in the following large parts. The code sections -with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way. --Tools for C and common code for PNG and Zlib --C Code for Zlib (huffman, deflate, ...) --C Code for PNG (file format chunks, adam7, PNG filters, color conversions, ...) --The C++ wrapper around all of the above -*/ - -/*The malloc, realloc and free functions defined here with "lodepng_" in front -of the name, so that you can easily change them to others related to your -platform if needed. Everything else in the code calls these. Pass --DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out -#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and -define them in your own project's source files without needing to change -lodepng source code. Don't forget to remove "static" if you copypaste them -from here.*/ - -#ifdef LODEPNG_COMPILE_ALLOCATORS -static void* lodepng_malloc(size_t size) -{ - return malloc(size); -} - -static void* lodepng_realloc(void* ptr, size_t new_size) -{ - return realloc(ptr, new_size); -} - -static void lodepng_free(void* ptr) -{ - free(ptr); -} -#else /*LODEPNG_COMPILE_ALLOCATORS*/ -void* lodepng_malloc(size_t size); -void* lodepng_realloc(void* ptr, size_t new_size); -void lodepng_free(void* ptr); -#endif /*LODEPNG_COMPILE_ALLOCATORS*/ - -/* ////////////////////////////////////////////////////////////////////////// */ -/* ////////////////////////////////////////////////////////////////////////// */ -/* // Tools for C, and common code for PNG and Zlib. // */ -/* ////////////////////////////////////////////////////////////////////////// */ -/* ////////////////////////////////////////////////////////////////////////// */ - -/* -Often in case of an error a value is assigned to a variable and then it breaks -out of a loop (to go to the cleanup phase of a function). This macro does that. -It makes the error handling code shorter and more readable. - -Example: if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83); -*/ -#define CERROR_BREAK(errorvar, code)\ -{\ - errorvar = code;\ - break;\ -} - -/*version of CERROR_BREAK that assumes the common case where the error variable is named "error"*/ -#define ERROR_BREAK(code) CERROR_BREAK(error, code) - -/*Set error var to the error code, and return it.*/ -#define CERROR_RETURN_ERROR(errorvar, code)\ -{\ - errorvar = code;\ - return code;\ -} - -/*Try the code, if it returns error, also return the error.*/ -#define CERROR_TRY_RETURN(call)\ -{\ - unsigned error = call;\ - if(error) return error;\ -} - -/*Set error var to the error code, and return from the void function.*/ -#define CERROR_RETURN(errorvar, code)\ -{\ - errorvar = code;\ - return;\ -} - -/* -About uivector, ucvector and string: --All of them wrap dynamic arrays or text strings in a similar way. --LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version. --The string tools are made to avoid problems with compilers that declare things like strncat as deprecated. --They're not used in the interface, only internally in this file as static functions. --As with many other structs in this file, the init and cleanup functions serve as ctor and dtor. -*/ - -#ifdef LODEPNG_COMPILE_ZLIB -/*dynamic vector of unsigned ints*/ -typedef struct uivector -{ - unsigned* data; - size_t size; /*size in number of unsigned longs*/ - size_t allocsize; /*allocated size in bytes*/ -} uivector; - -static void uivector_cleanup(void* p) -{ - ((uivector*)p)->size = ((uivector*)p)->allocsize = 0; - lodepng_free(((uivector*)p)->data); - ((uivector*)p)->data = NULL; -} - -/*returns 1 if success, 0 if failure ==> nothing done*/ -static unsigned uivector_reserve(uivector* p, size_t allocsize) -{ - if(allocsize > p->allocsize) - { - size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2); - void* data = lodepng_realloc(p->data, newsize); - if(data) - { - p->allocsize = newsize; - p->data = (unsigned*)data; - } - else return 0; /*error: not enough memory*/ - } - return 1; -} - -/*returns 1 if success, 0 if failure ==> nothing done*/ -static unsigned uivector_resize(uivector* p, size_t size) -{ - if(!uivector_reserve(p, size * sizeof(unsigned))) return 0; - p->size = size; - return 1; /*success*/ -} - -/*resize and give all new elements the value*/ -static unsigned uivector_resizev(uivector* p, size_t size, unsigned value) -{ - size_t oldsize = p->size, i; - if(!uivector_resize(p, size)) return 0; - for(i = oldsize; i < size; ++i) p->data[i] = value; - return 1; -} - -static void uivector_init(uivector* p) -{ - p->data = NULL; - p->size = p->allocsize = 0; -} - -#ifdef LODEPNG_COMPILE_ENCODER -/*returns 1 if success, 0 if failure ==> nothing done*/ -static unsigned uivector_push_back(uivector* p, unsigned c) -{ - if(!uivector_resize(p, p->size + 1)) return 0; - p->data[p->size - 1] = c; - return 1; -} -#endif /*LODEPNG_COMPILE_ENCODER*/ -#endif /*LODEPNG_COMPILE_ZLIB*/ - -/* /////////////////////////////////////////////////////////////////////////// */ - -/*dynamic vector of unsigned chars*/ -typedef struct ucvector -{ - unsigned char* data; - size_t size; /*used size*/ - size_t allocsize; /*allocated size*/ -} ucvector; - -/*returns 1 if success, 0 if failure ==> nothing done*/ -static unsigned ucvector_reserve(ucvector* p, size_t allocsize) -{ - if(allocsize > p->allocsize) - { - size_t newsize = (allocsize > p->allocsize * 2) ? allocsize : (allocsize * 3 / 2); - void* data = lodepng_realloc(p->data, newsize); - if(data) - { - p->allocsize = newsize; - p->data = (unsigned char*)data; - } - else return 0; /*error: not enough memory*/ - } - return 1; -} - -/*returns 1 if success, 0 if failure ==> nothing done*/ -static unsigned ucvector_resize(ucvector* p, size_t size) -{ - if(!ucvector_reserve(p, size * sizeof(unsigned char))) return 0; - p->size = size; - return 1; /*success*/ -} - -#ifdef LODEPNG_COMPILE_PNG - -static void ucvector_cleanup(void* p) -{ - ((ucvector*)p)->size = ((ucvector*)p)->allocsize = 0; - lodepng_free(((ucvector*)p)->data); - ((ucvector*)p)->data = NULL; -} - -static void ucvector_init(ucvector* p) -{ - p->data = NULL; - p->size = p->allocsize = 0; -} -#endif /*LODEPNG_COMPILE_PNG*/ - -#ifdef LODEPNG_COMPILE_ZLIB -/*you can both convert from vector to buffer&size and vica versa. If you use -init_buffer to take over a buffer and size, it is not needed to use cleanup*/ -static void ucvector_init_buffer(ucvector* p, unsigned char* buffer, size_t size) -{ - p->data = buffer; - p->allocsize = p->size = size; -} -#endif /*LODEPNG_COMPILE_ZLIB*/ - -#if (defined(LODEPNG_COMPILE_PNG) && defined(LODEPNG_COMPILE_ANCILLARY_CHUNKS)) || defined(LODEPNG_COMPILE_ENCODER) -/*returns 1 if success, 0 if failure ==> nothing done*/ -static unsigned ucvector_push_back(ucvector* p, unsigned char c) -{ - if(!ucvector_resize(p, p->size + 1)) return 0; - p->data[p->size - 1] = c; - return 1; -} -#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/ - - -/* ////////////////////////////////////////////////////////////////////////// */ - -#ifdef LODEPNG_COMPILE_PNG -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS -/*returns 1 if success, 0 if failure ==> nothing done*/ -static unsigned string_resize(char** out, size_t size) -{ - char* data = (char*)lodepng_realloc(*out, size + 1); - if(data) - { - data[size] = 0; /*null termination char*/ - *out = data; - } - return data != 0; -} - -/*init a {char*, size_t} pair for use as string*/ -static void string_init(char** out) -{ - *out = NULL; - string_resize(out, 0); -} - -/*free the above pair again*/ -static void string_cleanup(char** out) -{ - lodepng_free(*out); - *out = NULL; -} - -static void string_set(char** out, const char* in) -{ - size_t insize = strlen(in), i; - if(string_resize(out, insize)) - { - for(i = 0; i != insize; ++i) - { - (*out)[i] = in[i]; - } - } -} -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ -#endif /*LODEPNG_COMPILE_PNG*/ - -/* ////////////////////////////////////////////////////////////////////////// */ - -unsigned lodepng_read32bitInt(const unsigned char* buffer) -{ - return (unsigned)((buffer[0] << 24) | (buffer[1] << 16) | (buffer[2] << 8) | buffer[3]); -} - -#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER) -/*buffer must have at least 4 allocated bytes available*/ -static void lodepng_set32bitInt(unsigned char* buffer, unsigned value) -{ - buffer[0] = (unsigned char)((value >> 24) & 0xff); - buffer[1] = (unsigned char)((value >> 16) & 0xff); - buffer[2] = (unsigned char)((value >> 8) & 0xff); - buffer[3] = (unsigned char)((value ) & 0xff); -} -#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/ - -#ifdef LODEPNG_COMPILE_ENCODER -static void lodepng_add32bitInt(ucvector* buffer, unsigned value) -{ - ucvector_resize(buffer, buffer->size + 4); /*todo: give error if resize failed*/ - lodepng_set32bitInt(&buffer->data[buffer->size - 4], value); -} -#endif /*LODEPNG_COMPILE_ENCODER*/ - -/* ////////////////////////////////////////////////////////////////////////// */ -/* / File IO / */ -/* ////////////////////////////////////////////////////////////////////////// */ - -#ifdef LODEPNG_COMPILE_DISK - -unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename) -{ - FILE* file; - long size; - - /*provide some proper output values if error will happen*/ - *out = 0; - *outsize = 0; - - file = fopen(filename, "rb"); - if(!file) return 78; - - /*get filesize:*/ - fseek(file , 0 , SEEK_END); - size = ftell(file); - rewind(file); - - /*read contents of the file into the vector*/ - *outsize = 0; - *out = (unsigned char*)lodepng_malloc((size_t)size); - if(size && (*out)) (*outsize) = fread(*out, 1, (size_t)size, file); - - fclose(file); - if(!(*out) && size) return 83; /*the above malloc failed*/ - return 0; -} - -/*write given buffer to the file, overwriting the file, it doesn't append to it.*/ -unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename) -{ - FILE* file; - file = fopen(filename, "wb" ); - if(!file) return 79; - fwrite((char*)buffer , 1 , buffersize, file); - fclose(file); - return 0; -} - -#endif /*LODEPNG_COMPILE_DISK*/ - -/* ////////////////////////////////////////////////////////////////////////// */ -/* ////////////////////////////////////////////////////////////////////////// */ -/* // End of common code and tools. Begin of Zlib related code. // */ -/* ////////////////////////////////////////////////////////////////////////// */ -/* ////////////////////////////////////////////////////////////////////////// */ - -#ifdef LODEPNG_COMPILE_ZLIB -#ifdef LODEPNG_COMPILE_ENCODER -/*TODO: this ignores potential out of memory errors*/ -#define addBitToStream(/*size_t**/ bitpointer, /*ucvector**/ bitstream, /*unsigned char*/ bit)\ -{\ - /*add a new byte at the end*/\ - if(((*bitpointer) & 7) == 0) ucvector_push_back(bitstream, (unsigned char)0);\ - /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/\ - (bitstream->data[bitstream->size - 1]) |= (bit << ((*bitpointer) & 0x7));\ - ++(*bitpointer);\ -} - -static void addBitsToStream(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits) -{ - size_t i; - for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> i) & 1)); -} - -static void addBitsToStreamReversed(size_t* bitpointer, ucvector* bitstream, unsigned value, size_t nbits) -{ - size_t i; - for(i = 0; i != nbits; ++i) addBitToStream(bitpointer, bitstream, (unsigned char)((value >> (nbits - 1 - i)) & 1)); -} -#endif /*LODEPNG_COMPILE_ENCODER*/ - -#ifdef LODEPNG_COMPILE_DECODER - -#define READBIT(bitpointer, bitstream) ((bitstream[bitpointer >> 3] >> (bitpointer & 0x7)) & (unsigned char)1) - -static unsigned char readBitFromStream(size_t* bitpointer, const unsigned char* bitstream) -{ - unsigned char result = (unsigned char)(READBIT(*bitpointer, bitstream)); - ++(*bitpointer); - return result; -} - -static unsigned readBitsFromStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits) -{ - unsigned result = 0, i; - for(i = 0; i != nbits; ++i) - { - result += ((unsigned)READBIT(*bitpointer, bitstream)) << i; - ++(*bitpointer); - } - return result; -} -#endif /*LODEPNG_COMPILE_DECODER*/ - -/* ////////////////////////////////////////////////////////////////////////// */ -/* / Deflate - Huffman / */ -/* ////////////////////////////////////////////////////////////////////////// */ - -#define FIRST_LENGTH_CODE_INDEX 257 -#define LAST_LENGTH_CODE_INDEX 285 -/*256 literals, the end code, some length codes, and 2 unused codes*/ -#define NUM_DEFLATE_CODE_SYMBOLS 288 -/*the distance codes have their own symbols, 30 used, 2 unused*/ -#define NUM_DISTANCE_SYMBOLS 32 -/*the code length codes. 0-15: code lengths, 16: copy previous 3-6 times, 17: 3-10 zeros, 18: 11-138 zeros*/ -#define NUM_CODE_LENGTH_CODES 19 - -/*the base lengths represented by codes 257-285*/ -static const unsigned LENGTHBASE[29] - = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, - 67, 83, 99, 115, 131, 163, 195, 227, 258}; - -/*the extra bits used by codes 257-285 (added to base length)*/ -static const unsigned LENGTHEXTRA[29] - = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, - 4, 4, 4, 4, 5, 5, 5, 5, 0}; - -/*the base backwards distances (the bits of distance codes appear after length codes and use their own huffman tree)*/ -static const unsigned DISTANCEBASE[30] - = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, - 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577}; - -/*the extra bits of backwards distances (added to base)*/ -static const unsigned DISTANCEEXTRA[30] - = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, - 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; - -/*the order in which "code length alphabet code lengths" are stored, out of this -the huffman tree of the dynamic huffman tree lengths is generated*/ -static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES] - = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; - -/* ////////////////////////////////////////////////////////////////////////// */ - -/* -Huffman tree struct, containing multiple representations of the tree -*/ -typedef struct HuffmanTree -{ - unsigned* tree2d; - unsigned* tree1d; - unsigned* lengths; /*the lengths of the codes of the 1d-tree*/ - unsigned maxbitlen; /*maximum number of bits a single code can get*/ - unsigned numcodes; /*number of symbols in the alphabet = number of codes*/ -} HuffmanTree; - -/*function used for debug purposes to draw the tree in ascii art with C++*/ -/* -static void HuffmanTree_draw(HuffmanTree* tree) -{ - std::cout << "tree. length: " << tree->numcodes << " maxbitlen: " << tree->maxbitlen << std::endl; - for(size_t i = 0; i != tree->tree1d.size; ++i) - { - if(tree->lengths.data[i]) - std::cout << i << " " << tree->tree1d.data[i] << " " << tree->lengths.data[i] << std::endl; - } - std::cout << std::endl; -}*/ - -static void HuffmanTree_init(HuffmanTree* tree) -{ - tree->tree2d = 0; - tree->tree1d = 0; - tree->lengths = 0; -} - -static void HuffmanTree_cleanup(HuffmanTree* tree) -{ - lodepng_free(tree->tree2d); - lodepng_free(tree->tree1d); - lodepng_free(tree->lengths); -} - -/*the tree representation used by the decoder. return value is error*/ -static unsigned HuffmanTree_make2DTree(HuffmanTree* tree) -{ - unsigned nodefilled = 0; /*up to which node it is filled*/ - unsigned treepos = 0; /*position in the tree (1 of the numcodes columns)*/ - unsigned n, i; - - tree->tree2d = (unsigned*)lodepng_malloc(tree->numcodes * 2 * sizeof(unsigned)); - if(!tree->tree2d) return 83; /*alloc fail*/ - - /* - convert tree1d[] to tree2d[][]. In the 2D array, a value of 32767 means - uninited, a value >= numcodes is an address to another bit, a value < numcodes - is a code. The 2 rows are the 2 possible bit values (0 or 1), there are as - many columns as codes - 1. - A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes. - Here, the internal nodes are stored (what their 0 and 1 option point to). - There is only memory for such good tree currently, if there are more nodes - (due to too long length codes), error 55 will happen - */ - for(n = 0; n < tree->numcodes * 2; ++n) - { - tree->tree2d[n] = 32767; /*32767 here means the tree2d isn't filled there yet*/ - } - - for(n = 0; n < tree->numcodes; ++n) /*the codes*/ - { - for(i = 0; i != tree->lengths[n]; ++i) /*the bits for this code*/ - { - unsigned char bit = (unsigned char)((tree->tree1d[n] >> (tree->lengths[n] - i - 1)) & 1); - /*oversubscribed, see comment in lodepng_error_text*/ - if(treepos > 2147483647 || treepos + 2 > tree->numcodes) return 55; - if(tree->tree2d[2 * treepos + bit] == 32767) /*not yet filled in*/ - { - if(i + 1 == tree->lengths[n]) /*last bit*/ - { - tree->tree2d[2 * treepos + bit] = n; /*put the current code in it*/ - treepos = 0; - } - else - { - /*put address of the next step in here, first that address has to be found of course - (it's just nodefilled + 1)...*/ - ++nodefilled; - /*addresses encoded with numcodes added to it*/ - tree->tree2d[2 * treepos + bit] = nodefilled + tree->numcodes; - treepos = nodefilled; - } - } - else treepos = tree->tree2d[2 * treepos + bit] - tree->numcodes; - } - } - - for(n = 0; n < tree->numcodes * 2; ++n) - { - if(tree->tree2d[n] == 32767) tree->tree2d[n] = 0; /*remove possible remaining 32767's*/ - } - - return 0; -} - -/* -Second step for the ...makeFromLengths and ...makeFromFrequencies functions. -numcodes, lengths and maxbitlen must already be filled in correctly. return -value is error. -*/ -static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree) -{ - uivector blcount; - uivector nextcode; - unsigned error = 0; - unsigned bits, n; - - uivector_init(&blcount); - uivector_init(&nextcode); - - tree->tree1d = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned)); - if(!tree->tree1d) error = 83; /*alloc fail*/ - - if(!uivector_resizev(&blcount, tree->maxbitlen + 1, 0) - || !uivector_resizev(&nextcode, tree->maxbitlen + 1, 0)) - error = 83; /*alloc fail*/ - - if(!error) - { - /*step 1: count number of instances of each code length*/ - for(bits = 0; bits != tree->numcodes; ++bits) ++blcount.data[tree->lengths[bits]]; - /*step 2: generate the nextcode values*/ - for(bits = 1; bits <= tree->maxbitlen; ++bits) - { - nextcode.data[bits] = (nextcode.data[bits - 1] + blcount.data[bits - 1]) << 1; - } - /*step 3: generate all the codes*/ - for(n = 0; n != tree->numcodes; ++n) - { - if(tree->lengths[n] != 0) tree->tree1d[n] = nextcode.data[tree->lengths[n]]++; - } - } - - uivector_cleanup(&blcount); - uivector_cleanup(&nextcode); - - if(!error) return HuffmanTree_make2DTree(tree); - else return error; -} - -/* -given the code lengths (as stored in the PNG file), generate the tree as defined -by Deflate. maxbitlen is the maximum bits that a code in the tree can have. -return value is error. -*/ -static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* bitlen, - size_t numcodes, unsigned maxbitlen) -{ - unsigned i; - tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned)); - if(!tree->lengths) return 83; /*alloc fail*/ - for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i]; - tree->numcodes = (unsigned)numcodes; /*number of symbols*/ - tree->maxbitlen = maxbitlen; - return HuffmanTree_makeFromLengths2(tree); -} - -#ifdef LODEPNG_COMPILE_ENCODER - -/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding", -Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/ - -/*chain node for boundary package merge*/ -typedef struct BPMNode -{ - int weight; /*the sum of all weights in this chain*/ - unsigned index; /*index of this leaf node (called "count" in the paper)*/ - struct BPMNode* tail; /*the next nodes in this chain (null if last)*/ - int in_use; -} BPMNode; - -/*lists of chains*/ -typedef struct BPMLists -{ - /*memory pool*/ - unsigned memsize; - BPMNode* memory; - unsigned numfree; - unsigned nextfree; - BPMNode** freelist; - /*two heads of lookahead chains per list*/ - unsigned listsize; - BPMNode** chains0; - BPMNode** chains1; -} BPMLists; - -/*creates a new chain node with the given parameters, from the memory in the lists */ -static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail) -{ - unsigned i; - BPMNode* result; - - /*memory full, so garbage collect*/ - if(lists->nextfree >= lists->numfree) - { - /*mark only those that are in use*/ - for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0; - for(i = 0; i != lists->listsize; ++i) - { - BPMNode* node; - for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1; - for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1; - } - /*collect those that are free*/ - lists->numfree = 0; - for(i = 0; i != lists->memsize; ++i) - { - if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i]; - } - lists->nextfree = 0; - } - - result = lists->freelist[lists->nextfree++]; - result->weight = weight; - result->index = index; - result->tail = tail; - return result; -} - -static int bpmnode_compare(const void* a, const void* b) -{ - int wa = ((const BPMNode*)a)->weight; - int wb = ((const BPMNode*)b)->weight; - if(wa < wb) return -1; - if(wa > wb) return 1; - /*make the qsort a stable sort*/ - return ((const BPMNode*)a)->index < ((const BPMNode*)b)->index ? 1 : -1; -} - -/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/ -static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num) -{ - unsigned lastindex = lists->chains1[c]->index; - - if(c == 0) - { - if(lastindex >= numpresent) return; - lists->chains0[c] = lists->chains1[c]; - lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0); - } - else - { - /*sum of the weights of the head nodes of the previous lookahead chains.*/ - int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight; - lists->chains0[c] = lists->chains1[c]; - if(lastindex < numpresent && sum > leaves[lastindex].weight) - { - lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail); - return; - } - lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]); - /*in the end we are only interested in the chain of the last list, so no - need to recurse if we're at the last one (this gives measurable speedup)*/ - if(num + 1 < (int)(2 * numpresent - 2)) - { - boundaryPM(lists, leaves, numpresent, c - 1, num); - boundaryPM(lists, leaves, numpresent, c - 1, num); - } - } -} - -unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies, - size_t numcodes, unsigned maxbitlen) -{ - unsigned error = 0; - unsigned i; - size_t numpresent = 0; /*number of symbols with non-zero frequency*/ - BPMNode* leaves; /*the symbols, only those with > 0 frequency*/ - - if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/ - if((1ull << maxbitlen) < numcodes) return 80; /*error: represent all symbols*/ - - leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves)); - if(!leaves) return 83; /*alloc fail*/ - - for(i = 0; i != numcodes; ++i) - { - if(frequencies[i] > 0) - { - leaves[numpresent].weight = (int)frequencies[i]; - leaves[numpresent].index = i; - ++numpresent; - } - } - - for(i = 0; i != numcodes; ++i) lengths[i] = 0; - - /*ensure at least two present symbols. There should be at least one symbol - according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To - make these work as well ensure there are at least two symbols. The - Package-Merge code below also doesn't work correctly if there's only one - symbol, it'd give it the theoritical 0 bits but in practice zlib wants 1 bit*/ - if(numpresent == 0) - { - lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/ - } - else if(numpresent == 1) - { - lengths[leaves[0].index] = 1; - lengths[leaves[0].index == 0 ? 1 : 0] = 1; - } - else - { - BPMLists lists; - BPMNode* node; - - qsort(leaves, numpresent, sizeof(BPMNode), bpmnode_compare); - - lists.listsize = maxbitlen; - lists.memsize = 2 * maxbitlen * (maxbitlen + 1); - lists.nextfree = 0; - lists.numfree = lists.memsize; - lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory)); - lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*)); - lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*)); - lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*)); - if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/ - - if(!error) - { - for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i]; - - bpmnode_create(&lists, leaves[0].weight, 1, 0); - bpmnode_create(&lists, leaves[1].weight, 2, 0); - - for(i = 0; i != lists.listsize; ++i) - { - lists.chains0[i] = &lists.memory[0]; - lists.chains1[i] = &lists.memory[1]; - } - - /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/ - for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i); - - for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail) - { - for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index]; - } - } - - lodepng_free(lists.memory); - lodepng_free(lists.freelist); - lodepng_free(lists.chains0); - lodepng_free(lists.chains1); - } - - lodepng_free(leaves); - return error; -} - -/*Create the Huffman tree given the symbol frequencies*/ -static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies, - size_t mincodes, size_t numcodes, unsigned maxbitlen) -{ - unsigned error = 0; - while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/ - tree->maxbitlen = maxbitlen; - tree->numcodes = (unsigned)numcodes; /*number of symbols*/ - tree->lengths = (unsigned*)lodepng_realloc(tree->lengths, numcodes * sizeof(unsigned)); - if(!tree->lengths) return 83; /*alloc fail*/ - /*initialize all lengths to 0*/ - memset(tree->lengths, 0, numcodes * sizeof(unsigned)); - - error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen); - if(!error) error = HuffmanTree_makeFromLengths2(tree); - return error; -} - -static unsigned HuffmanTree_getCode(const HuffmanTree* tree, unsigned index) -{ - return tree->tree1d[index]; -} - -static unsigned HuffmanTree_getLength(const HuffmanTree* tree, unsigned index) -{ - return tree->lengths[index]; -} -#endif /*LODEPNG_COMPILE_ENCODER*/ - -/*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/ -static unsigned generateFixedLitLenTree(HuffmanTree* tree) -{ - unsigned i, error = 0; - unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned)); - if(!bitlen) return 83; /*alloc fail*/ - - /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/ - for(i = 0; i <= 143; ++i) bitlen[i] = 8; - for(i = 144; i <= 255; ++i) bitlen[i] = 9; - for(i = 256; i <= 279; ++i) bitlen[i] = 7; - for(i = 280; i <= 287; ++i) bitlen[i] = 8; - - error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15); - - lodepng_free(bitlen); - return error; -} - -/*get the distance code tree of a deflated block with fixed tree, as specified in the deflate specification*/ -static unsigned generateFixedDistanceTree(HuffmanTree* tree) -{ - unsigned i, error = 0; - unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned)); - if(!bitlen) return 83; /*alloc fail*/ - - /*there are 32 distance codes, but 30-31 are unused*/ - for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5; - error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15); - - lodepng_free(bitlen); - return error; -} - -#ifdef LODEPNG_COMPILE_DECODER - -/* -returns the code, or (unsigned)(-1) if error happened -inbitlength is the length of the complete buffer, in bits (so its byte length times 8) -*/ -static unsigned huffmanDecodeSymbol(const unsigned char* in, size_t* bp, - const HuffmanTree* codetree, size_t inbitlength) -{ - unsigned treepos = 0, ct; - for(;;) - { - if(*bp >= inbitlength) return (unsigned)(-1); /*error: end of input memory reached without endcode*/ - /* - decode the symbol from the tree. The "readBitFromStream" code is inlined in - the expression below because this is the biggest bottleneck while decoding - */ - ct = codetree->tree2d[(treepos << 1) + READBIT(*bp, in)]; - ++(*bp); - if(ct < codetree->numcodes) return ct; /*the symbol is decoded, return it*/ - else treepos = ct - codetree->numcodes; /*symbol not yet decoded, instead move tree position*/ - - if(treepos >= codetree->numcodes) return (unsigned)(-1); /*error: it appeared outside the codetree*/ - } -} -#endif /*LODEPNG_COMPILE_DECODER*/ - -#ifdef LODEPNG_COMPILE_DECODER - -/* ////////////////////////////////////////////////////////////////////////// */ -/* / Inflator (Decompressor) / */ -/* ////////////////////////////////////////////////////////////////////////// */ - -/*get the tree of a deflated block with fixed tree, as specified in the deflate specification*/ -static void getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d) -{ - /*TODO: check for out of memory errors*/ - generateFixedLitLenTree(tree_ll); - generateFixedDistanceTree(tree_d); -} - -/*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/ -static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d, - const unsigned char* in, size_t* bp, size_t inlength) -{ - /*make sure that length values that aren't filled in will be 0, or a wrong tree will be generated*/ - unsigned error = 0; - unsigned n, HLIT, HDIST, HCLEN, i; - size_t inbitlength = inlength * 8; - - /*see comments in deflateDynamic for explanation of the context and these variables, it is analogous*/ - unsigned* bitlen_ll = 0; /*lit,len code lengths*/ - unsigned* bitlen_d = 0; /*dist code lengths*/ - /*code length code lengths ("clcl"), the bit lengths of the huffman tree used to compress bitlen_ll and bitlen_d*/ - unsigned* bitlen_cl = 0; - HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/ - - if((*bp) + 14 > (inlength << 3)) return 49; /*error: the bit pointer is or will go past the memory*/ - - /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/ - HLIT = readBitsFromStream(bp, in, 5) + 257; - /*number of distance codes. Unlike the spec, the value 1 is added to it here already*/ - HDIST = readBitsFromStream(bp, in, 5) + 1; - /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/ - HCLEN = readBitsFromStream(bp, in, 4) + 4; - - if((*bp) + HCLEN * 3 > (inlength << 3)) return 50; /*error: the bit pointer is or will go past the memory*/ - - HuffmanTree_init(&tree_cl); - - while(!error) - { - /*read the code length codes out of 3 * (amount of code length codes) bits*/ - - bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned)); - if(!bitlen_cl) ERROR_BREAK(83 /*alloc fail*/); - - for(i = 0; i != NUM_CODE_LENGTH_CODES; ++i) - { - if(i < HCLEN) bitlen_cl[CLCL_ORDER[i]] = readBitsFromStream(bp, in, 3); - else bitlen_cl[CLCL_ORDER[i]] = 0; /*if not, it must stay 0*/ - } - - error = HuffmanTree_makeFromLengths(&tree_cl, bitlen_cl, NUM_CODE_LENGTH_CODES, 7); - if(error) break; - - /*now we can use this tree to read the lengths for the tree that this function will return*/ - bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned)); - bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned)); - if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/); - for(i = 0; i != NUM_DEFLATE_CODE_SYMBOLS; ++i) bitlen_ll[i] = 0; - for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen_d[i] = 0; - - /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/ - i = 0; - while(i < HLIT + HDIST) - { - unsigned code = huffmanDecodeSymbol(in, bp, &tree_cl, inbitlength); - if(code <= 15) /*a length code*/ - { - if(i < HLIT) bitlen_ll[i] = code; - else bitlen_d[i - HLIT] = code; - ++i; - } - else if(code == 16) /*repeat previous*/ - { - unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/ - unsigned value; /*set value to the previous code*/ - - if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/ - - if((*bp + 2) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/ - replength += readBitsFromStream(bp, in, 2); - - if(i < HLIT + 1) value = bitlen_ll[i - 1]; - else value = bitlen_d[i - HLIT - 1]; - /*repeat this value in the next lengths*/ - for(n = 0; n < replength; ++n) - { - if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/ - if(i < HLIT) bitlen_ll[i] = value; - else bitlen_d[i - HLIT] = value; - ++i; - } - } - else if(code == 17) /*repeat "0" 3-10 times*/ - { - unsigned replength = 3; /*read in the bits that indicate repeat length*/ - if((*bp + 3) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/ - replength += readBitsFromStream(bp, in, 3); - - /*repeat this value in the next lengths*/ - for(n = 0; n < replength; ++n) - { - if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/ - - if(i < HLIT) bitlen_ll[i] = 0; - else bitlen_d[i - HLIT] = 0; - ++i; - } - } - else if(code == 18) /*repeat "0" 11-138 times*/ - { - unsigned replength = 11; /*read in the bits that indicate repeat length*/ - if((*bp + 7) > inbitlength) ERROR_BREAK(50); /*error, bit pointer jumps past memory*/ - replength += readBitsFromStream(bp, in, 7); - - /*repeat this value in the next lengths*/ - for(n = 0; n < replength; ++n) - { - if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/ - - if(i < HLIT) bitlen_ll[i] = 0; - else bitlen_d[i - HLIT] = 0; - ++i; - } - } - else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/ - { - if(code == (unsigned)(-1)) - { - /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol - (10=no endcode, 11=wrong jump outside of tree)*/ - error = (*bp) > inbitlength ? 10 : 11; - } - else error = 16; /*unexisting code, this can never happen*/ - break; - } - } - if(error) break; - - if(bitlen_ll[256] == 0) ERROR_BREAK(64); /*the length of the end code 256 must be larger than 0*/ - - /*now we've finally got HLIT and HDIST, so generate the code trees, and the function is done*/ - error = HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, NUM_DEFLATE_CODE_SYMBOLS, 15); - if(error) break; - error = HuffmanTree_makeFromLengths(tree_d, bitlen_d, NUM_DISTANCE_SYMBOLS, 15); - - break; /*end of error-while*/ - } - - lodepng_free(bitlen_cl); - lodepng_free(bitlen_ll); - lodepng_free(bitlen_d); - HuffmanTree_cleanup(&tree_cl); - - return error; -} - -/*inflate a block with dynamic of fixed Huffman tree*/ -static unsigned inflateHuffmanBlock(ucvector* out, const unsigned char* in, size_t* bp, - size_t* pos, size_t inlength, unsigned btype) -{ - unsigned error = 0; - HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/ - HuffmanTree tree_d; /*the huffman tree for distance codes*/ - size_t inbitlength = inlength * 8; - - HuffmanTree_init(&tree_ll); - HuffmanTree_init(&tree_d); - - if(btype == 1) getTreeInflateFixed(&tree_ll, &tree_d); - else if(btype == 2) error = getTreeInflateDynamic(&tree_ll, &tree_d, in, bp, inlength); - - while(!error) /*decode all symbols until end reached, breaks at end code*/ - { - /*code_ll is literal, length or end code*/ - unsigned code_ll = huffmanDecodeSymbol(in, bp, &tree_ll, inbitlength); - if(code_ll <= 255) /*literal symbol*/ - { - /*ucvector_push_back would do the same, but for some reason the two lines below run 10% faster*/ - if(!ucvector_resize(out, (*pos) + 1)) ERROR_BREAK(83 /*alloc fail*/); - out->data[*pos] = (unsigned char)code_ll; - ++(*pos); - } - else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/ - { - unsigned code_d, distance; - unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/ - size_t start, forward, backward, length; - - /*part 1: get length base*/ - length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX]; - - /*part 2: get extra bits and add the value of that to length*/ - numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX]; - if((*bp + numextrabits_l) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/ - length += readBitsFromStream(bp, in, numextrabits_l); - - /*part 3: get distance code*/ - code_d = huffmanDecodeSymbol(in, bp, &tree_d, inbitlength); - if(code_d > 29) - { - if(code_ll == (unsigned)(-1)) /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/ - { - /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol - (10=no endcode, 11=wrong jump outside of tree)*/ - error = (*bp) > inlength * 8 ? 10 : 11; - } - else error = 18; /*error: invalid distance code (30-31 are never used)*/ - break; - } - distance = DISTANCEBASE[code_d]; - - /*part 4: get extra bits from distance*/ - numextrabits_d = DISTANCEEXTRA[code_d]; - if((*bp + numextrabits_d) > inbitlength) ERROR_BREAK(51); /*error, bit pointer will jump past memory*/ - distance += readBitsFromStream(bp, in, numextrabits_d); - - /*part 5: fill in all the out[n] values based on the length and dist*/ - start = (*pos); - if(distance > start) ERROR_BREAK(52); /*too long backward distance*/ - backward = start - distance; - - if(!ucvector_resize(out, (*pos) + length)) ERROR_BREAK(83 /*alloc fail*/); - if (distance < length) { - for(forward = 0; forward < length; ++forward) - { - out->data[(*pos)++] = out->data[backward++]; - } - } else { - memcpy(out->data + *pos, out->data + backward, length); - *pos += length; - } - } - else if(code_ll == 256) - { - break; /*end code, break the loop*/ - } - else /*if(code == (unsigned)(-1))*/ /*huffmanDecodeSymbol returns (unsigned)(-1) in case of error*/ - { - /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol - (10=no endcode, 11=wrong jump outside of tree)*/ - error = ((*bp) > inlength * 8) ? 10 : 11; - break; - } - } - - HuffmanTree_cleanup(&tree_ll); - HuffmanTree_cleanup(&tree_d); - - return error; -} - -static unsigned inflateNoCompression(ucvector* out, const unsigned char* in, size_t* bp, size_t* pos, size_t inlength) -{ - size_t p; - unsigned LEN, NLEN, n, error = 0; - - /*go to first boundary of byte*/ - while(((*bp) & 0x7) != 0) ++(*bp); - p = (*bp) / 8; /*byte position*/ - - /*read LEN (2 bytes) and NLEN (2 bytes)*/ - if(p + 4 >= inlength) return 52; /*error, bit pointer will jump past memory*/ - LEN = in[p] + 256u * in[p + 1]; p += 2; - NLEN = in[p] + 256u * in[p + 1]; p += 2; - - /*check if 16-bit NLEN is really the one's complement of LEN*/ - if(LEN + NLEN != 65535) return 21; /*error: NLEN is not one's complement of LEN*/ - - if(!ucvector_resize(out, (*pos) + LEN)) return 83; /*alloc fail*/ - - /*read the literal data: LEN bytes are now stored in the out buffer*/ - if(p + LEN > inlength) return 23; /*error: reading outside of in buffer*/ - for(n = 0; n < LEN; ++n) out->data[(*pos)++] = in[p++]; - - (*bp) = p * 8; - - return error; -} - -static unsigned lodepng_inflatev(ucvector* out, - const unsigned char* in, size_t insize, - const LodePNGDecompressSettings* settings) -{ - /*bit pointer in the "in" data, current byte is bp >> 3, current bit is bp & 0x7 (from lsb to msb of the byte)*/ - size_t bp = 0; - unsigned BFINAL = 0; - size_t pos = 0; /*byte position in the out buffer*/ - unsigned error = 0; - - (void)settings; - - while(!BFINAL) - { - unsigned BTYPE; - if(bp + 2 >= insize * 8) return 52; /*error, bit pointer will jump past memory*/ - BFINAL = readBitFromStream(&bp, in); - BTYPE = 1u * readBitFromStream(&bp, in); - BTYPE += 2u * readBitFromStream(&bp, in); - - if(BTYPE == 3) return 20; /*error: invalid BTYPE*/ - else if(BTYPE == 0) error = inflateNoCompression(out, in, &bp, &pos, insize); /*no compression*/ - else error = inflateHuffmanBlock(out, in, &bp, &pos, insize, BTYPE); /*compression, BTYPE 01 or 10*/ - - if(error) return error; - } - - return error; -} - -unsigned lodepng_inflate(unsigned char** out, size_t* outsize, - const unsigned char* in, size_t insize, - const LodePNGDecompressSettings* settings) -{ - unsigned error; - ucvector v; - ucvector_init_buffer(&v, *out, *outsize); - error = lodepng_inflatev(&v, in, insize, settings); - *out = v.data; - *outsize = v.size; - return error; -} - -static unsigned inflate(unsigned char** out, size_t* outsize, - const unsigned char* in, size_t insize, - const LodePNGDecompressSettings* settings) -{ - if(settings->custom_inflate) - { - return settings->custom_inflate(out, outsize, in, insize, settings); - } - else - { - return lodepng_inflate(out, outsize, in, insize, settings); - } -} - -#endif /*LODEPNG_COMPILE_DECODER*/ - -#ifdef LODEPNG_COMPILE_ENCODER - -/* ////////////////////////////////////////////////////////////////////////// */ -/* / Deflator (Compressor) / */ -/* ////////////////////////////////////////////////////////////////////////// */ - -static const size_t MAX_SUPPORTED_DEFLATE_LENGTH = 258; - -/*bitlen is the size in bits of the code*/ -static void addHuffmanSymbol(size_t* bp, ucvector* compressed, unsigned code, unsigned bitlen) -{ - addBitsToStreamReversed(bp, compressed, code, bitlen); -} - -/*search the index in the array, that has the largest value smaller than or equal to the given value, -given array must be sorted (if no value is smaller, it returns the size of the given array)*/ -static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value) -{ - /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/ - size_t left = 1; - size_t right = array_size - 1; - - while(left <= right) { - size_t mid = (left + right) >> 1; - if (array[mid] >= value) right = mid - 1; - else left = mid + 1; - } - if(left >= array_size || array[left] > value) left--; - return left; -} - -static void addLengthDistance(uivector* values, size_t length, size_t distance) -{ - /*values in encoded vector are those used by deflate: - 0-255: literal bytes - 256: end - 257-285: length/distance pair (length code, followed by extra length bits, distance code, extra distance bits) - 286-287: invalid*/ - - unsigned length_code = (unsigned)searchCodeIndex(LENGTHBASE, 29, length); - unsigned extra_length = (unsigned)(length - LENGTHBASE[length_code]); - unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance); - unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]); - - uivector_push_back(values, length_code + FIRST_LENGTH_CODE_INDEX); - uivector_push_back(values, extra_length); - uivector_push_back(values, dist_code); - uivector_push_back(values, extra_distance); -} - -/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3 -bytes as input because 3 is the minimum match length for deflate*/ -static const unsigned HASH_NUM_VALUES = 65536; -static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/ - -typedef struct Hash -{ - int* head; /*hash value to head circular pos - can be outdated if went around window*/ - /*circular pos to prev circular pos*/ - unsigned short* chain; - int* val; /*circular pos to hash value*/ - - /*TODO: do this not only for zeros but for any repeated byte. However for PNG - it's always going to be the zeros that dominate, so not important for PNG*/ - int* headz; /*similar to head, but for chainz*/ - unsigned short* chainz; /*those with same amount of zeros*/ - unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/ -} Hash; - -static unsigned hash_init(Hash* hash, unsigned windowsize) -{ - unsigned i; - hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES); - hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize); - hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize); - - hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize); - hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1)); - hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize); - - if(!hash->head || !hash->chain || !hash->val || !hash->headz|| !hash->chainz || !hash->zeros) - { - return 83; /*alloc fail*/ - } - - /*initialize hash table*/ - for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1; - for(i = 0; i != windowsize; ++i) hash->val[i] = -1; - for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/ - - for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1; - for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/ - - return 0; -} - -static void hash_cleanup(Hash* hash) -{ - lodepng_free(hash->head); - lodepng_free(hash->val); - lodepng_free(hash->chain); - - lodepng_free(hash->zeros); - lodepng_free(hash->headz); - lodepng_free(hash->chainz); -} - - - -static unsigned getHash(const unsigned char* data, size_t size, size_t pos) -{ - unsigned result = 0; - if(pos + 2 < size) - { - /*A simple shift and xor hash is used. Since the data of PNGs is dominated - by zeroes due to the filters, a better hash does not have a significant - effect on speed in traversing the chain, and causes more time spend on - calculating the hash.*/ - result ^= (unsigned)(data[pos + 0] << 0u); - result ^= (unsigned)(data[pos + 1] << 4u); - result ^= (unsigned)(data[pos + 2] << 8u); - } else { - size_t amount, i; - if(pos >= size) return 0; - amount = size - pos; - for(i = 0; i != amount; ++i) result ^= (unsigned)(data[pos + i] << (i * 8u)); - } - return result & HASH_BIT_MASK; -} - -static unsigned countZeros(const unsigned char* data, size_t size, size_t pos) -{ - const unsigned char* start = data + pos; - const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH; - if(end > data + size) end = data + size; - data = start; - while(data != end && *data == 0) ++data; - /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/ - return (unsigned)(data - start); -} - -/*wpos = pos & (windowsize - 1)*/ -static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros) -{ - hash->val[wpos] = (int)hashval; - if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval]; - hash->head[hashval] = (int)wpos; - - hash->zeros[wpos] = numzeros; - if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros]; - hash->headz[numzeros] = (int)wpos; -} - -/* -LZ77-encode the data. Return value is error code. The input are raw bytes, the output -is in the form of unsigned integers with codes representing for example literal bytes, or -length/distance pairs. -It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a -sliding window (of windowsize) is used, and all past bytes in that window can be used as -the "dictionary". A brute force search through all possible distances would be slow, and -this hash technique is one out of several ways to speed this up. -*/ -static unsigned encodeLZ77(uivector* out, Hash* hash, - const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize, - unsigned minmatch, unsigned nicematch, unsigned lazymatching) -{ - size_t pos; - unsigned i, error = 0; - /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/ - unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8; - unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64; - - unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/ - unsigned numzeros = 0; - - unsigned offset; /*the offset represents the distance in LZ77 terminology*/ - unsigned length; - unsigned lazy = 0; - unsigned lazylength = 0, lazyoffset = 0; - unsigned hashval; - unsigned current_offset, current_length; - unsigned prev_offset; - const unsigned char *lastptr, *foreptr, *backptr; - unsigned hashpos; - - if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/ - if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/ - - if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH; - - for(pos = inpos; pos < insize; ++pos) - { - size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/ - unsigned chainlength = 0; - - hashval = getHash(in, insize, pos); - - if(usezeros && hashval == 0) - { - if(numzeros == 0) numzeros = countZeros(in, insize, pos); - else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros; - } - else - { - numzeros = 0; - } - - updateHashChain(hash, wpos, hashval, numzeros); - - /*the length and offset found for the current position*/ - length = 0; - offset = 0; - - hashpos = hash->chain[wpos]; - - lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH]; - - /*search for the longest string*/ - prev_offset = 0; - for(;;) - { - if(chainlength++ >= maxchainlength) break; - current_offset = hashpos <= wpos ? (unsigned int)(wpos - hashpos) : (unsigned int)(wpos - hashpos + windowsize); - - if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/ - prev_offset = current_offset; - if(current_offset > 0) - { - /*test the next characters*/ - foreptr = &in[pos]; - backptr = &in[pos - current_offset]; - - /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/ - if(numzeros >= 3) - { - unsigned skip = hash->zeros[hashpos]; - if(skip > numzeros) skip = numzeros; - backptr += skip; - foreptr += skip; - } - - while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/ - { - ++backptr; - ++foreptr; - } - current_length = (unsigned)(foreptr - &in[pos]); - - if(current_length > length) - { - length = current_length; /*the longest length*/ - offset = current_offset; /*the offset that is related to this longest length*/ - /*jump out once a length of max length is found (speed gain). This also jumps - out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/ - if(current_length >= nicematch) break; - } - } - - if(hashpos == hash->chain[hashpos]) break; - - if(numzeros >= 3 && length > numzeros) - { - hashpos = hash->chainz[hashpos]; - if(hash->zeros[hashpos] != numzeros) break; - } - else - { - hashpos = hash->chain[hashpos]; - /*outdated hash value, happens if particular value was not encountered in whole last window*/ - if(hash->val[hashpos] != (int)hashval) break; - } - } - - if(lazymatching) - { - if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH) - { - lazy = 1; - lazylength = length; - lazyoffset = offset; - continue; /*try the next byte*/ - } - if(lazy) - { - lazy = 0; - if(pos == 0) ERROR_BREAK(81); - if(length > lazylength + 1) - { - /*push the previous character as literal*/ - if(!uivector_push_back(out, in[pos - 1])) ERROR_BREAK(83 /*alloc fail*/); - } - else - { - length = lazylength; - offset = lazyoffset; - hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/ - hash->headz[numzeros] = -1; /*idem*/ - --pos; - } - } - } - if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/); - - /*encode it as length/distance pair or literal value*/ - if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/ - { - if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/); - } - else if(length < minmatch || (length == 3 && offset > 4096)) - { - /*compensate for the fact that longer offsets have more extra bits, a - length of only 3 may be not worth it then*/ - if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/); - } - else - { - addLengthDistance(out, length, offset); - for(i = 1; i < length; ++i) - { - ++pos; - wpos = pos & (windowsize - 1); - hashval = getHash(in, insize, pos); - if(usezeros && hashval == 0) - { - if(numzeros == 0) numzeros = countZeros(in, insize, pos); - else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros; - } - else - { - numzeros = 0; - } - updateHashChain(hash, wpos, hashval, numzeros); - } - } - } /*end of the loop through each character of input*/ - - return error; -} - -/* /////////////////////////////////////////////////////////////////////////// */ - -static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, size_t datasize) -{ - /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte, - 2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/ - - size_t i, j, numdeflateblocks = (datasize + 65534) / 65535; - unsigned datapos = 0; - for(i = 0; i != numdeflateblocks; ++i) - { - unsigned BFINAL, BTYPE, LEN, NLEN; - unsigned char firstbyte; - - BFINAL = (i == numdeflateblocks - 1); - BTYPE = 0; - - firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1) << 1) + ((BTYPE & 2) << 1)); - ucvector_push_back(out, firstbyte); - - LEN = 65535; - if(datasize - datapos < 65535) LEN = (unsigned)datasize - datapos; - NLEN = 65535 - LEN; - - ucvector_push_back(out, (unsigned char)(LEN & 255)); - ucvector_push_back(out, (unsigned char)(LEN >> 8)); - ucvector_push_back(out, (unsigned char)(NLEN & 255)); - ucvector_push_back(out, (unsigned char)(NLEN >> 8)); - - /*Decompressed data*/ - for(j = 0; j < 65535 && datapos < datasize; ++j) - { - ucvector_push_back(out, data[datapos++]); - } - } - - return 0; -} - -/* -write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees. -tree_ll: the tree for lit and len codes. -tree_d: the tree for distance codes. -*/ -static void writeLZ77data(size_t* bp, ucvector* out, const uivector* lz77_encoded, - const HuffmanTree* tree_ll, const HuffmanTree* tree_d) -{ - size_t i = 0; - for(i = 0; i != lz77_encoded->size; ++i) - { - unsigned val = lz77_encoded->data[i]; - addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_ll, val), HuffmanTree_getLength(tree_ll, val)); - if(val > 256) /*for a length code, 3 more things have to be added*/ - { - unsigned length_index = val - FIRST_LENGTH_CODE_INDEX; - unsigned n_length_extra_bits = LENGTHEXTRA[length_index]; - unsigned length_extra_bits = lz77_encoded->data[++i]; - - unsigned distance_code = lz77_encoded->data[++i]; - - unsigned distance_index = distance_code; - unsigned n_distance_extra_bits = DISTANCEEXTRA[distance_index]; - unsigned distance_extra_bits = lz77_encoded->data[++i]; - - addBitsToStream(bp, out, length_extra_bits, n_length_extra_bits); - addHuffmanSymbol(bp, out, HuffmanTree_getCode(tree_d, distance_code), - HuffmanTree_getLength(tree_d, distance_code)); - addBitsToStream(bp, out, distance_extra_bits, n_distance_extra_bits); - } - } -} - -/*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/ -static unsigned deflateDynamic(ucvector* out, size_t* bp, Hash* hash, - const unsigned char* data, size_t datapos, size_t dataend, - const LodePNGCompressSettings* settings, unsigned final) -{ - unsigned error = 0; - - /* - A block is compressed as follows: The PNG data is lz77 encoded, resulting in - literal bytes and length/distance pairs. This is then huffman compressed with - two huffman trees. One huffman tree is used for the lit and len values ("ll"), - another huffman tree is used for the dist values ("d"). These two trees are - stored using their code lengths, and to compress even more these code lengths - are also run-length encoded and huffman compressed. This gives a huffman tree - of code lengths "cl". The code lenghts used to describe this third tree are - the code length code lengths ("clcl"). - */ - - /*The lz77 encoded data, represented with integers since there will also be length and distance codes in it*/ - uivector lz77_encoded; - HuffmanTree tree_ll; /*tree for lit,len values*/ - HuffmanTree tree_d; /*tree for distance codes*/ - HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/ - uivector frequencies_ll; /*frequency of lit,len codes*/ - uivector frequencies_d; /*frequency of dist codes*/ - uivector frequencies_cl; /*frequency of code length codes*/ - uivector bitlen_lld; /*lit,len,dist code lenghts (int bits), literally (without repeat codes).*/ - uivector bitlen_lld_e; /*bitlen_lld encoded with repeat codes (this is a rudemtary run length compression)*/ - /*bitlen_cl is the code length code lengths ("clcl"). The bit lengths of codes to represent tree_cl - (these are written as is in the file, it would be crazy to compress these using yet another huffman - tree that needs to be represented by yet another set of code lengths)*/ - uivector bitlen_cl; - size_t datasize = dataend - datapos; - - /* - Due to the huffman compression of huffman tree representations ("two levels"), there are some anologies: - bitlen_lld is to tree_cl what data is to tree_ll and tree_d. - bitlen_lld_e is to bitlen_lld what lz77_encoded is to data. - bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded. - */ - - unsigned BFINAL = final; - size_t numcodes_ll, numcodes_d, i; - unsigned HLIT, HDIST, HCLEN; - - uivector_init(&lz77_encoded); - HuffmanTree_init(&tree_ll); - HuffmanTree_init(&tree_d); - HuffmanTree_init(&tree_cl); - uivector_init(&frequencies_ll); - uivector_init(&frequencies_d); - uivector_init(&frequencies_cl); - uivector_init(&bitlen_lld); - uivector_init(&bitlen_lld_e); - uivector_init(&bitlen_cl); - - /*This while loop never loops due to a break at the end, it is here to - allow breaking out of it to the cleanup phase on error conditions.*/ - while(!error) - { - if(settings->use_lz77) - { - error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize, - settings->minmatch, settings->nicematch, settings->lazymatching); - if(error) break; - } - else - { - if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/); - for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/ - } - - if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83 /*alloc fail*/); - if(!uivector_resizev(&frequencies_d, 30, 0)) ERROR_BREAK(83 /*alloc fail*/); - - /*Count the frequencies of lit, len and dist codes*/ - for(i = 0; i != lz77_encoded.size; ++i) - { - unsigned symbol = lz77_encoded.data[i]; - ++frequencies_ll.data[symbol]; - if(symbol > 256) - { - unsigned dist = lz77_encoded.data[i + 2]; - ++frequencies_d.data[dist]; - i += 3; - } - } - frequencies_ll.data[256] = 1; /*there will be exactly 1 end code, at the end of the block*/ - - /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/ - error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll.data, 257, frequencies_ll.size, 15); - if(error) break; - /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/ - error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d.data, 2, frequencies_d.size, 15); - if(error) break; - - numcodes_ll = tree_ll.numcodes; if(numcodes_ll > 286) numcodes_ll = 286; - numcodes_d = tree_d.numcodes; if(numcodes_d > 30) numcodes_d = 30; - /*store the code lengths of both generated trees in bitlen_lld*/ - for(i = 0; i != numcodes_ll; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_ll, (unsigned)i)); - for(i = 0; i != numcodes_d; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_d, (unsigned)i)); - - /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times), - 17 (3-10 zeroes), 18 (11-138 zeroes)*/ - for(i = 0; i != (unsigned)bitlen_lld.size; ++i) - { - unsigned j = 0; /*amount of repititions*/ - while(i + j + 1 < (unsigned)bitlen_lld.size && bitlen_lld.data[i + j + 1] == bitlen_lld.data[i]) ++j; - - if(bitlen_lld.data[i] == 0 && j >= 2) /*repeat code for zeroes*/ - { - ++j; /*include the first zero*/ - if(j <= 10) /*repeat code 17 supports max 10 zeroes*/ - { - uivector_push_back(&bitlen_lld_e, 17); - uivector_push_back(&bitlen_lld_e, j - 3); - } - else /*repeat code 18 supports max 138 zeroes*/ - { - if(j > 138) j = 138; - uivector_push_back(&bitlen_lld_e, 18); - uivector_push_back(&bitlen_lld_e, j - 11); - } - i += (j - 1); - } - else if(j >= 3) /*repeat code for value other than zero*/ - { - size_t k; - unsigned num = j / 6, rest = j % 6; - uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]); - for(k = 0; k < num; ++k) - { - uivector_push_back(&bitlen_lld_e, 16); - uivector_push_back(&bitlen_lld_e, 6 - 3); - } - if(rest >= 3) - { - uivector_push_back(&bitlen_lld_e, 16); - uivector_push_back(&bitlen_lld_e, rest - 3); - } - else j -= rest; - i += j; - } - else /*too short to benefit from repeat code*/ - { - uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]); - } - } - - /*generate tree_cl, the huffmantree of huffmantrees*/ - - if(!uivector_resizev(&frequencies_cl, NUM_CODE_LENGTH_CODES, 0)) ERROR_BREAK(83 /*alloc fail*/); - for(i = 0; i != bitlen_lld_e.size; ++i) - { - ++frequencies_cl.data[bitlen_lld_e.data[i]]; - /*after a repeat code come the bits that specify the number of repetitions, - those don't need to be in the frequencies_cl calculation*/ - if(bitlen_lld_e.data[i] >= 16) ++i; - } - - error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl.data, - frequencies_cl.size, frequencies_cl.size, 7); - if(error) break; - - if(!uivector_resize(&bitlen_cl, tree_cl.numcodes)) ERROR_BREAK(83 /*alloc fail*/); - for(i = 0; i != tree_cl.numcodes; ++i) - { - /*lenghts of code length tree is in the order as specified by deflate*/ - bitlen_cl.data[i] = HuffmanTree_getLength(&tree_cl, CLCL_ORDER[i]); - } - while(bitlen_cl.data[bitlen_cl.size - 1] == 0 && bitlen_cl.size > 4) - { - /*remove zeros at the end, but minimum size must be 4*/ - if(!uivector_resize(&bitlen_cl, bitlen_cl.size - 1)) ERROR_BREAK(83 /*alloc fail*/); - } - if(error) break; - - /* - Write everything into the output - - After the BFINAL and BTYPE, the dynamic block consists out of the following: - - 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN - - (HCLEN+4)*3 bits code lengths of code length alphabet - - HLIT + 257 code lenghts of lit/length alphabet (encoded using the code length - alphabet, + possible repetition codes 16, 17, 18) - - HDIST + 1 code lengths of distance alphabet (encoded using the code length - alphabet, + possible repetition codes 16, 17, 18) - - compressed data - - 256 (end code) - */ - - /*Write block type*/ - addBitToStream(bp, out, BFINAL); - addBitToStream(bp, out, 0); /*first bit of BTYPE "dynamic"*/ - addBitToStream(bp, out, 1); /*second bit of BTYPE "dynamic"*/ - - /*write the HLIT, HDIST and HCLEN values*/ - HLIT = (unsigned)(numcodes_ll - 257); - HDIST = (unsigned)(numcodes_d - 1); - HCLEN = (unsigned)bitlen_cl.size - 4; - /*trim zeroes for HCLEN. HLIT and HDIST were already trimmed at tree creation*/ - while(!bitlen_cl.data[HCLEN + 4 - 1] && HCLEN > 0) --HCLEN; - addBitsToStream(bp, out, HLIT, 5); - addBitsToStream(bp, out, HDIST, 5); - addBitsToStream(bp, out, HCLEN, 4); - - /*write the code lenghts of the code length alphabet*/ - for(i = 0; i != HCLEN + 4; ++i) addBitsToStream(bp, out, bitlen_cl.data[i], 3); - - /*write the lenghts of the lit/len AND the dist alphabet*/ - for(i = 0; i != bitlen_lld_e.size; ++i) - { - addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_cl, bitlen_lld_e.data[i]), - HuffmanTree_getLength(&tree_cl, bitlen_lld_e.data[i])); - /*extra bits of repeat codes*/ - if(bitlen_lld_e.data[i] == 16) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 2); - else if(bitlen_lld_e.data[i] == 17) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 3); - else if(bitlen_lld_e.data[i] == 18) addBitsToStream(bp, out, bitlen_lld_e.data[++i], 7); - } - - /*write the compressed data symbols*/ - writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d); - /*error: the length of the end code 256 must be larger than 0*/ - if(HuffmanTree_getLength(&tree_ll, 256) == 0) ERROR_BREAK(64); - - /*write the end code*/ - addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256)); - - break; /*end of error-while*/ - } - - /*cleanup*/ - uivector_cleanup(&lz77_encoded); - HuffmanTree_cleanup(&tree_ll); - HuffmanTree_cleanup(&tree_d); - HuffmanTree_cleanup(&tree_cl); - uivector_cleanup(&frequencies_ll); - uivector_cleanup(&frequencies_d); - uivector_cleanup(&frequencies_cl); - uivector_cleanup(&bitlen_lld_e); - uivector_cleanup(&bitlen_lld); - uivector_cleanup(&bitlen_cl); - - return error; -} - -static unsigned deflateFixed(ucvector* out, size_t* bp, Hash* hash, - const unsigned char* data, - size_t datapos, size_t dataend, - const LodePNGCompressSettings* settings, unsigned final) -{ - HuffmanTree tree_ll; /*tree for literal values and length codes*/ - HuffmanTree tree_d; /*tree for distance codes*/ - - unsigned BFINAL = final; - unsigned error = 0; - size_t i; - - HuffmanTree_init(&tree_ll); - HuffmanTree_init(&tree_d); - - generateFixedLitLenTree(&tree_ll); - generateFixedDistanceTree(&tree_d); - - addBitToStream(bp, out, BFINAL); - addBitToStream(bp, out, 1); /*first bit of BTYPE*/ - addBitToStream(bp, out, 0); /*second bit of BTYPE*/ - - if(settings->use_lz77) /*LZ77 encoded*/ - { - uivector lz77_encoded; - uivector_init(&lz77_encoded); - error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize, - settings->minmatch, settings->nicematch, settings->lazymatching); - if(!error) writeLZ77data(bp, out, &lz77_encoded, &tree_ll, &tree_d); - uivector_cleanup(&lz77_encoded); - } - else /*no LZ77, but still will be Huffman compressed*/ - { - for(i = datapos; i < dataend; ++i) - { - addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, data[i]), HuffmanTree_getLength(&tree_ll, data[i])); - } - } - /*add END code*/ - if(!error) addHuffmanSymbol(bp, out, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256)); - - /*cleanup*/ - HuffmanTree_cleanup(&tree_ll); - HuffmanTree_cleanup(&tree_d); - - return error; -} - -static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t insize, - const LodePNGCompressSettings* settings) -{ - unsigned error = 0; - size_t i, blocksize, numdeflateblocks; - size_t bp = 0; /*the bit pointer*/ - Hash hash; - - if(settings->btype > 2) return 61; - else if(settings->btype == 0) return deflateNoCompression(out, in, insize); - else if(settings->btype == 1) blocksize = insize; - else /*if(settings->btype == 2)*/ - { - /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/ - blocksize = insize / 8 + 8; - if(blocksize < 65536) blocksize = 65536; - if(blocksize > 262144) blocksize = 262144; - } - - numdeflateblocks = (insize + blocksize - 1) / blocksize; - if(numdeflateblocks == 0) numdeflateblocks = 1; - - error = hash_init(&hash, settings->windowsize); - if(error) return error; - - for(i = 0; i != numdeflateblocks && !error; ++i) - { - unsigned final = (i == numdeflateblocks - 1); - size_t start = i * blocksize; - size_t end = start + blocksize; - if(end > insize) end = insize; - - if(settings->btype == 1) error = deflateFixed(out, &bp, &hash, in, start, end, settings, final); - else if(settings->btype == 2) error = deflateDynamic(out, &bp, &hash, in, start, end, settings, final); - } - - hash_cleanup(&hash); - - return error; -} - -unsigned lodepng_deflate(unsigned char** out, size_t* outsize, - const unsigned char* in, size_t insize, - const LodePNGCompressSettings* settings) -{ - unsigned error; - ucvector v; - ucvector_init_buffer(&v, *out, *outsize); - error = lodepng_deflatev(&v, in, insize, settings); - *out = v.data; - *outsize = v.size; - return error; -} - -static unsigned deflate(unsigned char** out, size_t* outsize, - const unsigned char* in, size_t insize, - const LodePNGCompressSettings* settings) -{ - if(settings->custom_deflate) - { - return settings->custom_deflate(out, outsize, in, insize, settings); - } - else - { - return lodepng_deflate(out, outsize, in, insize, settings); - } -} - -#endif /*LODEPNG_COMPILE_DECODER*/ - -/* ////////////////////////////////////////////////////////////////////////// */ -/* / Adler32 */ -/* ////////////////////////////////////////////////////////////////////////// */ - -static unsigned update_adler32(unsigned adler, const unsigned char* data, unsigned len) -{ - unsigned s1 = adler & 0xffff; - unsigned s2 = (adler >> 16) & 0xffff; - - while(len > 0) - { - /*at least 5550 sums can be done before the sums overflow, saving a lot of module divisions*/ - unsigned amount = len > 5550 ? 5550 : len; - len -= amount; - while(amount > 0) - { - s1 += (*data++); - s2 += s1; - --amount; - } - s1 %= 65521; - s2 %= 65521; - } - - return (s2 << 16) | s1; -} - -/*Return the adler32 of the bytes data[0..len-1]*/ -static unsigned adler32(const unsigned char* data, unsigned len) -{ - return update_adler32(1L, data, len); -} - -/* ////////////////////////////////////////////////////////////////////////// */ -/* / Zlib / */ -/* ////////////////////////////////////////////////////////////////////////// */ - -#ifdef LODEPNG_COMPILE_DECODER - -unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in, - size_t insize, const LodePNGDecompressSettings* settings) -{ - unsigned error = 0; - unsigned CM, CINFO, FDICT; - - if(insize < 2) return 53; /*error, size of zlib data too small*/ - /*read information from zlib header*/ - if((in[0] * 256 + in[1]) % 31 != 0) - { - /*error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way*/ - return 24; - } - - CM = in[0] & 15; - CINFO = (in[0] >> 4) & 15; - /*FCHECK = in[1] & 31;*/ /*FCHECK is already tested above*/ - FDICT = (in[1] >> 5) & 1; - /*FLEVEL = (in[1] >> 6) & 3;*/ /*FLEVEL is not used here*/ - - if(CM != 8 || CINFO > 7) - { - /*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/ - return 25; - } - if(FDICT != 0) - { - /*error: the specification of PNG says about the zlib stream: - "The additional flags shall not specify a preset dictionary."*/ - return 26; - } - - error = inflate(out, outsize, in + 2, insize - 2, settings); - if(error) return error; - - if(!settings->ignore_adler32) - { - unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]); - unsigned checksum = adler32(*out, (unsigned)(*outsize)); - if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/ - } - - return 0; /*no error*/ -} - -static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in, - size_t insize, const LodePNGDecompressSettings* settings) -{ - if(settings->custom_zlib) - { - return settings->custom_zlib(out, outsize, in, insize, settings); - } - else - { - return lodepng_zlib_decompress(out, outsize, in, insize, settings); - } -} - -#endif /*LODEPNG_COMPILE_DECODER*/ - -#ifdef LODEPNG_COMPILE_ENCODER - -unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in, - size_t insize, const LodePNGCompressSettings* settings) -{ - /*initially, *out must be NULL and outsize 0, if you just give some random *out - that's pointing to a non allocated buffer, this'll crash*/ - ucvector outv; - size_t i; - unsigned error; - unsigned char* deflatedata = 0; - size_t deflatesize = 0; - - /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/ - unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/ - unsigned FLEVEL = 0; - unsigned FDICT = 0; - unsigned CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64; - unsigned FCHECK = 31 - CMFFLG % 31; - CMFFLG += FCHECK; - - /*ucvector-controlled version of the output buffer, for dynamic array*/ - ucvector_init_buffer(&outv, *out, *outsize); - - ucvector_push_back(&outv, (unsigned char)(CMFFLG >> 8)); - ucvector_push_back(&outv, (unsigned char)(CMFFLG & 255)); - - error = deflate(&deflatedata, &deflatesize, in, insize, settings); - - if(!error) - { - unsigned ADLER32 = adler32(in, (unsigned)insize); - for(i = 0; i != deflatesize; ++i) ucvector_push_back(&outv, deflatedata[i]); - lodepng_free(deflatedata); - lodepng_add32bitInt(&outv, ADLER32); - } - - *out = outv.data; - *outsize = outv.size; - - return error; -} - -/* compress using the default or custom zlib function */ -static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in, - size_t insize, const LodePNGCompressSettings* settings) -{ - if(settings->custom_zlib) - { - return settings->custom_zlib(out, outsize, in, insize, settings); - } - else - { - return lodepng_zlib_compress(out, outsize, in, insize, settings); - } -} - -#endif /*LODEPNG_COMPILE_ENCODER*/ - -#else /*no LODEPNG_COMPILE_ZLIB*/ - -#ifdef LODEPNG_COMPILE_DECODER -static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in, - size_t insize, const LodePNGDecompressSettings* settings) -{ - if(!settings->custom_zlib) return 87; /*no custom zlib function provided */ - return settings->custom_zlib(out, outsize, in, insize, settings); -} -#endif /*LODEPNG_COMPILE_DECODER*/ -#ifdef LODEPNG_COMPILE_ENCODER -static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in, - size_t insize, const LodePNGCompressSettings* settings) -{ - if(!settings->custom_zlib) return 87; /*no custom zlib function provided */ - return settings->custom_zlib(out, outsize, in, insize, settings); -} -#endif /*LODEPNG_COMPILE_ENCODER*/ - -#endif /*LODEPNG_COMPILE_ZLIB*/ - -/* ////////////////////////////////////////////////////////////////////////// */ - -#ifdef LODEPNG_COMPILE_ENCODER - -/*this is a good tradeoff between speed and compression ratio*/ -#define DEFAULT_WINDOWSIZE 2048 - -void lodepng_compress_settings_init(LodePNGCompressSettings* settings) -{ - /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/ - settings->btype = 2; - settings->use_lz77 = 1; - settings->windowsize = DEFAULT_WINDOWSIZE; - settings->minmatch = 3; - settings->nicematch = 128; - settings->lazymatching = 1; - - settings->custom_zlib = 0; - settings->custom_deflate = 0; - settings->custom_context = 0; -} - -const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0}; - - -#endif /*LODEPNG_COMPILE_ENCODER*/ - -#ifdef LODEPNG_COMPILE_DECODER - -void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings) -{ - settings->ignore_adler32 = 0; - - settings->custom_zlib = 0; - settings->custom_inflate = 0; - settings->custom_context = 0; -} - -const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0}; - -#endif /*LODEPNG_COMPILE_DECODER*/ - -/* ////////////////////////////////////////////////////////////////////////// */ -/* ////////////////////////////////////////////////////////////////////////// */ -/* // End of Zlib related code. Begin of PNG related code. // */ -/* ////////////////////////////////////////////////////////////////////////// */ -/* ////////////////////////////////////////////////////////////////////////// */ - -#ifdef LODEPNG_COMPILE_PNG - -/* ////////////////////////////////////////////////////////////////////////// */ -/* / CRC32 / */ -/* ////////////////////////////////////////////////////////////////////////// */ - - -#ifndef LODEPNG_NO_COMPILE_CRC -/* CRC polynomial: 0xedb88320 */ -static unsigned lodepng_crc32_table[256] = { - 0u, 1996959894u, 3993919788u, 2567524794u, 124634137u, 1886057615u, 3915621685u, 2657392035u, - 249268274u, 2044508324u, 3772115230u, 2547177864u, 162941995u, 2125561021u, 3887607047u, 2428444049u, - 498536548u, 1789927666u, 4089016648u, 2227061214u, 450548861u, 1843258603u, 4107580753u, 2211677639u, - 325883990u, 1684777152u, 4251122042u, 2321926636u, 335633487u, 1661365465u, 4195302755u, 2366115317u, - 997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u, - 901097722u, 1119000684u, 3686517206u, 2898065728u, 853044451u, 1172266101u, 3705015759u, 2882616665u, - 651767980u, 1373503546u, 3369554304u, 3218104598u, 565507253u, 1454621731u, 3485111705u, 3099436303u, - 671266974u, 1594198024u, 3322730930u, 2970347812u, 795835527u, 1483230225u, 3244367275u, 3060149565u, - 1994146192u, 31158534u, 2563907772u, 4023717930u, 1907459465u, 112637215u, 2680153253u, 3904427059u, - 2013776290u, 251722036u, 2517215374u, 3775830040u, 2137656763u, 141376813u, 2439277719u, 3865271297u, - 1802195444u, 476864866u, 2238001368u, 4066508878u, 1812370925u, 453092731u, 2181625025u, 4111451223u, - 1706088902u, 314042704u, 2344532202u, 4240017532u, 1658658271u, 366619977u, 2362670323u, 4224994405u, - 1303535960u, 984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u, - 1131014506u, 879679996u, 2909243462u, 3663771856u, 1141124467u, 855842277u, 2852801631u, 3708648649u, - 1342533948u, 654459306u, 3188396048u, 3373015174u, 1466479909u, 544179635u, 3110523913u, 3462522015u, - 1591671054u, 702138776u, 2966460450u, 3352799412u, 1504918807u, 783551873u, 3082640443u, 3233442989u, - 3988292384u, 2596254646u, 62317068u, 1957810842u, 3939845945u, 2647816111u, 81470997u, 1943803523u, - 3814918930u, 2489596804u, 225274430u, 2053790376u, 3826175755u, 2466906013u, 167816743u, 2097651377u, - 4027552580u, 2265490386u, 503444072u, 1762050814u, 4150417245u, 2154129355u, 426522225u, 1852507879u, - 4275313526u, 2312317920u, 282753626u, 1742555852u, 4189708143u, 2394877945u, 397917763u, 1622183637u, - 3604390888u, 2714866558u, 953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u, - 3624741850u, 2936675148u, 906185462u, 1090812512u, 3747672003u, 2825379669u, 829329135u, 1181335161u, - 3412177804u, 3160834842u, 628085408u, 1382605366u, 3423369109u, 3138078467u, 570562233u, 1426400815u, - 3317316542u, 2998733608u, 733239954u, 1555261956u, 3268935591u, 3050360625u, 752459403u, 1541320221u, - 2607071920u, 3965973030u, 1969922972u, 40735498u, 2617837225u, 3943577151u, 1913087877u, 83908371u, - 2512341634u, 3803740692u, 2075208622u, 213261112u, 2463272603u, 3855990285u, 2094854071u, 198958881u, - 2262029012u, 4057260610u, 1759359992u, 534414190u, 2176718541u, 4139329115u, 1873836001u, 414664567u, - 2282248934u, 4279200368u, 1711684554u, 285281116u, 2405801727u, 4167216745u, 1634467795u, 376229701u, - 2685067896u, 3608007406u, 1308918612u, 956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u, - 2932959818u, 3654703836u, 1088359270u, 936918000u, 2847714899u, 3736837829u, 1202900863u, 817233897u, - 3183342108u, 3401237130u, 1404277552u, 615818150u, 3134207493u, 3453421203u, 1423857449u, 601450431u, - 3009837614u, 3294710456u, 1567103746u, 711928724u, 3020668471u, 3272380065u, 1510334235u, 755167117u -}; - -/*Return the CRC of the bytes buf[0..len-1].*/ -unsigned lodepng_crc32(const unsigned char* data, size_t length) -{ - unsigned r = 0xffffffffu; - size_t i; - for(i = 0; i < length; ++i) - { - r = lodepng_crc32_table[(r ^ data[i]) & 0xff] ^ (r >> 8); - } - return r ^ 0xffffffffu; -} -#else /* !LODEPNG_NO_COMPILE_CRC */ -unsigned lodepng_crc32(const unsigned char* data, size_t length); -#endif /* !LODEPNG_NO_COMPILE_CRC */ - -/* ////////////////////////////////////////////////////////////////////////// */ -/* / Reading and writing single bits and bytes from/to stream for LodePNG / */ -/* ////////////////////////////////////////////////////////////////////////// */ - -static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream) -{ - unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1); - ++(*bitpointer); - return result; -} - -static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits) -{ - unsigned result = 0; - size_t i; - for(i = nbits - 1; i < nbits; --i) - { - result += (unsigned)readBitFromReversedStream(bitpointer, bitstream) << i; - } - return result; -} - -#ifdef LODEPNG_COMPILE_DECODER -static void setBitOfReversedStream0(size_t* bitpointer, unsigned char* bitstream, unsigned char bit) -{ - /*the current bit in bitstream must be 0 for this to work*/ - if(bit) - { - /*earlier bit of huffman code is in a lesser significant bit of an earlier byte*/ - bitstream[(*bitpointer) >> 3] |= (bit << (7 - ((*bitpointer) & 0x7))); - } - ++(*bitpointer); -} -#endif /*LODEPNG_COMPILE_DECODER*/ - -static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit) -{ - /*the current bit in bitstream may be 0 or 1 for this to work*/ - if(bit == 0) bitstream[(*bitpointer) >> 3] &= (unsigned char)(~(1 << (7 - ((*bitpointer) & 0x7)))); - else bitstream[(*bitpointer) >> 3] |= (1 << (7 - ((*bitpointer) & 0x7))); - ++(*bitpointer); -} - -/* ////////////////////////////////////////////////////////////////////////// */ -/* / PNG chunks / */ -/* ////////////////////////////////////////////////////////////////////////// */ - -unsigned lodepng_chunk_length(const unsigned char* chunk) -{ - return lodepng_read32bitInt(&chunk[0]); -} - -void lodepng_chunk_type(char type[5], const unsigned char* chunk) -{ - unsigned i; - for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i]; - type[4] = 0; /*null termination char*/ -} - -unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type) -{ - if(strlen(type) != 4) return 0; - return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]); -} - -unsigned char lodepng_chunk_ancillary(const unsigned char* chunk) -{ - return((chunk[4] & 32) != 0); -} - -unsigned char lodepng_chunk_private(const unsigned char* chunk) -{ - return((chunk[6] & 32) != 0); -} - -unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk) -{ - return((chunk[7] & 32) != 0); -} - -unsigned char* lodepng_chunk_data(unsigned char* chunk) -{ - return &chunk[8]; -} - -const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk) -{ - return &chunk[8]; -} - -unsigned lodepng_chunk_check_crc(const unsigned char* chunk) -{ - unsigned length = lodepng_chunk_length(chunk); - unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]); - /*the CRC is taken of the data and the 4 chunk type letters, not the length*/ - unsigned checksum = lodepng_crc32(&chunk[4], length + 4); - if(CRC != checksum) return 1; - else return 0; -} - -void lodepng_chunk_generate_crc(unsigned char* chunk) -{ - unsigned length = lodepng_chunk_length(chunk); - unsigned CRC = lodepng_crc32(&chunk[4], length + 4); - lodepng_set32bitInt(chunk + 8 + length, CRC); -} - -unsigned char* lodepng_chunk_next(unsigned char* chunk) -{ - unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12; - return &chunk[total_chunk_length]; -} - -const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk) -{ - unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12; - return &chunk[total_chunk_length]; -} - -unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk) -{ - unsigned i; - unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12; - unsigned char *chunk_start, *new_buffer; - size_t new_length = (*outlength) + total_chunk_length; - if(new_length < total_chunk_length || new_length < (*outlength)) return 77; /*integer overflow happened*/ - - new_buffer = (unsigned char*)lodepng_realloc(*out, new_length); - if(!new_buffer) return 83; /*alloc fail*/ - (*out) = new_buffer; - (*outlength) = new_length; - chunk_start = &(*out)[new_length - total_chunk_length]; - - for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i]; - - return 0; -} - -unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length, - const char* type, const unsigned char* data) -{ - unsigned i; - unsigned char *chunk, *new_buffer; - size_t new_length = (*outlength) + length + 12; - if(new_length < length + 12 || new_length < (*outlength)) return 77; /*integer overflow happened*/ - new_buffer = (unsigned char*)lodepng_realloc(*out, new_length); - if(!new_buffer) return 83; /*alloc fail*/ - (*out) = new_buffer; - (*outlength) = new_length; - chunk = &(*out)[(*outlength) - length - 12]; - - /*1: length*/ - lodepng_set32bitInt(chunk, (unsigned)length); - - /*2: chunk name (4 letters)*/ - chunk[4] = (unsigned char)type[0]; - chunk[5] = (unsigned char)type[1]; - chunk[6] = (unsigned char)type[2]; - chunk[7] = (unsigned char)type[3]; - - /*3: the data*/ - for(i = 0; i != length; ++i) chunk[8 + i] = data[i]; - - /*4: CRC (of the chunkname characters and the data)*/ - lodepng_chunk_generate_crc(chunk); - - return 0; -} - -/* ////////////////////////////////////////////////////////////////////////// */ -/* / Color types and such / */ -/* ////////////////////////////////////////////////////////////////////////// */ - -/*return type is a LodePNG error code*/ -static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) /*bd = bitdepth*/ -{ - switch(colortype) - { - case 0: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; break; /*grey*/ - case 2: if(!( bd == 8 || bd == 16)) return 37; break; /*RGB*/ - case 3: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 )) return 37; break; /*palette*/ - case 4: if(!( bd == 8 || bd == 16)) return 37; break; /*grey + alpha*/ - case 6: if(!( bd == 8 || bd == 16)) return 37; break; /*RGBA*/ - default: return 31; - } - return 0; /*allowed color type / bits combination*/ -} - -static unsigned getNumColorChannels(LodePNGColorType colortype) -{ - switch(colortype) - { - case 0: return 1; /*grey*/ - case 2: return 3; /*RGB*/ - case 3: return 1; /*palette*/ - case 4: return 2; /*grey + alpha*/ - case 6: return 4; /*RGBA*/ - } - return 0; /*unexisting color type*/ -} - -static unsigned lodepng_get_bpp_lct(LodePNGColorType colortype, unsigned bitdepth) -{ - /*bits per pixel is amount of channels * bits per channel*/ - return getNumColorChannels(colortype) * bitdepth; -} - -/* ////////////////////////////////////////////////////////////////////////// */ - -void lodepng_color_mode_init(LodePNGColorMode* info) -{ - info->key_defined = 0; - info->key_r = info->key_g = info->key_b = 0; - info->colortype = LCT_RGBA; - info->bitdepth = 8; - info->palette = 0; - info->palettesize = 0; -} - -void lodepng_color_mode_cleanup(LodePNGColorMode* info) -{ - lodepng_palette_clear(info); -} - -unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source) -{ - size_t i; - lodepng_color_mode_cleanup(dest); - *dest = *source; - if(source->palette) - { - dest->palette = (unsigned char*)lodepng_malloc(1024); - if(!dest->palette && source->palettesize) return 83; /*alloc fail*/ - for(i = 0; i != source->palettesize * 4; ++i) dest->palette[i] = source->palette[i]; - } - return 0; -} - -static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColorMode* b) -{ - size_t i; - if(a->colortype != b->colortype) return 0; - if(a->bitdepth != b->bitdepth) return 0; - if(a->key_defined != b->key_defined) return 0; - if(a->key_defined) - { - if(a->key_r != b->key_r) return 0; - if(a->key_g != b->key_g) return 0; - if(a->key_b != b->key_b) return 0; - } - /*if one of the palette sizes is 0, then we consider it to be the same as the - other: it means that e.g. the palette was not given by the user and should be - considered the same as the palette inside the PNG.*/ - if(1/*a->palettesize != 0 && b->palettesize != 0*/) { - if(a->palettesize != b->palettesize) return 0; - for(i = 0; i != a->palettesize * 4; ++i) - { - if(a->palette[i] != b->palette[i]) return 0; - } - } - return 1; -} - -void lodepng_palette_clear(LodePNGColorMode* info) -{ - if(info->palette) lodepng_free(info->palette); - info->palette = 0; - info->palettesize = 0; -} - -unsigned lodepng_palette_add(LodePNGColorMode* info, - unsigned char r, unsigned char g, unsigned char b, unsigned char a) -{ - unsigned char* data; - /*the same resize technique as C++ std::vectors is used, and here it's made so that for a palette with - the max of 256 colors, it'll have the exact alloc size*/ - if(!info->palette) /*allocate palette if empty*/ - { - /*room for 256 colors with 4 bytes each*/ - data = (unsigned char*)lodepng_realloc(info->palette, 1024); - if(!data) return 83; /*alloc fail*/ - else info->palette = data; - } - info->palette[4 * info->palettesize + 0] = r; - info->palette[4 * info->palettesize + 1] = g; - info->palette[4 * info->palettesize + 2] = b; - info->palette[4 * info->palettesize + 3] = a; - ++info->palettesize; - return 0; -} - -unsigned lodepng_get_bpp(const LodePNGColorMode* info) -{ - /*calculate bits per pixel out of colortype and bitdepth*/ - return lodepng_get_bpp_lct(info->colortype, info->bitdepth); -} - -unsigned lodepng_get_channels(const LodePNGColorMode* info) -{ - return getNumColorChannels(info->colortype); -} - -unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info) -{ - return info->colortype == LCT_GREY || info->colortype == LCT_GREY_ALPHA; -} - -unsigned lodepng_is_alpha_type(const LodePNGColorMode* info) -{ - return (info->colortype & 4) != 0; /*4 or 6*/ -} - -unsigned lodepng_is_palette_type(const LodePNGColorMode* info) -{ - return info->colortype == LCT_PALETTE; -} - -unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info) -{ - size_t i; - for(i = 0; i != info->palettesize; ++i) - { - if(info->palette[i * 4 + 3] < 255) return 1; - } - return 0; -} - -unsigned lodepng_can_have_alpha(const LodePNGColorMode* info) -{ - return info->key_defined - || lodepng_is_alpha_type(info) - || lodepng_has_palette_alpha(info); -} - -size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color) -{ - /*will not overflow for any color type if roughly w * h < 268435455*/ - int bpp = lodepng_get_bpp(color); - size_t n = w * h; - return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8; -} - -size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) -{ - /*will not overflow for any color type if roughly w * h < 268435455*/ - int bpp = lodepng_get_bpp_lct(colortype, bitdepth); - size_t n = w * h; - return ((n / 8) * bpp) + ((n & 7) * bpp + 7) / 8; -} - - -#ifdef LODEPNG_COMPILE_PNG -#ifdef LODEPNG_COMPILE_DECODER -/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer*/ -static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, const LodePNGColorMode* color) -{ - /*will not overflow for any color type if roughly w * h < 268435455*/ - int bpp = lodepng_get_bpp(color); - size_t line = ((w / 8) * bpp) + ((w & 7) * bpp + 7) / 8; - return h * line; -} -#endif /*LODEPNG_COMPILE_DECODER*/ -#endif /*LODEPNG_COMPILE_PNG*/ - -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - -static void LodePNGUnknownChunks_init(LodePNGInfo* info) -{ - unsigned i; - for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0; - for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0; -} - -static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info) -{ - unsigned i; - for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]); -} - -static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src) -{ - unsigned i; - - LodePNGUnknownChunks_cleanup(dest); - - for(i = 0; i != 3; ++i) - { - size_t j; - dest->unknown_chunks_size[i] = src->unknown_chunks_size[i]; - dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]); - if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/ - for(j = 0; j < src->unknown_chunks_size[i]; ++j) - { - dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j]; - } - } - - return 0; -} - -/******************************************************************************/ - -static void LodePNGText_init(LodePNGInfo* info) -{ - info->text_num = 0; - info->text_keys = NULL; - info->text_strings = NULL; -} - -static void LodePNGText_cleanup(LodePNGInfo* info) -{ - size_t i; - for(i = 0; i != info->text_num; ++i) - { - string_cleanup(&info->text_keys[i]); - string_cleanup(&info->text_strings[i]); - } - lodepng_free(info->text_keys); - lodepng_free(info->text_strings); -} - -static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source) -{ - size_t i = 0; - dest->text_keys = 0; - dest->text_strings = 0; - dest->text_num = 0; - for(i = 0; i != source->text_num; ++i) - { - CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i])); - } - return 0; -} - -void lodepng_clear_text(LodePNGInfo* info) -{ - LodePNGText_cleanup(info); -} - -unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str) -{ - char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1))); - char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1))); - if(!new_keys || !new_strings) - { - lodepng_free(new_keys); - lodepng_free(new_strings); - return 83; /*alloc fail*/ - } - - ++info->text_num; - info->text_keys = new_keys; - info->text_strings = new_strings; - - string_init(&info->text_keys[info->text_num - 1]); - string_set(&info->text_keys[info->text_num - 1], key); - - string_init(&info->text_strings[info->text_num - 1]); - string_set(&info->text_strings[info->text_num - 1], str); - - return 0; -} - -/******************************************************************************/ - -static void LodePNGIText_init(LodePNGInfo* info) -{ - info->itext_num = 0; - info->itext_keys = NULL; - info->itext_langtags = NULL; - info->itext_transkeys = NULL; - info->itext_strings = NULL; -} - -static void LodePNGIText_cleanup(LodePNGInfo* info) -{ - size_t i; - for(i = 0; i != info->itext_num; ++i) - { - string_cleanup(&info->itext_keys[i]); - string_cleanup(&info->itext_langtags[i]); - string_cleanup(&info->itext_transkeys[i]); - string_cleanup(&info->itext_strings[i]); - } - lodepng_free(info->itext_keys); - lodepng_free(info->itext_langtags); - lodepng_free(info->itext_transkeys); - lodepng_free(info->itext_strings); -} - -static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source) -{ - size_t i = 0; - dest->itext_keys = 0; - dest->itext_langtags = 0; - dest->itext_transkeys = 0; - dest->itext_strings = 0; - dest->itext_num = 0; - for(i = 0; i != source->itext_num; ++i) - { - CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i], - source->itext_transkeys[i], source->itext_strings[i])); - } - return 0; -} - -void lodepng_clear_itext(LodePNGInfo* info) -{ - LodePNGIText_cleanup(info); -} - -unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag, - const char* transkey, const char* str) -{ - char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1))); - char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1))); - char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1))); - char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1))); - if(!new_keys || !new_langtags || !new_transkeys || !new_strings) - { - lodepng_free(new_keys); - lodepng_free(new_langtags); - lodepng_free(new_transkeys); - lodepng_free(new_strings); - return 83; /*alloc fail*/ - } - - ++info->itext_num; - info->itext_keys = new_keys; - info->itext_langtags = new_langtags; - info->itext_transkeys = new_transkeys; - info->itext_strings = new_strings; - - string_init(&info->itext_keys[info->itext_num - 1]); - string_set(&info->itext_keys[info->itext_num - 1], key); - - string_init(&info->itext_langtags[info->itext_num - 1]); - string_set(&info->itext_langtags[info->itext_num - 1], langtag); - - string_init(&info->itext_transkeys[info->itext_num - 1]); - string_set(&info->itext_transkeys[info->itext_num - 1], transkey); - - string_init(&info->itext_strings[info->itext_num - 1]); - string_set(&info->itext_strings[info->itext_num - 1], str); - - return 0; -} -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - -void lodepng_info_init(LodePNGInfo* info) -{ - lodepng_color_mode_init(&info->color); - info->interlace_method = 0; - info->compression_method = 0; - info->filter_method = 0; -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - info->background_defined = 0; - info->background_r = info->background_g = info->background_b = 0; - - LodePNGText_init(info); - LodePNGIText_init(info); - - info->time_defined = 0; - info->phys_defined = 0; - - LodePNGUnknownChunks_init(info); -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ -} - -void lodepng_info_cleanup(LodePNGInfo* info) -{ - lodepng_color_mode_cleanup(&info->color); -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - LodePNGText_cleanup(info); - LodePNGIText_cleanup(info); - - LodePNGUnknownChunks_cleanup(info); -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ -} - -unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source) -{ - lodepng_info_cleanup(dest); - *dest = *source; - lodepng_color_mode_init(&dest->color); - CERROR_TRY_RETURN(lodepng_color_mode_copy(&dest->color, &source->color)); - -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - CERROR_TRY_RETURN(LodePNGText_copy(dest, source)); - CERROR_TRY_RETURN(LodePNGIText_copy(dest, source)); - - LodePNGUnknownChunks_init(dest); - CERROR_TRY_RETURN(LodePNGUnknownChunks_copy(dest, source)); -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - return 0; -} - -void lodepng_info_swap(LodePNGInfo* a, LodePNGInfo* b) -{ - LodePNGInfo temp = *a; - *a = *b; - *b = temp; -} - -/* ////////////////////////////////////////////////////////////////////////// */ - -/*index: bitgroup index, bits: bitgroup size(1, 2 or 4), in: bitgroup value, out: octet array to add bits to*/ -static void addColorBits(unsigned char* out, size_t index, unsigned bits, unsigned in) -{ - unsigned m = bits == 1 ? 7 : bits == 2 ? 3 : 1; /*8 / bits - 1*/ - /*p = the partial index in the byte, e.g. with 4 palettebits it is 0 for first half or 1 for second half*/ - unsigned p = index & m; - in &= (1u << bits) - 1u; /*filter out any other bits of the input value*/ - in = in << (bits * (m - p)); - if(p == 0) out[index * bits / 8] = in; - else out[index * bits / 8] |= in; -} - -typedef struct ColorTree ColorTree; - -/* -One node of a color tree -This is the data structure used to count the number of unique colors and to get a palette -index for a color. It's like an octree, but because the alpha channel is used too, each -node has 16 instead of 8 children. -*/ -struct ColorTree -{ - ColorTree* children[16]; /*up to 16 pointers to ColorTree of next level*/ - int index; /*the payload. Only has a meaningful value if this is in the last level*/ -}; - -static void color_tree_init(ColorTree* tree) -{ - int i; - for(i = 0; i != 16; ++i) tree->children[i] = 0; - tree->index = -1; -} - -static void color_tree_cleanup(ColorTree* tree) -{ - int i; - for(i = 0; i != 16; ++i) - { - if(tree->children[i]) - { - color_tree_cleanup(tree->children[i]); - lodepng_free(tree->children[i]); - } - } -} - -/*returns -1 if color not present, its index otherwise*/ -static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) -{ - int bit = 0; - for(bit = 0; bit < 8; ++bit) - { - int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1); - if(!tree->children[i]) return -1; - else tree = tree->children[i]; - } - return tree ? tree->index : -1; -} - -#ifdef LODEPNG_COMPILE_ENCODER -static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) -{ - return color_tree_get(tree, r, g, b, a) >= 0; -} -#endif /*LODEPNG_COMPILE_ENCODER*/ - -/*color is not allowed to already exist. -Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist")*/ -static void color_tree_add(ColorTree* tree, - unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index) -{ - int bit; - for(bit = 0; bit < 8; ++bit) - { - int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1); - if(!tree->children[i]) - { - tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree)); - color_tree_init(tree->children[i]); - } - tree = tree->children[i]; - } - tree->index = (int)index; -} - -/*put a pixel, given its RGBA color, into image of any color type*/ -static unsigned rgba8ToPixel(unsigned char* out, size_t i, - const LodePNGColorMode* mode, ColorTree* tree /*for palette*/, - unsigned char r, unsigned char g, unsigned char b, unsigned char a) -{ - if(mode->colortype == LCT_GREY) - { - unsigned char grey = r; /*((unsigned short)r + g + b) / 3*/; - if(mode->bitdepth == 8) out[i] = grey; - else if(mode->bitdepth == 16) out[i * 2 + 0] = out[i * 2 + 1] = grey; - else - { - /*take the most significant bits of grey*/ - grey = (grey >> (8 - mode->bitdepth)) & ((1 << mode->bitdepth) - 1); - addColorBits(out, i, mode->bitdepth, grey); - } - } - else if(mode->colortype == LCT_RGB) - { - if(mode->bitdepth == 8) - { - out[i * 3 + 0] = r; - out[i * 3 + 1] = g; - out[i * 3 + 2] = b; - } - else - { - out[i * 6 + 0] = out[i * 6 + 1] = r; - out[i * 6 + 2] = out[i * 6 + 3] = g; - out[i * 6 + 4] = out[i * 6 + 5] = b; - } - } - else if(mode->colortype == LCT_PALETTE) - { - int index = color_tree_get(tree, r, g, b, a); - if(index < 0) return 82; /*color not in palette*/ - if(mode->bitdepth == 8) out[i] = index; - else addColorBits(out, i, mode->bitdepth, (unsigned)index); - } - else if(mode->colortype == LCT_GREY_ALPHA) - { - unsigned char grey = r; /*((unsigned short)r + g + b) / 3*/; - if(mode->bitdepth == 8) - { - out[i * 2 + 0] = grey; - out[i * 2 + 1] = a; - } - else if(mode->bitdepth == 16) - { - out[i * 4 + 0] = out[i * 4 + 1] = grey; - out[i * 4 + 2] = out[i * 4 + 3] = a; - } - } - else if(mode->colortype == LCT_RGBA) - { - if(mode->bitdepth == 8) - { - out[i * 4 + 0] = r; - out[i * 4 + 1] = g; - out[i * 4 + 2] = b; - out[i * 4 + 3] = a; - } - else - { - out[i * 8 + 0] = out[i * 8 + 1] = r; - out[i * 8 + 2] = out[i * 8 + 3] = g; - out[i * 8 + 4] = out[i * 8 + 5] = b; - out[i * 8 + 6] = out[i * 8 + 7] = a; - } - } - - return 0; /*no error*/ -} - -/*put a pixel, given its RGBA16 color, into image of any color 16-bitdepth type*/ -static void rgba16ToPixel(unsigned char* out, size_t i, - const LodePNGColorMode* mode, - unsigned short r, unsigned short g, unsigned short b, unsigned short a) -{ - if(mode->colortype == LCT_GREY) - { - unsigned short grey = r; /*((unsigned)r + g + b) / 3*/; - out[i * 2 + 0] = (grey >> 8) & 255; - out[i * 2 + 1] = grey & 255; - } - else if(mode->colortype == LCT_RGB) - { - out[i * 6 + 0] = (r >> 8) & 255; - out[i * 6 + 1] = r & 255; - out[i * 6 + 2] = (g >> 8) & 255; - out[i * 6 + 3] = g & 255; - out[i * 6 + 4] = (b >> 8) & 255; - out[i * 6 + 5] = b & 255; - } - else if(mode->colortype == LCT_GREY_ALPHA) - { - unsigned short grey = r; /*((unsigned)r + g + b) / 3*/; - out[i * 4 + 0] = (grey >> 8) & 255; - out[i * 4 + 1] = grey & 255; - out[i * 4 + 2] = (a >> 8) & 255; - out[i * 4 + 3] = a & 255; - } - else if(mode->colortype == LCT_RGBA) - { - out[i * 8 + 0] = (r >> 8) & 255; - out[i * 8 + 1] = r & 255; - out[i * 8 + 2] = (g >> 8) & 255; - out[i * 8 + 3] = g & 255; - out[i * 8 + 4] = (b >> 8) & 255; - out[i * 8 + 5] = b & 255; - out[i * 8 + 6] = (a >> 8) & 255; - out[i * 8 + 7] = a & 255; - } -} - -/*Get RGBA8 color of pixel with index i (y * width + x) from the raw image with given color type.*/ -static void getPixelColorRGBA8(unsigned char* r, unsigned char* g, - unsigned char* b, unsigned char* a, - const unsigned char* in, size_t i, - const LodePNGColorMode* mode) -{ - if(mode->colortype == LCT_GREY) - { - if(mode->bitdepth == 8) - { - *r = *g = *b = in[i]; - if(mode->key_defined && *r == mode->key_r) *a = 0; - else *a = 255; - } - else if(mode->bitdepth == 16) - { - *r = *g = *b = in[i * 2 + 0]; - if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0; - else *a = 255; - } - else - { - unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/ - size_t j = i * mode->bitdepth; - unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth); - *r = *g = *b = (value * 255) / highest; - if(mode->key_defined && value == mode->key_r) *a = 0; - else *a = 255; - } - } - else if(mode->colortype == LCT_RGB) - { - if(mode->bitdepth == 8) - { - *r = in[i * 3 + 0]; *g = in[i * 3 + 1]; *b = in[i * 3 + 2]; - if(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b) *a = 0; - else *a = 255; - } - else - { - *r = in[i * 6 + 0]; - *g = in[i * 6 + 2]; - *b = in[i * 6 + 4]; - if(mode->key_defined && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r - && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g - && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0; - else *a = 255; - } - } - else if(mode->colortype == LCT_PALETTE) - { - unsigned index; - if(mode->bitdepth == 8) index = in[i]; - else - { - size_t j = i * mode->bitdepth; - index = readBitsFromReversedStream(&j, in, mode->bitdepth); - } - - if(index >= mode->palettesize) - { - /*This is an error according to the PNG spec, but common PNG decoders make it black instead. - Done here too, slightly faster due to no error handling needed.*/ - *r = *g = *b = 0; - *a = 255; - } - else - { - *r = mode->palette[index * 4 + 0]; - *g = mode->palette[index * 4 + 1]; - *b = mode->palette[index * 4 + 2]; - *a = mode->palette[index * 4 + 3]; - } - } - else if(mode->colortype == LCT_GREY_ALPHA) - { - if(mode->bitdepth == 8) - { - *r = *g = *b = in[i * 2 + 0]; - *a = in[i * 2 + 1]; - } - else - { - *r = *g = *b = in[i * 4 + 0]; - *a = in[i * 4 + 2]; - } - } - else if(mode->colortype == LCT_RGBA) - { - if(mode->bitdepth == 8) - { - *r = in[i * 4 + 0]; - *g = in[i * 4 + 1]; - *b = in[i * 4 + 2]; - *a = in[i * 4 + 3]; - } - else - { - *r = in[i * 8 + 0]; - *g = in[i * 8 + 2]; - *b = in[i * 8 + 4]; - *a = in[i * 8 + 6]; - } - } -} - -/*Similar to getPixelColorRGBA8, but with all the for loops inside of the color -mode test cases, optimized to convert the colors much faster, when converting -to RGBA or RGB with 8 bit per cannel. buffer must be RGBA or RGB output with -enough memory, if has_alpha is true the output is RGBA. mode has the color mode -of the input buffer.*/ -static void getPixelColorsRGBA8(unsigned char* buffer, size_t numpixels, - unsigned has_alpha, const unsigned char* in, - const LodePNGColorMode* mode) -{ - unsigned num_channels = has_alpha ? 4 : 3; - size_t i; - if(mode->colortype == LCT_GREY) - { - if(mode->bitdepth == 8) - { - for(i = 0; i != numpixels; ++i, buffer += num_channels) - { - buffer[0] = buffer[1] = buffer[2] = in[i]; - if(has_alpha) buffer[3] = mode->key_defined && in[i] == mode->key_r ? 0 : 255; - } - } - else if(mode->bitdepth == 16) - { - for(i = 0; i != numpixels; ++i, buffer += num_channels) - { - buffer[0] = buffer[1] = buffer[2] = in[i * 2]; - if(has_alpha) buffer[3] = mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r ? 0 : 255; - } - } - else - { - unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/ - size_t j = 0; - for(i = 0; i != numpixels; ++i, buffer += num_channels) - { - unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth); - buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest; - if(has_alpha) buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255; - } - } - } - else if(mode->colortype == LCT_RGB) - { - if(mode->bitdepth == 8) - { - for(i = 0; i != numpixels; ++i, buffer += num_channels) - { - buffer[0] = in[i * 3 + 0]; - buffer[1] = in[i * 3 + 1]; - buffer[2] = in[i * 3 + 2]; - if(has_alpha) buffer[3] = mode->key_defined && buffer[0] == mode->key_r - && buffer[1]== mode->key_g && buffer[2] == mode->key_b ? 0 : 255; - } - } - else - { - for(i = 0; i != numpixels; ++i, buffer += num_channels) - { - buffer[0] = in[i * 6 + 0]; - buffer[1] = in[i * 6 + 2]; - buffer[2] = in[i * 6 + 4]; - if(has_alpha) buffer[3] = mode->key_defined - && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r - && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g - && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255; - } - } - } - else if(mode->colortype == LCT_PALETTE) - { - unsigned index; - size_t j = 0; - for(i = 0; i != numpixels; ++i, buffer += num_channels) - { - if(mode->bitdepth == 8) index = in[i]; - else index = readBitsFromReversedStream(&j, in, mode->bitdepth); - - if(index >= mode->palettesize) - { - /*This is an error according to the PNG spec, but most PNG decoders make it black instead. - Done here too, slightly faster due to no error handling needed.*/ - buffer[0] = buffer[1] = buffer[2] = 0; - if(has_alpha) buffer[3] = 255; - } - else - { - buffer[0] = mode->palette[index * 4 + 0]; - buffer[1] = mode->palette[index * 4 + 1]; - buffer[2] = mode->palette[index * 4 + 2]; - if(has_alpha) buffer[3] = mode->palette[index * 4 + 3]; - } - } - } - else if(mode->colortype == LCT_GREY_ALPHA) - { - if(mode->bitdepth == 8) - { - for(i = 0; i != numpixels; ++i, buffer += num_channels) - { - buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0]; - if(has_alpha) buffer[3] = in[i * 2 + 1]; - } - } - else - { - for(i = 0; i != numpixels; ++i, buffer += num_channels) - { - buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0]; - if(has_alpha) buffer[3] = in[i * 4 + 2]; - } - } - } - else if(mode->colortype == LCT_RGBA) - { - if(mode->bitdepth == 8) - { - for(i = 0; i != numpixels; ++i, buffer += num_channels) - { - buffer[0] = in[i * 4 + 0]; - buffer[1] = in[i * 4 + 1]; - buffer[2] = in[i * 4 + 2]; - if(has_alpha) buffer[3] = in[i * 4 + 3]; - } - } - else - { - for(i = 0; i != numpixels; ++i, buffer += num_channels) - { - buffer[0] = in[i * 8 + 0]; - buffer[1] = in[i * 8 + 2]; - buffer[2] = in[i * 8 + 4]; - if(has_alpha) buffer[3] = in[i * 8 + 6]; - } - } - } -} - -/*Get RGBA16 color of pixel with index i (y * width + x) from the raw image with -given color type, but the given color type must be 16-bit itself.*/ -static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a, - const unsigned char* in, size_t i, const LodePNGColorMode* mode) -{ - if(mode->colortype == LCT_GREY) - { - *r = *g = *b = 256 * in[i * 2 + 0] + in[i * 2 + 1]; - if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0; - else *a = 65535; - } - else if(mode->colortype == LCT_RGB) - { - *r = 256u * in[i * 6 + 0] + in[i * 6 + 1]; - *g = 256u * in[i * 6 + 2] + in[i * 6 + 3]; - *b = 256u * in[i * 6 + 4] + in[i * 6 + 5]; - if(mode->key_defined - && 256u * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r - && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g - && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0; - else *a = 65535; - } - else if(mode->colortype == LCT_GREY_ALPHA) - { - *r = *g = *b = 256u * in[i * 4 + 0] + in[i * 4 + 1]; - *a = 256u * in[i * 4 + 2] + in[i * 4 + 3]; - } - else if(mode->colortype == LCT_RGBA) - { - *r = 256u * in[i * 8 + 0] + in[i * 8 + 1]; - *g = 256u * in[i * 8 + 2] + in[i * 8 + 3]; - *b = 256u * in[i * 8 + 4] + in[i * 8 + 5]; - *a = 256u * in[i * 8 + 6] + in[i * 8 + 7]; - } -} - -unsigned lodepng_convert(unsigned char* out, const unsigned char* in, - const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in, - unsigned w, unsigned h) -{ - int i; - ColorTree tree; - size_t numpixels = w * h; - - if(lodepng_color_mode_equal(mode_out, mode_in)) - { - size_t numbytes = lodepng_get_raw_size(w, h, mode_in); - for(i = 0; i != (int)numbytes; ++i) out[i] = in[i]; - return 0; - } - - if(mode_out->colortype == LCT_PALETTE) - { - size_t palettesize = mode_out->palettesize; - const unsigned char* palette = mode_out->palette; - size_t palsize = 1ull << (size_t)mode_out->bitdepth; - /*if the user specified output palette but did not give the values, assume - they want the values of the input color type (assuming that one is palette). - Note that we never create a new palette ourselves.*/ - if(palettesize == 0) - { - palettesize = mode_in->palettesize; - palette = mode_in->palette; - } - if(palettesize < palsize) palsize = palettesize; - color_tree_init(&tree); - for(i = 0; i != (int)palsize; ++i) - { - const unsigned char* p = &palette[i * 4]; - color_tree_add(&tree, p[0], p[1], p[2], p[3], i); - } - } - - if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16) - { - for(i = 0; i != (int)numpixels; ++i) - { - unsigned short r = 0, g = 0, b = 0, a = 0; - getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in); - rgba16ToPixel(out, i, mode_out, r, g, b, a); - } - } - else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA) - { - getPixelColorsRGBA8(out, numpixels, 1, in, mode_in); - } - else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB) - { - getPixelColorsRGBA8(out, numpixels, 0, in, mode_in); - } - else - { - unsigned char r = 0, g = 0, b = 0, a = 0; - for(i = 0; i != (int)numpixels; ++i) - { - getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in); - CERROR_TRY_RETURN(rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a)); - } - } - - if(mode_out->colortype == LCT_PALETTE) - { - color_tree_cleanup(&tree); - } - - return 0; /*no error*/ -} - -#ifdef LODEPNG_COMPILE_ENCODER - -void lodepng_color_profile_init(LodePNGColorProfile* profile) -{ - profile->colored = 0; - profile->key = 0; - profile->alpha = 0; - profile->key_r = profile->key_g = profile->key_b = 0; - profile->numcolors = 0; - profile->bits = 1; -} - -/*function used for debug purposes with C++*/ -/*void printColorProfile(LodePNGColorProfile* p) -{ - std::cout << "colored: " << (int)p->colored << ", "; - std::cout << "key: " << (int)p->key << ", "; - std::cout << "key_r: " << (int)p->key_r << ", "; - std::cout << "key_g: " << (int)p->key_g << ", "; - std::cout << "key_b: " << (int)p->key_b << ", "; - std::cout << "alpha: " << (int)p->alpha << ", "; - std::cout << "numcolors: " << (int)p->numcolors << ", "; - std::cout << "bits: " << (int)p->bits << std::endl; -}*/ - -/*Returns how many bits needed to represent given value (max 8 bit)*/ -static unsigned getValueRequiredBits(unsigned char value) -{ - if(value == 0 || value == 255) return 1; - /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/ - if(value % 17 == 0) return value % 85 == 0 ? 2 : 4; - return 8; -} - -/*profile must already have been inited with mode. -It's ok to set some parameters of profile to done already.*/ -unsigned lodepng_get_color_profile(LodePNGColorProfile* profile, - const unsigned char* in, unsigned w, unsigned h, - const LodePNGColorMode* mode) -{ - unsigned error = 0; - size_t i; - ColorTree tree; - size_t numpixels = w * h; - - unsigned colored_done = lodepng_is_greyscale_type(mode) ? 1 : 0; - unsigned alpha_done = lodepng_can_have_alpha(mode) ? 0 : 1; - unsigned numcolors_done = 0; - unsigned bpp = lodepng_get_bpp(mode); - unsigned bits_done = bpp == 1 ? 1 : 0; - unsigned maxnumcolors = 257; - unsigned sixteen = 0; - if(bpp <= 8) maxnumcolors = bpp == 1 ? 2 : (bpp == 2 ? 4 : (bpp == 4 ? 16 : 256)); - - color_tree_init(&tree); - - /*Check if the 16-bit input is truly 16-bit*/ - if(mode->bitdepth == 16) - { - unsigned short r, g, b, a; - for(i = 0; i != numpixels; ++i) - { - getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode); - if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) || - (b & 255) != ((b >> 8) & 255) || (a & 255) != ((a >> 8) & 255)) /*first and second byte differ*/ - { - sixteen = 1; - break; - } - } - } - - if(sixteen) - { - unsigned short r = 0, g = 0, b = 0, a = 0; - profile->bits = 16; - bits_done = numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/ - - for(i = 0; i != numpixels; ++i) - { - getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode); - - if(!colored_done && (r != g || r != b)) - { - profile->colored = 1; - colored_done = 1; - } - - if(!alpha_done) - { - unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b); - if(a != 65535 && (a != 0 || (profile->key && !matchkey))) - { - profile->alpha = 1; - alpha_done = 1; - if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ - } - else if(a == 0 && !profile->alpha && !profile->key) - { - profile->key = 1; - profile->key_r = r; - profile->key_g = g; - profile->key_b = b; - } - else if(a == 65535 && profile->key && matchkey) - { - /* Color key cannot be used if an opaque pixel also has that RGB color. */ - profile->alpha = 1; - alpha_done = 1; - } - } - - if(alpha_done && numcolors_done && colored_done && bits_done) break; - } - } - else /* < 16-bit */ - { - for(i = 0; i != numpixels; ++i) - { - unsigned char r = 0, g = 0, b = 0, a = 0; - getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode); - - if(!bits_done && profile->bits < 8) - { - /*only r is checked, < 8 bits is only relevant for greyscale*/ - unsigned bits = getValueRequiredBits(r); - if(bits > profile->bits) profile->bits = bits; - } - bits_done = (profile->bits >= bpp); - - if(!colored_done && (r != g || r != b)) - { - profile->colored = 1; - colored_done = 1; - if(profile->bits < 8) profile->bits = 8; /*PNG has no colored modes with less than 8-bit per channel*/ - } - - if(!alpha_done) - { - unsigned matchkey = (r == profile->key_r && g == profile->key_g && b == profile->key_b); - if(a != 255 && (a != 0 || (profile->key && !matchkey))) - { - profile->alpha = 1; - alpha_done = 1; - if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ - } - else if(a == 0 && !profile->alpha && !profile->key) - { - profile->key = 1; - profile->key_r = r; - profile->key_g = g; - profile->key_b = b; - } - else if(a == 255 && profile->key && matchkey) - { - /* Color key cannot be used if an opaque pixel also has that RGB color. */ - profile->alpha = 1; - alpha_done = 1; - if(profile->bits < 8) profile->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ - } - } - - if(!numcolors_done) - { - if(!color_tree_has(&tree, r, g, b, a)) - { - color_tree_add(&tree, r, g, b, a, profile->numcolors); - if(profile->numcolors < 256) - { - unsigned char* p = profile->palette; - unsigned n = profile->numcolors; - p[n * 4 + 0] = r; - p[n * 4 + 1] = g; - p[n * 4 + 2] = b; - p[n * 4 + 3] = a; - } - ++profile->numcolors; - numcolors_done = profile->numcolors >= maxnumcolors; - } - } - - if(alpha_done && numcolors_done && colored_done && bits_done) break; - } - - /*make the profile's key always 16-bit for consistency - repeat each byte twice*/ - profile->key_r += (profile->key_r << 8); - profile->key_g += (profile->key_g << 8); - profile->key_b += (profile->key_b << 8); - } - - color_tree_cleanup(&tree); - return error; -} - -/*Automatically chooses color type that gives smallest amount of bits in the -output image, e.g. grey if there are only greyscale pixels, palette if there -are less than 256 colors, ... -Updates values of mode with a potentially smaller color model. mode_out should -contain the user chosen color model, but will be overwritten with the new chosen one.*/ -unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out, - const unsigned char* image, unsigned w, unsigned h, - const LodePNGColorMode* mode_in) -{ - LodePNGColorProfile prof; - unsigned error = 0; - unsigned i, n, palettebits, grey_ok, palette_ok; - - lodepng_color_profile_init(&prof); - error = lodepng_get_color_profile(&prof, image, w, h, mode_in); - if(error) return error; - mode_out->key_defined = 0; - - if(prof.key && w * h <= 16) - { - prof.alpha = 1; /*too few pixels to justify tRNS chunk overhead*/ - if(prof.bits < 8) prof.bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ - } - grey_ok = !prof.colored && !prof.alpha; /*grey without alpha, with potentially low bits*/ - n = prof.numcolors; - palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8)); - palette_ok = n <= 256 && (n * 2 < w * h) && prof.bits <= 8; - if(w * h < n * 2) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/ - if(grey_ok && prof.bits <= palettebits) palette_ok = 0; /*grey is less overhead*/ - - if(palette_ok) - { - unsigned char* p = prof.palette; - lodepng_palette_clear(mode_out); /*remove potential earlier palette*/ - for(i = 0; i != prof.numcolors; ++i) - { - error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]); - if(error) break; - } - - mode_out->colortype = LCT_PALETTE; - mode_out->bitdepth = palettebits; - - if(mode_in->colortype == LCT_PALETTE && mode_in->palettesize >= mode_out->palettesize - && mode_in->bitdepth == mode_out->bitdepth) - { - /*If input should have same palette colors, keep original to preserve its order and prevent conversion*/ - lodepng_color_mode_cleanup(mode_out); - lodepng_color_mode_copy(mode_out, mode_in); - } - } - else /*8-bit or 16-bit per channel*/ - { - mode_out->bitdepth = prof.bits; - mode_out->colortype = prof.alpha ? (prof.colored ? LCT_RGBA : LCT_GREY_ALPHA) - : (prof.colored ? LCT_RGB : LCT_GREY); - - if(prof.key && !prof.alpha) - { - unsigned mask = (1u << mode_out->bitdepth) - 1u; /*profile always uses 16-bit, mask converts it*/ - mode_out->key_r = prof.key_r & mask; - mode_out->key_g = prof.key_g & mask; - mode_out->key_b = prof.key_b & mask; - mode_out->key_defined = 1; - } - } - - return error; -} - -#endif /* #ifdef LODEPNG_COMPILE_ENCODER */ - -/* -Paeth predicter, used by PNG filter type 4 -The parameters are of type short, but should come from unsigned chars, the shorts -are only needed to make the paeth calculation correct. -*/ -static unsigned char paethPredictor(short a, short b, short c) -{ - short pa = abs(b - c); - short pb = abs(a - c); - short pc = abs(a + b - c - c); - - if(pc < pa && pc < pb) return (unsigned char)c; - else if(pb < pa) return (unsigned char)b; - else return (unsigned char)a; -} - -/*shared values used by multiple Adam7 related functions*/ - -static const unsigned ADAM7_IX[7] = { 0, 4, 0, 2, 0, 1, 0 }; /*x start values*/ -static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/ -static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/ -static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/ - -/* -Outputs various dimensions and positions in the image related to the Adam7 reduced images. -passw: output containing the width of the 7 passes -passh: output containing the height of the 7 passes -filter_passstart: output containing the index of the start and end of each - reduced image with filter bytes -padded_passstart output containing the index of the start and end of each - reduced image when without filter bytes but with padded scanlines -passstart: output containing the index of the start and end of each reduced - image without padding between scanlines, but still padding between the images -w, h: width and height of non-interlaced image -bpp: bits per pixel -"padded" is only relevant if bpp is less than 8 and a scanline or image does not - end at a full byte -*/ -static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t filter_passstart[8], - size_t padded_passstart[8], size_t passstart[8], unsigned w, unsigned h, unsigned bpp) -{ - /*the passstart values have 8 values: the 8th one indicates the byte after the end of the 7th (= last) pass*/ - unsigned i; - - /*calculate width and height in pixels of each pass*/ - for(i = 0; i != 7; ++i) - { - passw[i] = (w + ADAM7_DX[i] - ADAM7_IX[i] - 1) / ADAM7_DX[i]; - passh[i] = (h + ADAM7_DY[i] - ADAM7_IY[i] - 1) / ADAM7_DY[i]; - if(passw[i] == 0) passh[i] = 0; - if(passh[i] == 0) passw[i] = 0; - } - - filter_passstart[0] = padded_passstart[0] = passstart[0] = 0; - for(i = 0; i != 7; ++i) - { - /*if passw[i] is 0, it's 0 bytes, not 1 (no filtertype-byte)*/ - filter_passstart[i + 1] = filter_passstart[i] - + ((passw[i] && passh[i]) ? passh[i] * (1 + (passw[i] * bpp + 7) / 8) : 0); - /*bits padded if needed to fill full byte at end of each scanline*/ - padded_passstart[i + 1] = padded_passstart[i] + passh[i] * ((passw[i] * bpp + 7) / 8); - /*only padded at end of reduced image*/ - passstart[i + 1] = passstart[i] + (passh[i] * passw[i] * bpp + 7) / 8; - } -} - -#ifdef LODEPNG_COMPILE_DECODER - -/* ////////////////////////////////////////////////////////////////////////// */ -/* / PNG Decoder / */ -/* ////////////////////////////////////////////////////////////////////////// */ - -/*read the information from the header and store it in the LodePNGInfo. return value is error*/ -unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state, - const unsigned char* in, size_t insize) -{ - LodePNGInfo* info = &state->info_png; - if(insize == 0 || in == 0) - { - CERROR_RETURN_ERROR(state->error, 48); /*error: the given data is empty*/ - } - if(insize < 33) - { - CERROR_RETURN_ERROR(state->error, 27); /*error: the data length is smaller than the length of a PNG header*/ - } - - /*when decoding a new PNG image, make sure all parameters created after previous decoding are reset*/ - lodepng_info_cleanup(info); - lodepng_info_init(info); - - if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71 - || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10) - { - CERROR_RETURN_ERROR(state->error, 28); /*error: the first 8 bytes are not the correct PNG signature*/ - } - if(lodepng_chunk_length(in + 8) != 13) - { - CERROR_RETURN_ERROR(state->error, 94); /*error: header size must be 13 bytes*/ - } - if(!lodepng_chunk_type_equals(in + 8, "IHDR")) - { - CERROR_RETURN_ERROR(state->error, 29); /*error: it doesn't start with a IHDR chunk!*/ - } - - /*read the values given in the header*/ - *w = lodepng_read32bitInt(&in[16]); - *h = lodepng_read32bitInt(&in[20]); - info->color.bitdepth = in[24]; - info->color.colortype = (LodePNGColorType)in[25]; - info->compression_method = in[26]; - info->filter_method = in[27]; - info->interlace_method = in[28]; - - if(*w == 0 || *h == 0) - { - CERROR_RETURN_ERROR(state->error, 93); - } - - if(!state->decoder.ignore_crc) - { - unsigned CRC = lodepng_read32bitInt(&in[29]); - unsigned checksum = lodepng_crc32(&in[12], 17); - if(CRC != checksum) - { - CERROR_RETURN_ERROR(state->error, 57); /*invalid CRC*/ - } - } - - /*error: only compression method 0 is allowed in the specification*/ - if(info->compression_method != 0) CERROR_RETURN_ERROR(state->error, 32); - /*error: only filter method 0 is allowed in the specification*/ - if(info->filter_method != 0) CERROR_RETURN_ERROR(state->error, 33); - /*error: only interlace methods 0 and 1 exist in the specification*/ - if(info->interlace_method > 1) CERROR_RETURN_ERROR(state->error, 34); - - state->error = checkColorValidity(info->color.colortype, info->color.bitdepth); - return state->error; -} - -static unsigned unfilterScanline(unsigned char* recon, const unsigned char* scanline, const unsigned char* precon, - size_t bytewidth, unsigned char filterType, size_t length) -{ - /* - For PNG filter method 0 - unfilter a PNG image scanline by scanline. when the pixels are smaller than 1 byte, - the filter works byte per byte (bytewidth = 1) - precon is the previous unfiltered scanline, recon the result, scanline the current one - the incoming scanlines do NOT include the filtertype byte, that one is given in the parameter filterType instead - recon and scanline MAY be the same memory address! precon must be disjoint. - */ - - size_t i; - switch(filterType) - { - case 0: - for(i = 0; i != length; ++i) recon[i] = scanline[i]; - break; - case 1: - for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i]; - for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + recon[i - bytewidth]; - break; - case 2: - if(precon) - { - for(i = 0; i != length; ++i) recon[i] = scanline[i] + precon[i]; - } - else - { - for(i = 0; i != length; ++i) recon[i] = scanline[i]; - } - break; - case 3: - if(precon) - { - for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i] + (precon[i] >> 1); - for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) >> 1); - } - else - { - for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i]; - for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + (recon[i - bytewidth] >> 1); - } - break; - case 4: - if(precon) - { - for(i = 0; i != bytewidth; ++i) - { - recon[i] = (scanline[i] + precon[i]); /*paethPredictor(0, precon[i], 0) is always precon[i]*/ - } - for(i = bytewidth; i < length; ++i) - { - recon[i] = (scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth])); - } - } - else - { - for(i = 0; i != bytewidth; ++i) - { - recon[i] = scanline[i]; - } - for(i = bytewidth; i < length; ++i) - { - /*paethPredictor(recon[i - bytewidth], 0, 0) is always recon[i - bytewidth]*/ - recon[i] = (scanline[i] + recon[i - bytewidth]); - } - } - break; - default: return 36; /*error: unexisting filter type given*/ - } - return 0; -} - -static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) -{ - /* - For PNG filter method 0 - this function unfilters a single image (e.g. without interlacing this is called once, with Adam7 seven times) - out must have enough bytes allocated already, in must have the scanlines + 1 filtertype byte per scanline - w and h are image dimensions or dimensions of reduced image, bpp is bits per pixel - in and out are allowed to be the same memory address (but aren't the same size since in has the extra filter bytes) - */ - - unsigned y; - unsigned char* prevline = 0; - - /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/ - size_t bytewidth = (bpp + 7) / 8; - size_t linebytes = (w * bpp + 7) / 8; - - for(y = 0; y < h; ++y) - { - size_t outindex = linebytes * y; - size_t inindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ - unsigned char filterType = in[inindex]; - - CERROR_TRY_RETURN(unfilterScanline(&out[outindex], &in[inindex + 1], prevline, bytewidth, filterType, linebytes)); - - prevline = &out[outindex]; - } - - return 0; -} - -/* -in: Adam7 interlaced image, with no padding bits between scanlines, but between - reduced images so that each reduced image starts at a byte. -out: the same pixels, but re-ordered so that they're now a non-interlaced image with size w*h -bpp: bits per pixel -out has the following size in bits: w * h * bpp. -in is possibly bigger due to padding bits between reduced images. -out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation -(because that's likely a little bit faster) -NOTE: comments about padding bits are only relevant if bpp < 8 -*/ -static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) -{ - unsigned passw[7], passh[7]; - size_t filter_passstart[8], padded_passstart[8], passstart[8]; - unsigned i; - - Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); - - if(bpp >= 8) - { - for(i = 0; i != 7; ++i) - { - unsigned x, y, b; - size_t bytewidth = bpp / 8; - for(y = 0; y < passh[i]; ++y) - for(x = 0; x < passw[i]; ++x) - { - size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth; - size_t pixeloutstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth; - for(b = 0; b < bytewidth; ++b) - { - out[pixeloutstart + b] = in[pixelinstart + b]; - } - } - } - } - else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ - { - for(i = 0; i != 7; ++i) - { - unsigned x, y, b; - unsigned ilinebits = bpp * passw[i]; - unsigned olinebits = bpp * w; - size_t obp, ibp; /*bit pointers (for out and in buffer)*/ - for(y = 0; y < passh[i]; ++y) - for(x = 0; x < passw[i]; ++x) - { - ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp); - obp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp; - for(b = 0; b < bpp; ++b) - { - unsigned char bit = readBitFromReversedStream(&ibp, in); - /*note that this function assumes the out buffer is completely 0, use setBitOfReversedStream otherwise*/ - setBitOfReversedStream0(&obp, out, bit); - } - } - } - } -} - -static void removePaddingBits(unsigned char* out, const unsigned char* in, - size_t olinebits, size_t ilinebits, unsigned h) -{ - /* - After filtering there are still padding bits if scanlines have non multiple of 8 bit amounts. They need - to be removed (except at last scanline of (Adam7-reduced) image) before working with pure image buffers - for the Adam7 code, the color convert code and the output to the user. - in and out are allowed to be the same buffer, in may also be higher but still overlapping; in must - have >= ilinebits*h bits, out must have >= olinebits*h bits, olinebits must be <= ilinebits - also used to move bits after earlier such operations happened, e.g. in a sequence of reduced images from Adam7 - only useful if (ilinebits - olinebits) is a value in the range 1..7 - */ - unsigned y; - size_t diff = ilinebits - olinebits; - size_t ibp = 0, obp = 0; /*input and output bit pointers*/ - for(y = 0; y < h; ++y) - { - size_t x; - for(x = 0; x < olinebits; ++x) - { - unsigned char bit = readBitFromReversedStream(&ibp, in); - setBitOfReversedStream(&obp, out, bit); - } - ibp += diff; - } -} - -/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from -the IDAT chunks (with filter index bytes and possible padding bits) -return value is error*/ -static unsigned postProcessScanlines(unsigned char* out, unsigned char* in, - unsigned w, unsigned h, const LodePNGInfo* info_png) -{ - /* - This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype. - Steps: - *) if no Adam7: 1) unfilter 2) remove padding bits (= posible extra bits per scanline if bpp < 8) - *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace - NOTE: the in buffer will be overwritten with intermediate data! - */ - unsigned bpp = lodepng_get_bpp(&info_png->color); - if(bpp == 0) return 31; /*error: invalid colortype*/ - - if(info_png->interlace_method == 0) - { - if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8) - { - CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp)); - removePaddingBits(out, in, w * bpp, ((w * bpp + 7) / 8) * 8, h); - } - /*we can immediately filter into the out buffer, no other steps needed*/ - else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp)); - } - else /*interlace_method is 1 (Adam7)*/ - { - unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8]; - unsigned i; - - Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); - - for(i = 0; i != 7; ++i) - { - CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp)); - /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline, - move bytes instead of bits or move not at all*/ - if(bpp < 8) - { - /*remove padding bits in scanlines; after this there still may be padding - bits between the different reduced images: each reduced image still starts nicely at a byte*/ - removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp, - ((passw[i] * bpp + 7) / 8) * 8, passh[i]); - } - } - - Adam7_deinterlace(out, in, w, h, bpp); - } - - return 0; -} - -static unsigned readChunk_PLTE(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) -{ - unsigned pos = 0, i; - if(color->palette) lodepng_free(color->palette); - color->palettesize = chunkLength / 3; - color->palette = (unsigned char*)lodepng_malloc(4 * color->palettesize); - if(!color->palette && color->palettesize) - { - color->palettesize = 0; - return 83; /*alloc fail*/ - } - if(color->palettesize > 256) return 38; /*error: palette too big*/ - - for(i = 0; i != color->palettesize; ++i) - { - color->palette[4 * i + 0] = data[pos++]; /*R*/ - color->palette[4 * i + 1] = data[pos++]; /*G*/ - color->palette[4 * i + 2] = data[pos++]; /*B*/ - color->palette[4 * i + 3] = 255; /*alpha*/ - } - - return 0; /* OK */ -} - -static unsigned readChunk_tRNS(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) -{ - unsigned i; - if(color->colortype == LCT_PALETTE) - { - /*error: more alpha values given than there are palette entries*/ - if(chunkLength > color->palettesize) return 38; - - for(i = 0; i != chunkLength; ++i) color->palette[4 * i + 3] = data[i]; - } - else if(color->colortype == LCT_GREY) - { - /*error: this chunk must be 2 bytes for greyscale image*/ - if(chunkLength != 2) return 30; - - color->key_defined = 1; - color->key_r = color->key_g = color->key_b = 256u * data[0] + data[1]; - } - else if(color->colortype == LCT_RGB) - { - /*error: this chunk must be 6 bytes for RGB image*/ - if(chunkLength != 6) return 41; - - color->key_defined = 1; - color->key_r = 256u * data[0] + data[1]; - color->key_g = 256u * data[2] + data[3]; - color->key_b = 256u * data[4] + data[5]; - } - else return 42; /*error: tRNS chunk not allowed for other color models*/ - - return 0; /* OK */ -} - - -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS -/*background color chunk (bKGD)*/ -static unsigned readChunk_bKGD(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) -{ - if(info->color.colortype == LCT_PALETTE) - { - /*error: this chunk must be 1 byte for indexed color image*/ - if(chunkLength != 1) return 43; - - info->background_defined = 1; - info->background_r = info->background_g = info->background_b = data[0]; - } - else if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) - { - /*error: this chunk must be 2 bytes for greyscale image*/ - if(chunkLength != 2) return 44; - - info->background_defined = 1; - info->background_r = info->background_g = info->background_b = 256u * data[0] + data[1]; - } - else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) - { - /*error: this chunk must be 6 bytes for greyscale image*/ - if(chunkLength != 6) return 45; - - info->background_defined = 1; - info->background_r = 256u * data[0] + data[1]; - info->background_g = 256u * data[2] + data[3]; - info->background_b = 256u * data[4] + data[5]; - } - - return 0; /* OK */ -} - -/*text chunk (tEXt)*/ -static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) -{ - unsigned error = 0; - char *key = 0, *str = 0; - unsigned i; - - while(!error) /*not really a while loop, only used to break on error*/ - { - unsigned length, string2_begin; - - length = 0; - while(length < chunkLength && data[length] != 0) ++length; - /*even though it's not allowed by the standard, no error is thrown if - there's no null termination char, if the text is empty*/ - if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/ - - key = (char*)lodepng_malloc(length + 1); - if(!key) CERROR_BREAK(error, 83); /*alloc fail*/ - - key[length] = 0; - for(i = 0; i != length; ++i) key[i] = (char)data[i]; - - string2_begin = length + 1; /*skip keyword null terminator*/ - - length = chunkLength < string2_begin ? 0 : (unsigned int)(chunkLength - string2_begin); - str = (char*)lodepng_malloc(length + 1); - if(!str) CERROR_BREAK(error, 83); /*alloc fail*/ - - str[length] = 0; - for(i = 0; i != length; ++i) str[i] = (char)data[string2_begin + i]; - - error = lodepng_add_text(info, key, str); - - break; - } - - lodepng_free(key); - lodepng_free(str); - - return error; -} - -/*compressed text chunk (zTXt)*/ -static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings, - const unsigned char* data, size_t chunkLength) -{ - unsigned error = 0; - unsigned i; - - unsigned length, string2_begin; - char *key = 0; - ucvector decoded; - - ucvector_init(&decoded); - - while(!error) /*not really a while loop, only used to break on error*/ - { - for(length = 0; length < chunkLength && data[length] != 0; ++length) ; - if(length + 2 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/ - if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/ - - key = (char*)lodepng_malloc(length + 1); - if(!key) CERROR_BREAK(error, 83); /*alloc fail*/ - - key[length] = 0; - for(i = 0; i != length; ++i) key[i] = (char)data[i]; - - if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/ - - string2_begin = length + 2; - if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/ - - length = (unsigned int)(chunkLength - string2_begin); - /*will fail if zlib error, e.g. if length is too small*/ - error = zlib_decompress(&decoded.data, &decoded.size, - (unsigned char*)(&data[string2_begin]), - length, zlibsettings); - if(error) break; - ucvector_push_back(&decoded, 0); - - error = lodepng_add_text(info, key, (char*)decoded.data); - - break; - } - - lodepng_free(key); - ucvector_cleanup(&decoded); - - return error; -} - -/*international text chunk (iTXt)*/ -static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecompressSettings* zlibsettings, - const unsigned char* data, size_t chunkLength) -{ - unsigned error = 0; - unsigned i; - - unsigned length, begin, compressed; - char *key = 0, *langtag = 0, *transkey = 0; - ucvector decoded; - ucvector_init(&decoded); - - while(!error) /*not really a while loop, only used to break on error*/ - { - /*Quick check if the chunk length isn't too small. Even without check - it'd still fail with other error checks below if it's too short. This just gives a different error code.*/ - if(chunkLength < 5) CERROR_BREAK(error, 30); /*iTXt chunk too short*/ - - /*read the key*/ - for(length = 0; length < chunkLength && data[length] != 0; ++length) ; - if(length + 3 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination char, corrupt?*/ - if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/ - - key = (char*)lodepng_malloc(length + 1); - if(!key) CERROR_BREAK(error, 83); /*alloc fail*/ - - key[length] = 0; - for(i = 0; i != length; ++i) key[i] = (char)data[i]; - - /*read the compression method*/ - compressed = data[length + 1]; - if(data[length + 2] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/ - - /*even though it's not allowed by the standard, no error is thrown if - there's no null termination char, if the text is empty for the next 3 texts*/ - - /*read the langtag*/ - begin = length + 3; - length = 0; - for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length; - - langtag = (char*)lodepng_malloc(length + 1); - if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/ - - langtag[length] = 0; - for(i = 0; i != length; ++i) langtag[i] = (char)data[begin + i]; - - /*read the transkey*/ - begin += length + 1; - length = 0; - for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length; - - transkey = (char*)lodepng_malloc(length + 1); - if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/ - - transkey[length] = 0; - for(i = 0; i != length; ++i) transkey[i] = (char)data[begin + i]; - - /*read the actual text*/ - begin += length + 1; - - length = chunkLength < begin ? 0 : (unsigned int)(chunkLength - begin); - - if(compressed) - { - /*will fail if zlib error, e.g. if length is too small*/ - error = zlib_decompress(&decoded.data, &decoded.size, - (unsigned char*)(&data[begin]), - length, zlibsettings); - if(error) break; - if(decoded.allocsize < decoded.size) decoded.allocsize = decoded.size; - ucvector_push_back(&decoded, 0); - } - else - { - if(!ucvector_resize(&decoded, length + 1)) CERROR_BREAK(error, 83 /*alloc fail*/); - - decoded.data[length] = 0; - for(i = 0; i != length; ++i) decoded.data[i] = data[begin + i]; - } - - error = lodepng_add_itext(info, key, langtag, transkey, (char*)decoded.data); - - break; - } - - lodepng_free(key); - lodepng_free(langtag); - lodepng_free(transkey); - ucvector_cleanup(&decoded); - - return error; -} - -static unsigned readChunk_tIME(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) -{ - if(chunkLength != 7) return 73; /*invalid tIME chunk size*/ - - info->time_defined = 1; - info->time.year = 256u * data[0] + data[1]; - info->time.month = data[2]; - info->time.day = data[3]; - info->time.hour = data[4]; - info->time.minute = data[5]; - info->time.second = data[6]; - - return 0; /* OK */ -} - -static unsigned readChunk_pHYs(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) -{ - if(chunkLength != 9) return 74; /*invalid pHYs chunk size*/ - - info->phys_defined = 1; - info->phys_x = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3]; - info->phys_y = 16777216u * data[4] + 65536u * data[5] + 256u * data[6] + data[7]; - info->phys_unit = data[8]; - - return 0; /* OK */ -} -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - -/*read a PNG, the result will be in the same color type as the PNG (hence "generic")*/ -static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h, - LodePNGState* state, - const unsigned char* in, size_t insize) -{ - unsigned char IEND = 0; - const unsigned char* chunk; - size_t i; - ucvector idat; /*the data from idat chunks*/ - ucvector scanlines; - size_t predict; - size_t numpixels; - - /*for unknown chunk order*/ - unsigned unknown = 0; -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - unsigned critical_pos = 1; /*1 = after IHDR, 2 = after PLTE, 3 = after IDAT*/ -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - - /*provide some proper output values if error will happen*/ - *out = 0; - - state->error = lodepng_inspect(w, h, state, in, insize); /*reads header and resets other parameters in state->info_png*/ - if(state->error) return; - - numpixels = *w * *h; - - /*multiplication overflow*/ - if(*h != 0 && numpixels / *h != *w) CERROR_RETURN(state->error, 92); - /*multiplication overflow possible further below. Allows up to 2^31-1 pixel - bytes with 16-bit RGBA, the rest is room for filter bytes.*/ - if(numpixels > 268435455) CERROR_RETURN(state->error, 92); - - ucvector_init(&idat); - chunk = &in[33]; /*first byte of the first chunk after the header*/ - - /*loop through the chunks, ignoring unknown chunks and stopping at IEND chunk. - IDAT data is put at the start of the in buffer*/ - while(!IEND && !state->error) - { - unsigned chunkLength; - const unsigned char* data; /*the data in the chunk*/ - - /*error: size of the in buffer too small to contain next chunk*/ - if((size_t)((chunk - in) + 12) > insize || chunk < in) CERROR_BREAK(state->error, 30); - - /*length of the data of the chunk, excluding the length bytes, chunk type and CRC bytes*/ - chunkLength = lodepng_chunk_length(chunk); - /*error: chunk length larger than the max PNG chunk size*/ - if(chunkLength > 2147483647) CERROR_BREAK(state->error, 63); - - if((size_t)((chunk - in) + chunkLength + 12) > insize || (chunk + chunkLength + 12) < in) - { - CERROR_BREAK(state->error, 64); /*error: size of the in buffer too small to contain next chunk*/ - } - - data = lodepng_chunk_data_const(chunk); - - /*IDAT chunk, containing compressed image data*/ - if(lodepng_chunk_type_equals(chunk, "IDAT")) - { - size_t oldsize = idat.size; - if(!ucvector_resize(&idat, oldsize + chunkLength)) CERROR_BREAK(state->error, 83 /*alloc fail*/); - for(i = 0; i != chunkLength; ++i) idat.data[oldsize + i] = data[i]; -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - critical_pos = 3; -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - } - /*IEND chunk*/ - else if(lodepng_chunk_type_equals(chunk, "IEND")) - { - IEND = 1; - } - /*palette chunk (PLTE)*/ - else if(lodepng_chunk_type_equals(chunk, "PLTE")) - { - state->error = readChunk_PLTE(&state->info_png.color, data, chunkLength); - if(state->error) break; -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - critical_pos = 2; -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - } - /*palette transparency chunk (tRNS)*/ - else if(lodepng_chunk_type_equals(chunk, "tRNS")) - { - state->error = readChunk_tRNS(&state->info_png.color, data, chunkLength); - if(state->error) break; - } -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - /*background color chunk (bKGD)*/ - else if(lodepng_chunk_type_equals(chunk, "bKGD")) - { - state->error = readChunk_bKGD(&state->info_png, data, chunkLength); - if(state->error) break; - } - /*text chunk (tEXt)*/ - else if(lodepng_chunk_type_equals(chunk, "tEXt")) - { - if(state->decoder.read_text_chunks) - { - state->error = readChunk_tEXt(&state->info_png, data, chunkLength); - if(state->error) break; - } - } - /*compressed text chunk (zTXt)*/ - else if(lodepng_chunk_type_equals(chunk, "zTXt")) - { - if(state->decoder.read_text_chunks) - { - state->error = readChunk_zTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength); - if(state->error) break; - } - } - /*international text chunk (iTXt)*/ - else if(lodepng_chunk_type_equals(chunk, "iTXt")) - { - if(state->decoder.read_text_chunks) - { - state->error = readChunk_iTXt(&state->info_png, &state->decoder.zlibsettings, data, chunkLength); - if(state->error) break; - } - } - else if(lodepng_chunk_type_equals(chunk, "tIME")) - { - state->error = readChunk_tIME(&state->info_png, data, chunkLength); - if(state->error) break; - } - else if(lodepng_chunk_type_equals(chunk, "pHYs")) - { - state->error = readChunk_pHYs(&state->info_png, data, chunkLength); - if(state->error) break; - } -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - else /*it's not an implemented chunk type, so ignore it: skip over the data*/ - { - /*error: unknown critical chunk (5th bit of first byte of chunk type is 0)*/ - if(!lodepng_chunk_ancillary(chunk)) CERROR_BREAK(state->error, 69); - - unknown = 1; -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - if(state->decoder.remember_unknown_chunks) - { - state->error = lodepng_chunk_append(&state->info_png.unknown_chunks_data[critical_pos - 1], - &state->info_png.unknown_chunks_size[critical_pos - 1], chunk); - if(state->error) break; - } -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - } - - if(!state->decoder.ignore_crc && !unknown) /*check CRC if wanted, only on known chunk types*/ - { - if(lodepng_chunk_check_crc(chunk)) CERROR_BREAK(state->error, 57); /*invalid CRC*/ - } - - if(!IEND) chunk = lodepng_chunk_next_const(chunk); - } - - ucvector_init(&scanlines); - /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation. - If the decompressed size does not match the prediction, the image must be corrupt.*/ - if(state->info_png.interlace_method == 0) - { - /*The extra *h is added because this are the filter bytes every scanline starts with*/ - predict = lodepng_get_raw_size_idat(*w, *h, &state->info_png.color) + *h; - } - else - { - /*Adam-7 interlaced: predicted size is the sum of the 7 sub-images sizes*/ - const LodePNGColorMode* color = &state->info_png.color; - predict = 0; - predict += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3); - if(*w > 4) predict += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, color) + ((*h + 7) >> 3); - predict += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, color) + ((*h + 3) >> 3); - if(*w > 2) predict += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, color) + ((*h + 3) >> 2); - predict += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, color) + ((*h + 1) >> 2); - if(*w > 1) predict += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, color) + ((*h + 1) >> 1); - predict += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, color) + ((*h + 0) >> 1); - } - if(!state->error && !ucvector_reserve(&scanlines, predict)) state->error = 83; /*alloc fail*/ - if(!state->error) - { - state->error = zlib_decompress(&scanlines.data, &scanlines.size, idat.data, - idat.size, &state->decoder.zlibsettings); - if(!state->error && scanlines.size != predict) state->error = 91; /*decompressed size doesn't match prediction*/ - } - ucvector_cleanup(&idat); - - if(!state->error) - { - size_t outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color); - *out = (unsigned char*)lodepng_malloc(outsize); - if(!*out) state->error = 83; /*alloc fail*/ - for(i = 0; i < outsize; i++) (*out)[i] = 0; - if(!state->error) state->error = postProcessScanlines(*out, scanlines.data, *w, *h, &state->info_png); - } - ucvector_cleanup(&scanlines); -} - -unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h, - LodePNGState* state, - const unsigned char* in, size_t insize) -{ - *out = 0; - decodeGeneric(out, w, h, state, in, insize); - if(state->error) return state->error; - if(!state->decoder.color_convert || lodepng_color_mode_equal(&state->info_raw, &state->info_png.color)) - { - /*same color type, no copying or converting of data needed*/ - /*store the info_png color settings on the info_raw so that the info_raw still reflects what colortype - the raw image has to the end user*/ - if(!state->decoder.color_convert) - { - state->error = lodepng_color_mode_copy(&state->info_raw, &state->info_png.color); - if(state->error) return state->error; - } - } - else - { - /*color conversion needed; sort of copy of the data*/ - unsigned char* data = *out; - size_t outsize; - - /*TODO: check if this works according to the statement in the documentation: "The converter can convert - from greyscale input color type, to 8-bit greyscale or greyscale with alpha"*/ - if(!(state->info_raw.colortype == LCT_RGB || state->info_raw.colortype == LCT_RGBA) - && !(state->info_raw.bitdepth == 8)) - { - return 56; /*unsupported color mode conversion*/ - } - - outsize = lodepng_get_raw_size(*w, *h, &state->info_raw); - *out = (unsigned char*)lodepng_malloc(outsize); - if(!(*out)) - { - state->error = 83; /*alloc fail*/ - } - else state->error = lodepng_convert(*out, data, &state->info_raw, - &state->info_png.color, *w, *h); - lodepng_free(data); - } - return state->error; -} - -unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, - size_t insize, LodePNGColorType colortype, unsigned bitdepth) -{ - unsigned error; - LodePNGState state; - lodepng_state_init(&state); - state.info_raw.colortype = colortype; - state.info_raw.bitdepth = bitdepth; - error = lodepng_decode(out, w, h, &state, in, insize); - lodepng_state_cleanup(&state); - return error; -} - -unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) -{ - return lodepng_decode_memory(out, w, h, in, insize, LCT_RGBA, 8); -} - -unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) -{ - return lodepng_decode_memory(out, w, h, in, insize, LCT_RGB, 8); -} - -#ifdef LODEPNG_COMPILE_DISK -unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename, - LodePNGColorType colortype, unsigned bitdepth) -{ - unsigned char* buffer; - size_t buffersize; - unsigned error; - error = lodepng_load_file(&buffer, &buffersize, filename); - if(!error) error = lodepng_decode_memory(out, w, h, buffer, buffersize, colortype, bitdepth); - lodepng_free(buffer); - return error; -} - -unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) -{ - return lodepng_decode_file(out, w, h, filename, LCT_RGBA, 8); -} - -unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) -{ - return lodepng_decode_file(out, w, h, filename, LCT_RGB, 8); -} -#endif /*LODEPNG_COMPILE_DISK*/ - -void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings) -{ - settings->color_convert = 1; -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - settings->read_text_chunks = 1; - settings->remember_unknown_chunks = 0; -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - settings->ignore_crc = 0; - lodepng_decompress_settings_init(&settings->zlibsettings); -} - -#endif /*LODEPNG_COMPILE_DECODER*/ - -#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) - -void lodepng_state_init(LodePNGState* state) -{ -#ifdef LODEPNG_COMPILE_DECODER - lodepng_decoder_settings_init(&state->decoder); -#endif /*LODEPNG_COMPILE_DECODER*/ -#ifdef LODEPNG_COMPILE_ENCODER - lodepng_encoder_settings_init(&state->encoder); -#endif /*LODEPNG_COMPILE_ENCODER*/ - lodepng_color_mode_init(&state->info_raw); - lodepng_info_init(&state->info_png); - state->error = 1; -} - -void lodepng_state_cleanup(LodePNGState* state) -{ - lodepng_color_mode_cleanup(&state->info_raw); - lodepng_info_cleanup(&state->info_png); -} - -void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source) -{ - lodepng_state_cleanup(dest); - *dest = *source; - lodepng_color_mode_init(&dest->info_raw); - lodepng_info_init(&dest->info_png); - dest->error = lodepng_color_mode_copy(&dest->info_raw, &source->info_raw); if(dest->error) return; - dest->error = lodepng_info_copy(&dest->info_png, &source->info_png); if(dest->error) return; -} - -#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */ - -#ifdef LODEPNG_COMPILE_ENCODER - -/* ////////////////////////////////////////////////////////////////////////// */ -/* / PNG Encoder / */ -/* ////////////////////////////////////////////////////////////////////////// */ - -/*chunkName must be string of 4 characters*/ -static unsigned addChunk(ucvector* out, const char* chunkName, const unsigned char* data, size_t length) -{ - CERROR_TRY_RETURN(lodepng_chunk_create(&out->data, &out->size, (unsigned)length, chunkName, data)); - out->allocsize = out->size; /*fix the allocsize again*/ - return 0; -} - -static void writeSignature(ucvector* out) -{ - /*8 bytes PNG signature, aka the magic bytes*/ - ucvector_push_back(out, 137); - ucvector_push_back(out, 80); - ucvector_push_back(out, 78); - ucvector_push_back(out, 71); - ucvector_push_back(out, 13); - ucvector_push_back(out, 10); - ucvector_push_back(out, 26); - ucvector_push_back(out, 10); -} - -static unsigned addChunk_IHDR(ucvector* out, unsigned w, unsigned h, - LodePNGColorType colortype, unsigned bitdepth, unsigned interlace_method) -{ - unsigned error = 0; - ucvector header; - ucvector_init(&header); - - lodepng_add32bitInt(&header, w); /*width*/ - lodepng_add32bitInt(&header, h); /*height*/ - ucvector_push_back(&header, (unsigned char)bitdepth); /*bit depth*/ - ucvector_push_back(&header, (unsigned char)colortype); /*color type*/ - ucvector_push_back(&header, 0); /*compression method*/ - ucvector_push_back(&header, 0); /*filter method*/ - ucvector_push_back(&header, interlace_method); /*interlace method*/ - - error = addChunk(out, "IHDR", header.data, header.size); - ucvector_cleanup(&header); - - return error; -} - -static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info) -{ - unsigned error = 0; - size_t i; - ucvector PLTE; - ucvector_init(&PLTE); - for(i = 0; i != info->palettesize * 4; ++i) - { - /*add all channels except alpha channel*/ - if(i % 4 != 3) ucvector_push_back(&PLTE, info->palette[i]); - } - error = addChunk(out, "PLTE", PLTE.data, PLTE.size); - ucvector_cleanup(&PLTE); - - return error; -} - -static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info) -{ - unsigned error = 0; - size_t i; - ucvector tRNS; - ucvector_init(&tRNS); - if(info->colortype == LCT_PALETTE) - { - size_t amount = info->palettesize; - /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/ - for(i = info->palettesize; i != 0; --i) - { - if(info->palette[4 * (i - 1) + 3] == 255) --amount; - else break; - } - /*add only alpha channel*/ - for(i = 0; i != amount; ++i) ucvector_push_back(&tRNS, info->palette[4 * i + 3]); - } - else if(info->colortype == LCT_GREY) - { - if(info->key_defined) - { - ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8)); - ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255)); - } - } - else if(info->colortype == LCT_RGB) - { - if(info->key_defined) - { - ucvector_push_back(&tRNS, (unsigned char)(info->key_r >> 8)); - ucvector_push_back(&tRNS, (unsigned char)(info->key_r & 255)); - ucvector_push_back(&tRNS, (unsigned char)(info->key_g >> 8)); - ucvector_push_back(&tRNS, (unsigned char)(info->key_g & 255)); - ucvector_push_back(&tRNS, (unsigned char)(info->key_b >> 8)); - ucvector_push_back(&tRNS, (unsigned char)(info->key_b & 255)); - } - } - - error = addChunk(out, "tRNS", tRNS.data, tRNS.size); - ucvector_cleanup(&tRNS); - - return error; -} - -static unsigned addChunk_IDAT(ucvector* out, const unsigned char* data, size_t datasize, - LodePNGCompressSettings* zlibsettings) -{ - ucvector zlibdata; - unsigned error = 0; - - /*compress with the Zlib compressor*/ - ucvector_init(&zlibdata); - error = zlib_compress(&zlibdata.data, &zlibdata.size, data, datasize, zlibsettings); - if(!error) error = addChunk(out, "IDAT", zlibdata.data, zlibdata.size); - ucvector_cleanup(&zlibdata); - - return error; -} - -static unsigned addChunk_IEND(ucvector* out) -{ - unsigned error = 0; - error = addChunk(out, "IEND", 0, 0); - return error; -} - -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - -static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* textstring) -{ - unsigned error = 0; - size_t i; - ucvector text; - ucvector_init(&text); - for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)keyword[i]); - if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/ - ucvector_push_back(&text, 0); /*0 termination char*/ - for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&text, (unsigned char)textstring[i]); - error = addChunk(out, "tEXt", text.data, text.size); - ucvector_cleanup(&text); - - return error; -} - -static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* textstring, - LodePNGCompressSettings* zlibsettings) -{ - unsigned error = 0; - ucvector data, compressed; - size_t i, textsize = strlen(textstring); - - ucvector_init(&data); - ucvector_init(&compressed); - for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]); - if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/ - ucvector_push_back(&data, 0); /*0 termination char*/ - ucvector_push_back(&data, 0); /*compression method: 0*/ - - error = zlib_compress(&compressed.data, &compressed.size, - (unsigned char*)textstring, textsize, zlibsettings); - if(!error) - { - for(i = 0; i != compressed.size; ++i) ucvector_push_back(&data, compressed.data[i]); - error = addChunk(out, "zTXt", data.data, data.size); - } - - ucvector_cleanup(&compressed); - ucvector_cleanup(&data); - return error; -} - -static unsigned addChunk_iTXt(ucvector* out, unsigned compressed, const char* keyword, const char* langtag, - const char* transkey, const char* textstring, LodePNGCompressSettings* zlibsettings) -{ - unsigned error = 0; - ucvector data; - size_t i, textsize = strlen(textstring); - - ucvector_init(&data); - - for(i = 0; keyword[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)keyword[i]); - if(i < 1 || i > 79) return 89; /*error: invalid keyword size*/ - ucvector_push_back(&data, 0); /*null termination char*/ - ucvector_push_back(&data, compressed ? 1 : 0); /*compression flag*/ - ucvector_push_back(&data, 0); /*compression method*/ - for(i = 0; langtag[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)langtag[i]); - ucvector_push_back(&data, 0); /*null termination char*/ - for(i = 0; transkey[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)transkey[i]); - ucvector_push_back(&data, 0); /*null termination char*/ - - if(compressed) - { - ucvector compressed_data; - ucvector_init(&compressed_data); - error = zlib_compress(&compressed_data.data, &compressed_data.size, - (unsigned char*)textstring, textsize, zlibsettings); - if(!error) - { - for(i = 0; i != compressed_data.size; ++i) ucvector_push_back(&data, compressed_data.data[i]); - } - ucvector_cleanup(&compressed_data); - } - else /*not compressed*/ - { - for(i = 0; textstring[i] != 0; ++i) ucvector_push_back(&data, (unsigned char)textstring[i]); - } - - if(!error) error = addChunk(out, "iTXt", data.data, data.size); - ucvector_cleanup(&data); - return error; -} - -static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info) -{ - unsigned error = 0; - ucvector bKGD; - ucvector_init(&bKGD); - if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) - { - ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8)); - ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); - } - else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) - { - ucvector_push_back(&bKGD, (unsigned char)(info->background_r >> 8)); - ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); - ucvector_push_back(&bKGD, (unsigned char)(info->background_g >> 8)); - ucvector_push_back(&bKGD, (unsigned char)(info->background_g & 255)); - ucvector_push_back(&bKGD, (unsigned char)(info->background_b >> 8)); - ucvector_push_back(&bKGD, (unsigned char)(info->background_b & 255)); - } - else if(info->color.colortype == LCT_PALETTE) - { - ucvector_push_back(&bKGD, (unsigned char)(info->background_r & 255)); /*palette index*/ - } - - error = addChunk(out, "bKGD", bKGD.data, bKGD.size); - ucvector_cleanup(&bKGD); - - return error; -} - -static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time) -{ - unsigned error = 0; - unsigned char* data = (unsigned char*)lodepng_malloc(7); - if(!data) return 83; /*alloc fail*/ - data[0] = (unsigned char)(time->year >> 8); - data[1] = (unsigned char)(time->year & 255); - data[2] = (unsigned char)time->month; - data[3] = (unsigned char)time->day; - data[4] = (unsigned char)time->hour; - data[5] = (unsigned char)time->minute; - data[6] = (unsigned char)time->second; - error = addChunk(out, "tIME", data, 7); - lodepng_free(data); - return error; -} - -static unsigned addChunk_pHYs(ucvector* out, const LodePNGInfo* info) -{ - unsigned error = 0; - ucvector data; - ucvector_init(&data); - - lodepng_add32bitInt(&data, info->phys_x); - lodepng_add32bitInt(&data, info->phys_y); - ucvector_push_back(&data, info->phys_unit); - - error = addChunk(out, "pHYs", data.data, data.size); - ucvector_cleanup(&data); - - return error; -} - -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - -static void filterScanline(unsigned char* out, const unsigned char* scanline, const unsigned char* prevline, - size_t length, size_t bytewidth, unsigned char filterType) -{ - size_t i; - switch(filterType) - { - case 0: /*None*/ - for(i = 0; i != length; ++i) out[i] = scanline[i]; - break; - case 1: /*Sub*/ - for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; - for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth]; - break; - case 2: /*Up*/ - if(prevline) - { - for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i]; - } - else - { - for(i = 0; i != length; ++i) out[i] = scanline[i]; - } - break; - case 3: /*Average*/ - if(prevline) - { - for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1); - for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1); - } - else - { - for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; - for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1); - } - break; - case 4: /*Paeth*/ - if(prevline) - { - /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/ - for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]); - for(i = bytewidth; i < length; ++i) - { - out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth])); - } - } - else - { - for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; - /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/ - for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]); - } - break; - default: return; /*unexisting filter type given*/ - } -} - -/* log2 approximation. A slight bit faster than std::log. */ -static float flog2(float f) -{ - float result = 0; - while(f > 32) { result += 4; f /= 16; } - while(f > 2) { ++result; f /= 2; } - return result + 1.442695f * (f * f * f / 3 - 3 * f * f / 2 + 3 * f - 1.83333f); -} - -static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, - const LodePNGColorMode* info, const LodePNGEncoderSettings* settings) -{ - /* - For PNG filter method 0 - out must be a buffer with as size: h + (w * h * bpp + 7) / 8, because there are - the scanlines with 1 extra byte per scanline - */ - - unsigned bpp = lodepng_get_bpp(info); - /*the width of a scanline in bytes, not including the filter type*/ - size_t linebytes = (w * bpp + 7) / 8; - /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/ - size_t bytewidth = (bpp + 7) / 8; - const unsigned char* prevline = 0; - unsigned x, y; - unsigned error = 0; - LodePNGFilterStrategy strategy = settings->filter_strategy; - - /* - There is a heuristic called the minimum sum of absolute differences heuristic, suggested by the PNG standard: - * If the image type is Palette, or the bit depth is smaller than 8, then do not filter the image (i.e. - use fixed filtering, with the filter None). - * (The other case) If the image type is Grayscale or RGB (with or without Alpha), and the bit depth is - not smaller than 8, then use adaptive filtering heuristic as follows: independently for each row, apply - all five filters and select the filter that produces the smallest sum of absolute values per row. - This heuristic is used if filter strategy is LFS_MINSUM and filter_palette_zero is true. - - If filter_palette_zero is true and filter_strategy is not LFS_MINSUM, the above heuristic is followed, - but for "the other case", whatever strategy filter_strategy is set to instead of the minimum sum - heuristic is used. - */ - if(settings->filter_palette_zero && - (info->colortype == LCT_PALETTE || info->bitdepth < 8)) strategy = LFS_ZERO; - - if(bpp == 0) return 31; /*error: invalid color type*/ - - if(strategy == LFS_ZERO) - { - for(y = 0; y != h; ++y) - { - size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ - size_t inindex = linebytes * y; - out[outindex] = 0; /*filter type byte*/ - filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, 0); - prevline = &in[inindex]; - } - } - else if(strategy == LFS_MINSUM) - { - /*adaptive filtering*/ - size_t sum[5]; - unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ - size_t smallest = 0; - unsigned char type, bestType = 0; - - for(type = 0; type != 5; ++type) - { - attempt[type] = (unsigned char*)lodepng_malloc(linebytes); - if(!attempt[type]) return 83; /*alloc fail*/ - } - - if(!error) - { - for(y = 0; y != h; ++y) - { - /*try the 5 filter types*/ - for(type = 0; type != 5; ++type) - { - filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); - - /*calculate the sum of the result*/ - sum[type] = 0; - if(type == 0) - { - for(x = 0; x != linebytes; ++x) sum[type] += (unsigned char)(attempt[type][x]); - } - else - { - for(x = 0; x != linebytes; ++x) - { - /*For differences, each byte should be treated as signed, values above 127 are negative - (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there. - This means filtertype 0 is almost never chosen, but that is justified.*/ - unsigned char s = attempt[type][x]; - sum[type] += s < 128 ? s : (255U - s); - } - } - - /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/ - if(type == 0 || sum[type] < smallest) - { - bestType = type; - smallest = sum[type]; - } - } - - prevline = &in[y * linebytes]; - - /*now fill the out values*/ - out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; - } - } - - for(type = 0; type != 5; ++type) lodepng_free(attempt[type]); - } - else if(strategy == LFS_ENTROPY) - { - float sum[5]; - unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ - float smallest = 0; - unsigned type, bestType = 0; - unsigned count[256]; - - for(type = 0; type != 5; ++type) - { - attempt[type] = (unsigned char*)lodepng_malloc(linebytes); - if(!attempt[type]) return 83; /*alloc fail*/ - } - - for(y = 0; y != h; ++y) - { - /*try the 5 filter types*/ - for(type = 0; type != 5; ++type) - { - filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); - for(x = 0; x != 256; ++x) count[x] = 0; - for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]]; - ++count[type]; /*the filter type itself is part of the scanline*/ - sum[type] = 0; - for(x = 0; x != 256; ++x) - { - float p = count[x] / (float)(linebytes + 1); - sum[type] += count[x] == 0 ? 0 : flog2(1 / p) * p; - } - /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/ - if(type == 0 || sum[type] < smallest) - { - bestType = type; - smallest = sum[type]; - } - } - - prevline = &in[y * linebytes]; - - /*now fill the out values*/ - out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; - } - - for(type = 0; type != 5; ++type) lodepng_free(attempt[type]); - } - else if(strategy == LFS_PREDEFINED) - { - for(y = 0; y != h; ++y) - { - size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ - size_t inindex = linebytes * y; - unsigned char type = settings->predefined_filters[y]; - out[outindex] = type; /*filter type byte*/ - filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type); - prevline = &in[inindex]; - } - } - else if(strategy == LFS_BRUTE_FORCE) - { - /*brute force filter chooser. - deflate the scanline after every filter attempt to see which one deflates best. - This is very slow and gives only slightly smaller, sometimes even larger, result*/ - size_t size[5]; - unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ - size_t smallest = 0; - unsigned type = 0, bestType = 0; - unsigned char* dummy; - LodePNGCompressSettings zlibsettings = settings->zlibsettings; - /*use fixed tree on the attempts so that the tree is not adapted to the filtertype on purpose, - to simulate the true case where the tree is the same for the whole image. Sometimes it gives - better result with dynamic tree anyway. Using the fixed tree sometimes gives worse, but in rare - cases better compression. It does make this a bit less slow, so it's worth doing this.*/ - zlibsettings.btype = 1; - /*a custom encoder likely doesn't read the btype setting and is optimized for complete PNG - images only, so disable it*/ - zlibsettings.custom_zlib = 0; - zlibsettings.custom_deflate = 0; - for(type = 0; type != 5; ++type) - { - attempt[type] = (unsigned char*)lodepng_malloc(linebytes); - if(!attempt[type]) return 83; /*alloc fail*/ - } - for(y = 0; y != h; ++y) /*try the 5 filter types*/ - { - for(type = 0; type != 5; ++type) - { - size_t testsize = linebytes; - /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/ - - filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); - size[type] = 0; - dummy = 0; - zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings); - lodepng_free(dummy); - /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/ - if(type == 0 || size[type] < smallest) - { - bestType = type; - smallest = size[type]; - } - } - prevline = &in[y * linebytes]; - out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ - for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; - } - for(type = 0; type != 5; ++type) free(attempt[type]); - } - else return 88; /* unknown filter strategy */ - - return error; -} - -static void addPaddingBits(unsigned char* out, const unsigned char* in, - size_t olinebits, size_t ilinebits, unsigned h) -{ - /*The opposite of the removePaddingBits function - olinebits must be >= ilinebits*/ - unsigned y; - size_t diff = olinebits - ilinebits; - size_t obp = 0, ibp = 0; /*bit pointers*/ - for(y = 0; y != h; ++y) - { - size_t x; - for(x = 0; x < ilinebits; ++x) - { - unsigned char bit = readBitFromReversedStream(&ibp, in); - setBitOfReversedStream(&obp, out, bit); - } - /*obp += diff; --> no, fill in some value in the padding bits too, to avoid - "Use of uninitialised value of size ###" warning from valgrind*/ - for(x = 0; x != diff; ++x) setBitOfReversedStream(&obp, out, 0); - } -} - -/* -in: non-interlaced image with size w*h -out: the same pixels, but re-ordered according to PNG's Adam7 interlacing, with - no padding bits between scanlines, but between reduced images so that each - reduced image starts at a byte. -bpp: bits per pixel -there are no padding bits, not between scanlines, not between reduced images -in has the following size in bits: w * h * bpp. -out is possibly bigger due to padding bits between reduced images -NOTE: comments about padding bits are only relevant if bpp < 8 -*/ -static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) -{ - unsigned passw[7], passh[7]; - size_t filter_passstart[8], padded_passstart[8], passstart[8]; - unsigned i; - - Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); - - if(bpp >= 8) - { - for(i = 0; i != 7; ++i) - { - unsigned x, y, b; - size_t bytewidth = bpp / 8; - for(y = 0; y < passh[i]; ++y) - for(x = 0; x < passw[i]; ++x) - { - size_t pixelinstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth; - size_t pixeloutstart = passstart[i] + (y * passw[i] + x) * bytewidth; - for(b = 0; b < bytewidth; ++b) - { - out[pixeloutstart + b] = in[pixelinstart + b]; - } - } - } - } - else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ - { - for(i = 0; i != 7; ++i) - { - unsigned x, y, b; - unsigned ilinebits = bpp * passw[i]; - unsigned olinebits = bpp * w; - size_t obp, ibp; /*bit pointers (for out and in buffer)*/ - for(y = 0; y < passh[i]; ++y) - for(x = 0; x < passw[i]; ++x) - { - ibp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp; - obp = (8 * passstart[i]) + (y * ilinebits + x * bpp); - for(b = 0; b < bpp; ++b) - { - unsigned char bit = readBitFromReversedStream(&ibp, in); - setBitOfReversedStream(&obp, out, bit); - } - } - } - } -} - -/*out must be buffer big enough to contain uncompressed IDAT chunk data, and in must contain the full image. -return value is error**/ -static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const unsigned char* in, - unsigned w, unsigned h, - const LodePNGInfo* info_png, const LodePNGEncoderSettings* settings) -{ - /* - This function converts the pure 2D image with the PNG's colortype, into filtered-padded-interlaced data. Steps: - *) if no Adam7: 1) add padding bits (= posible extra bits per scanline if bpp < 8) 2) filter - *) if adam7: 1) Adam7_interlace 2) 7x add padding bits 3) 7x filter - */ - unsigned bpp = lodepng_get_bpp(&info_png->color); - unsigned error = 0; - - if(info_png->interlace_method == 0) - { - *outsize = h + (h * ((w * bpp + 7) / 8)); /*image size plus an extra byte per scanline + possible padding bits*/ - *out = (unsigned char*)lodepng_malloc(*outsize); - if(!(*out) && (*outsize)) error = 83; /*alloc fail*/ - - if(!error) - { - /*non multiple of 8 bits per scanline, padding bits needed per scanline*/ - if(bpp < 8 && w * bpp != ((w * bpp + 7) / 8) * 8) - { - unsigned char* padded = (unsigned char*)lodepng_malloc(h * ((w * bpp + 7) / 8)); - if(!padded) error = 83; /*alloc fail*/ - if(!error) - { - addPaddingBits(padded, in, ((w * bpp + 7) / 8) * 8, w * bpp, h); - error = filter(*out, padded, w, h, &info_png->color, settings); - } - lodepng_free(padded); - } - else - { - /*we can immediately filter into the out buffer, no other steps needed*/ - error = filter(*out, in, w, h, &info_png->color, settings); - } - } - } - else /*interlace_method is 1 (Adam7)*/ - { - unsigned passw[7], passh[7]; - size_t filter_passstart[8], padded_passstart[8], passstart[8]; - unsigned char* adam7; - - Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); - - *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/ - *out = (unsigned char*)lodepng_malloc(*outsize); - if(!(*out)) error = 83; /*alloc fail*/ - - adam7 = (unsigned char*)lodepng_malloc(passstart[7]); - if(!adam7 && passstart[7]) error = 83; /*alloc fail*/ - - if(!error) - { - unsigned i; - - Adam7_interlace(adam7, in, w, h, bpp); - for(i = 0; i != 7; ++i) - { - if(bpp < 8) - { - unsigned char* padded = (unsigned char*)lodepng_malloc(padded_passstart[i + 1] - padded_passstart[i]); - if(!padded) ERROR_BREAK(83); /*alloc fail*/ - addPaddingBits(padded, &adam7[passstart[i]], - ((passw[i] * bpp + 7) / 8) * 8, passw[i] * bpp, passh[i]); - error = filter(&(*out)[filter_passstart[i]], padded, - passw[i], passh[i], &info_png->color, settings); - lodepng_free(padded); - } - else - { - error = filter(&(*out)[filter_passstart[i]], &adam7[padded_passstart[i]], - passw[i], passh[i], &info_png->color, settings); - } - - if(error) break; - } - } - - lodepng_free(adam7); - } - - return error; -} - -/* -palette must have 4 * palettesize bytes allocated, and given in format RGBARGBARGBARGBA... -returns 0 if the palette is opaque, -returns 1 if the palette has a single color with alpha 0 ==> color key -returns 2 if the palette is semi-translucent. -*/ -static unsigned getPaletteTranslucency(const unsigned char* palette, size_t palettesize) -{ - size_t i; - unsigned key = 0; - unsigned r = 0, g = 0, b = 0; /*the value of the color with alpha 0, so long as color keying is possible*/ - for(i = 0; i != palettesize; ++i) - { - if(!key && palette[4 * i + 3] == 0) - { - r = palette[4 * i + 0]; g = palette[4 * i + 1]; b = palette[4 * i + 2]; - key = 1; - i = (size_t)(-1); /*restart from beginning, to detect earlier opaque colors with key's value*/ - } - else if(palette[4 * i + 3] != 255) return 2; - /*when key, no opaque RGB may have key's RGB*/ - else if(key && r == palette[i * 4 + 0] && g == palette[i * 4 + 1] && b == palette[i * 4 + 2]) return 2; - } - return key; -} - -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS -static unsigned addUnknownChunks(ucvector* out, unsigned char* data, size_t datasize) -{ - unsigned char* inchunk = data; - while((size_t)(inchunk - data) < datasize) - { - CERROR_TRY_RETURN(lodepng_chunk_append(&out->data, &out->size, inchunk)); - out->allocsize = out->size; /*fix the allocsize again*/ - inchunk = lodepng_chunk_next(inchunk); - } - return 0; -} -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - -unsigned lodepng_encode(unsigned char** out, size_t* outsize, - const unsigned char* image, unsigned w, unsigned h, - LodePNGState* state) -{ - LodePNGInfo info; - ucvector outv; - unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/ - size_t datasize = 0; - - /*provide some proper output values if error will happen*/ - *out = 0; - *outsize = 0; - state->error = 0; - - lodepng_info_init(&info); - lodepng_info_copy(&info, &state->info_png); - - if((info.color.colortype == LCT_PALETTE || state->encoder.force_palette) - && (info.color.palettesize == 0 || info.color.palettesize > 256)) - { - state->error = 68; /*invalid palette size, it is only allowed to be 1-256*/ - return state->error; - } - - if(state->encoder.auto_convert) - { - state->error = lodepng_auto_choose_color(&info.color, image, w, h, &state->info_raw); - } - if(state->error) return state->error; - - if(state->encoder.zlibsettings.btype > 2) - { - CERROR_RETURN_ERROR(state->error, 61); /*error: unexisting btype*/ - } - if(state->info_png.interlace_method > 1) - { - CERROR_RETURN_ERROR(state->error, 71); /*error: unexisting interlace mode*/ - } - - state->error = checkColorValidity(info.color.colortype, info.color.bitdepth); - if(state->error) return state->error; /*error: unexisting color type given*/ - state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth); - if(state->error) return state->error; /*error: unexisting color type given*/ - - if(!lodepng_color_mode_equal(&state->info_raw, &info.color)) - { - unsigned char* converted; - size_t size = (w * h * lodepng_get_bpp(&info.color) + 7) / 8; - - converted = (unsigned char*)lodepng_malloc(size); - if(!converted && size) state->error = 83; /*alloc fail*/ - if(!state->error) - { - state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h); - } - if(!state->error) preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder); - lodepng_free(converted); - } - else preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder); - - ucvector_init(&outv); - while(!state->error) /*while only executed once, to break on error*/ - { -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - size_t i; -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - /*write signature and chunks*/ - writeSignature(&outv); - /*IHDR*/ - addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method); -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - /*unknown chunks between IHDR and PLTE*/ - if(info.unknown_chunks_data[0]) - { - state->error = addUnknownChunks(&outv, info.unknown_chunks_data[0], info.unknown_chunks_size[0]); - if(state->error) break; - } -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - /*PLTE*/ - if(info.color.colortype == LCT_PALETTE) - { - addChunk_PLTE(&outv, &info.color); - } - if(state->encoder.force_palette && (info.color.colortype == LCT_RGB || info.color.colortype == LCT_RGBA)) - { - addChunk_PLTE(&outv, &info.color); - } - /*tRNS*/ - if(info.color.colortype == LCT_PALETTE && getPaletteTranslucency(info.color.palette, info.color.palettesize) != 0) - { - addChunk_tRNS(&outv, &info.color); - } - if((info.color.colortype == LCT_GREY || info.color.colortype == LCT_RGB) && info.color.key_defined) - { - addChunk_tRNS(&outv, &info.color); - } -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - /*bKGD (must come between PLTE and the IDAt chunks*/ - if(info.background_defined) addChunk_bKGD(&outv, &info); - /*pHYs (must come before the IDAT chunks)*/ - if(info.phys_defined) addChunk_pHYs(&outv, &info); - - /*unknown chunks between PLTE and IDAT*/ - if(info.unknown_chunks_data[1]) - { - state->error = addUnknownChunks(&outv, info.unknown_chunks_data[1], info.unknown_chunks_size[1]); - if(state->error) break; - } -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - /*IDAT (multiple IDAT chunks must be consecutive)*/ - state->error = addChunk_IDAT(&outv, data, datasize, &state->encoder.zlibsettings); - if(state->error) break; -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - /*tIME*/ - if(info.time_defined) addChunk_tIME(&outv, &info.time); - /*tEXt and/or zTXt*/ - for(i = 0; i != info.text_num; ++i) - { - if(strlen(info.text_keys[i]) > 79) - { - state->error = 66; /*text chunk too large*/ - break; - } - if(strlen(info.text_keys[i]) < 1) - { - state->error = 67; /*text chunk too small*/ - break; - } - if(state->encoder.text_compression) - { - addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings); - } - else - { - addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]); - } - } - /*LodePNG version id in text chunk*/ - if(state->encoder.add_id) - { - unsigned alread_added_id_text = 0; - for(i = 0; i != info.text_num; ++i) - { - if(!strcmp(info.text_keys[i], "LodePNG")) - { - alread_added_id_text = 1; - break; - } - } - if(alread_added_id_text == 0) - { - addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/ - } - } - /*iTXt*/ - for(i = 0; i != info.itext_num; ++i) - { - if(strlen(info.itext_keys[i]) > 79) - { - state->error = 66; /*text chunk too large*/ - break; - } - if(strlen(info.itext_keys[i]) < 1) - { - state->error = 67; /*text chunk too small*/ - break; - } - addChunk_iTXt(&outv, state->encoder.text_compression, - info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i], - &state->encoder.zlibsettings); - } - - /*unknown chunks between IDAT and IEND*/ - if(info.unknown_chunks_data[2]) - { - state->error = addUnknownChunks(&outv, info.unknown_chunks_data[2], info.unknown_chunks_size[2]); - if(state->error) break; - } -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - addChunk_IEND(&outv); - - break; /*this isn't really a while loop; no error happened so break out now!*/ - } - - lodepng_info_cleanup(&info); - lodepng_free(data); - /*instead of cleaning the vector up, give it to the output*/ - *out = outv.data; - *outsize = outv.size; - - return state->error; -} - -unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, const unsigned char* image, - unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) -{ - unsigned error; - LodePNGState state; - lodepng_state_init(&state); - state.info_raw.colortype = colortype; - state.info_raw.bitdepth = bitdepth; - state.info_png.color.colortype = colortype; - state.info_png.color.bitdepth = bitdepth; - lodepng_encode(out, outsize, image, w, h, &state); - error = state.error; - lodepng_state_cleanup(&state); - return error; -} - -unsigned lodepng_encode32(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) -{ - return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGBA, 8); -} - -unsigned lodepng_encode24(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) -{ - return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGB, 8); -} - -#ifdef LODEPNG_COMPILE_DISK -unsigned lodepng_encode_file(const char* filename, const unsigned char* image, unsigned w, unsigned h, - LodePNGColorType colortype, unsigned bitdepth) -{ - unsigned char* buffer; - size_t buffersize; - unsigned error = lodepng_encode_memory(&buffer, &buffersize, image, w, h, colortype, bitdepth); - if(!error) error = lodepng_save_file(buffer, buffersize, filename); - lodepng_free(buffer); - return error; -} - -unsigned lodepng_encode32_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) -{ - return lodepng_encode_file(filename, image, w, h, LCT_RGBA, 8); -} - -unsigned lodepng_encode24_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) -{ - return lodepng_encode_file(filename, image, w, h, LCT_RGB, 8); -} -#endif /*LODEPNG_COMPILE_DISK*/ - -void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings) -{ - lodepng_compress_settings_init(&settings->zlibsettings); - settings->filter_palette_zero = 1; - settings->filter_strategy = LFS_MINSUM; - settings->auto_convert = 1; - settings->force_palette = 0; - settings->predefined_filters = 0; -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - settings->add_id = 0; - settings->text_compression = 1; -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ -} - -#endif /*LODEPNG_COMPILE_ENCODER*/ -#endif /*LODEPNG_COMPILE_PNG*/ - -#ifdef LODEPNG_COMPILE_ERROR_TEXT -/* -This returns the description of a numerical error code in English. This is also -the documentation of all the error codes. -*/ -const char* lodepng_error_text(unsigned code) -{ - switch(code) - { - case 0: return "no error, everything went ok"; - case 1: return "nothing done yet"; /*the Encoder/Decoder has done nothing yet, error checking makes no sense yet*/ - case 10: return "end of input memory reached without huffman end code"; /*while huffman decoding*/ - case 11: return "error in code tree made it jump outside of huffman tree"; /*while huffman decoding*/ - case 13: return "problem while processing dynamic deflate block"; - case 14: return "problem while processing dynamic deflate block"; - case 15: return "problem while processing dynamic deflate block"; - case 16: return "unexisting code while processing dynamic deflate block"; - case 17: return "end of out buffer memory reached while inflating"; - case 18: return "invalid distance code while inflating"; - case 19: return "end of out buffer memory reached while inflating"; - case 20: return "invalid deflate block BTYPE encountered while decoding"; - case 21: return "NLEN is not ones complement of LEN in a deflate block"; - /*end of out buffer memory reached while inflating: - This can happen if the inflated deflate data is longer than the amount of bytes required to fill up - all the pixels of the image, given the color depth and image dimensions. Something that doesn't - happen in a normal, well encoded, PNG image.*/ - case 22: return "end of out buffer memory reached while inflating"; - case 23: return "end of in buffer memory reached while inflating"; - case 24: return "invalid FCHECK in zlib header"; - case 25: return "invalid compression method in zlib header"; - case 26: return "FDICT encountered in zlib header while it's not used for PNG"; - case 27: return "PNG file is smaller than a PNG header"; - /*Checks the magic file header, the first 8 bytes of the PNG file*/ - case 28: return "incorrect PNG signature, it's no PNG or corrupted"; - case 29: return "first chunk is not the header chunk"; - case 30: return "chunk length too large, chunk broken off at end of file"; - case 31: return "illegal PNG color type or bpp"; - case 32: return "illegal PNG compression method"; - case 33: return "illegal PNG filter method"; - case 34: return "illegal PNG interlace method"; - case 35: return "chunk length of a chunk is too large or the chunk too small"; - case 36: return "illegal PNG filter type encountered"; - case 37: return "illegal bit depth for this color type given"; - case 38: return "the palette is too big"; /*more than 256 colors*/ - case 39: return "more palette alpha values given in tRNS chunk than there are colors in the palette"; - case 40: return "tRNS chunk has wrong size for greyscale image"; - case 41: return "tRNS chunk has wrong size for RGB image"; - case 42: return "tRNS chunk appeared while it was not allowed for this color type"; - case 43: return "bKGD chunk has wrong size for palette image"; - case 44: return "bKGD chunk has wrong size for greyscale image"; - case 45: return "bKGD chunk has wrong size for RGB image"; - case 48: return "empty input buffer given to decoder. Maybe caused by non-existing file?"; - case 49: return "jumped past memory while generating dynamic huffman tree"; - case 50: return "jumped past memory while generating dynamic huffman tree"; - case 51: return "jumped past memory while inflating huffman block"; - case 52: return "jumped past memory while inflating"; - case 53: return "size of zlib data too small"; - case 54: return "repeat symbol in tree while there was no value symbol yet"; - /*jumped past tree while generating huffman tree, this could be when the - tree will have more leaves than symbols after generating it out of the - given lenghts. They call this an oversubscribed dynamic bit lengths tree in zlib.*/ - case 55: return "jumped past tree while generating huffman tree"; - case 56: return "given output image colortype or bitdepth not supported for color conversion"; - case 57: return "invalid CRC encountered (checking CRC can be disabled)"; - case 58: return "invalid ADLER32 encountered (checking ADLER32 can be disabled)"; - case 59: return "requested color conversion not supported"; - case 60: return "invalid window size given in the settings of the encoder (must be 0-32768)"; - case 61: return "invalid BTYPE given in the settings of the encoder (only 0, 1 and 2 are allowed)"; - /*LodePNG leaves the choice of RGB to greyscale conversion formula to the user.*/ - case 62: return "conversion from color to greyscale not supported"; - case 63: return "length of a chunk too long, max allowed for PNG is 2147483647 bytes per chunk"; /*(2^31-1)*/ - /*this would result in the inability of a deflated block to ever contain an end code. It must be at least 1.*/ - case 64: return "the length of the END symbol 256 in the Huffman tree is 0"; - case 66: return "the length of a text chunk keyword given to the encoder is longer than the maximum of 79 bytes"; - case 67: return "the length of a text chunk keyword given to the encoder is smaller than the minimum of 1 byte"; - case 68: return "tried to encode a PLTE chunk with a palette that has less than 1 or more than 256 colors"; - case 69: return "unknown chunk type with 'critical' flag encountered by the decoder"; - case 71: return "unexisting interlace mode given to encoder (must be 0 or 1)"; - case 72: return "while decoding, unexisting compression method encountering in zTXt or iTXt chunk (it must be 0)"; - case 73: return "invalid tIME chunk size"; - case 74: return "invalid pHYs chunk size"; - /*length could be wrong, or data chopped off*/ - case 75: return "no null termination char found while decoding text chunk"; - case 76: return "iTXt chunk too short to contain required bytes"; - case 77: return "integer overflow in buffer size"; - case 78: return "failed to open file for reading"; /*file doesn't exist or couldn't be opened for reading*/ - case 79: return "failed to open file for writing"; - case 80: return "tried creating a tree of 0 symbols"; - case 81: return "lazy matching at pos 0 is impossible"; - case 82: return "color conversion to palette requested while a color isn't in palette"; - case 83: return "memory allocation failed"; - case 84: return "given image too small to contain all pixels to be encoded"; - case 86: return "impossible offset in lz77 encoding (internal bug)"; - case 87: return "must provide custom zlib function pointer if LODEPNG_COMPILE_ZLIB is not defined"; - case 88: return "invalid filter strategy given for LodePNGEncoderSettings.filter_strategy"; - case 89: return "text chunk keyword too short or long: must have size 1-79"; - /*the windowsize in the LodePNGCompressSettings. Requiring POT(==> & instead of %) makes encoding 12% faster.*/ - case 90: return "windowsize must be a power of two"; - case 91: return "invalid decompressed idat size"; - case 92: return "too many pixels, not supported"; - case 93: return "zero width or height is invalid"; - case 94: return "header chunk must have a size of 13 bytes"; - } - return "unknown error code"; -} -#endif /*LODEPNG_COMPILE_ERROR_TEXT*/ - -/* ////////////////////////////////////////////////////////////////////////// */ -/* ////////////////////////////////////////////////////////////////////////// */ -/* // C++ Wrapper // */ -/* ////////////////////////////////////////////////////////////////////////// */ -/* ////////////////////////////////////////////////////////////////////////// */ - -#ifdef LODEPNG_COMPILE_CPP -namespace lodepng -{ - -#ifdef LODEPNG_COMPILE_DISK -unsigned load_file(std::vector& buffer, const std::string& filename) -{ - std::ifstream file(filename.c_str(), std::ios::in|std::ios::binary|std::ios::ate); - if(!file) return 78; - - /*get filesize*/ - std::streamsize size = 0; - if(file.seekg(0, std::ios::end).good()) size = file.tellg(); - if(file.seekg(0, std::ios::beg).good()) size -= file.tellg(); - - /*read contents of the file into the vector*/ - buffer.resize(size_t(size)); - if(size > 0) file.read((char*)(&buffer[0]), size); - - return 0; /* OK */ -} - -/*write given buffer to the file, overwriting the file, it doesn't append to it.*/ -unsigned save_file(const std::vector& buffer, const std::string& filename) -{ - std::ofstream file(filename.c_str(), std::ios::out|std::ios::binary); - if(!file) return 79; - file.write(buffer.empty() ? 0 : (char*)&buffer[0], std::streamsize(buffer.size())); - return 0; -} -#endif /* LODEPNG_COMPILE_DISK */ - -#ifdef LODEPNG_COMPILE_ZLIB -#ifdef LODEPNG_COMPILE_DECODER -unsigned decompress(std::vector& out, const unsigned char* in, size_t insize, - const LodePNGDecompressSettings& settings) -{ - unsigned char* buffer = 0; - size_t buffersize = 0; - unsigned error = zlib_decompress(&buffer, &buffersize, in, insize, &settings); - if(buffer) - { - out.insert(out.end(), &buffer[0], &buffer[buffersize]); - lodepng_free(buffer); - } - return error; -} - -unsigned decompress(std::vector& out, const std::vector& in, - const LodePNGDecompressSettings& settings) -{ - return decompress(out, in.empty() ? 0 : &in[0], in.size(), settings); -} -#endif /* LODEPNG_COMPILE_DECODER */ - -#ifdef LODEPNG_COMPILE_ENCODER -unsigned compress(std::vector& out, const unsigned char* in, size_t insize, - const LodePNGCompressSettings& settings) -{ - unsigned char* buffer = 0; - size_t buffersize = 0; - unsigned error = zlib_compress(&buffer, &buffersize, in, insize, &settings); - if(buffer) - { - out.insert(out.end(), &buffer[0], &buffer[buffersize]); - lodepng_free(buffer); - } - return error; -} - -unsigned compress(std::vector& out, const std::vector& in, - const LodePNGCompressSettings& settings) -{ - return compress(out, in.empty() ? 0 : &in[0], in.size(), settings); -} -#endif /* LODEPNG_COMPILE_ENCODER */ -#endif /* LODEPNG_COMPILE_ZLIB */ - - -#ifdef LODEPNG_COMPILE_PNG - -State::State() -{ - lodepng_state_init(this); -} - -State::State(const State& other) -{ - lodepng_state_init(this); - lodepng_state_copy(this, &other); -} - -State::~State() -{ - lodepng_state_cleanup(this); -} - -State& State::operator=(const State& other) -{ - lodepng_state_copy(this, &other); - return *this; -} - -#ifdef LODEPNG_COMPILE_DECODER - -unsigned decode(std::vector& out, unsigned& w, unsigned& h, const unsigned char* in, - size_t insize, LodePNGColorType colortype, unsigned bitdepth) -{ - unsigned char* buffer; - unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth); - if(buffer && !error) - { - State state; - state.info_raw.colortype = colortype; - state.info_raw.bitdepth = bitdepth; - size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw); - out.insert(out.end(), &buffer[0], &buffer[buffersize]); - lodepng_free(buffer); - } - return error; -} - -unsigned decode(std::vector& out, unsigned& w, unsigned& h, - const std::vector& in, LodePNGColorType colortype, unsigned bitdepth) -{ - return decode(out, w, h, in.empty() ? 0 : &in[0], (unsigned)in.size(), colortype, bitdepth); -} - -unsigned decode(std::vector& out, unsigned& w, unsigned& h, - State& state, - const unsigned char* in, size_t insize) -{ - unsigned char* buffer = NULL; - unsigned error = lodepng_decode(&buffer, &w, &h, &state, in, insize); - if(buffer && !error) - { - size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw); - out.insert(out.end(), &buffer[0], &buffer[buffersize]); - } - lodepng_free(buffer); - return error; -} - -unsigned decode(std::vector& out, unsigned& w, unsigned& h, - State& state, - const std::vector& in) -{ - return decode(out, w, h, state, in.empty() ? 0 : &in[0], in.size()); -} - -#ifdef LODEPNG_COMPILE_DISK -unsigned decode(std::vector& out, unsigned& w, unsigned& h, const std::string& filename, - LodePNGColorType colortype, unsigned bitdepth) -{ - std::vector buffer; - unsigned error = load_file(buffer, filename); - if(error) return error; - return decode(out, w, h, buffer, colortype, bitdepth); -} -#endif /* LODEPNG_COMPILE_DECODER */ -#endif /* LODEPNG_COMPILE_DISK */ - -#ifdef LODEPNG_COMPILE_ENCODER -unsigned encode(std::vector& out, const unsigned char* in, unsigned w, unsigned h, - LodePNGColorType colortype, unsigned bitdepth) -{ - unsigned char* buffer; - size_t buffersize; - unsigned error = lodepng_encode_memory(&buffer, &buffersize, in, w, h, colortype, bitdepth); - if(buffer) - { - out.insert(out.end(), &buffer[0], &buffer[buffersize]); - lodepng_free(buffer); - } - return error; -} - -unsigned encode(std::vector& out, - const std::vector& in, unsigned w, unsigned h, - LodePNGColorType colortype, unsigned bitdepth) -{ - if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84; - return encode(out, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth); -} - -unsigned encode(std::vector& out, - const unsigned char* in, unsigned w, unsigned h, - State& state) -{ - unsigned char* buffer; - size_t buffersize; - unsigned error = lodepng_encode(&buffer, &buffersize, in, w, h, &state); - if(buffer) - { - out.insert(out.end(), &buffer[0], &buffer[buffersize]); - lodepng_free(buffer); - } - return error; -} - -unsigned encode(std::vector& out, - const std::vector& in, unsigned w, unsigned h, - State& state) -{ - if(lodepng_get_raw_size(w, h, &state.info_raw) > in.size()) return 84; - return encode(out, in.empty() ? 0 : &in[0], w, h, state); -} - -#ifdef LODEPNG_COMPILE_DISK -unsigned encode(const std::string& filename, - const unsigned char* in, unsigned w, unsigned h, - LodePNGColorType colortype, unsigned bitdepth) -{ - std::vector buffer; - unsigned error = encode(buffer, in, w, h, colortype, bitdepth); - if(!error) error = save_file(buffer, filename); - return error; -} - -unsigned encode(const std::string& filename, - const std::vector& in, unsigned w, unsigned h, - LodePNGColorType colortype, unsigned bitdepth) -{ - if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84; - return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth); -} -#endif /* LODEPNG_COMPILE_DISK */ -#endif /* LODEPNG_COMPILE_ENCODER */ -#endif /* LODEPNG_COMPILE_PNG */ -} /* namespace lodepng */ -#endif /*LODEPNG_COMPILE_CPP*/ +/* +LodePNG version 20201017 + +Copyright (c) 2005-2020 Lode Vandevenne + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ + +/* +The manual and changelog are in the header file "lodepng.h" +Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C. +*/ + +#include "lodepng.h" + +#ifdef LODEPNG_COMPILE_DISK +#include /* LONG_MAX */ +#include /* file handling */ +#endif /* LODEPNG_COMPILE_DISK */ + +#ifdef LODEPNG_COMPILE_ALLOCATORS +#include /* allocations */ +#endif /* LODEPNG_COMPILE_ALLOCATORS */ + +#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/ +#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/ +#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/ +#endif /*_MSC_VER */ + +const char* LODEPNG_VERSION_STRING = "20201017"; + +/* +This source file is built up in the following large parts. The code sections +with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way. +-Tools for C and common code for PNG and Zlib +-C Code for Zlib (huffman, deflate, ...) +-C Code for PNG (file format chunks, adam7, PNG filters, color conversions, ...) +-The C++ wrapper around all of the above +*/ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* // Tools for C, and common code for PNG and Zlib. // */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/*The malloc, realloc and free functions defined here with "lodepng_" in front +of the name, so that you can easily change them to others related to your +platform if needed. Everything else in the code calls these. Pass +-DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out +#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and +define them in your own project's source files without needing to change +lodepng source code. Don't forget to remove "static" if you copypaste them +from here.*/ + +#ifdef LODEPNG_COMPILE_ALLOCATORS +static void* lodepng_malloc(size_t size) { +#ifdef LODEPNG_MAX_ALLOC + if(size > LODEPNG_MAX_ALLOC) return 0; +#endif + return malloc(size); +} + +/* NOTE: when realloc returns NULL, it leaves the original memory untouched */ +static void* lodepng_realloc(void* ptr, size_t new_size) { +#ifdef LODEPNG_MAX_ALLOC + if(new_size > LODEPNG_MAX_ALLOC) return 0; +#endif + return realloc(ptr, new_size); +} + +static void lodepng_free(void* ptr) { + free(ptr); +} +#else /*LODEPNG_COMPILE_ALLOCATORS*/ +/* TODO: support giving additional void* payload to the custom allocators */ +void* lodepng_malloc(size_t size); +void* lodepng_realloc(void* ptr, size_t new_size); +void lodepng_free(void* ptr); +#endif /*LODEPNG_COMPILE_ALLOCATORS*/ + +/* convince the compiler to inline a function, for use when this measurably improves performance */ +/* inline is not available in C90, but use it when supported by the compiler */ +#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || (defined(__cplusplus) && (__cplusplus >= 199711L)) +#define LODEPNG_INLINE inline +#else +#define LODEPNG_INLINE /* not available */ +#endif + +/* restrict is not available in C90, but use it when supported by the compiler */ +#if (defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) ||\ + (defined(_MSC_VER) && (_MSC_VER >= 1400)) || \ + (defined(__WATCOMC__) && (__WATCOMC__ >= 1250) && !defined(__cplusplus)) +#define LODEPNG_RESTRICT __restrict +#else +#define LODEPNG_RESTRICT /* not available */ +#endif + + +/* Replacements for C library functions such as memcpy and strlen, to support platforms +where a full C library is not available. The compiler can recognize them and compile +to something as fast. */ + +//static void lodepng_memcpy(void* LODEPNG_RESTRICT dst, +// const void* LODEPNG_RESTRICT src, size_t size) { +// size_t i; +// for(i = 0; i < size; i++) ((char*)dst)[i] = ((const char*)src)[i]; +//} +// +//static void lodepng_memset(void* LODEPNG_RESTRICT dst, +// int value, size_t num) { +// size_t i; +// for(i = 0; i < num; i++) ((char*)dst)[i] = (char)value; +//} +// +///* does not check memory out of bounds, do not use on untrusted data */ +//static size_t lodepng_strlen(const char* a) { +// const char* orig = a; +// /* avoid warning about unused function in case of disabled COMPILE... macros */ +// (void)(&lodepng_strlen); +// while(*a) a++; +// return (size_t)(a - orig); +//} + +#define lodepng_memcpy(dst, src, size) memcpy(dst, src, size) +#define lodepng_memset(dst, value, size) memset(dst, value, size) +#define lodepng_strlen(str) strlen(str) + + +#define LODEPNG_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define LODEPNG_MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define LODEPNG_ABS(x) ((x) < 0 ? -(x) : (x)) + +#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_DECODER) +/* Safely check if adding two integers will overflow (no undefined +behavior, compiler removing the code, etc...) and output result. */ +static int lodepng_addofl(size_t a, size_t b, size_t* result) { + *result = a + b; /* Unsigned addition is well defined and safe in C90 */ + return *result < a; +} +#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_DECODER)*/ + +#ifdef LODEPNG_COMPILE_DECODER +/* Safely check if multiplying two integers will overflow (no undefined +behavior, compiler removing the code, etc...) and output result. */ +static int lodepng_mulofl(size_t a, size_t b, size_t* result) { + *result = a * b; /* Unsigned multiplication is well defined and safe in C90 */ + return (a != 0 && *result / a != b); +} + +#ifdef LODEPNG_COMPILE_ZLIB +/* Safely check if a + b > c, even if overflow could happen. */ +static int lodepng_gtofl(size_t a, size_t b, size_t c) { + size_t d; + if(lodepng_addofl(a, b, &d)) return 1; + return d > c; +} +#endif /*LODEPNG_COMPILE_ZLIB*/ +#endif /*LODEPNG_COMPILE_DECODER*/ + + +/* +Often in case of an error a value is assigned to a variable and then it breaks +out of a loop (to go to the cleanup phase of a function). This macro does that. +It makes the error handling code shorter and more readable. + +Example: if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83); +*/ +#define CERROR_BREAK(errorvar, code){\ + errorvar = code;\ + break;\ +} + +/*version of CERROR_BREAK that assumes the common case where the error variable is named "error"*/ +#define ERROR_BREAK(code) CERROR_BREAK(error, code) + +/*Set error var to the error code, and return it.*/ +#define CERROR_RETURN_ERROR(errorvar, code){\ + errorvar = code;\ + return code;\ +} + +/*Try the code, if it returns error, also return the error.*/ +#define CERROR_TRY_RETURN(call){\ + unsigned error = call;\ + if(error) return error;\ +} + +/*Set error var to the error code, and return from the void function.*/ +#define CERROR_RETURN(errorvar, code){\ + errorvar = code;\ + return;\ +} + +/* +About uivector, ucvector and string: +-All of them wrap dynamic arrays or text strings in a similar way. +-LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version. +-The string tools are made to avoid problems with compilers that declare things like strncat as deprecated. +-They're not used in the interface, only internally in this file as static functions. +-As with many other structs in this file, the init and cleanup functions serve as ctor and dtor. +*/ + +#ifdef LODEPNG_COMPILE_ZLIB +#ifdef LODEPNG_COMPILE_ENCODER +/*dynamic vector of unsigned ints*/ +typedef struct uivector { + unsigned* data; + size_t size; /*size in number of unsigned longs*/ + size_t allocsize; /*allocated size in bytes*/ +} uivector; + +static void uivector_cleanup(void* p) { + ((uivector*)p)->size = ((uivector*)p)->allocsize = 0; + lodepng_free(((uivector*)p)->data); + ((uivector*)p)->data = NULL; +} + +/*returns 1 if success, 0 if failure ==> nothing done*/ +static unsigned uivector_resize(uivector* p, size_t size) { + size_t allocsize = size * sizeof(unsigned); + if(allocsize > p->allocsize) { + size_t newsize = allocsize + (p->allocsize >> 1u); + void* data = lodepng_realloc(p->data, newsize); + if(data) { + p->allocsize = newsize; + p->data = (unsigned*)data; + } + else return 0; /*error: not enough memory*/ + } + p->size = size; + return 1; /*success*/ +} + +static void uivector_init(uivector* p) { + p->data = NULL; + p->size = p->allocsize = 0; +} + +/*returns 1 if success, 0 if failure ==> nothing done*/ +static unsigned uivector_push_back(uivector* p, unsigned c) { + if(!uivector_resize(p, p->size + 1)) return 0; + p->data[p->size - 1] = c; + return 1; +} +#endif /*LODEPNG_COMPILE_ENCODER*/ +#endif /*LODEPNG_COMPILE_ZLIB*/ + +/* /////////////////////////////////////////////////////////////////////////// */ + +/*dynamic vector of unsigned chars*/ +typedef struct ucvector { + unsigned char* data; + size_t size; /*used size*/ + size_t allocsize; /*allocated size*/ +} ucvector; + +/*returns 1 if success, 0 if failure ==> nothing done*/ +static unsigned ucvector_resize(ucvector* p, size_t size) { + if(size > p->allocsize) { + size_t newsize = size + (p->allocsize >> 1u); + void* data = lodepng_realloc(p->data, newsize); + if(data) { + p->allocsize = newsize; + p->data = (unsigned char*)data; + } + else return 0; /*error: not enough memory*/ + } + p->size = size; + return 1; /*success*/ +} + +static ucvector ucvector_init(unsigned char* buffer, size_t size) { + ucvector v; + v.data = buffer; + v.allocsize = v.size = size; + return v; +} + +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_PNG +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + +/*free string pointer and set it to NULL*/ +static void string_cleanup(char** out) { + lodepng_free(*out); + *out = NULL; +} + +/*also appends null termination character*/ +static char* alloc_string_sized(const char* in, size_t insize) { + char* out = (char*)lodepng_malloc(insize + 1); + if(out) { + lodepng_memcpy(out, in, insize); + out[insize] = 0; + } + return out; +} + +/* dynamically allocates a new string with a copy of the null terminated input text */ +static char* alloc_string(const char* in) { + return alloc_string_sized(in, lodepng_strlen(in)); +} +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +#endif /*LODEPNG_COMPILE_PNG*/ + +/* ////////////////////////////////////////////////////////////////////////// */ + +#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG) +static unsigned lodepng_read32bitInt(const unsigned char* buffer) { + return (((unsigned)buffer[0] << 24u) | ((unsigned)buffer[1] << 16u) | + ((unsigned)buffer[2] << 8u) | (unsigned)buffer[3]); +} +#endif /*defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG)*/ + +#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER) +/*buffer must have at least 4 allocated bytes available*/ +static void lodepng_set32bitInt(unsigned char* buffer, unsigned value) { + buffer[0] = (unsigned char)((value >> 24) & 0xff); + buffer[1] = (unsigned char)((value >> 16) & 0xff); + buffer[2] = (unsigned char)((value >> 8) & 0xff); + buffer[3] = (unsigned char)((value ) & 0xff); +} +#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / File IO / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_DISK + +/* returns negative value on error. This should be pure C compatible, so no fstat. */ +static long lodepng_filesize(const char* filename) { + FILE* file; + long size; + file = fopen(filename, "rb"); + if(!file) return -1; + + if(fseek(file, 0, SEEK_END) != 0) { + fclose(file); + return -1; + } + + size = ftell(file); + /* It may give LONG_MAX as directory size, this is invalid for us. */ + if(size == LONG_MAX) size = -1; + + fclose(file); + return size; +} + +/* load file into buffer that already has the correct allocated size. Returns error code.*/ +static unsigned lodepng_buffer_file(unsigned char* out, size_t size, const char* filename) { + FILE* file; + size_t readsize; + file = fopen(filename, "rb"); + if(!file) return 78; + + readsize = fread(out, 1, size, file); + fclose(file); + + if(readsize != size) return 78; + return 0; +} + +unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename) { + long size = lodepng_filesize(filename); + if(size < 0) return 78; + *outsize = (size_t)size; + + *out = (unsigned char*)lodepng_malloc((size_t)size); + if(!(*out) && size > 0) return 83; /*the above malloc failed*/ + + return lodepng_buffer_file(*out, (size_t)size, filename); +} + +/*write given buffer to the file, overwriting the file, it doesn't append to it.*/ +unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename) { + FILE* file; + file = fopen(filename, "wb" ); + if(!file) return 79; + fwrite(buffer, 1, buffersize, file); + fclose(file); + return 0; +} + +#endif /*LODEPNG_COMPILE_DISK*/ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* // End of common code and tools. Begin of Zlib related code. // */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_ZLIB +#ifdef LODEPNG_COMPILE_ENCODER + +typedef struct { + ucvector* data; + unsigned char bp; /*ok to overflow, indicates bit pos inside byte*/ +} LodePNGBitWriter; + +static void LodePNGBitWriter_init(LodePNGBitWriter* writer, ucvector* data) { + writer->data = data; + writer->bp = 0; +} + +/*TODO: this ignores potential out of memory errors*/ +#define WRITEBIT(writer, bit){\ + /* append new byte */\ + if(((writer->bp) & 7u) == 0) {\ + if(!ucvector_resize(writer->data, writer->data->size + 1)) return;\ + writer->data->data[writer->data->size - 1] = 0;\ + }\ + (writer->data->data[writer->data->size - 1]) |= (bit << ((writer->bp) & 7u));\ + ++writer->bp;\ +} + +/* LSB of value is written first, and LSB of bytes is used first */ +static void writeBits(LodePNGBitWriter* writer, unsigned value, size_t nbits) { + if(nbits == 1) { /* compiler should statically compile this case if nbits == 1 */ + WRITEBIT(writer, value); + } else { + /* TODO: increase output size only once here rather than in each WRITEBIT */ + size_t i; + for(i = 0; i != nbits; ++i) { + WRITEBIT(writer, (unsigned char)((value >> i) & 1)); + } + } +} + +/* This one is to use for adding huffman symbol, the value bits are written MSB first */ +static void writeBitsReversed(LodePNGBitWriter* writer, unsigned value, size_t nbits) { + size_t i; + for(i = 0; i != nbits; ++i) { + /* TODO: increase output size only once here rather than in each WRITEBIT */ + WRITEBIT(writer, (unsigned char)((value >> (nbits - 1u - i)) & 1u)); + } +} +#endif /*LODEPNG_COMPILE_ENCODER*/ + +#ifdef LODEPNG_COMPILE_DECODER + +typedef struct { + const unsigned char* data; + size_t size; /*size of data in bytes*/ + size_t bitsize; /*size of data in bits, end of valid bp values, should be 8*size*/ + size_t bp; + unsigned buffer; /*buffer for reading bits. NOTE: 'unsigned' must support at least 32 bits*/ +} LodePNGBitReader; + +/* data size argument is in bytes. Returns error if size too large causing overflow */ +static unsigned LodePNGBitReader_init(LodePNGBitReader* reader, const unsigned char* data, size_t size) { + size_t temp; + reader->data = data; + reader->size = size; + /* size in bits, return error if overflow (if size_t is 32 bit this supports up to 500MB) */ + if(lodepng_mulofl(size, 8u, &reader->bitsize)) return 105; + /*ensure incremented bp can be compared to bitsize without overflow even when it would be incremented 32 too much and + trying to ensure 32 more bits*/ + if(lodepng_addofl(reader->bitsize, 64u, &temp)) return 105; + reader->bp = 0; + reader->buffer = 0; + return 0; /*ok*/ +} + +/* +ensureBits functions: +Ensures the reader can at least read nbits bits in one or more readBits calls, +safely even if not enough bits are available. +Returns 1 if there are enough bits available, 0 if not. +*/ + +/*See ensureBits documentation above. This one ensures exactly 1 bit */ +/*static unsigned ensureBits1(LodePNGBitReader* reader) { + if(reader->bp >= reader->bitsize) return 0; + reader->buffer = (unsigned)reader->data[reader->bp >> 3u] >> (reader->bp & 7u); + return 1; +}*/ + +/*See ensureBits documentation above. This one ensures up to 9 bits */ +static unsigned ensureBits9(LodePNGBitReader* reader, size_t nbits) { + size_t start = reader->bp >> 3u; + size_t size = reader->size; + if(start + 1u < size) { + reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u); + reader->buffer >>= (reader->bp & 7u); + return 1; + } else { + reader->buffer = 0; + if(start + 0u < size) reader->buffer |= reader->data[start + 0]; + reader->buffer >>= (reader->bp & 7u); + return reader->bp + nbits <= reader->bitsize; + } +} + +/*See ensureBits documentation above. This one ensures up to 17 bits */ +static unsigned ensureBits17(LodePNGBitReader* reader, size_t nbits) { + size_t start = reader->bp >> 3u; + size_t size = reader->size; + if(start + 2u < size) { + reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) | + ((unsigned)reader->data[start + 2] << 16u); + reader->buffer >>= (reader->bp & 7u); + return 1; + } else { + reader->buffer = 0; + if(start + 0u < size) reader->buffer |= reader->data[start + 0]; + if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u); + reader->buffer >>= (reader->bp & 7u); + return reader->bp + nbits <= reader->bitsize; + } +} + +/*See ensureBits documentation above. This one ensures up to 25 bits */ +static LODEPNG_INLINE unsigned ensureBits25(LodePNGBitReader* reader, size_t nbits) { + size_t start = reader->bp >> 3u; + size_t size = reader->size; + if(start + 3u < size) { + reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) | + ((unsigned)reader->data[start + 2] << 16u) | ((unsigned)reader->data[start + 3] << 24u); + reader->buffer >>= (reader->bp & 7u); + return 1; + } else { + reader->buffer = 0; + if(start + 0u < size) reader->buffer |= reader->data[start + 0]; + if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u); + if(start + 2u < size) reader->buffer |= ((unsigned)reader->data[start + 2] << 16u); + reader->buffer >>= (reader->bp & 7u); + return reader->bp + nbits <= reader->bitsize; + } +} + +/*See ensureBits documentation above. This one ensures up to 32 bits */ +static LODEPNG_INLINE unsigned ensureBits32(LodePNGBitReader* reader, size_t nbits) { + size_t start = reader->bp >> 3u; + size_t size = reader->size; + if(start + 4u < size) { + reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) | + ((unsigned)reader->data[start + 2] << 16u) | ((unsigned)reader->data[start + 3] << 24u); + reader->buffer >>= (reader->bp & 7u); + reader->buffer |= (((unsigned)reader->data[start + 4] << 24u) << (8u - (reader->bp & 7u))); + return 1; + } else { + reader->buffer = 0; + if(start + 0u < size) reader->buffer |= reader->data[start + 0]; + if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u); + if(start + 2u < size) reader->buffer |= ((unsigned)reader->data[start + 2] << 16u); + if(start + 3u < size) reader->buffer |= ((unsigned)reader->data[start + 3] << 24u); + reader->buffer >>= (reader->bp & 7u); + return reader->bp + nbits <= reader->bitsize; + } +} + +/* Get bits without advancing the bit pointer. Must have enough bits available with ensureBits. Max nbits is 31. */ +static unsigned peekBits(LodePNGBitReader* reader, size_t nbits) { + /* The shift allows nbits to be only up to 31. */ + return reader->buffer & ((1u << nbits) - 1u); +} + +/* Must have enough bits available with ensureBits */ +static void advanceBits(LodePNGBitReader* reader, size_t nbits) { + reader->buffer >>= nbits; + reader->bp += nbits; +} + +/* Must have enough bits available with ensureBits */ +static unsigned readBits(LodePNGBitReader* reader, size_t nbits) { + unsigned result = peekBits(reader, nbits); + advanceBits(reader, nbits); + return result; +} + +/* Public for testing only. steps and result must have numsteps values. */ +unsigned lode_png_test_bitreader(const unsigned char* data, size_t size, + size_t numsteps, const size_t* steps, unsigned* result) { + size_t i; + LodePNGBitReader reader; + unsigned error = LodePNGBitReader_init(&reader, data, size); + if(error) return 0; + for(i = 0; i < numsteps; i++) { + size_t step = steps[i]; + unsigned ok; + if(step > 25) ok = ensureBits32(&reader, step); + else if(step > 17) ok = ensureBits25(&reader, step); + else if(step > 9) ok = ensureBits17(&reader, step); + else ok = ensureBits9(&reader, step); + if(!ok) return 0; + result[i] = readBits(&reader, step); + } + return 1; +} +#endif /*LODEPNG_COMPILE_DECODER*/ + +static unsigned reverseBits(unsigned bits, unsigned num) { + /*TODO: implement faster lookup table based version when needed*/ + unsigned i, result = 0; + for(i = 0; i < num; i++) result |= ((bits >> (num - i - 1u)) & 1u) << i; + return result; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Deflate - Huffman / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +#define FIRST_LENGTH_CODE_INDEX 257 +#define LAST_LENGTH_CODE_INDEX 285 +/*256 literals, the end code, some length codes, and 2 unused codes*/ +#define NUM_DEFLATE_CODE_SYMBOLS 288 +/*the distance codes have their own symbols, 30 used, 2 unused*/ +#define NUM_DISTANCE_SYMBOLS 32 +/*the code length codes. 0-15: code lengths, 16: copy previous 3-6 times, 17: 3-10 zeros, 18: 11-138 zeros*/ +#define NUM_CODE_LENGTH_CODES 19 + +/*the base lengths represented by codes 257-285*/ +static const unsigned LENGTHBASE[29] + = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, + 67, 83, 99, 115, 131, 163, 195, 227, 258}; + +/*the extra bits used by codes 257-285 (added to base length)*/ +static const unsigned LENGTHEXTRA[29] + = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, + 4, 4, 4, 4, 5, 5, 5, 5, 0}; + +/*the base backwards distances (the bits of distance codes appear after length codes and use their own huffman tree)*/ +static const unsigned DISTANCEBASE[30] + = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, + 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577}; + +/*the extra bits of backwards distances (added to base)*/ +static const unsigned DISTANCEEXTRA[30] + = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, + 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; + +/*the order in which "code length alphabet code lengths" are stored as specified by deflate, out of this the huffman +tree of the dynamic huffman tree lengths is generated*/ +static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES] + = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; + +/* ////////////////////////////////////////////////////////////////////////// */ + +/* +Huffman tree struct, containing multiple representations of the tree +*/ +typedef struct HuffmanTree { + unsigned* codes; /*the huffman codes (bit patterns representing the symbols)*/ + unsigned* lengths; /*the lengths of the huffman codes*/ + unsigned maxbitlen; /*maximum number of bits a single code can get*/ + unsigned numcodes; /*number of symbols in the alphabet = number of codes*/ + /* for reading only */ + unsigned char* table_len; /*length of symbol from lookup table, or max length if secondary lookup needed*/ + unsigned short* table_value; /*value of symbol from lookup table, or pointer to secondary table if needed*/ +} HuffmanTree; + +static void HuffmanTree_init(HuffmanTree* tree) { + tree->codes = 0; + tree->lengths = 0; + tree->table_len = 0; + tree->table_value = 0; +} + +static void HuffmanTree_cleanup(HuffmanTree* tree) { + lodepng_free(tree->codes); + lodepng_free(tree->lengths); + lodepng_free(tree->table_len); + lodepng_free(tree->table_value); +} + +/* amount of bits for first huffman table lookup (aka root bits), see HuffmanTree_makeTable and huffmanDecodeSymbol.*/ +/* values 8u and 9u work the fastest */ +#define FIRSTBITS 9u + +/* a symbol value too big to represent any valid symbol, to indicate reading disallowed huffman bits combination, +which is possible in case of only 0 or 1 present symbols. */ +#define INVALIDSYMBOL 65535u + +/* make table for huffman decoding */ +static unsigned HuffmanTree_makeTable(HuffmanTree* tree) { + static const unsigned headsize = 1u << FIRSTBITS; /*size of the first table*/ + static const unsigned mask = (1u << FIRSTBITS) /*headsize*/ - 1u; + size_t i, numpresent, pointer, size; /*total table size*/ + unsigned* maxlens = (unsigned*)lodepng_malloc(headsize * sizeof(unsigned)); + if(!maxlens) return 83; /*alloc fail*/ + + /* compute maxlens: max total bit length of symbols sharing prefix in the first table*/ + lodepng_memset(maxlens, 0, headsize * sizeof(*maxlens)); + for(i = 0; i < tree->numcodes; i++) { + unsigned symbol = tree->codes[i]; + unsigned l = tree->lengths[i]; + unsigned index; + if(l <= FIRSTBITS) continue; /*symbols that fit in first table don't increase secondary table size*/ + /*get the FIRSTBITS MSBs, the MSBs of the symbol are encoded first. See later comment about the reversing*/ + index = reverseBits(symbol >> (l - FIRSTBITS), FIRSTBITS); + maxlens[index] = LODEPNG_MAX(maxlens[index], l); + } + /* compute total table size: size of first table plus all secondary tables for symbols longer than FIRSTBITS */ + size = headsize; + for(i = 0; i < headsize; ++i) { + unsigned l = maxlens[i]; + if(l > FIRSTBITS) size += (1u << (l - FIRSTBITS)); + } + tree->table_len = (unsigned char*)lodepng_malloc(size * sizeof(*tree->table_len)); + tree->table_value = (unsigned short*)lodepng_malloc(size * sizeof(*tree->table_value)); + if(!tree->table_len || !tree->table_value) { + lodepng_free(maxlens); + /* freeing tree->table values is done at a higher scope */ + return 83; /*alloc fail*/ + } + /*initialize with an invalid length to indicate unused entries*/ + for(i = 0; i < size; ++i) tree->table_len[i] = 16; + + /*fill in the first table for long symbols: max prefix size and pointer to secondary tables*/ + pointer = headsize; + for(i = 0; i < headsize; ++i) { + unsigned l = maxlens[i]; + if(l <= FIRSTBITS) continue; + tree->table_len[i] = l; + tree->table_value[i] = pointer; + pointer += (1u << (l - FIRSTBITS)); + } + lodepng_free(maxlens); + + /*fill in the first table for short symbols, or secondary table for long symbols*/ + numpresent = 0; + for(i = 0; i < tree->numcodes; ++i) { + unsigned l = tree->lengths[i]; + unsigned symbol = tree->codes[i]; /*the huffman bit pattern. i itself is the value.*/ + /*reverse bits, because the huffman bits are given in MSB first order but the bit reader reads LSB first*/ + unsigned reverse = reverseBits(symbol, l); + if(l == 0) continue; + numpresent++; + + if(l <= FIRSTBITS) { + /*short symbol, fully in first table, replicated num times if l < FIRSTBITS*/ + unsigned num = 1u << (FIRSTBITS - l); + unsigned j; + for(j = 0; j < num; ++j) { + /*bit reader will read the l bits of symbol first, the remaining FIRSTBITS - l bits go to the MSB's*/ + unsigned index = reverse | (j << l); + if(tree->table_len[index] != 16) return 55; /*invalid tree: long symbol shares prefix with short symbol*/ + tree->table_len[index] = l; + tree->table_value[index] = i; + } + } else { + /*long symbol, shares prefix with other long symbols in first lookup table, needs second lookup*/ + /*the FIRSTBITS MSBs of the symbol are the first table index*/ + unsigned index = reverse & mask; + unsigned maxlen = tree->table_len[index]; + /*log2 of secondary table length, should be >= l - FIRSTBITS*/ + unsigned tablelen = maxlen - FIRSTBITS; + unsigned start = tree->table_value[index]; /*starting index in secondary table*/ + unsigned num = 1u << (tablelen - (l - FIRSTBITS)); /*amount of entries of this symbol in secondary table*/ + unsigned j; + if(maxlen < l) return 55; /*invalid tree: long symbol shares prefix with short symbol*/ + for(j = 0; j < num; ++j) { + unsigned reverse2 = reverse >> FIRSTBITS; /* l - FIRSTBITS bits */ + unsigned index2 = start + (reverse2 | (j << (l - FIRSTBITS))); + tree->table_len[index2] = l; + tree->table_value[index2] = i; + } + } + } + + if(numpresent < 2) { + /* In case of exactly 1 symbol, in theory the huffman symbol needs 0 bits, + but deflate uses 1 bit instead. In case of 0 symbols, no symbols can + appear at all, but such huffman tree could still exist (e.g. if distance + codes are never used). In both cases, not all symbols of the table will be + filled in. Fill them in with an invalid symbol value so returning them from + huffmanDecodeSymbol will cause error. */ + for(i = 0; i < size; ++i) { + if(tree->table_len[i] == 16) { + /* As length, use a value smaller than FIRSTBITS for the head table, + and a value larger than FIRSTBITS for the secondary table, to ensure + valid behavior for advanceBits when reading this symbol. */ + tree->table_len[i] = (i < headsize) ? 1 : (FIRSTBITS + 1); + tree->table_value[i] = INVALIDSYMBOL; + } + } + } else { + /* A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes. + If that is not the case (due to too long length codes), the table will not + have been fully used, and this is an error (not all bit combinations can be + decoded): an oversubscribed huffman tree, indicated by error 55. */ + for(i = 0; i < size; ++i) { + if(tree->table_len[i] == 16) return 55; + } + } + + return 0; +} + +/* +Second step for the ...makeFromLengths and ...makeFromFrequencies functions. +numcodes, lengths and maxbitlen must already be filled in correctly. return +value is error. +*/ +static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree) { + unsigned* blcount; + unsigned* nextcode; + unsigned error = 0; + unsigned bits, n; + + tree->codes = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned)); + blcount = (unsigned*)lodepng_malloc((tree->maxbitlen + 1) * sizeof(unsigned)); + nextcode = (unsigned*)lodepng_malloc((tree->maxbitlen + 1) * sizeof(unsigned)); + if(!tree->codes || !blcount || !nextcode) error = 83; /*alloc fail*/ + + if(!error) { + for(n = 0; n != tree->maxbitlen + 1; n++) blcount[n] = nextcode[n] = 0; + /*step 1: count number of instances of each code length*/ + for(bits = 0; bits != tree->numcodes; ++bits) ++blcount[tree->lengths[bits]]; + /*step 2: generate the nextcode values*/ + for(bits = 1; bits <= tree->maxbitlen; ++bits) { + nextcode[bits] = (nextcode[bits - 1] + blcount[bits - 1]) << 1u; + } + /*step 3: generate all the codes*/ + for(n = 0; n != tree->numcodes; ++n) { + if(tree->lengths[n] != 0) { + tree->codes[n] = nextcode[tree->lengths[n]]++; + /*remove superfluous bits from the code*/ + tree->codes[n] &= ((1u << tree->lengths[n]) - 1u); + } + } + } + + lodepng_free(blcount); + lodepng_free(nextcode); + + if(!error) error = HuffmanTree_makeTable(tree); + return error; +} + +/* +given the code lengths (as stored in the PNG file), generate the tree as defined +by Deflate. maxbitlen is the maximum bits that a code in the tree can have. +return value is error. +*/ +static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* bitlen, + size_t numcodes, unsigned maxbitlen) { + unsigned i; + tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned)); + if(!tree->lengths) return 83; /*alloc fail*/ + for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i]; + tree->numcodes = (unsigned)numcodes; /*number of symbols*/ + tree->maxbitlen = maxbitlen; + return HuffmanTree_makeFromLengths2(tree); +} + +#ifdef LODEPNG_COMPILE_ENCODER + +/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding", +Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/ + +/*chain node for boundary package merge*/ +typedef struct BPMNode { + int weight; /*the sum of all weights in this chain*/ + unsigned index; /*index of this leaf node (called "count" in the paper)*/ + struct BPMNode* tail; /*the next nodes in this chain (null if last)*/ + int in_use; +} BPMNode; + +/*lists of chains*/ +typedef struct BPMLists { + /*memory pool*/ + unsigned memsize; + BPMNode* memory; + unsigned numfree; + unsigned nextfree; + BPMNode** freelist; + /*two heads of lookahead chains per list*/ + unsigned listsize; + BPMNode** chains0; + BPMNode** chains1; +} BPMLists; + +/*creates a new chain node with the given parameters, from the memory in the lists */ +static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail) { + unsigned i; + BPMNode* result; + + /*memory full, so garbage collect*/ + if(lists->nextfree >= lists->numfree) { + /*mark only those that are in use*/ + for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0; + for(i = 0; i != lists->listsize; ++i) { + BPMNode* node; + for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1; + for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1; + } + /*collect those that are free*/ + lists->numfree = 0; + for(i = 0; i != lists->memsize; ++i) { + if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i]; + } + lists->nextfree = 0; + } + + result = lists->freelist[lists->nextfree++]; + result->weight = weight; + result->index = index; + result->tail = tail; + return result; +} + +/*sort the leaves with stable mergesort*/ +static void bpmnode_sort(BPMNode* leaves, size_t num) { + BPMNode* mem = (BPMNode*)lodepng_malloc(sizeof(*leaves) * num); + size_t width, counter = 0; + for(width = 1; width < num; width *= 2) { + BPMNode* a = (counter & 1) ? mem : leaves; + BPMNode* b = (counter & 1) ? leaves : mem; + size_t p; + for(p = 0; p < num; p += 2 * width) { + size_t q = (p + width > num) ? num : (p + width); + size_t r = (p + 2 * width > num) ? num : (p + 2 * width); + size_t i = p, j = q, k; + for(k = p; k < r; k++) { + if(i < q && (j >= r || a[i].weight <= a[j].weight)) b[k] = a[i++]; + else b[k] = a[j++]; + } + } + counter++; + } + if(counter & 1) lodepng_memcpy(leaves, mem, sizeof(*leaves) * num); + lodepng_free(mem); +} + +/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/ +static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num) { + unsigned lastindex = lists->chains1[c]->index; + + if(c == 0) { + if(lastindex >= numpresent) return; + lists->chains0[c] = lists->chains1[c]; + lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0); + } else { + /*sum of the weights of the head nodes of the previous lookahead chains.*/ + int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight; + lists->chains0[c] = lists->chains1[c]; + if(lastindex < numpresent && sum > leaves[lastindex].weight) { + lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail); + return; + } + lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]); + /*in the end we are only interested in the chain of the last list, so no + need to recurse if we're at the last one (this gives measurable speedup)*/ + if(num + 1 < (int)(2 * numpresent - 2)) { + boundaryPM(lists, leaves, numpresent, c - 1, num); + boundaryPM(lists, leaves, numpresent, c - 1, num); + } + } +} + +unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies, + size_t numcodes, unsigned maxbitlen) { + unsigned error = 0; + unsigned i; + size_t numpresent = 0; /*number of symbols with non-zero frequency*/ + BPMNode* leaves; /*the symbols, only those with > 0 frequency*/ + + if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/ + if((1u << maxbitlen) < (unsigned)numcodes) return 80; /*error: represent all symbols*/ + + leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves)); + if(!leaves) return 83; /*alloc fail*/ + + for(i = 0; i != numcodes; ++i) { + if(frequencies[i] > 0) { + leaves[numpresent].weight = (int)frequencies[i]; + leaves[numpresent].index = i; + ++numpresent; + } + } + + lodepng_memset(lengths, 0, numcodes * sizeof(*lengths)); + + /*ensure at least two present symbols. There should be at least one symbol + according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To + make these work as well ensure there are at least two symbols. The + Package-Merge code below also doesn't work correctly if there's only one + symbol, it'd give it the theoretical 0 bits but in practice zlib wants 1 bit*/ + if(numpresent == 0) { + lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/ + } else if(numpresent == 1) { + lengths[leaves[0].index] = 1; + lengths[leaves[0].index == 0 ? 1 : 0] = 1; + } else { + BPMLists lists; + BPMNode* node; + + bpmnode_sort(leaves, numpresent); + + lists.listsize = maxbitlen; + lists.memsize = 2 * maxbitlen * (maxbitlen + 1); + lists.nextfree = 0; + lists.numfree = lists.memsize; + lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory)); + lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*)); + lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*)); + lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*)); + if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/ + + if(!error) { + for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i]; + + bpmnode_create(&lists, leaves[0].weight, 1, 0); + bpmnode_create(&lists, leaves[1].weight, 2, 0); + + for(i = 0; i != lists.listsize; ++i) { + lists.chains0[i] = &lists.memory[0]; + lists.chains1[i] = &lists.memory[1]; + } + + /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/ + for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i); + + for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail) { + for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index]; + } + } + + lodepng_free(lists.memory); + lodepng_free(lists.freelist); + lodepng_free(lists.chains0); + lodepng_free(lists.chains1); + } + + lodepng_free(leaves); + return error; +} + +/*Create the Huffman tree given the symbol frequencies*/ +static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies, + size_t mincodes, size_t numcodes, unsigned maxbitlen) { + unsigned error = 0; + while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/ + tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned)); + if(!tree->lengths) return 83; /*alloc fail*/ + tree->maxbitlen = maxbitlen; + tree->numcodes = (unsigned)numcodes; /*number of symbols*/ + + error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen); + if(!error) error = HuffmanTree_makeFromLengths2(tree); + return error; +} +#endif /*LODEPNG_COMPILE_ENCODER*/ + +/*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/ +static unsigned generateFixedLitLenTree(HuffmanTree* tree) { + unsigned i, error = 0; + unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned)); + if(!bitlen) return 83; /*alloc fail*/ + + /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/ + for(i = 0; i <= 143; ++i) bitlen[i] = 8; + for(i = 144; i <= 255; ++i) bitlen[i] = 9; + for(i = 256; i <= 279; ++i) bitlen[i] = 7; + for(i = 280; i <= 287; ++i) bitlen[i] = 8; + + error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15); + + lodepng_free(bitlen); + return error; +} + +/*get the distance code tree of a deflated block with fixed tree, as specified in the deflate specification*/ +static unsigned generateFixedDistanceTree(HuffmanTree* tree) { + unsigned i, error = 0; + unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned)); + if(!bitlen) return 83; /*alloc fail*/ + + /*there are 32 distance codes, but 30-31 are unused*/ + for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5; + error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15); + + lodepng_free(bitlen); + return error; +} + +#ifdef LODEPNG_COMPILE_DECODER + +/* +returns the code. The bit reader must already have been ensured at least 15 bits +*/ +static unsigned huffmanDecodeSymbol(LodePNGBitReader* reader, const HuffmanTree* codetree) { + unsigned short code = peekBits(reader, FIRSTBITS); + unsigned short l = codetree->table_len[code]; + unsigned short value = codetree->table_value[code]; + if(l <= FIRSTBITS) { + advanceBits(reader, l); + return value; + } else { + unsigned index2; + advanceBits(reader, FIRSTBITS); + index2 = value + peekBits(reader, l - FIRSTBITS); + advanceBits(reader, codetree->table_len[index2] - FIRSTBITS); + return codetree->table_value[index2]; + } +} +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_DECODER + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Inflator (Decompressor) / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/*get the tree of a deflated block with fixed tree, as specified in the deflate specification +Returns error code.*/ +static unsigned getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d) { + unsigned error = generateFixedLitLenTree(tree_ll); + if(error) return error; + return generateFixedDistanceTree(tree_d); +} + +/*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/ +static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d, + LodePNGBitReader* reader) { + /*make sure that length values that aren't filled in will be 0, or a wrong tree will be generated*/ + unsigned error = 0; + unsigned n, HLIT, HDIST, HCLEN, i; + + /*see comments in deflateDynamic for explanation of the context and these variables, it is analogous*/ + unsigned* bitlen_ll = 0; /*lit,len code lengths*/ + unsigned* bitlen_d = 0; /*dist code lengths*/ + /*code length code lengths ("clcl"), the bit lengths of the huffman tree used to compress bitlen_ll and bitlen_d*/ + unsigned* bitlen_cl = 0; + HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/ + + if(!ensureBits17(reader, 14)) return 49; /*error: the bit pointer is or will go past the memory*/ + + /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/ + HLIT = readBits(reader, 5) + 257; + /*number of distance codes. Unlike the spec, the value 1 is added to it here already*/ + HDIST = readBits(reader, 5) + 1; + /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/ + HCLEN = readBits(reader, 4) + 4; + + bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned)); + if(!bitlen_cl) return 83 /*alloc fail*/; + + HuffmanTree_init(&tree_cl); + + while(!error) { + /*read the code length codes out of 3 * (amount of code length codes) bits*/ + if(lodepng_gtofl(reader->bp, HCLEN * 3, reader->bitsize)) { + ERROR_BREAK(50); /*error: the bit pointer is or will go past the memory*/ + } + for(i = 0; i != HCLEN; ++i) { + ensureBits9(reader, 3); /*out of bounds already checked above */ + bitlen_cl[CLCL_ORDER[i]] = readBits(reader, 3); + } + for(i = HCLEN; i != NUM_CODE_LENGTH_CODES; ++i) { + bitlen_cl[CLCL_ORDER[i]] = 0; + } + + error = HuffmanTree_makeFromLengths(&tree_cl, bitlen_cl, NUM_CODE_LENGTH_CODES, 7); + if(error) break; + + /*now we can use this tree to read the lengths for the tree that this function will return*/ + bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned)); + bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned)); + if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/); + lodepng_memset(bitlen_ll, 0, NUM_DEFLATE_CODE_SYMBOLS * sizeof(*bitlen_ll)); + lodepng_memset(bitlen_d, 0, NUM_DISTANCE_SYMBOLS * sizeof(*bitlen_d)); + + /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/ + i = 0; + while(i < HLIT + HDIST) { + unsigned code; + ensureBits25(reader, 22); /* up to 15 bits for huffman code, up to 7 extra bits below*/ + code = huffmanDecodeSymbol(reader, &tree_cl); + if(code <= 15) /*a length code*/ { + if(i < HLIT) bitlen_ll[i] = code; + else bitlen_d[i - HLIT] = code; + ++i; + } else if(code == 16) /*repeat previous*/ { + unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/ + unsigned value; /*set value to the previous code*/ + + if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/ + + replength += readBits(reader, 2); + + if(i < HLIT + 1) value = bitlen_ll[i - 1]; + else value = bitlen_d[i - HLIT - 1]; + /*repeat this value in the next lengths*/ + for(n = 0; n < replength; ++n) { + if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/ + if(i < HLIT) bitlen_ll[i] = value; + else bitlen_d[i - HLIT] = value; + ++i; + } + } else if(code == 17) /*repeat "0" 3-10 times*/ { + unsigned replength = 3; /*read in the bits that indicate repeat length*/ + replength += readBits(reader, 3); + + /*repeat this value in the next lengths*/ + for(n = 0; n < replength; ++n) { + if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/ + + if(i < HLIT) bitlen_ll[i] = 0; + else bitlen_d[i - HLIT] = 0; + ++i; + } + } else if(code == 18) /*repeat "0" 11-138 times*/ { + unsigned replength = 11; /*read in the bits that indicate repeat length*/ + replength += readBits(reader, 7); + + /*repeat this value in the next lengths*/ + for(n = 0; n < replength; ++n) { + if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/ + + if(i < HLIT) bitlen_ll[i] = 0; + else bitlen_d[i - HLIT] = 0; + ++i; + } + } else /*if(code == INVALIDSYMBOL)*/ { + ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/ + } + /*check if any of the ensureBits above went out of bounds*/ + if(reader->bp > reader->bitsize) { + /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol + (10=no endcode, 11=wrong jump outside of tree)*/ + /* TODO: revise error codes 10,11,50: the above comment is no longer valid */ + ERROR_BREAK(50); /*error, bit pointer jumps past memory*/ + } + } + if(error) break; + + if(bitlen_ll[256] == 0) ERROR_BREAK(64); /*the length of the end code 256 must be larger than 0*/ + + /*now we've finally got HLIT and HDIST, so generate the code trees, and the function is done*/ + error = HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, NUM_DEFLATE_CODE_SYMBOLS, 15); + if(error) break; + error = HuffmanTree_makeFromLengths(tree_d, bitlen_d, NUM_DISTANCE_SYMBOLS, 15); + + break; /*end of error-while*/ + } + + lodepng_free(bitlen_cl); + lodepng_free(bitlen_ll); + lodepng_free(bitlen_d); + HuffmanTree_cleanup(&tree_cl); + + return error; +} + +/*inflate a block with dynamic of fixed Huffman tree. btype must be 1 or 2.*/ +static unsigned inflateHuffmanBlock(ucvector* out, LodePNGBitReader* reader, + unsigned btype, size_t max_output_size) { + unsigned error = 0; + HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/ + HuffmanTree tree_d; /*the huffman tree for distance codes*/ + + HuffmanTree_init(&tree_ll); + HuffmanTree_init(&tree_d); + + if(btype == 1) error = getTreeInflateFixed(&tree_ll, &tree_d); + else /*if(btype == 2)*/ error = getTreeInflateDynamic(&tree_ll, &tree_d, reader); + + while(!error) /*decode all symbols until end reached, breaks at end code*/ { + /*code_ll is literal, length or end code*/ + unsigned code_ll; + ensureBits25(reader, 20); /* up to 15 for the huffman symbol, up to 5 for the length extra bits */ + code_ll = huffmanDecodeSymbol(reader, &tree_ll); + if(code_ll <= 255) /*literal symbol*/ { + if(!ucvector_resize(out, out->size + 1)) ERROR_BREAK(83 /*alloc fail*/); + out->data[out->size - 1] = (unsigned char)code_ll; + } else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/ { + unsigned code_d, distance; + unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/ + size_t start, backward, length; + + /*part 1: get length base*/ + length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX]; + + /*part 2: get extra bits and add the value of that to length*/ + numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX]; + if(numextrabits_l != 0) { + /* bits already ensured above */ + length += readBits(reader, numextrabits_l); + } + + /*part 3: get distance code*/ + ensureBits32(reader, 28); /* up to 15 for the huffman symbol, up to 13 for the extra bits */ + code_d = huffmanDecodeSymbol(reader, &tree_d); + if(code_d > 29) { + if(code_d <= 31) { + ERROR_BREAK(18); /*error: invalid distance code (30-31 are never used)*/ + } else /* if(code_d == INVALIDSYMBOL) */{ + ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/ + } + } + distance = DISTANCEBASE[code_d]; + + /*part 4: get extra bits from distance*/ + numextrabits_d = DISTANCEEXTRA[code_d]; + if(numextrabits_d != 0) { + /* bits already ensured above */ + distance += readBits(reader, numextrabits_d); + } + + /*part 5: fill in all the out[n] values based on the length and dist*/ + start = out->size; + if(distance > start) ERROR_BREAK(52); /*too long backward distance*/ + backward = start - distance; + + if(!ucvector_resize(out, out->size + length)) ERROR_BREAK(83 /*alloc fail*/); + if(distance < length) { + size_t forward; + lodepng_memcpy(out->data + start, out->data + backward, distance); + start += distance; + for(forward = distance; forward < length; ++forward) { + out->data[start++] = out->data[backward++]; + } + } else { + lodepng_memcpy(out->data + start, out->data + backward, length); + } + } else if(code_ll == 256) { + break; /*end code, break the loop*/ + } else /*if(code_ll == INVALIDSYMBOL)*/ { + ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/ + } + /*check if any of the ensureBits above went out of bounds*/ + if(reader->bp > reader->bitsize) { + /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol + (10=no endcode, 11=wrong jump outside of tree)*/ + /* TODO: revise error codes 10,11,50: the above comment is no longer valid */ + ERROR_BREAK(51); /*error, bit pointer jumps past memory*/ + } + if(max_output_size && out->size > max_output_size) { + ERROR_BREAK(109); /*error, larger than max size*/ + } + } + + HuffmanTree_cleanup(&tree_ll); + HuffmanTree_cleanup(&tree_d); + + return error; +} + +static unsigned inflateNoCompression(ucvector* out, LodePNGBitReader* reader, + const LodePNGDecompressSettings* settings) { + size_t bytepos; + size_t size = reader->size; + unsigned LEN, NLEN, error = 0; + + /*go to first boundary of byte*/ + bytepos = (reader->bp + 7u) >> 3u; + + /*read LEN (2 bytes) and NLEN (2 bytes)*/ + if(bytepos + 4 >= size) return 52; /*error, bit pointer will jump past memory*/ + LEN = (unsigned)reader->data[bytepos] + ((unsigned)reader->data[bytepos + 1] << 8u); bytepos += 2; + NLEN = (unsigned)reader->data[bytepos] + ((unsigned)reader->data[bytepos + 1] << 8u); bytepos += 2; + + /*check if 16-bit NLEN is really the one's complement of LEN*/ + if(!settings->ignore_nlen && LEN + NLEN != 65535) { + return 21; /*error: NLEN is not one's complement of LEN*/ + } + + if(!ucvector_resize(out, out->size + LEN)) return 83; /*alloc fail*/ + + /*read the literal data: LEN bytes are now stored in the out buffer*/ + if(bytepos + LEN > size) return 23; /*error: reading outside of in buffer*/ + + lodepng_memcpy(out->data + out->size - LEN, reader->data + bytepos, LEN); + bytepos += LEN; + + reader->bp = bytepos << 3u; + + return error; +} + +static unsigned lodepng_inflatev(ucvector* out, + const unsigned char* in, size_t insize, + const LodePNGDecompressSettings* settings) { + unsigned BFINAL = 0; + LodePNGBitReader reader; + unsigned error = LodePNGBitReader_init(&reader, in, insize); + + if(error) return error; + + while(!BFINAL) { + unsigned BTYPE; + if(!ensureBits9(&reader, 3)) return 52; /*error, bit pointer will jump past memory*/ + BFINAL = readBits(&reader, 1); + BTYPE = readBits(&reader, 2); + + if(BTYPE == 3) return 20; /*error: invalid BTYPE*/ + else if(BTYPE == 0) error = inflateNoCompression(out, &reader, settings); /*no compression*/ + else error = inflateHuffmanBlock(out, &reader, BTYPE, settings->max_output_size); /*compression, BTYPE 01 or 10*/ + if(!error && settings->max_output_size && out->size > settings->max_output_size) error = 109; + if(error) break; + } + + return error; +} + +unsigned lodepng_inflate(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGDecompressSettings* settings) { + ucvector v = ucvector_init(*out, *outsize); + unsigned error = lodepng_inflatev(&v, in, insize, settings); + *out = v.data; + *outsize = v.size; + return error; +} + +static unsigned inflatev(ucvector* out, const unsigned char* in, size_t insize, + const LodePNGDecompressSettings* settings) { + if(settings->custom_inflate) { + unsigned error = settings->custom_inflate(&out->data, &out->size, in, insize, settings); + out->allocsize = out->size; + if(error) { + /*the custom inflate is allowed to have its own error codes, however, we translate it to code 110*/ + error = 110; + /*if there's a max output size, and the custom zlib returned error, then indicate that error instead*/ + if(settings->max_output_size && out->size > settings->max_output_size) error = 109; + } + return error; + } else { + return lodepng_inflatev(out, in, insize, settings); + } +} + +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_ENCODER + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Deflator (Compressor) / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +static const size_t MAX_SUPPORTED_DEFLATE_LENGTH = 258; + +/*search the index in the array, that has the largest value smaller than or equal to the given value, +given array must be sorted (if no value is smaller, it returns the size of the given array)*/ +static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value) { + /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/ + size_t left = 1; + size_t right = array_size - 1; + + while(left <= right) { + size_t mid = (left + right) >> 1; + if(array[mid] >= value) right = mid - 1; + else left = mid + 1; + } + if(left >= array_size || array[left] > value) left--; + return left; +} + +static void addLengthDistance(uivector* values, size_t length, size_t distance) { + /*values in encoded vector are those used by deflate: + 0-255: literal bytes + 256: end + 257-285: length/distance pair (length code, followed by extra length bits, distance code, extra distance bits) + 286-287: invalid*/ + + unsigned length_code = (unsigned)searchCodeIndex(LENGTHBASE, 29, length); + unsigned extra_length = (unsigned)(length - LENGTHBASE[length_code]); + unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance); + unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]); + + size_t pos = values->size; + /*TODO: return error when this fails (out of memory)*/ + unsigned ok = uivector_resize(values, values->size + 4); + if(ok) { + values->data[pos + 0] = length_code + FIRST_LENGTH_CODE_INDEX; + values->data[pos + 1] = extra_length; + values->data[pos + 2] = dist_code; + values->data[pos + 3] = extra_distance; + } +} + +/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3 +bytes as input because 3 is the minimum match length for deflate*/ +static const unsigned HASH_NUM_VALUES = 65536; +static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/ + +typedef struct Hash { + int* head; /*hash value to head circular pos - can be outdated if went around window*/ + /*circular pos to prev circular pos*/ + unsigned short* chain; + int* val; /*circular pos to hash value*/ + + /*TODO: do this not only for zeros but for any repeated byte. However for PNG + it's always going to be the zeros that dominate, so not important for PNG*/ + int* headz; /*similar to head, but for chainz*/ + unsigned short* chainz; /*those with same amount of zeros*/ + unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/ +} Hash; + +static unsigned hash_init(Hash* hash, unsigned windowsize) { + unsigned i; + hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES); + hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize); + hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize); + + hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize); + hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1)); + hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize); + + if(!hash->head || !hash->chain || !hash->val || !hash->headz|| !hash->chainz || !hash->zeros) { + return 83; /*alloc fail*/ + } + + /*initialize hash table*/ + for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1; + for(i = 0; i != windowsize; ++i) hash->val[i] = -1; + for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/ + + for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1; + for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/ + + return 0; +} + +static void hash_cleanup(Hash* hash) { + lodepng_free(hash->head); + lodepng_free(hash->val); + lodepng_free(hash->chain); + + lodepng_free(hash->zeros); + lodepng_free(hash->headz); + lodepng_free(hash->chainz); +} + + + +static unsigned getHash(const unsigned char* data, size_t size, size_t pos) { + unsigned result = 0; + if(pos + 2 < size) { + /*A simple shift and xor hash is used. Since the data of PNGs is dominated + by zeroes due to the filters, a better hash does not have a significant + effect on speed in traversing the chain, and causes more time spend on + calculating the hash.*/ + result ^= ((unsigned)data[pos + 0] << 0u); + result ^= ((unsigned)data[pos + 1] << 4u); + result ^= ((unsigned)data[pos + 2] << 8u); + } else { + size_t amount, i; + if(pos >= size) return 0; + amount = size - pos; + for(i = 0; i != amount; ++i) result ^= ((unsigned)data[pos + i] << (i * 8u)); + } + return result & HASH_BIT_MASK; +} + +static unsigned countZeros(const unsigned char* data, size_t size, size_t pos) { + const unsigned char* start = data + pos; + const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH; + if(end > data + size) end = data + size; + data = start; + while(data != end && *data == 0) ++data; + /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/ + return (unsigned)(data - start); +} + +/*wpos = pos & (windowsize - 1)*/ +static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros) { + hash->val[wpos] = (int)hashval; + if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval]; + hash->head[hashval] = (int)wpos; + + hash->zeros[wpos] = numzeros; + if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros]; + hash->headz[numzeros] = (int)wpos; +} + +/* +LZ77-encode the data. Return value is error code. The input are raw bytes, the output +is in the form of unsigned integers with codes representing for example literal bytes, or +length/distance pairs. +It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a +sliding window (of windowsize) is used, and all past bytes in that window can be used as +the "dictionary". A brute force search through all possible distances would be slow, and +this hash technique is one out of several ways to speed this up. +*/ +static unsigned encodeLZ77(uivector* out, Hash* hash, + const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize, + unsigned minmatch, unsigned nicematch, unsigned lazymatching) { + size_t pos; + unsigned i, error = 0; + /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/ + unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8u; + unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64; + + unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/ + unsigned numzeros = 0; + + unsigned offset; /*the offset represents the distance in LZ77 terminology*/ + unsigned length; + unsigned lazy = 0; + unsigned lazylength = 0, lazyoffset = 0; + unsigned hashval; + unsigned current_offset, current_length; + unsigned prev_offset; + const unsigned char *lastptr, *foreptr, *backptr; + unsigned hashpos; + + if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/ + if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/ + + if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH; + + for(pos = inpos; pos < insize; ++pos) { + size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/ + unsigned chainlength = 0; + + hashval = getHash(in, insize, pos); + + if(usezeros && hashval == 0) { + if(numzeros == 0) numzeros = countZeros(in, insize, pos); + else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros; + } else { + numzeros = 0; + } + + updateHashChain(hash, wpos, hashval, numzeros); + + /*the length and offset found for the current position*/ + length = 0; + offset = 0; + + hashpos = hash->chain[wpos]; + + lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH]; + + /*search for the longest string*/ + prev_offset = 0; + for(;;) { + if(chainlength++ >= maxchainlength) break; + current_offset = (unsigned)(hashpos <= wpos ? wpos - hashpos : wpos - hashpos + windowsize); + + if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/ + prev_offset = current_offset; + if(current_offset > 0) { + /*test the next characters*/ + foreptr = &in[pos]; + backptr = &in[pos - current_offset]; + + /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/ + if(numzeros >= 3) { + unsigned skip = hash->zeros[hashpos]; + if(skip > numzeros) skip = numzeros; + backptr += skip; + foreptr += skip; + } + + while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/ { + ++backptr; + ++foreptr; + } + current_length = (unsigned)(foreptr - &in[pos]); + + if(current_length > length) { + length = current_length; /*the longest length*/ + offset = current_offset; /*the offset that is related to this longest length*/ + /*jump out once a length of max length is found (speed gain). This also jumps + out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/ + if(current_length >= nicematch) break; + } + } + + if(hashpos == hash->chain[hashpos]) break; + + if(numzeros >= 3 && length > numzeros) { + hashpos = hash->chainz[hashpos]; + if(hash->zeros[hashpos] != numzeros) break; + } else { + hashpos = hash->chain[hashpos]; + /*outdated hash value, happens if particular value was not encountered in whole last window*/ + if(hash->val[hashpos] != (int)hashval) break; + } + } + + if(lazymatching) { + if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH) { + lazy = 1; + lazylength = length; + lazyoffset = offset; + continue; /*try the next byte*/ + } + if(lazy) { + lazy = 0; + if(pos == 0) ERROR_BREAK(81); + if(length > lazylength + 1) { + /*push the previous character as literal*/ + if(!uivector_push_back(out, in[pos - 1])) ERROR_BREAK(83 /*alloc fail*/); + } else { + length = lazylength; + offset = lazyoffset; + hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/ + hash->headz[numzeros] = -1; /*idem*/ + --pos; + } + } + } + if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/); + + /*encode it as length/distance pair or literal value*/ + if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/ { + if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/); + } else if(length < minmatch || (length == 3 && offset > 4096)) { + /*compensate for the fact that longer offsets have more extra bits, a + length of only 3 may be not worth it then*/ + if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/); + } else { + addLengthDistance(out, length, offset); + for(i = 1; i < length; ++i) { + ++pos; + wpos = pos & (windowsize - 1); + hashval = getHash(in, insize, pos); + if(usezeros && hashval == 0) { + if(numzeros == 0) numzeros = countZeros(in, insize, pos); + else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros; + } else { + numzeros = 0; + } + updateHashChain(hash, wpos, hashval, numzeros); + } + } + } /*end of the loop through each character of input*/ + + return error; +} + +/* /////////////////////////////////////////////////////////////////////////// */ + +static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, size_t datasize) { + /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte, + 2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/ + + size_t i, numdeflateblocks = (datasize + 65534u) / 65535u; + unsigned datapos = 0; + for(i = 0; i != numdeflateblocks; ++i) { + unsigned BFINAL, BTYPE, LEN, NLEN; + unsigned char firstbyte; + size_t pos = out->size; + + BFINAL = (i == numdeflateblocks - 1); + BTYPE = 0; + + LEN = 65535; + if(datasize - datapos < 65535u) LEN = (unsigned)datasize - datapos; + NLEN = 65535 - LEN; + + if(!ucvector_resize(out, out->size + LEN + 5)) return 83; /*alloc fail*/ + + firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1u) << 1u) + ((BTYPE & 2u) << 1u)); + out->data[pos + 0] = firstbyte; + out->data[pos + 1] = (unsigned char)(LEN & 255); + out->data[pos + 2] = (unsigned char)(LEN >> 8u); + out->data[pos + 3] = (unsigned char)(NLEN & 255); + out->data[pos + 4] = (unsigned char)(NLEN >> 8u); + lodepng_memcpy(out->data + pos + 5, data + datapos, LEN); + datapos += LEN; + } + + return 0; +} + +/* +write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees. +tree_ll: the tree for lit and len codes. +tree_d: the tree for distance codes. +*/ +static void writeLZ77data(LodePNGBitWriter* writer, const uivector* lz77_encoded, + const HuffmanTree* tree_ll, const HuffmanTree* tree_d) { + size_t i = 0; + for(i = 0; i != lz77_encoded->size; ++i) { + unsigned val = lz77_encoded->data[i]; + writeBitsReversed(writer, tree_ll->codes[val], tree_ll->lengths[val]); + if(val > 256) /*for a length code, 3 more things have to be added*/ { + unsigned length_index = val - FIRST_LENGTH_CODE_INDEX; + unsigned n_length_extra_bits = LENGTHEXTRA[length_index]; + unsigned length_extra_bits = lz77_encoded->data[++i]; + + unsigned distance_code = lz77_encoded->data[++i]; + + unsigned distance_index = distance_code; + unsigned n_distance_extra_bits = DISTANCEEXTRA[distance_index]; + unsigned distance_extra_bits = lz77_encoded->data[++i]; + + writeBits(writer, length_extra_bits, n_length_extra_bits); + writeBitsReversed(writer, tree_d->codes[distance_code], tree_d->lengths[distance_code]); + writeBits(writer, distance_extra_bits, n_distance_extra_bits); + } + } +} + +/*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/ +static unsigned deflateDynamic(LodePNGBitWriter* writer, Hash* hash, + const unsigned char* data, size_t datapos, size_t dataend, + const LodePNGCompressSettings* settings, unsigned final) { + unsigned error = 0; + + /* + A block is compressed as follows: The PNG data is lz77 encoded, resulting in + literal bytes and length/distance pairs. This is then huffman compressed with + two huffman trees. One huffman tree is used for the lit and len values ("ll"), + another huffman tree is used for the dist values ("d"). These two trees are + stored using their code lengths, and to compress even more these code lengths + are also run-length encoded and huffman compressed. This gives a huffman tree + of code lengths "cl". The code lengths used to describe this third tree are + the code length code lengths ("clcl"). + */ + + /*The lz77 encoded data, represented with integers since there will also be length and distance codes in it*/ + uivector lz77_encoded; + HuffmanTree tree_ll; /*tree for lit,len values*/ + HuffmanTree tree_d; /*tree for distance codes*/ + HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/ + unsigned* frequencies_ll = 0; /*frequency of lit,len codes*/ + unsigned* frequencies_d = 0; /*frequency of dist codes*/ + unsigned* frequencies_cl = 0; /*frequency of code length codes*/ + unsigned* bitlen_lld = 0; /*lit,len,dist code lengths (int bits), literally (without repeat codes).*/ + unsigned* bitlen_lld_e = 0; /*bitlen_lld encoded with repeat codes (this is a rudimentary run length compression)*/ + size_t datasize = dataend - datapos; + + /* + If we could call "bitlen_cl" the the code length code lengths ("clcl"), that is the bit lengths of codes to represent + tree_cl in CLCL_ORDER, then due to the huffman compression of huffman tree representations ("two levels"), there are + some analogies: + bitlen_lld is to tree_cl what data is to tree_ll and tree_d. + bitlen_lld_e is to bitlen_lld what lz77_encoded is to data. + bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded. + */ + + unsigned BFINAL = final; + size_t i; + size_t numcodes_ll, numcodes_d, numcodes_lld, numcodes_lld_e, numcodes_cl; + unsigned HLIT, HDIST, HCLEN; + + uivector_init(&lz77_encoded); + HuffmanTree_init(&tree_ll); + HuffmanTree_init(&tree_d); + HuffmanTree_init(&tree_cl); + /* could fit on stack, but >1KB is on the larger side so allocate instead */ + frequencies_ll = (unsigned*)lodepng_malloc(286 * sizeof(*frequencies_ll)); + frequencies_d = (unsigned*)lodepng_malloc(30 * sizeof(*frequencies_d)); + frequencies_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(*frequencies_cl)); + + if(!frequencies_ll || !frequencies_d || !frequencies_cl) error = 83; /*alloc fail*/ + + /*This while loop never loops due to a break at the end, it is here to + allow breaking out of it to the cleanup phase on error conditions.*/ + while(!error) { + lodepng_memset(frequencies_ll, 0, 286 * sizeof(*frequencies_ll)); + lodepng_memset(frequencies_d, 0, 30 * sizeof(*frequencies_d)); + lodepng_memset(frequencies_cl, 0, NUM_CODE_LENGTH_CODES * sizeof(*frequencies_cl)); + + if(settings->use_lz77) { + error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize, + settings->minmatch, settings->nicematch, settings->lazymatching); + if(error) break; + } else { + if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/); + for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/ + } + + /*Count the frequencies of lit, len and dist codes*/ + for(i = 0; i != lz77_encoded.size; ++i) { + unsigned symbol = lz77_encoded.data[i]; + ++frequencies_ll[symbol]; + if(symbol > 256) { + unsigned dist = lz77_encoded.data[i + 2]; + ++frequencies_d[dist]; + i += 3; + } + } + frequencies_ll[256] = 1; /*there will be exactly 1 end code, at the end of the block*/ + + /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/ + error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll, 257, 286, 15); + if(error) break; + /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/ + error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d, 2, 30, 15); + if(error) break; + + numcodes_ll = LODEPNG_MIN(tree_ll.numcodes, 286); + numcodes_d = LODEPNG_MIN(tree_d.numcodes, 30); + /*store the code lengths of both generated trees in bitlen_lld*/ + numcodes_lld = numcodes_ll + numcodes_d; + bitlen_lld = (unsigned*)lodepng_malloc(numcodes_lld * sizeof(*bitlen_lld)); + /*numcodes_lld_e never needs more size than bitlen_lld*/ + bitlen_lld_e = (unsigned*)lodepng_malloc(numcodes_lld * sizeof(*bitlen_lld_e)); + if(!bitlen_lld || !bitlen_lld_e) ERROR_BREAK(83); /*alloc fail*/ + numcodes_lld_e = 0; + + for(i = 0; i != numcodes_ll; ++i) bitlen_lld[i] = tree_ll.lengths[i]; + for(i = 0; i != numcodes_d; ++i) bitlen_lld[numcodes_ll + i] = tree_d.lengths[i]; + + /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times), + 17 (3-10 zeroes), 18 (11-138 zeroes)*/ + for(i = 0; i != numcodes_lld; ++i) { + unsigned j = 0; /*amount of repetitions*/ + while(i + j + 1 < numcodes_lld && bitlen_lld[i + j + 1] == bitlen_lld[i]) ++j; + + if(bitlen_lld[i] == 0 && j >= 2) /*repeat code for zeroes*/ { + ++j; /*include the first zero*/ + if(j <= 10) /*repeat code 17 supports max 10 zeroes*/ { + bitlen_lld_e[numcodes_lld_e++] = 17; + bitlen_lld_e[numcodes_lld_e++] = j - 3; + } else /*repeat code 18 supports max 138 zeroes*/ { + if(j > 138) j = 138; + bitlen_lld_e[numcodes_lld_e++] = 18; + bitlen_lld_e[numcodes_lld_e++] = j - 11; + } + i += (j - 1); + } else if(j >= 3) /*repeat code for value other than zero*/ { + size_t k; + unsigned num = j / 6u, rest = j % 6u; + bitlen_lld_e[numcodes_lld_e++] = bitlen_lld[i]; + for(k = 0; k < num; ++k) { + bitlen_lld_e[numcodes_lld_e++] = 16; + bitlen_lld_e[numcodes_lld_e++] = 6 - 3; + } + if(rest >= 3) { + bitlen_lld_e[numcodes_lld_e++] = 16; + bitlen_lld_e[numcodes_lld_e++] = rest - 3; + } + else j -= rest; + i += j; + } else /*too short to benefit from repeat code*/ { + bitlen_lld_e[numcodes_lld_e++] = bitlen_lld[i]; + } + } + + /*generate tree_cl, the huffmantree of huffmantrees*/ + for(i = 0; i != numcodes_lld_e; ++i) { + ++frequencies_cl[bitlen_lld_e[i]]; + /*after a repeat code come the bits that specify the number of repetitions, + those don't need to be in the frequencies_cl calculation*/ + if(bitlen_lld_e[i] >= 16) ++i; + } + + error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl, + NUM_CODE_LENGTH_CODES, NUM_CODE_LENGTH_CODES, 7); + if(error) break; + + /*compute amount of code-length-code-lengths to output*/ + numcodes_cl = NUM_CODE_LENGTH_CODES; + /*trim zeros at the end (using CLCL_ORDER), but minimum size must be 4 (see HCLEN below)*/ + while(numcodes_cl > 4u && tree_cl.lengths[CLCL_ORDER[numcodes_cl - 1u]] == 0) { + numcodes_cl--; + } + + /* + Write everything into the output + + After the BFINAL and BTYPE, the dynamic block consists out of the following: + - 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN + - (HCLEN+4)*3 bits code lengths of code length alphabet + - HLIT + 257 code lengths of lit/length alphabet (encoded using the code length + alphabet, + possible repetition codes 16, 17, 18) + - HDIST + 1 code lengths of distance alphabet (encoded using the code length + alphabet, + possible repetition codes 16, 17, 18) + - compressed data + - 256 (end code) + */ + + /*Write block type*/ + writeBits(writer, BFINAL, 1); + writeBits(writer, 0, 1); /*first bit of BTYPE "dynamic"*/ + writeBits(writer, 1, 1); /*second bit of BTYPE "dynamic"*/ + + /*write the HLIT, HDIST and HCLEN values*/ + /*all three sizes take trimmed ending zeroes into account, done either by HuffmanTree_makeFromFrequencies + or in the loop for numcodes_cl above, which saves space. */ + HLIT = (unsigned)(numcodes_ll - 257); + HDIST = (unsigned)(numcodes_d - 1); + HCLEN = (unsigned)(numcodes_cl - 4); + writeBits(writer, HLIT, 5); + writeBits(writer, HDIST, 5); + writeBits(writer, HCLEN, 4); + + /*write the code lengths of the code length alphabet ("bitlen_cl")*/ + for(i = 0; i != numcodes_cl; ++i) writeBits(writer, tree_cl.lengths[CLCL_ORDER[i]], 3); + + /*write the lengths of the lit/len AND the dist alphabet*/ + for(i = 0; i != numcodes_lld_e; ++i) { + writeBitsReversed(writer, tree_cl.codes[bitlen_lld_e[i]], tree_cl.lengths[bitlen_lld_e[i]]); + /*extra bits of repeat codes*/ + if(bitlen_lld_e[i] == 16) writeBits(writer, bitlen_lld_e[++i], 2); + else if(bitlen_lld_e[i] == 17) writeBits(writer, bitlen_lld_e[++i], 3); + else if(bitlen_lld_e[i] == 18) writeBits(writer, bitlen_lld_e[++i], 7); + } + + /*write the compressed data symbols*/ + writeLZ77data(writer, &lz77_encoded, &tree_ll, &tree_d); + /*error: the length of the end code 256 must be larger than 0*/ + if(tree_ll.lengths[256] == 0) ERROR_BREAK(64); + + /*write the end code*/ + writeBitsReversed(writer, tree_ll.codes[256], tree_ll.lengths[256]); + + break; /*end of error-while*/ + } + + /*cleanup*/ + uivector_cleanup(&lz77_encoded); + HuffmanTree_cleanup(&tree_ll); + HuffmanTree_cleanup(&tree_d); + HuffmanTree_cleanup(&tree_cl); + lodepng_free(frequencies_ll); + lodepng_free(frequencies_d); + lodepng_free(frequencies_cl); + lodepng_free(bitlen_lld); + lodepng_free(bitlen_lld_e); + + return error; +} + +static unsigned deflateFixed(LodePNGBitWriter* writer, Hash* hash, + const unsigned char* data, + size_t datapos, size_t dataend, + const LodePNGCompressSettings* settings, unsigned final) { + HuffmanTree tree_ll; /*tree for literal values and length codes*/ + HuffmanTree tree_d; /*tree for distance codes*/ + + unsigned BFINAL = final; + unsigned error = 0; + size_t i; + + HuffmanTree_init(&tree_ll); + HuffmanTree_init(&tree_d); + + error = generateFixedLitLenTree(&tree_ll); + if(!error) error = generateFixedDistanceTree(&tree_d); + + if(!error) { + writeBits(writer, BFINAL, 1); + writeBits(writer, 1, 1); /*first bit of BTYPE*/ + writeBits(writer, 0, 1); /*second bit of BTYPE*/ + + if(settings->use_lz77) /*LZ77 encoded*/ { + uivector lz77_encoded; + uivector_init(&lz77_encoded); + error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize, + settings->minmatch, settings->nicematch, settings->lazymatching); + if(!error) writeLZ77data(writer, &lz77_encoded, &tree_ll, &tree_d); + uivector_cleanup(&lz77_encoded); + } else /*no LZ77, but still will be Huffman compressed*/ { + for(i = datapos; i < dataend; ++i) { + writeBitsReversed(writer, tree_ll.codes[data[i]], tree_ll.lengths[data[i]]); + } + } + /*add END code*/ + if(!error) writeBitsReversed(writer,tree_ll.codes[256], tree_ll.lengths[256]); + } + + /*cleanup*/ + HuffmanTree_cleanup(&tree_ll); + HuffmanTree_cleanup(&tree_d); + + return error; +} + +static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t insize, + const LodePNGCompressSettings* settings) { + unsigned error = 0; + size_t i, blocksize, numdeflateblocks; + Hash hash; + LodePNGBitWriter writer; + + LodePNGBitWriter_init(&writer, out); + + if(settings->btype > 2) return 61; + else if(settings->btype == 0) return deflateNoCompression(out, in, insize); + else if(settings->btype == 1) blocksize = insize; + else /*if(settings->btype == 2)*/ { + /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/ + blocksize = insize / 8u + 8; + if(blocksize < 65536) blocksize = 65536; + if(blocksize > 262144) blocksize = 262144; + } + + numdeflateblocks = (insize + blocksize - 1) / blocksize; + if(numdeflateblocks == 0) numdeflateblocks = 1; + + error = hash_init(&hash, settings->windowsize); + + if(!error) { + for(i = 0; i != numdeflateblocks && !error; ++i) { + unsigned final = (i == numdeflateblocks - 1); + size_t start = i * blocksize; + size_t end = start + blocksize; + if(end > insize) end = insize; + + if(settings->btype == 1) error = deflateFixed(&writer, &hash, in, start, end, settings, final); + else if(settings->btype == 2) error = deflateDynamic(&writer, &hash, in, start, end, settings, final); + } + } + + hash_cleanup(&hash); + + return error; +} + +unsigned lodepng_deflate(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGCompressSettings* settings) { + ucvector v = ucvector_init(*out, *outsize); + unsigned error = lodepng_deflatev(&v, in, insize, settings); + *out = v.data; + *outsize = v.size; + return error; +} + +static unsigned deflate(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGCompressSettings* settings) { + if(settings->custom_deflate) { + unsigned error = settings->custom_deflate(out, outsize, in, insize, settings); + /*the custom deflate is allowed to have its own error codes, however, we translate it to code 111*/ + return error ? 111 : 0; + } else { + return lodepng_deflate(out, outsize, in, insize, settings); + } +} + +#endif /*LODEPNG_COMPILE_DECODER*/ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Adler32 / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +static unsigned update_adler32(unsigned adler, const unsigned char* data, unsigned len) { + unsigned s1 = adler & 0xffffu; + unsigned s2 = (adler >> 16u) & 0xffffu; + + while(len != 0u) { + unsigned i; + /*at least 5552 sums can be done before the sums overflow, saving a lot of module divisions*/ + unsigned amount = len > 5552u ? 5552u : len; + len -= amount; + for(i = 0; i != amount; ++i) { + s1 += (*data++); + s2 += s1; + } + s1 %= 65521u; + s2 %= 65521u; + } + + return (s2 << 16u) | s1; +} + +/*Return the adler32 of the bytes data[0..len-1]*/ +static unsigned adler32(const unsigned char* data, unsigned len) { + return update_adler32(1u, data, len); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Zlib / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_DECODER + +static unsigned lodepng_zlib_decompressv(ucvector* out, + const unsigned char* in, size_t insize, + const LodePNGDecompressSettings* settings) { + unsigned error = 0; + unsigned CM, CINFO, FDICT; + + if(insize < 2) return 53; /*error, size of zlib data too small*/ + /*read information from zlib header*/ + if((in[0] * 256 + in[1]) % 31 != 0) { + /*error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way*/ + return 24; + } + + CM = in[0] & 15; + CINFO = (in[0] >> 4) & 15; + /*FCHECK = in[1] & 31;*/ /*FCHECK is already tested above*/ + FDICT = (in[1] >> 5) & 1; + /*FLEVEL = (in[1] >> 6) & 3;*/ /*FLEVEL is not used here*/ + + if(CM != 8 || CINFO > 7) { + /*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/ + return 25; + } + if(FDICT != 0) { + /*error: the specification of PNG says about the zlib stream: + "The additional flags shall not specify a preset dictionary."*/ + return 26; + } + + error = inflatev(out, in + 2, insize - 2, settings); + if(error) return error; + + if(!settings->ignore_adler32) { + unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]); + unsigned checksum = adler32(out->data, (unsigned)(out->size)); + if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/ + } + + return 0; /*no error*/ +} + + +unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in, + size_t insize, const LodePNGDecompressSettings* settings) { + ucvector v = ucvector_init(*out, *outsize); + unsigned error = lodepng_zlib_decompressv(&v, in, insize, settings); + *out = v.data; + *outsize = v.size; + return error; +} + +/*expected_size is expected output size, to avoid intermediate allocations. Set to 0 if not known. */ +static unsigned zlib_decompress(unsigned char** out, size_t* outsize, size_t expected_size, + const unsigned char* in, size_t insize, const LodePNGDecompressSettings* settings) { + unsigned error; + if(settings->custom_zlib) { + error = settings->custom_zlib(out, outsize, in, insize, settings); + if(error) { + /*the custom zlib is allowed to have its own error codes, however, we translate it to code 110*/ + error = 110; + /*if there's a max output size, and the custom zlib returned error, then indicate that error instead*/ + if(settings->max_output_size && *outsize > settings->max_output_size) error = 109; + } + } else { + ucvector v = ucvector_init(*out, *outsize); + if(expected_size) { + /*reserve the memory to avoid intermediate reallocations*/ + ucvector_resize(&v, *outsize + expected_size); + v.size = *outsize; + } + error = lodepng_zlib_decompressv(&v, in, insize, settings); + *out = v.data; + *outsize = v.size; + } + return error; +} + +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_ENCODER + +unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in, + size_t insize, const LodePNGCompressSettings* settings) { + size_t i; + unsigned error; + unsigned char* deflatedata = 0; + size_t deflatesize = 0; + + error = deflate(&deflatedata, &deflatesize, in, insize, settings); + + *out = NULL; + *outsize = 0; + if(!error) { + *outsize = deflatesize + 6; + *out = (unsigned char*)lodepng_malloc(*outsize); + if(!*out) error = 83; /*alloc fail*/ + } + + if(!error) { + unsigned ADLER32 = adler32(in, (unsigned)insize); + /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/ + unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/ + unsigned FLEVEL = 0; + unsigned FDICT = 0; + unsigned CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64; + unsigned FCHECK = 31 - CMFFLG % 31; + CMFFLG += FCHECK; + + (*out)[0] = (unsigned char)(CMFFLG >> 8); + (*out)[1] = (unsigned char)(CMFFLG & 255); + for(i = 0; i != deflatesize; ++i) (*out)[i + 2] = deflatedata[i]; + lodepng_set32bitInt(&(*out)[*outsize - 4], ADLER32); + } + + lodepng_free(deflatedata); + return error; +} + +/* compress using the default or custom zlib function */ +static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in, + size_t insize, const LodePNGCompressSettings* settings) { + if(settings->custom_zlib) { + unsigned error = settings->custom_zlib(out, outsize, in, insize, settings); + /*the custom zlib is allowed to have its own error codes, however, we translate it to code 111*/ + return error ? 111 : 0; + } else { + return lodepng_zlib_compress(out, outsize, in, insize, settings); + } +} + +#endif /*LODEPNG_COMPILE_ENCODER*/ + +#else /*no LODEPNG_COMPILE_ZLIB*/ + +#ifdef LODEPNG_COMPILE_DECODER +static unsigned zlib_decompress(unsigned char** out, size_t* outsize, size_t expected_size, + const unsigned char* in, size_t insize, const LodePNGDecompressSettings* settings) { + if(!settings->custom_zlib) return 87; /*no custom zlib function provided */ + (void)expected_size; + return settings->custom_zlib(out, outsize, in, insize, settings); +} +#endif /*LODEPNG_COMPILE_DECODER*/ +#ifdef LODEPNG_COMPILE_ENCODER +static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in, + size_t insize, const LodePNGCompressSettings* settings) { + if(!settings->custom_zlib) return 87; /*no custom zlib function provided */ + return settings->custom_zlib(out, outsize, in, insize, settings); +} +#endif /*LODEPNG_COMPILE_ENCODER*/ + +#endif /*LODEPNG_COMPILE_ZLIB*/ + +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_ENCODER + +/*this is a good tradeoff between speed and compression ratio*/ +#define DEFAULT_WINDOWSIZE 2048 + +void lodepng_compress_settings_init(LodePNGCompressSettings* settings) { + /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/ + settings->btype = 2; + settings->use_lz77 = 1; + settings->windowsize = DEFAULT_WINDOWSIZE; + settings->minmatch = 3; + settings->nicematch = 128; + settings->lazymatching = 1; + + settings->custom_zlib = 0; + settings->custom_deflate = 0; + settings->custom_context = 0; +} + +const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0}; + + +#endif /*LODEPNG_COMPILE_ENCODER*/ + +#ifdef LODEPNG_COMPILE_DECODER + +void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings) { + settings->ignore_adler32 = 0; + settings->ignore_nlen = 0; + settings->max_output_size = 0; + + settings->custom_zlib = 0; + settings->custom_inflate = 0; + settings->custom_context = 0; +} + +const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0, 0, 0}; + +#endif /*LODEPNG_COMPILE_DECODER*/ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* // End of Zlib related code. Begin of PNG related code. // */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_PNG + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / CRC32 / */ +/* ////////////////////////////////////////////////////////////////////////// */ + + +#ifndef LODEPNG_NO_COMPILE_CRC +/* CRC polynomial: 0xedb88320 */ +static unsigned lodepng_crc32_table[256] = { + 0u, 1996959894u, 3993919788u, 2567524794u, 124634137u, 1886057615u, 3915621685u, 2657392035u, + 249268274u, 2044508324u, 3772115230u, 2547177864u, 162941995u, 2125561021u, 3887607047u, 2428444049u, + 498536548u, 1789927666u, 4089016648u, 2227061214u, 450548861u, 1843258603u, 4107580753u, 2211677639u, + 325883990u, 1684777152u, 4251122042u, 2321926636u, 335633487u, 1661365465u, 4195302755u, 2366115317u, + 997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u, + 901097722u, 1119000684u, 3686517206u, 2898065728u, 853044451u, 1172266101u, 3705015759u, 2882616665u, + 651767980u, 1373503546u, 3369554304u, 3218104598u, 565507253u, 1454621731u, 3485111705u, 3099436303u, + 671266974u, 1594198024u, 3322730930u, 2970347812u, 795835527u, 1483230225u, 3244367275u, 3060149565u, + 1994146192u, 31158534u, 2563907772u, 4023717930u, 1907459465u, 112637215u, 2680153253u, 3904427059u, + 2013776290u, 251722036u, 2517215374u, 3775830040u, 2137656763u, 141376813u, 2439277719u, 3865271297u, + 1802195444u, 476864866u, 2238001368u, 4066508878u, 1812370925u, 453092731u, 2181625025u, 4111451223u, + 1706088902u, 314042704u, 2344532202u, 4240017532u, 1658658271u, 366619977u, 2362670323u, 4224994405u, + 1303535960u, 984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u, + 1131014506u, 879679996u, 2909243462u, 3663771856u, 1141124467u, 855842277u, 2852801631u, 3708648649u, + 1342533948u, 654459306u, 3188396048u, 3373015174u, 1466479909u, 544179635u, 3110523913u, 3462522015u, + 1591671054u, 702138776u, 2966460450u, 3352799412u, 1504918807u, 783551873u, 3082640443u, 3233442989u, + 3988292384u, 2596254646u, 62317068u, 1957810842u, 3939845945u, 2647816111u, 81470997u, 1943803523u, + 3814918930u, 2489596804u, 225274430u, 2053790376u, 3826175755u, 2466906013u, 167816743u, 2097651377u, + 4027552580u, 2265490386u, 503444072u, 1762050814u, 4150417245u, 2154129355u, 426522225u, 1852507879u, + 4275313526u, 2312317920u, 282753626u, 1742555852u, 4189708143u, 2394877945u, 397917763u, 1622183637u, + 3604390888u, 2714866558u, 953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u, + 3624741850u, 2936675148u, 906185462u, 1090812512u, 3747672003u, 2825379669u, 829329135u, 1181335161u, + 3412177804u, 3160834842u, 628085408u, 1382605366u, 3423369109u, 3138078467u, 570562233u, 1426400815u, + 3317316542u, 2998733608u, 733239954u, 1555261956u, 3268935591u, 3050360625u, 752459403u, 1541320221u, + 2607071920u, 3965973030u, 1969922972u, 40735498u, 2617837225u, 3943577151u, 1913087877u, 83908371u, + 2512341634u, 3803740692u, 2075208622u, 213261112u, 2463272603u, 3855990285u, 2094854071u, 198958881u, + 2262029012u, 4057260610u, 1759359992u, 534414190u, 2176718541u, 4139329115u, 1873836001u, 414664567u, + 2282248934u, 4279200368u, 1711684554u, 285281116u, 2405801727u, 4167216745u, 1634467795u, 376229701u, + 2685067896u, 3608007406u, 1308918612u, 956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u, + 2932959818u, 3654703836u, 1088359270u, 936918000u, 2847714899u, 3736837829u, 1202900863u, 817233897u, + 3183342108u, 3401237130u, 1404277552u, 615818150u, 3134207493u, 3453421203u, 1423857449u, 601450431u, + 3009837614u, 3294710456u, 1567103746u, 711928724u, 3020668471u, 3272380065u, 1510334235u, 755167117u +}; + +/*Return the CRC of the bytes buf[0..len-1].*/ +unsigned lodepng_crc32(const unsigned char* data, size_t length) { + unsigned r = 0xffffffffu; + size_t i; + for(i = 0; i < length; ++i) { + r = lodepng_crc32_table[(r ^ data[i]) & 0xffu] ^ (r >> 8u); + } + return r ^ 0xffffffffu; +} +#else /* !LODEPNG_NO_COMPILE_CRC */ +unsigned lodepng_crc32(const unsigned char* data, size_t length); +#endif /* !LODEPNG_NO_COMPILE_CRC */ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Reading and writing PNG color channel bits / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/* The color channel bits of less-than-8-bit pixels are read with the MSB of bytes first, +so LodePNGBitWriter and LodePNGBitReader can't be used for those. */ + +static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream) { + unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1); + ++(*bitpointer); + return result; +} + +/* TODO: make this faster */ +static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits) { + unsigned result = 0; + size_t i; + for(i = 0 ; i < nbits; ++i) { + result <<= 1u; + result |= (unsigned)readBitFromReversedStream(bitpointer, bitstream); + } + return result; +} + +static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit) { + /*the current bit in bitstream may be 0 or 1 for this to work*/ + if(bit == 0) bitstream[(*bitpointer) >> 3u] &= (unsigned char)(~(1u << (7u - ((*bitpointer) & 7u)))); + else bitstream[(*bitpointer) >> 3u] |= (1u << (7u - ((*bitpointer) & 7u))); + ++(*bitpointer); +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / PNG chunks / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +unsigned lodepng_chunk_length(const unsigned char* chunk) { + return lodepng_read32bitInt(&chunk[0]); +} + +void lodepng_chunk_type(char type[5], const unsigned char* chunk) { + unsigned i; + for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i]; + type[4] = 0; /*null termination char*/ +} + +unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type) { + if(lodepng_strlen(type) != 4) return 0; + return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]); +} + +unsigned char lodepng_chunk_ancillary(const unsigned char* chunk) { + return((chunk[4] & 32) != 0); +} + +unsigned char lodepng_chunk_private(const unsigned char* chunk) { + return((chunk[6] & 32) != 0); +} + +unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk) { + return((chunk[7] & 32) != 0); +} + +unsigned char* lodepng_chunk_data(unsigned char* chunk) { + return &chunk[8]; +} + +const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk) { + return &chunk[8]; +} + +unsigned lodepng_chunk_check_crc(const unsigned char* chunk) { + unsigned length = lodepng_chunk_length(chunk); + unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]); + /*the CRC is taken of the data and the 4 chunk type letters, not the length*/ + unsigned checksum = lodepng_crc32(&chunk[4], length + 4); + if(CRC != checksum) return 1; + else return 0; +} + +void lodepng_chunk_generate_crc(unsigned char* chunk) { + unsigned length = lodepng_chunk_length(chunk); + unsigned CRC = lodepng_crc32(&chunk[4], length + 4); + lodepng_set32bitInt(chunk + 8 + length, CRC); +} + +unsigned char* lodepng_chunk_next(unsigned char* chunk, unsigned char* end) { + if(chunk >= end || end - chunk < 12) return end; /*too small to contain a chunk*/ + if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47 + && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) { + /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */ + return chunk + 8; + } else { + size_t total_chunk_length; + unsigned char* result; + if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return end; + result = chunk + total_chunk_length; + if(result < chunk) return end; /*pointer overflow*/ + return result; + } +} + +const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk, const unsigned char* end) { + if(chunk >= end || end - chunk < 12) return end; /*too small to contain a chunk*/ + if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47 + && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) { + /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */ + return chunk + 8; + } else { + size_t total_chunk_length; + const unsigned char* result; + if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return end; + result = chunk + total_chunk_length; + if(result < chunk) return end; /*pointer overflow*/ + return result; + } +} + +unsigned char* lodepng_chunk_find(unsigned char* chunk, unsigned char* end, const char type[5]) { + for(;;) { + if(chunk >= end || end - chunk < 12) return 0; /* past file end: chunk + 12 > end */ + if(lodepng_chunk_type_equals(chunk, type)) return chunk; + chunk = lodepng_chunk_next(chunk, end); + } +} + +const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]) { + for(;;) { + if(chunk >= end || end - chunk < 12) return 0; /* past file end: chunk + 12 > end */ + if(lodepng_chunk_type_equals(chunk, type)) return chunk; + chunk = lodepng_chunk_next_const(chunk, end); + } +} + +unsigned lodepng_chunk_append(unsigned char** out, size_t* outsize, const unsigned char* chunk) { + unsigned i; + size_t total_chunk_length, new_length; + unsigned char *chunk_start, *new_buffer; + + if(lodepng_addofl(lodepng_chunk_length(chunk), 12, &total_chunk_length)) return 77; + if(lodepng_addofl(*outsize, total_chunk_length, &new_length)) return 77; + + new_buffer = (unsigned char*)lodepng_realloc(*out, new_length); + if(!new_buffer) return 83; /*alloc fail*/ + (*out) = new_buffer; + (*outsize) = new_length; + chunk_start = &(*out)[new_length - total_chunk_length]; + + for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i]; + + return 0; +} + +/*Sets length and name and allocates the space for data and crc but does not +set data or crc yet. Returns the start of the chunk in chunk. The start of +the data is at chunk + 8. To finalize chunk, add the data, then use +lodepng_chunk_generate_crc */ +static unsigned lodepng_chunk_init(unsigned char** chunk, + ucvector* out, + unsigned length, const char* type) { + size_t new_length = out->size; + if(lodepng_addofl(new_length, length, &new_length)) return 77; + if(lodepng_addofl(new_length, 12, &new_length)) return 77; + if(!ucvector_resize(out, new_length)) return 83; /*alloc fail*/ + *chunk = out->data + new_length - length - 12u; + + /*1: length*/ + lodepng_set32bitInt(*chunk, length); + + /*2: chunk name (4 letters)*/ + lodepng_memcpy(*chunk + 4, type, 4); + + return 0; +} + +/* like lodepng_chunk_create but with custom allocsize */ +static unsigned lodepng_chunk_createv(ucvector* out, + unsigned length, const char* type, const unsigned char* data) { + unsigned char* chunk; + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, length, type)); + + /*3: the data*/ + lodepng_memcpy(chunk + 8, data, length); + + /*4: CRC (of the chunkname characters and the data)*/ + lodepng_chunk_generate_crc(chunk); + + return 0; +} + +unsigned lodepng_chunk_create(unsigned char** out, size_t* outsize, + unsigned length, const char* type, const unsigned char* data) { + ucvector v = ucvector_init(*out, *outsize); + unsigned error = lodepng_chunk_createv(&v, length, type, data); + *out = v.data; + *outsize = v.size; + return error; +} + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / Color types, channels, bits / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/*checks if the colortype is valid and the bitdepth bd is allowed for this colortype. +Return value is a LodePNG error code.*/ +static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) { + switch(colortype) { + case LCT_GREY: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; break; + case LCT_RGB: if(!( bd == 8 || bd == 16)) return 37; break; + case LCT_PALETTE: if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 )) return 37; break; + case LCT_GREY_ALPHA: if(!( bd == 8 || bd == 16)) return 37; break; + case LCT_RGBA: if(!( bd == 8 || bd == 16)) return 37; break; + case LCT_MAX_OCTET_VALUE: return 31; /* invalid color type */ + default: return 31; /* invalid color type */ + } + return 0; /*allowed color type / bits combination*/ +} + +static unsigned getNumColorChannels(LodePNGColorType colortype) { + switch(colortype) { + case LCT_GREY: return 1; + case LCT_RGB: return 3; + case LCT_PALETTE: return 1; + case LCT_GREY_ALPHA: return 2; + case LCT_RGBA: return 4; + case LCT_MAX_OCTET_VALUE: return 0; /* invalid color type */ + default: return 0; /*invalid color type*/ + } +} + +static unsigned lodepng_get_bpp_lct(LodePNGColorType colortype, unsigned bitdepth) { + /*bits per pixel is amount of channels * bits per channel*/ + return getNumColorChannels(colortype) * bitdepth; +} + +/* ////////////////////////////////////////////////////////////////////////// */ + +void lodepng_color_mode_init(LodePNGColorMode* info) { + info->key_defined = 0; + info->key_r = info->key_g = info->key_b = 0; + info->colortype = LCT_RGBA; + info->bitdepth = 8; + info->palette = 0; + info->palettesize = 0; +} + +/*allocates palette memory if needed, and initializes all colors to black*/ +static void lodepng_color_mode_alloc_palette(LodePNGColorMode* info) { + size_t i; + /*if the palette is already allocated, it will have size 1024 so no reallocation needed in that case*/ + /*the palette must have room for up to 256 colors with 4 bytes each.*/ + if(!info->palette) info->palette = (unsigned char*)lodepng_malloc(1024); + if(!info->palette) return; /*alloc fail*/ + for(i = 0; i != 256; ++i) { + /*Initialize all unused colors with black, the value used for invalid palette indices. + This is an error according to the PNG spec, but common PNG decoders make it black instead. + That makes color conversion slightly faster due to no error handling needed.*/ + info->palette[i * 4 + 0] = 0; + info->palette[i * 4 + 1] = 0; + info->palette[i * 4 + 2] = 0; + info->palette[i * 4 + 3] = 255; + } +} + +void lodepng_color_mode_cleanup(LodePNGColorMode* info) { + lodepng_palette_clear(info); +} + +unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source) { + lodepng_color_mode_cleanup(dest); + lodepng_memcpy(dest, source, sizeof(LodePNGColorMode)); + if(source->palette) { + dest->palette = (unsigned char*)lodepng_malloc(1024); + if(!dest->palette && source->palettesize) return 83; /*alloc fail*/ + lodepng_memcpy(dest->palette, source->palette, source->palettesize * 4); + } + return 0; +} + +LodePNGColorMode lodepng_color_mode_make(LodePNGColorType colortype, unsigned bitdepth) { + LodePNGColorMode result; + lodepng_color_mode_init(&result); + result.colortype = colortype; + result.bitdepth = bitdepth; + return result; +} + +static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColorMode* b) { + size_t i; + if(a->colortype != b->colortype) return 0; + if(a->bitdepth != b->bitdepth) return 0; + if(a->key_defined != b->key_defined) return 0; + if(a->key_defined) { + if(a->key_r != b->key_r) return 0; + if(a->key_g != b->key_g) return 0; + if(a->key_b != b->key_b) return 0; + } + if(a->palettesize != b->palettesize) return 0; + for(i = 0; i != a->palettesize * 4; ++i) { + if(a->palette[i] != b->palette[i]) return 0; + } + return 1; +} + +void lodepng_palette_clear(LodePNGColorMode* info) { + if(info->palette) lodepng_free(info->palette); + info->palette = 0; + info->palettesize = 0; +} + +unsigned lodepng_palette_add(LodePNGColorMode* info, + unsigned char r, unsigned char g, unsigned char b, unsigned char a) { + if(!info->palette) /*allocate palette if empty*/ { + lodepng_color_mode_alloc_palette(info); + if(!info->palette) return 83; /*alloc fail*/ + } + if(info->palettesize >= 256) { + return 108; /*too many palette values*/ + } + info->palette[4 * info->palettesize + 0] = r; + info->palette[4 * info->palettesize + 1] = g; + info->palette[4 * info->palettesize + 2] = b; + info->palette[4 * info->palettesize + 3] = a; + ++info->palettesize; + return 0; +} + +/*calculate bits per pixel out of colortype and bitdepth*/ +unsigned lodepng_get_bpp(const LodePNGColorMode* info) { + return lodepng_get_bpp_lct(info->colortype, info->bitdepth); +} + +unsigned lodepng_get_channels(const LodePNGColorMode* info) { + return getNumColorChannels(info->colortype); +} + +unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info) { + return info->colortype == LCT_GREY || info->colortype == LCT_GREY_ALPHA; +} + +unsigned lodepng_is_alpha_type(const LodePNGColorMode* info) { + return (info->colortype & 4) != 0; /*4 or 6*/ +} + +unsigned lodepng_is_palette_type(const LodePNGColorMode* info) { + return info->colortype == LCT_PALETTE; +} + +unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info) { + size_t i; + for(i = 0; i != info->palettesize; ++i) { + if(info->palette[i * 4 + 3] < 255) return 1; + } + return 0; +} + +unsigned lodepng_can_have_alpha(const LodePNGColorMode* info) { + return info->key_defined + || lodepng_is_alpha_type(info) + || lodepng_has_palette_alpha(info); +} + +static size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) { + size_t bpp = lodepng_get_bpp_lct(colortype, bitdepth); + size_t n = (size_t)w * (size_t)h; + return ((n / 8u) * bpp) + ((n & 7u) * bpp + 7u) / 8u; +} + +size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color) { + return lodepng_get_raw_size_lct(w, h, color->colortype, color->bitdepth); +} + + +#ifdef LODEPNG_COMPILE_PNG + +/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer, +and in addition has one extra byte per line: the filter byte. So this gives a larger +result than lodepng_get_raw_size. Set h to 1 to get the size of 1 row including filter byte. */ +static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, unsigned bpp) { + /* + 1 for the filter byte, and possibly plus padding bits per line. */ + /* Ignoring casts, the expression is equal to (w * bpp + 7) / 8 + 1, but avoids overflow of w * bpp */ + size_t line = ((size_t)(w / 8u) * bpp) + 1u + ((w & 7u) * bpp + 7u) / 8u; + return (size_t)h * line; +} + +#ifdef LODEPNG_COMPILE_DECODER +/*Safely checks whether size_t overflow can be caused due to amount of pixels. +This check is overcautious rather than precise. If this check indicates no overflow, +you can safely compute in a size_t (but not an unsigned): +-(size_t)w * (size_t)h * 8 +-amount of bytes in IDAT (including filter, padding and Adam7 bytes) +-amount of bytes in raw color model +Returns 1 if overflow possible, 0 if not. +*/ +static int lodepng_pixel_overflow(unsigned w, unsigned h, + const LodePNGColorMode* pngcolor, const LodePNGColorMode* rawcolor) { + size_t bpp = LODEPNG_MAX(lodepng_get_bpp(pngcolor), lodepng_get_bpp(rawcolor)); + size_t numpixels, total; + size_t line; /* bytes per line in worst case */ + + if(lodepng_mulofl((size_t)w, (size_t)h, &numpixels)) return 1; + if(lodepng_mulofl(numpixels, 8, &total)) return 1; /* bit pointer with 8-bit color, or 8 bytes per channel color */ + + /* Bytes per scanline with the expression "(w / 8u) * bpp) + ((w & 7u) * bpp + 7u) / 8u" */ + if(lodepng_mulofl((size_t)(w / 8u), bpp, &line)) return 1; + if(lodepng_addofl(line, ((w & 7u) * bpp + 7u) / 8u, &line)) return 1; + + if(lodepng_addofl(line, 5, &line)) return 1; /* 5 bytes overhead per line: 1 filterbyte, 4 for Adam7 worst case */ + if(lodepng_mulofl(line, h, &total)) return 1; /* Total bytes in worst case */ + + return 0; /* no overflow */ +} +#endif /*LODEPNG_COMPILE_DECODER*/ +#endif /*LODEPNG_COMPILE_PNG*/ + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + +static void LodePNGUnknownChunks_init(LodePNGInfo* info) { + unsigned i; + for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0; + for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0; +} + +static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info) { + unsigned i; + for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]); +} + +static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src) { + unsigned i; + + LodePNGUnknownChunks_cleanup(dest); + + for(i = 0; i != 3; ++i) { + size_t j; + dest->unknown_chunks_size[i] = src->unknown_chunks_size[i]; + dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]); + if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/ + for(j = 0; j < src->unknown_chunks_size[i]; ++j) { + dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j]; + } + } + + return 0; +} + +/******************************************************************************/ + +static void LodePNGText_init(LodePNGInfo* info) { + info->text_num = 0; + info->text_keys = NULL; + info->text_strings = NULL; +} + +static void LodePNGText_cleanup(LodePNGInfo* info) { + size_t i; + for(i = 0; i != info->text_num; ++i) { + string_cleanup(&info->text_keys[i]); + string_cleanup(&info->text_strings[i]); + } + lodepng_free(info->text_keys); + lodepng_free(info->text_strings); +} + +static unsigned LodePNGText_copy(LodePNGInfo* dest, const LodePNGInfo* source) { + size_t i = 0; + dest->text_keys = NULL; + dest->text_strings = NULL; + dest->text_num = 0; + for(i = 0; i != source->text_num; ++i) { + CERROR_TRY_RETURN(lodepng_add_text(dest, source->text_keys[i], source->text_strings[i])); + } + return 0; +} + +static unsigned lodepng_add_text_sized(LodePNGInfo* info, const char* key, const char* str, size_t size) { + char** new_keys = (char**)(lodepng_realloc(info->text_keys, sizeof(char*) * (info->text_num + 1))); + char** new_strings = (char**)(lodepng_realloc(info->text_strings, sizeof(char*) * (info->text_num + 1))); + + if(new_keys) info->text_keys = new_keys; + if(new_strings) info->text_strings = new_strings; + + if(!new_keys || !new_strings) return 83; /*alloc fail*/ + + ++info->text_num; + info->text_keys[info->text_num - 1] = alloc_string(key); + info->text_strings[info->text_num - 1] = alloc_string_sized(str, size); + if(!info->text_keys[info->text_num - 1] || !info->text_strings[info->text_num - 1]) return 83; /*alloc fail*/ + + return 0; +} + +unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str) { + return lodepng_add_text_sized(info, key, str, lodepng_strlen(str)); +} + +void lodepng_clear_text(LodePNGInfo* info) { + LodePNGText_cleanup(info); +} + +/******************************************************************************/ + +static void LodePNGIText_init(LodePNGInfo* info) { + info->itext_num = 0; + info->itext_keys = NULL; + info->itext_langtags = NULL; + info->itext_transkeys = NULL; + info->itext_strings = NULL; +} + +static void LodePNGIText_cleanup(LodePNGInfo* info) { + size_t i; + for(i = 0; i != info->itext_num; ++i) { + string_cleanup(&info->itext_keys[i]); + string_cleanup(&info->itext_langtags[i]); + string_cleanup(&info->itext_transkeys[i]); + string_cleanup(&info->itext_strings[i]); + } + lodepng_free(info->itext_keys); + lodepng_free(info->itext_langtags); + lodepng_free(info->itext_transkeys); + lodepng_free(info->itext_strings); +} + +static unsigned LodePNGIText_copy(LodePNGInfo* dest, const LodePNGInfo* source) { + size_t i = 0; + dest->itext_keys = NULL; + dest->itext_langtags = NULL; + dest->itext_transkeys = NULL; + dest->itext_strings = NULL; + dest->itext_num = 0; + for(i = 0; i != source->itext_num; ++i) { + CERROR_TRY_RETURN(lodepng_add_itext(dest, source->itext_keys[i], source->itext_langtags[i], + source->itext_transkeys[i], source->itext_strings[i])); + } + return 0; +} + +void lodepng_clear_itext(LodePNGInfo* info) { + LodePNGIText_cleanup(info); +} + +static unsigned lodepng_add_itext_sized(LodePNGInfo* info, const char* key, const char* langtag, + const char* transkey, const char* str, size_t size) { + char** new_keys = (char**)(lodepng_realloc(info->itext_keys, sizeof(char*) * (info->itext_num + 1))); + char** new_langtags = (char**)(lodepng_realloc(info->itext_langtags, sizeof(char*) * (info->itext_num + 1))); + char** new_transkeys = (char**)(lodepng_realloc(info->itext_transkeys, sizeof(char*) * (info->itext_num + 1))); + char** new_strings = (char**)(lodepng_realloc(info->itext_strings, sizeof(char*) * (info->itext_num + 1))); + + if(new_keys) info->itext_keys = new_keys; + if(new_langtags) info->itext_langtags = new_langtags; + if(new_transkeys) info->itext_transkeys = new_transkeys; + if(new_strings) info->itext_strings = new_strings; + + if(!new_keys || !new_langtags || !new_transkeys || !new_strings) return 83; /*alloc fail*/ + + ++info->itext_num; + + info->itext_keys[info->itext_num - 1] = alloc_string(key); + info->itext_langtags[info->itext_num - 1] = alloc_string(langtag); + info->itext_transkeys[info->itext_num - 1] = alloc_string(transkey); + info->itext_strings[info->itext_num - 1] = alloc_string_sized(str, size); + + return 0; +} + +unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag, + const char* transkey, const char* str) { + return lodepng_add_itext_sized(info, key, langtag, transkey, str, lodepng_strlen(str)); +} + +/* same as set but does not delete */ +static unsigned lodepng_assign_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size) { + if(profile_size == 0) return 100; /*invalid ICC profile size*/ + + info->iccp_name = alloc_string(name); + info->iccp_profile = (unsigned char*)lodepng_malloc(profile_size); + + if(!info->iccp_name || !info->iccp_profile) return 83; /*alloc fail*/ + + lodepng_memcpy(info->iccp_profile, profile, profile_size); + info->iccp_profile_size = profile_size; + + return 0; /*ok*/ +} + +unsigned lodepng_set_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size) { + if(info->iccp_name) lodepng_clear_icc(info); + info->iccp_defined = 1; + + return lodepng_assign_icc(info, name, profile, profile_size); +} + +void lodepng_clear_icc(LodePNGInfo* info) { + string_cleanup(&info->iccp_name); + lodepng_free(info->iccp_profile); + info->iccp_profile = NULL; + info->iccp_profile_size = 0; + info->iccp_defined = 0; +} +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +void lodepng_info_init(LodePNGInfo* info) { + lodepng_color_mode_init(&info->color); + info->interlace_method = 0; + info->compression_method = 0; + info->filter_method = 0; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + info->background_defined = 0; + info->background_r = info->background_g = info->background_b = 0; + + LodePNGText_init(info); + LodePNGIText_init(info); + + info->time_defined = 0; + info->phys_defined = 0; + + info->gama_defined = 0; + info->chrm_defined = 0; + info->srgb_defined = 0; + info->iccp_defined = 0; + info->iccp_name = NULL; + info->iccp_profile = NULL; + + LodePNGUnknownChunks_init(info); +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +} + +void lodepng_info_cleanup(LodePNGInfo* info) { + lodepng_color_mode_cleanup(&info->color); +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + LodePNGText_cleanup(info); + LodePNGIText_cleanup(info); + + lodepng_clear_icc(info); + + LodePNGUnknownChunks_cleanup(info); +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +} + +unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source) { + lodepng_info_cleanup(dest); + lodepng_memcpy(dest, source, sizeof(LodePNGInfo)); + lodepng_color_mode_init(&dest->color); + CERROR_TRY_RETURN(lodepng_color_mode_copy(&dest->color, &source->color)); + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + CERROR_TRY_RETURN(LodePNGText_copy(dest, source)); + CERROR_TRY_RETURN(LodePNGIText_copy(dest, source)); + if(source->iccp_defined) { + CERROR_TRY_RETURN(lodepng_assign_icc(dest, source->iccp_name, source->iccp_profile, source->iccp_profile_size)); + } + + LodePNGUnknownChunks_init(dest); + CERROR_TRY_RETURN(LodePNGUnknownChunks_copy(dest, source)); +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + return 0; +} + +/* ////////////////////////////////////////////////////////////////////////// */ + +/*index: bitgroup index, bits: bitgroup size(1, 2 or 4), in: bitgroup value, out: octet array to add bits to*/ +static void addColorBits(unsigned char* out, size_t index, unsigned bits, unsigned in) { + unsigned m = bits == 1 ? 7 : bits == 2 ? 3 : 1; /*8 / bits - 1*/ + /*p = the partial index in the byte, e.g. with 4 palettebits it is 0 for first half or 1 for second half*/ + unsigned p = index & m; + in &= (1u << bits) - 1u; /*filter out any other bits of the input value*/ + in = in << (bits * (m - p)); + if(p == 0) out[index * bits / 8u] = in; + else out[index * bits / 8u] |= in; +} + +typedef struct ColorTree ColorTree; + +/* +One node of a color tree +This is the data structure used to count the number of unique colors and to get a palette +index for a color. It's like an octree, but because the alpha channel is used too, each +node has 16 instead of 8 children. +*/ +struct ColorTree { + ColorTree* children[16]; /*up to 16 pointers to ColorTree of next level*/ + int index; /*the payload. Only has a meaningful value if this is in the last level*/ +}; + +static void color_tree_init(ColorTree* tree) { + lodepng_memset(tree->children, 0, 16 * sizeof(*tree->children)); + tree->index = -1; +} + +static void color_tree_cleanup(ColorTree* tree) { + int i; + for(i = 0; i != 16; ++i) { + if(tree->children[i]) { + color_tree_cleanup(tree->children[i]); + lodepng_free(tree->children[i]); + } + } +} + +/*returns -1 if color not present, its index otherwise*/ +static int color_tree_get(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) { + int bit = 0; + for(bit = 0; bit < 8; ++bit) { + int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1); + if(!tree->children[i]) return -1; + else tree = tree->children[i]; + } + return tree ? tree->index : -1; +} + +#ifdef LODEPNG_COMPILE_ENCODER +static int color_tree_has(ColorTree* tree, unsigned char r, unsigned char g, unsigned char b, unsigned char a) { + return color_tree_get(tree, r, g, b, a) >= 0; +} +#endif /*LODEPNG_COMPILE_ENCODER*/ + +/*color is not allowed to already exist. +Index should be >= 0 (it's signed to be compatible with using -1 for "doesn't exist") +Returns error code, or 0 if ok*/ +static unsigned color_tree_add(ColorTree* tree, + unsigned char r, unsigned char g, unsigned char b, unsigned char a, unsigned index) { + int bit; + for(bit = 0; bit < 8; ++bit) { + int i = 8 * ((r >> bit) & 1) + 4 * ((g >> bit) & 1) + 2 * ((b >> bit) & 1) + 1 * ((a >> bit) & 1); + if(!tree->children[i]) { + tree->children[i] = (ColorTree*)lodepng_malloc(sizeof(ColorTree)); + if(!tree->children[i]) return 83; /*alloc fail*/ + color_tree_init(tree->children[i]); + } + tree = tree->children[i]; + } + tree->index = (int)index; + return 0; +} + +/*put a pixel, given its RGBA color, into image of any color type*/ +static unsigned rgba8ToPixel(unsigned char* out, size_t i, + const LodePNGColorMode* mode, ColorTree* tree /*for palette*/, + unsigned char r, unsigned char g, unsigned char b, unsigned char a) { + if(mode->colortype == LCT_GREY) { + unsigned char gray = r; /*((unsigned short)r + g + b) / 3u;*/ + if(mode->bitdepth == 8) out[i] = gray; + else if(mode->bitdepth == 16) out[i * 2 + 0] = out[i * 2 + 1] = gray; + else { + /*take the most significant bits of gray*/ + gray = ((unsigned)gray >> (8u - mode->bitdepth)) & ((1u << mode->bitdepth) - 1u); + addColorBits(out, i, mode->bitdepth, gray); + } + } else if(mode->colortype == LCT_RGB) { + if(mode->bitdepth == 8) { + out[i * 3 + 0] = r; + out[i * 3 + 1] = g; + out[i * 3 + 2] = b; + } else { + out[i * 6 + 0] = out[i * 6 + 1] = r; + out[i * 6 + 2] = out[i * 6 + 3] = g; + out[i * 6 + 4] = out[i * 6 + 5] = b; + } + } else if(mode->colortype == LCT_PALETTE) { + int index = color_tree_get(tree, r, g, b, a); + if(index < 0) return 82; /*color not in palette*/ + if(mode->bitdepth == 8) out[i] = index; + else addColorBits(out, i, mode->bitdepth, (unsigned)index); + } else if(mode->colortype == LCT_GREY_ALPHA) { + unsigned char gray = r; /*((unsigned short)r + g + b) / 3u;*/ + if(mode->bitdepth == 8) { + out[i * 2 + 0] = gray; + out[i * 2 + 1] = a; + } else if(mode->bitdepth == 16) { + out[i * 4 + 0] = out[i * 4 + 1] = gray; + out[i * 4 + 2] = out[i * 4 + 3] = a; + } + } else if(mode->colortype == LCT_RGBA) { + if(mode->bitdepth == 8) { + out[i * 4 + 0] = r; + out[i * 4 + 1] = g; + out[i * 4 + 2] = b; + out[i * 4 + 3] = a; + } else { + out[i * 8 + 0] = out[i * 8 + 1] = r; + out[i * 8 + 2] = out[i * 8 + 3] = g; + out[i * 8 + 4] = out[i * 8 + 5] = b; + out[i * 8 + 6] = out[i * 8 + 7] = a; + } + } + + return 0; /*no error*/ +} + +/*put a pixel, given its RGBA16 color, into image of any color 16-bitdepth type*/ +static void rgba16ToPixel(unsigned char* out, size_t i, + const LodePNGColorMode* mode, + unsigned short r, unsigned short g, unsigned short b, unsigned short a) { + if(mode->colortype == LCT_GREY) { + unsigned short gray = r; /*((unsigned)r + g + b) / 3u;*/ + out[i * 2 + 0] = (gray >> 8) & 255; + out[i * 2 + 1] = gray & 255; + } else if(mode->colortype == LCT_RGB) { + out[i * 6 + 0] = (r >> 8) & 255; + out[i * 6 + 1] = r & 255; + out[i * 6 + 2] = (g >> 8) & 255; + out[i * 6 + 3] = g & 255; + out[i * 6 + 4] = (b >> 8) & 255; + out[i * 6 + 5] = b & 255; + } else if(mode->colortype == LCT_GREY_ALPHA) { + unsigned short gray = r; /*((unsigned)r + g + b) / 3u;*/ + out[i * 4 + 0] = (gray >> 8) & 255; + out[i * 4 + 1] = gray & 255; + out[i * 4 + 2] = (a >> 8) & 255; + out[i * 4 + 3] = a & 255; + } else if(mode->colortype == LCT_RGBA) { + out[i * 8 + 0] = (r >> 8) & 255; + out[i * 8 + 1] = r & 255; + out[i * 8 + 2] = (g >> 8) & 255; + out[i * 8 + 3] = g & 255; + out[i * 8 + 4] = (b >> 8) & 255; + out[i * 8 + 5] = b & 255; + out[i * 8 + 6] = (a >> 8) & 255; + out[i * 8 + 7] = a & 255; + } +} + +/*Get RGBA8 color of pixel with index i (y * width + x) from the raw image with given color type.*/ +static void getPixelColorRGBA8(unsigned char* r, unsigned char* g, + unsigned char* b, unsigned char* a, + const unsigned char* in, size_t i, + const LodePNGColorMode* mode) { + if(mode->colortype == LCT_GREY) { + if(mode->bitdepth == 8) { + *r = *g = *b = in[i]; + if(mode->key_defined && *r == mode->key_r) *a = 0; + else *a = 255; + } else if(mode->bitdepth == 16) { + *r = *g = *b = in[i * 2 + 0]; + if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0; + else *a = 255; + } else { + unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/ + size_t j = i * mode->bitdepth; + unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth); + *r = *g = *b = (value * 255) / highest; + if(mode->key_defined && value == mode->key_r) *a = 0; + else *a = 255; + } + } else if(mode->colortype == LCT_RGB) { + if(mode->bitdepth == 8) { + *r = in[i * 3 + 0]; *g = in[i * 3 + 1]; *b = in[i * 3 + 2]; + if(mode->key_defined && *r == mode->key_r && *g == mode->key_g && *b == mode->key_b) *a = 0; + else *a = 255; + } else { + *r = in[i * 6 + 0]; + *g = in[i * 6 + 2]; + *b = in[i * 6 + 4]; + if(mode->key_defined && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r + && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g + && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0; + else *a = 255; + } + } else if(mode->colortype == LCT_PALETTE) { + unsigned index; + if(mode->bitdepth == 8) index = in[i]; + else { + size_t j = i * mode->bitdepth; + index = readBitsFromReversedStream(&j, in, mode->bitdepth); + } + /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/ + *r = mode->palette[index * 4 + 0]; + *g = mode->palette[index * 4 + 1]; + *b = mode->palette[index * 4 + 2]; + *a = mode->palette[index * 4 + 3]; + } else if(mode->colortype == LCT_GREY_ALPHA) { + if(mode->bitdepth == 8) { + *r = *g = *b = in[i * 2 + 0]; + *a = in[i * 2 + 1]; + } else { + *r = *g = *b = in[i * 4 + 0]; + *a = in[i * 4 + 2]; + } + } else if(mode->colortype == LCT_RGBA) { + if(mode->bitdepth == 8) { + *r = in[i * 4 + 0]; + *g = in[i * 4 + 1]; + *b = in[i * 4 + 2]; + *a = in[i * 4 + 3]; + } else { + *r = in[i * 8 + 0]; + *g = in[i * 8 + 2]; + *b = in[i * 8 + 4]; + *a = in[i * 8 + 6]; + } + } +} + +/*Similar to getPixelColorRGBA8, but with all the for loops inside of the color +mode test cases, optimized to convert the colors much faster, when converting +to the common case of RGBA with 8 bit per channel. buffer must be RGBA with +enough memory.*/ +static void getPixelColorsRGBA8(unsigned char* LODEPNG_RESTRICT buffer, size_t numpixels, + const unsigned char* LODEPNG_RESTRICT in, + const LodePNGColorMode* mode) { + unsigned num_channels = 4; + size_t i; + if(mode->colortype == LCT_GREY) { + if(mode->bitdepth == 8) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = buffer[1] = buffer[2] = in[i]; + buffer[3] = 255; + } + if(mode->key_defined) { + buffer -= numpixels * num_channels; + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + if(buffer[0] == mode->key_r) buffer[3] = 0; + } + } + } else if(mode->bitdepth == 16) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = buffer[1] = buffer[2] = in[i * 2]; + buffer[3] = mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r ? 0 : 255; + } + } else { + unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/ + size_t j = 0; + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth); + buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest; + buffer[3] = mode->key_defined && value == mode->key_r ? 0 : 255; + } + } + } else if(mode->colortype == LCT_RGB) { + if(mode->bitdepth == 8) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + lodepng_memcpy(buffer, &in[i * 3], 3); + buffer[3] = 255; + } + if(mode->key_defined) { + buffer -= numpixels * num_channels; + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + if(buffer[0] == mode->key_r && buffer[1]== mode->key_g && buffer[2] == mode->key_b) buffer[3] = 0; + } + } + } else { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = in[i * 6 + 0]; + buffer[1] = in[i * 6 + 2]; + buffer[2] = in[i * 6 + 4]; + buffer[3] = mode->key_defined + && 256U * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r + && 256U * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g + && 256U * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b ? 0 : 255; + } + } + } else if(mode->colortype == LCT_PALETTE) { + if(mode->bitdepth == 8) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + unsigned index = in[i]; + /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/ + lodepng_memcpy(buffer, &mode->palette[index * 4], 4); + } + } else { + size_t j = 0; + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + unsigned index = readBitsFromReversedStream(&j, in, mode->bitdepth); + /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/ + lodepng_memcpy(buffer, &mode->palette[index * 4], 4); + } + } + } else if(mode->colortype == LCT_GREY_ALPHA) { + if(mode->bitdepth == 8) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0]; + buffer[3] = in[i * 2 + 1]; + } + } else { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0]; + buffer[3] = in[i * 4 + 2]; + } + } + } else if(mode->colortype == LCT_RGBA) { + if(mode->bitdepth == 8) { + lodepng_memcpy(buffer, in, numpixels * 4); + } else { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = in[i * 8 + 0]; + buffer[1] = in[i * 8 + 2]; + buffer[2] = in[i * 8 + 4]; + buffer[3] = in[i * 8 + 6]; + } + } + } +} + +/*Similar to getPixelColorsRGBA8, but with 3-channel RGB output.*/ +static void getPixelColorsRGB8(unsigned char* LODEPNG_RESTRICT buffer, size_t numpixels, + const unsigned char* LODEPNG_RESTRICT in, + const LodePNGColorMode* mode) { + const unsigned num_channels = 3; + size_t i; + if(mode->colortype == LCT_GREY) { + if(mode->bitdepth == 8) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = buffer[1] = buffer[2] = in[i]; + } + } else if(mode->bitdepth == 16) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = buffer[1] = buffer[2] = in[i * 2]; + } + } else { + unsigned highest = ((1U << mode->bitdepth) - 1U); /*highest possible value for this bit depth*/ + size_t j = 0; + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + unsigned value = readBitsFromReversedStream(&j, in, mode->bitdepth); + buffer[0] = buffer[1] = buffer[2] = (value * 255) / highest; + } + } + } else if(mode->colortype == LCT_RGB) { + if(mode->bitdepth == 8) { + lodepng_memcpy(buffer, in, numpixels * 3); + } else { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = in[i * 6 + 0]; + buffer[1] = in[i * 6 + 2]; + buffer[2] = in[i * 6 + 4]; + } + } + } else if(mode->colortype == LCT_PALETTE) { + if(mode->bitdepth == 8) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + unsigned index = in[i]; + /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/ + lodepng_memcpy(buffer, &mode->palette[index * 4], 3); + } + } else { + size_t j = 0; + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + unsigned index = readBitsFromReversedStream(&j, in, mode->bitdepth); + /*out of bounds of palette not checked: see lodepng_color_mode_alloc_palette.*/ + lodepng_memcpy(buffer, &mode->palette[index * 4], 3); + } + } + } else if(mode->colortype == LCT_GREY_ALPHA) { + if(mode->bitdepth == 8) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = buffer[1] = buffer[2] = in[i * 2 + 0]; + } + } else { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = buffer[1] = buffer[2] = in[i * 4 + 0]; + } + } + } else if(mode->colortype == LCT_RGBA) { + if(mode->bitdepth == 8) { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + //lodepng_memcpy(buffer, &in[i * 4], 3); // rgb -> rgba, don't call func, this is hot spot + buffer[0] = in[i * 4 + 0]; + buffer[1] = in[i * 4 + 1]; + buffer[2] = in[i * 4 + 2]; + } + } else { + for(i = 0; i != numpixels; ++i, buffer += num_channels) { + buffer[0] = in[i * 8 + 0]; + buffer[1] = in[i * 8 + 2]; + buffer[2] = in[i * 8 + 4]; + } + } + } +} + +/*Get RGBA16 color of pixel with index i (y * width + x) from the raw image with +given color type, but the given color type must be 16-bit itself.*/ +static void getPixelColorRGBA16(unsigned short* r, unsigned short* g, unsigned short* b, unsigned short* a, + const unsigned char* in, size_t i, const LodePNGColorMode* mode) { + if(mode->colortype == LCT_GREY) { + *r = *g = *b = 256 * in[i * 2 + 0] + in[i * 2 + 1]; + if(mode->key_defined && 256U * in[i * 2 + 0] + in[i * 2 + 1] == mode->key_r) *a = 0; + else *a = 65535; + } else if(mode->colortype == LCT_RGB) { + *r = 256u * in[i * 6 + 0] + in[i * 6 + 1]; + *g = 256u * in[i * 6 + 2] + in[i * 6 + 3]; + *b = 256u * in[i * 6 + 4] + in[i * 6 + 5]; + if(mode->key_defined + && 256u * in[i * 6 + 0] + in[i * 6 + 1] == mode->key_r + && 256u * in[i * 6 + 2] + in[i * 6 + 3] == mode->key_g + && 256u * in[i * 6 + 4] + in[i * 6 + 5] == mode->key_b) *a = 0; + else *a = 65535; + } else if(mode->colortype == LCT_GREY_ALPHA) { + *r = *g = *b = 256u * in[i * 4 + 0] + in[i * 4 + 1]; + *a = 256u * in[i * 4 + 2] + in[i * 4 + 3]; + } else if(mode->colortype == LCT_RGBA) { + *r = 256u * in[i * 8 + 0] + in[i * 8 + 1]; + *g = 256u * in[i * 8 + 2] + in[i * 8 + 3]; + *b = 256u * in[i * 8 + 4] + in[i * 8 + 5]; + *a = 256u * in[i * 8 + 6] + in[i * 8 + 7]; + } +} + +unsigned lodepng_convert(unsigned char* out, const unsigned char* in, + const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in, + unsigned w, unsigned h) { + size_t i; + ColorTree tree; + size_t numpixels = (size_t)w * (size_t)h; + unsigned error = 0; + + if(mode_in->colortype == LCT_PALETTE && !mode_in->palette) { + return 107; /* error: must provide palette if input mode is palette */ + } + + if(lodepng_color_mode_equal(mode_out, mode_in)) { + size_t numbytes = lodepng_get_raw_size(w, h, mode_in); + lodepng_memcpy(out, in, numbytes); + return 0; + } + + if(mode_out->colortype == LCT_PALETTE) { + size_t palettesize = mode_out->palettesize; + const unsigned char* palette = mode_out->palette; + size_t palsize = (size_t)1u << mode_out->bitdepth; + /*if the user specified output palette but did not give the values, assume + they want the values of the input color type (assuming that one is palette). + Note that we never create a new palette ourselves.*/ + if(palettesize == 0) { + palettesize = mode_in->palettesize; + palette = mode_in->palette; + /*if the input was also palette with same bitdepth, then the color types are also + equal, so copy literally. This to preserve the exact indices that were in the PNG + even in case there are duplicate colors in the palette.*/ + if(mode_in->colortype == LCT_PALETTE && mode_in->bitdepth == mode_out->bitdepth) { + size_t numbytes = lodepng_get_raw_size(w, h, mode_in); + lodepng_memcpy(out, in, numbytes); + return 0; + } + } + if(palettesize < palsize) palsize = palettesize; + color_tree_init(&tree); + for(i = 0; i != palsize; ++i) { + const unsigned char* p = &palette[i * 4]; + error = color_tree_add(&tree, p[0], p[1], p[2], p[3], (unsigned)i); + if(error) break; + } + } + + if(!error) { + if(mode_in->bitdepth == 16 && mode_out->bitdepth == 16) { + for(i = 0; i != numpixels; ++i) { + unsigned short r = 0, g = 0, b = 0, a = 0; + getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in); + rgba16ToPixel(out, i, mode_out, r, g, b, a); + } + } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGBA) { + getPixelColorsRGBA8(out, numpixels, in, mode_in); + } else if(mode_out->bitdepth == 8 && mode_out->colortype == LCT_RGB) { + getPixelColorsRGB8(out, numpixels, in, mode_in); + } else { + unsigned char r = 0, g = 0, b = 0, a = 0; + for(i = 0; i != numpixels; ++i) { + getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in); + error = rgba8ToPixel(out, i, mode_out, &tree, r, g, b, a); + if(error) break; + } + } + } + + if(mode_out->colortype == LCT_PALETTE) { + color_tree_cleanup(&tree); + } + + return error; +} + + +/* Converts a single rgb color without alpha from one type to another, color bits truncated to +their bitdepth. In case of single channel (gray or palette), only the r channel is used. Slow +function, do not use to process all pixels of an image. Alpha channel not supported on purpose: +this is for bKGD, supporting alpha may prevent it from finding a color in the palette, from the +specification it looks like bKGD should ignore the alpha values of the palette since it can use +any palette index but doesn't have an alpha channel. Idem with ignoring color key. */ +unsigned lodepng_convert_rgb( + unsigned* r_out, unsigned* g_out, unsigned* b_out, + unsigned r_in, unsigned g_in, unsigned b_in, + const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in) { + unsigned r = 0, g = 0, b = 0; + unsigned mul = 65535 / ((1u << mode_in->bitdepth) - 1u); /*65535, 21845, 4369, 257, 1*/ + unsigned shift = 16 - mode_out->bitdepth; + + if(mode_in->colortype == LCT_GREY || mode_in->colortype == LCT_GREY_ALPHA) { + r = g = b = r_in * mul; + } else if(mode_in->colortype == LCT_RGB || mode_in->colortype == LCT_RGBA) { + r = r_in * mul; + g = g_in * mul; + b = b_in * mul; + } else if(mode_in->colortype == LCT_PALETTE) { + if(r_in >= mode_in->palettesize) return 82; + r = mode_in->palette[r_in * 4 + 0] * 257u; + g = mode_in->palette[r_in * 4 + 1] * 257u; + b = mode_in->palette[r_in * 4 + 2] * 257u; + } else { + return 31; + } + + /* now convert to output format */ + if(mode_out->colortype == LCT_GREY || mode_out->colortype == LCT_GREY_ALPHA) { + *r_out = r >> shift ; + } else if(mode_out->colortype == LCT_RGB || mode_out->colortype == LCT_RGBA) { + *r_out = r >> shift ; + *g_out = g >> shift ; + *b_out = b >> shift ; + } else if(mode_out->colortype == LCT_PALETTE) { + unsigned i; + /* a 16-bit color cannot be in the palette */ + if((r >> 8) != (r & 255) || (g >> 8) != (g & 255) || (b >> 8) != (b & 255)) return 82; + for(i = 0; i < mode_out->palettesize; i++) { + unsigned j = i * 4; + if((r >> 8) == mode_out->palette[j + 0] && (g >> 8) == mode_out->palette[j + 1] && + (b >> 8) == mode_out->palette[j + 2]) { + *r_out = i; + return 0; + } + } + return 82; + } else { + return 31; + } + + return 0; +} + +#ifdef LODEPNG_COMPILE_ENCODER + +void lodepng_color_stats_init(LodePNGColorStats* stats) { + /*stats*/ + stats->colored = 0; + stats->key = 0; + stats->key_r = stats->key_g = stats->key_b = 0; + stats->alpha = 0; + stats->numcolors = 0; + stats->bits = 1; + stats->numpixels = 0; + /*settings*/ + stats->allow_palette = 1; + stats->allow_greyscale = 1; +} + +/*function used for debug purposes with C++*/ +/*void printColorStats(LodePNGColorStats* p) { + std::cout << "colored: " << (int)p->colored << ", "; + std::cout << "key: " << (int)p->key << ", "; + std::cout << "key_r: " << (int)p->key_r << ", "; + std::cout << "key_g: " << (int)p->key_g << ", "; + std::cout << "key_b: " << (int)p->key_b << ", "; + std::cout << "alpha: " << (int)p->alpha << ", "; + std::cout << "numcolors: " << (int)p->numcolors << ", "; + std::cout << "bits: " << (int)p->bits << std::endl; +}*/ + +/*Returns how many bits needed to represent given value (max 8 bit)*/ +static unsigned getValueRequiredBits(unsigned char value) { + if(value == 0 || value == 255) return 1; + /*The scaling of 2-bit and 4-bit values uses multiples of 85 and 17*/ + if(value % 17 == 0) return value % 85 == 0 ? 2 : 4; + return 8; +} + +/*stats must already have been inited. */ +unsigned lodepng_compute_color_stats(LodePNGColorStats* stats, + const unsigned char* in, unsigned w, unsigned h, + const LodePNGColorMode* mode_in) { + size_t i; + ColorTree tree; + size_t numpixels = (size_t)w * (size_t)h; + unsigned error = 0; + + /* mark things as done already if it would be impossible to have a more expensive case */ + unsigned colored_done = lodepng_is_greyscale_type(mode_in) ? 1 : 0; + unsigned alpha_done = lodepng_can_have_alpha(mode_in) ? 0 : 1; + unsigned numcolors_done = 0; + unsigned bpp = lodepng_get_bpp(mode_in); + unsigned bits_done = (stats->bits == 1 && bpp == 1) ? 1 : 0; + unsigned sixteen = 0; /* whether the input image is 16 bit */ + unsigned maxnumcolors = 257; + if(bpp <= 8) maxnumcolors = LODEPNG_MIN(257, stats->numcolors + (1u << bpp)); + + stats->numpixels += numpixels; + + /*if palette not allowed, no need to compute numcolors*/ + if(!stats->allow_palette) numcolors_done = 1; + + color_tree_init(&tree); + + /*If the stats was already filled in from previous data, fill its palette in tree + and mark things as done already if we know they are the most expensive case already*/ + if(stats->alpha) alpha_done = 1; + if(stats->colored) colored_done = 1; + if(stats->bits == 16) numcolors_done = 1; + if(stats->bits >= bpp) bits_done = 1; + if(stats->numcolors >= maxnumcolors) numcolors_done = 1; + + if(!numcolors_done) { + for(i = 0; i < stats->numcolors; i++) { + const unsigned char* color = &stats->palette[i * 4]; + error = color_tree_add(&tree, color[0], color[1], color[2], color[3], i); + if(error) goto cleanup; + } + } + + /*Check if the 16-bit input is truly 16-bit*/ + if(mode_in->bitdepth == 16 && !sixteen) { + unsigned short r = 0, g = 0, b = 0, a = 0; + for(i = 0; i != numpixels; ++i) { + getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in); + if((r & 255) != ((r >> 8) & 255) || (g & 255) != ((g >> 8) & 255) || + (b & 255) != ((b >> 8) & 255) || (a & 255) != ((a >> 8) & 255)) /*first and second byte differ*/ { + stats->bits = 16; + sixteen = 1; + bits_done = 1; + numcolors_done = 1; /*counting colors no longer useful, palette doesn't support 16-bit*/ + break; + } + } + } + + if(sixteen) { + unsigned short r = 0, g = 0, b = 0, a = 0; + + for(i = 0; i != numpixels; ++i) { + getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in); + + if(!colored_done && (r != g || r != b)) { + stats->colored = 1; + colored_done = 1; + } + + if(!alpha_done) { + unsigned matchkey = (r == stats->key_r && g == stats->key_g && b == stats->key_b); + if(a != 65535 && (a != 0 || (stats->key && !matchkey))) { + stats->alpha = 1; + stats->key = 0; + alpha_done = 1; + } else if(a == 0 && !stats->alpha && !stats->key) { + stats->key = 1; + stats->key_r = r; + stats->key_g = g; + stats->key_b = b; + } else if(a == 65535 && stats->key && matchkey) { + /* Color key cannot be used if an opaque pixel also has that RGB color. */ + stats->alpha = 1; + stats->key = 0; + alpha_done = 1; + } + } + if(alpha_done && numcolors_done && colored_done && bits_done) break; + } + + if(stats->key && !stats->alpha) { + for(i = 0; i != numpixels; ++i) { + getPixelColorRGBA16(&r, &g, &b, &a, in, i, mode_in); + if(a != 0 && r == stats->key_r && g == stats->key_g && b == stats->key_b) { + /* Color key cannot be used if an opaque pixel also has that RGB color. */ + stats->alpha = 1; + stats->key = 0; + alpha_done = 1; + } + } + } + } else /* < 16-bit */ { + unsigned char r = 0, g = 0, b = 0, a = 0; + for(i = 0; i != numpixels; ++i) { + getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in); + + if(!bits_done && stats->bits < 8) { + /*only r is checked, < 8 bits is only relevant for grayscale*/ + unsigned bits = getValueRequiredBits(r); + if(bits > stats->bits) stats->bits = bits; + } + bits_done = (stats->bits >= bpp); + + if(!colored_done && (r != g || r != b)) { + stats->colored = 1; + colored_done = 1; + if(stats->bits < 8) stats->bits = 8; /*PNG has no colored modes with less than 8-bit per channel*/ + } + + if(!alpha_done) { + unsigned matchkey = (r == stats->key_r && g == stats->key_g && b == stats->key_b); + if(a != 255 && (a != 0 || (stats->key && !matchkey))) { + stats->alpha = 1; + stats->key = 0; + alpha_done = 1; + if(stats->bits < 8) stats->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ + } else if(a == 0 && !stats->alpha && !stats->key) { + stats->key = 1; + stats->key_r = r; + stats->key_g = g; + stats->key_b = b; + } else if(a == 255 && stats->key && matchkey) { + /* Color key cannot be used if an opaque pixel also has that RGB color. */ + stats->alpha = 1; + stats->key = 0; + alpha_done = 1; + if(stats->bits < 8) stats->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ + } + } + + if(!numcolors_done) { + if(!color_tree_has(&tree, r, g, b, a)) { + error = color_tree_add(&tree, r, g, b, a, stats->numcolors); + if(error) goto cleanup; + if(stats->numcolors < 256) { + unsigned char* p = stats->palette; + unsigned n = stats->numcolors; + p[n * 4 + 0] = r; + p[n * 4 + 1] = g; + p[n * 4 + 2] = b; + p[n * 4 + 3] = a; + } + ++stats->numcolors; + numcolors_done = stats->numcolors >= maxnumcolors; + } + } + + if(alpha_done && numcolors_done && colored_done && bits_done) break; + } + + if(stats->key && !stats->alpha) { + for(i = 0; i != numpixels; ++i) { + getPixelColorRGBA8(&r, &g, &b, &a, in, i, mode_in); + if(a != 0 && r == stats->key_r && g == stats->key_g && b == stats->key_b) { + /* Color key cannot be used if an opaque pixel also has that RGB color. */ + stats->alpha = 1; + stats->key = 0; + alpha_done = 1; + if(stats->bits < 8) stats->bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ + } + } + } + + /*make the stats's key always 16-bit for consistency - repeat each byte twice*/ + stats->key_r += (stats->key_r << 8); + stats->key_g += (stats->key_g << 8); + stats->key_b += (stats->key_b << 8); + } + +cleanup: + color_tree_cleanup(&tree); + return error; +} + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS +/*Adds a single color to the color stats. The stats must already have been inited. The color must be given as 16-bit +(with 2 bytes repeating for 8-bit and 65535 for opaque alpha channel). This function is expensive, do not call it for +all pixels of an image but only for a few additional values. */ +static unsigned lodepng_color_stats_add(LodePNGColorStats* stats, + unsigned r, unsigned g, unsigned b, unsigned a) { + unsigned error = 0; + unsigned char image[8]; + LodePNGColorMode mode; + lodepng_color_mode_init(&mode); + image[0] = r >> 8; image[1] = r; image[2] = g >> 8; image[3] = g; + image[4] = b >> 8; image[5] = b; image[6] = a >> 8; image[7] = a; + mode.bitdepth = 16; + mode.colortype = LCT_RGBA; + error = lodepng_compute_color_stats(stats, image, 1, 1, &mode); + lodepng_color_mode_cleanup(&mode); + return error; +} +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +/*Computes a minimal PNG color model that can contain all colors as indicated by the stats. +The stats should be computed with lodepng_compute_color_stats. +mode_in is raw color profile of the image the stats were computed on, to copy palette order from when relevant. +Minimal PNG color model means the color type and bit depth that gives smallest amount of bits in the output image, +e.g. gray if only grayscale pixels, palette if less than 256 colors, color key if only single transparent color, ... +This is used if auto_convert is enabled (it is by default). +*/ +static unsigned auto_choose_color(LodePNGColorMode* mode_out, + const LodePNGColorMode* mode_in, + const LodePNGColorStats* stats) { + unsigned error = 0; + unsigned palettebits; + size_t i, n; + size_t numpixels = stats->numpixels; + unsigned palette_ok, gray_ok; + + unsigned alpha = stats->alpha; + unsigned key = stats->key; + unsigned bits = stats->bits; + + mode_out->key_defined = 0; + + if(key && numpixels <= 16) { + alpha = 1; /*too few pixels to justify tRNS chunk overhead*/ + key = 0; + if(bits < 8) bits = 8; /*PNG has no alphachannel modes with less than 8-bit per channel*/ + } + + gray_ok = !stats->colored; + if(!stats->allow_greyscale) gray_ok = 0; + if(!gray_ok && bits < 8) bits = 8; + + n = stats->numcolors; + palettebits = n <= 2 ? 1 : (n <= 4 ? 2 : (n <= 16 ? 4 : 8)); + palette_ok = n <= 256 && bits <= 8 && n != 0; /*n==0 means likely numcolors wasn't computed*/ + if(numpixels < n * 2) palette_ok = 0; /*don't add palette overhead if image has only a few pixels*/ + if(gray_ok && !alpha && bits <= palettebits) palette_ok = 0; /*gray is less overhead*/ + if(!stats->allow_palette) palette_ok = 0; + + if(palette_ok) { + const unsigned char* p = stats->palette; + lodepng_palette_clear(mode_out); /*remove potential earlier palette*/ + for(i = 0; i != stats->numcolors; ++i) { + error = lodepng_palette_add(mode_out, p[i * 4 + 0], p[i * 4 + 1], p[i * 4 + 2], p[i * 4 + 3]); + if(error) break; + } + + mode_out->colortype = LCT_PALETTE; + mode_out->bitdepth = palettebits; + + if(mode_in->colortype == LCT_PALETTE && mode_in->palettesize >= mode_out->palettesize + && mode_in->bitdepth == mode_out->bitdepth) { + /*If input should have same palette colors, keep original to preserve its order and prevent conversion*/ + lodepng_color_mode_cleanup(mode_out); + lodepng_color_mode_copy(mode_out, mode_in); + } + } else /*8-bit or 16-bit per channel*/ { + mode_out->bitdepth = bits; + mode_out->colortype = alpha ? (gray_ok ? LCT_GREY_ALPHA : LCT_RGBA) + : (gray_ok ? LCT_GREY : LCT_RGB); + if(key) { + unsigned mask = (1u << mode_out->bitdepth) - 1u; /*stats always uses 16-bit, mask converts it*/ + mode_out->key_r = stats->key_r & mask; + mode_out->key_g = stats->key_g & mask; + mode_out->key_b = stats->key_b & mask; + mode_out->key_defined = 1; + } + } + + return error; +} + +#endif /* #ifdef LODEPNG_COMPILE_ENCODER */ + +/* +Paeth predictor, used by PNG filter type 4 +The parameters are of type short, but should come from unsigned chars, the shorts +are only needed to make the paeth calculation correct. +*/ +static unsigned char paethPredictor(short a, short b, short c) { + short pa = LODEPNG_ABS(b - c); + short pb = LODEPNG_ABS(a - c); + short pc = LODEPNG_ABS(a + b - c - c); + /* return input value associated with smallest of pa, pb, pc (with certain priority if equal) */ + if(pb < pa) { a = b; pa = pb; } + return (pc < pa) ? c : a; +} + +/*shared values used by multiple Adam7 related functions*/ + +static const unsigned ADAM7_IX[7] = { 0, 4, 0, 2, 0, 1, 0 }; /*x start values*/ +static const unsigned ADAM7_IY[7] = { 0, 0, 4, 0, 2, 0, 1 }; /*y start values*/ +static const unsigned ADAM7_DX[7] = { 8, 8, 4, 4, 2, 2, 1 }; /*x delta values*/ +static const unsigned ADAM7_DY[7] = { 8, 8, 8, 4, 4, 2, 2 }; /*y delta values*/ + +/* +Outputs various dimensions and positions in the image related to the Adam7 reduced images. +passw: output containing the width of the 7 passes +passh: output containing the height of the 7 passes +filter_passstart: output containing the index of the start and end of each + reduced image with filter bytes +padded_passstart output containing the index of the start and end of each + reduced image when without filter bytes but with padded scanlines +passstart: output containing the index of the start and end of each reduced + image without padding between scanlines, but still padding between the images +w, h: width and height of non-interlaced image +bpp: bits per pixel +"padded" is only relevant if bpp is less than 8 and a scanline or image does not + end at a full byte +*/ +static void Adam7_getpassvalues(unsigned passw[7], unsigned passh[7], size_t filter_passstart[8], + size_t padded_passstart[8], size_t passstart[8], unsigned w, unsigned h, unsigned bpp) { + /*the passstart values have 8 values: the 8th one indicates the byte after the end of the 7th (= last) pass*/ + unsigned i; + + /*calculate width and height in pixels of each pass*/ + for(i = 0; i != 7; ++i) { + passw[i] = (w + ADAM7_DX[i] - ADAM7_IX[i] - 1) / ADAM7_DX[i]; + passh[i] = (h + ADAM7_DY[i] - ADAM7_IY[i] - 1) / ADAM7_DY[i]; + if(passw[i] == 0) passh[i] = 0; + if(passh[i] == 0) passw[i] = 0; + } + + filter_passstart[0] = padded_passstart[0] = passstart[0] = 0; + for(i = 0; i != 7; ++i) { + /*if passw[i] is 0, it's 0 bytes, not 1 (no filtertype-byte)*/ + filter_passstart[i + 1] = filter_passstart[i] + + ((passw[i] && passh[i]) ? passh[i] * (1u + (passw[i] * bpp + 7u) / 8u) : 0); + /*bits padded if needed to fill full byte at end of each scanline*/ + padded_passstart[i + 1] = padded_passstart[i] + passh[i] * ((passw[i] * bpp + 7u) / 8u); + /*only padded at end of reduced image*/ + passstart[i + 1] = passstart[i] + (passh[i] * passw[i] * bpp + 7u) / 8u; + } +} + +#ifdef LODEPNG_COMPILE_DECODER + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / PNG Decoder / */ +/* ////////////////////////////////////////////////////////////////////////// */ + +/*read the information from the header and store it in the LodePNGInfo. return value is error*/ +unsigned lodepng_inspect(unsigned* w, unsigned* h, LodePNGState* state, + const unsigned char* in, size_t insize) { + unsigned width, height; + LodePNGInfo* info = &state->info_png; + if(insize == 0 || in == 0) { + CERROR_RETURN_ERROR(state->error, 48); /*error: the given data is empty*/ + } + if(insize < 33) { + CERROR_RETURN_ERROR(state->error, 27); /*error: the data length is smaller than the length of a PNG header*/ + } + + /*when decoding a new PNG image, make sure all parameters created after previous decoding are reset*/ + /* TODO: remove this. One should use a new LodePNGState for new sessions */ + lodepng_info_cleanup(info); + lodepng_info_init(info); + + if(in[0] != 137 || in[1] != 80 || in[2] != 78 || in[3] != 71 + || in[4] != 13 || in[5] != 10 || in[6] != 26 || in[7] != 10) { + CERROR_RETURN_ERROR(state->error, 28); /*error: the first 8 bytes are not the correct PNG signature*/ + } + if(lodepng_chunk_length(in + 8) != 13) { + CERROR_RETURN_ERROR(state->error, 94); /*error: header size must be 13 bytes*/ + } + if(!lodepng_chunk_type_equals(in + 8, "IHDR")) { + CERROR_RETURN_ERROR(state->error, 29); /*error: it doesn't start with a IHDR chunk!*/ + } + + /*read the values given in the header*/ + width = lodepng_read32bitInt(&in[16]); + height = lodepng_read32bitInt(&in[20]); + /*TODO: remove the undocumented feature that allows to give null pointers to width or height*/ + if(w) *w = width; + if(h) *h = height; + info->color.bitdepth = in[24]; + info->color.colortype = (LodePNGColorType)in[25]; + info->compression_method = in[26]; + info->filter_method = in[27]; + info->interlace_method = in[28]; + + /*errors returned only after the parsing so other values are still output*/ + + /*error: invalid image size*/ + if(width == 0 || height == 0) CERROR_RETURN_ERROR(state->error, 93); + /*error: invalid colortype or bitdepth combination*/ + state->error = checkColorValidity(info->color.colortype, info->color.bitdepth); + if(state->error) return state->error; + /*error: only compression method 0 is allowed in the specification*/ + if(info->compression_method != 0) CERROR_RETURN_ERROR(state->error, 32); + /*error: only filter method 0 is allowed in the specification*/ + if(info->filter_method != 0) CERROR_RETURN_ERROR(state->error, 33); + /*error: only interlace methods 0 and 1 exist in the specification*/ + if(info->interlace_method > 1) CERROR_RETURN_ERROR(state->error, 34); + + if(!state->decoder.ignore_crc) { + unsigned CRC = lodepng_read32bitInt(&in[29]); + unsigned checksum = lodepng_crc32(&in[12], 17); + if(CRC != checksum) { + CERROR_RETURN_ERROR(state->error, 57); /*invalid CRC*/ + } + } + + return state->error; +} + +static unsigned unfilterScanline(unsigned char* recon, const unsigned char* scanline, const unsigned char* precon, + size_t bytewidth, unsigned char filterType, size_t length) { + /* + For PNG filter method 0 + unfilter a PNG image scanline by scanline. when the pixels are smaller than 1 byte, + the filter works byte per byte (bytewidth = 1) + precon is the previous unfiltered scanline, recon the result, scanline the current one + the incoming scanlines do NOT include the filtertype byte, that one is given in the parameter filterType instead + recon and scanline MAY be the same memory address! precon must be disjoint. + */ + + size_t i; + switch(filterType) { + case 0: + for(i = 0; i != length; ++i) recon[i] = scanline[i]; + break; + case 1: + for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i]; + for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + recon[i - bytewidth]; + break; + case 2: + if(precon) { + for(i = 0; i != length; ++i) recon[i] = scanline[i] + precon[i]; + } else { + for(i = 0; i != length; ++i) recon[i] = scanline[i]; + } + break; + case 3: + if(precon) { + for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i] + (precon[i] >> 1u); + for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + ((recon[i - bytewidth] + precon[i]) >> 1u); + } else { + for(i = 0; i != bytewidth; ++i) recon[i] = scanline[i]; + for(i = bytewidth; i < length; ++i) recon[i] = scanline[i] + (recon[i - bytewidth] >> 1u); + } + break; + case 4: + if(precon) { + for(i = 0; i != bytewidth; ++i) { + recon[i] = (scanline[i] + precon[i]); /*paethPredictor(0, precon[i], 0) is always precon[i]*/ + } + + /* Unroll independent paths of the paeth predictor. A 6x and 8x version would also be possible but that + adds too much code. Whether this actually speeds anything up at all depends on compiler and settings. */ + if(bytewidth >= 4) { + for(; i + 3 < length; i += 4) { + size_t j = i - bytewidth; + unsigned char s0 = scanline[i + 0], s1 = scanline[i + 1], s2 = scanline[i + 2], s3 = scanline[i + 3]; + unsigned char r0 = recon[j + 0], r1 = recon[j + 1], r2 = recon[j + 2], r3 = recon[j + 3]; + unsigned char p0 = precon[i + 0], p1 = precon[i + 1], p2 = precon[i + 2], p3 = precon[i + 3]; + unsigned char q0 = precon[j + 0], q1 = precon[j + 1], q2 = precon[j + 2], q3 = precon[j + 3]; + recon[i + 0] = s0 + paethPredictor(r0, p0, q0); + recon[i + 1] = s1 + paethPredictor(r1, p1, q1); + recon[i + 2] = s2 + paethPredictor(r2, p2, q2); + recon[i + 3] = s3 + paethPredictor(r3, p3, q3); + } + } else if(bytewidth >= 3) { + for(; i + 2 < length; i += 3) { + size_t j = i - bytewidth; + unsigned char s0 = scanline[i + 0], s1 = scanline[i + 1], s2 = scanline[i + 2]; + unsigned char r0 = recon[j + 0], r1 = recon[j + 1], r2 = recon[j + 2]; + unsigned char p0 = precon[i + 0], p1 = precon[i + 1], p2 = precon[i + 2]; + unsigned char q0 = precon[j + 0], q1 = precon[j + 1], q2 = precon[j + 2]; + recon[i + 0] = s0 + paethPredictor(r0, p0, q0); + recon[i + 1] = s1 + paethPredictor(r1, p1, q1); + recon[i + 2] = s2 + paethPredictor(r2, p2, q2); + } + } else if(bytewidth >= 2) { + for(; i + 1 < length; i += 2) { + size_t j = i - bytewidth; + unsigned char s0 = scanline[i + 0], s1 = scanline[i + 1]; + unsigned char r0 = recon[j + 0], r1 = recon[j + 1]; + unsigned char p0 = precon[i + 0], p1 = precon[i + 1]; + unsigned char q0 = precon[j + 0], q1 = precon[j + 1]; + recon[i + 0] = s0 + paethPredictor(r0, p0, q0); + recon[i + 1] = s1 + paethPredictor(r1, p1, q1); + } + } + + for(; i != length; ++i) { + recon[i] = (scanline[i] + paethPredictor(recon[i - bytewidth], precon[i], precon[i - bytewidth])); + } + } else { + for(i = 0; i != bytewidth; ++i) { + recon[i] = scanline[i]; + } + for(i = bytewidth; i < length; ++i) { + /*paethPredictor(recon[i - bytewidth], 0, 0) is always recon[i - bytewidth]*/ + recon[i] = (scanline[i] + recon[i - bytewidth]); + } + } + break; + default: return 36; /*error: invalid filter type given*/ + } + return 0; +} + +static unsigned unfilter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) { + /* + For PNG filter method 0 + this function unfilters a single image (e.g. without interlacing this is called once, with Adam7 seven times) + out must have enough bytes allocated already, in must have the scanlines + 1 filtertype byte per scanline + w and h are image dimensions or dimensions of reduced image, bpp is bits per pixel + in and out are allowed to be the same memory address (but aren't the same size since in has the extra filter bytes) + */ + + unsigned y; + unsigned char* prevline = 0; + + /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/ + size_t bytewidth = (bpp + 7u) / 8u; + /*the width of a scanline in bytes, not including the filter type*/ + size_t linebytes = lodepng_get_raw_size_idat(w, 1, bpp) - 1u; + + for(y = 0; y < h; ++y) { + size_t outindex = linebytes * y; + size_t inindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ + unsigned char filterType = in[inindex]; + + CERROR_TRY_RETURN(unfilterScanline(&out[outindex], &in[inindex + 1], prevline, bytewidth, filterType, linebytes)); + + prevline = &out[outindex]; + } + + return 0; +} + +/* +in: Adam7 interlaced image, with no padding bits between scanlines, but between + reduced images so that each reduced image starts at a byte. +out: the same pixels, but re-ordered so that they're now a non-interlaced image with size w*h +bpp: bits per pixel +out has the following size in bits: w * h * bpp. +in is possibly bigger due to padding bits between reduced images. +out must be big enough AND must be 0 everywhere if bpp < 8 in the current implementation +(because that's likely a little bit faster) +NOTE: comments about padding bits are only relevant if bpp < 8 +*/ +static void Adam7_deinterlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) { + unsigned passw[7], passh[7]; + size_t filter_passstart[8], padded_passstart[8], passstart[8]; + unsigned i; + + Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); + + if(bpp >= 8) { + for(i = 0; i != 7; ++i) { + unsigned x, y, b; + size_t bytewidth = bpp / 8u; + for(y = 0; y < passh[i]; ++y) + for(x = 0; x < passw[i]; ++x) { + size_t pixelinstart = passstart[i] + (y * passw[i] + x) * bytewidth; + size_t pixeloutstart = ((ADAM7_IY[i] + (size_t)y * ADAM7_DY[i]) * (size_t)w + + ADAM7_IX[i] + (size_t)x * ADAM7_DX[i]) * bytewidth; + for(b = 0; b < bytewidth; ++b) { + out[pixeloutstart + b] = in[pixelinstart + b]; + } + } + } + } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ { + for(i = 0; i != 7; ++i) { + unsigned x, y, b; + unsigned ilinebits = bpp * passw[i]; + unsigned olinebits = bpp * w; + size_t obp, ibp; /*bit pointers (for out and in buffer)*/ + for(y = 0; y < passh[i]; ++y) + for(x = 0; x < passw[i]; ++x) { + ibp = (8 * passstart[i]) + (y * ilinebits + x * bpp); + obp = (ADAM7_IY[i] + (size_t)y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + (size_t)x * ADAM7_DX[i]) * bpp; + for(b = 0; b < bpp; ++b) { + unsigned char bit = readBitFromReversedStream(&ibp, in); + setBitOfReversedStream(&obp, out, bit); + } + } + } + } +} + +static void removePaddingBits(unsigned char* out, const unsigned char* in, + size_t olinebits, size_t ilinebits, unsigned h) { + /* + After filtering there are still padding bits if scanlines have non multiple of 8 bit amounts. They need + to be removed (except at last scanline of (Adam7-reduced) image) before working with pure image buffers + for the Adam7 code, the color convert code and the output to the user. + in and out are allowed to be the same buffer, in may also be higher but still overlapping; in must + have >= ilinebits*h bits, out must have >= olinebits*h bits, olinebits must be <= ilinebits + also used to move bits after earlier such operations happened, e.g. in a sequence of reduced images from Adam7 + only useful if (ilinebits - olinebits) is a value in the range 1..7 + */ + unsigned y; + size_t diff = ilinebits - olinebits; + size_t ibp = 0, obp = 0; /*input and output bit pointers*/ + for(y = 0; y < h; ++y) { + size_t x; + for(x = 0; x < olinebits; ++x) { + unsigned char bit = readBitFromReversedStream(&ibp, in); + setBitOfReversedStream(&obp, out, bit); + } + ibp += diff; + } +} + +/*out must be buffer big enough to contain full image, and in must contain the full decompressed data from +the IDAT chunks (with filter index bytes and possible padding bits) +return value is error*/ +static unsigned postProcessScanlines(unsigned char* out, unsigned char* in, + unsigned w, unsigned h, const LodePNGInfo* info_png) { + /* + This function converts the filtered-padded-interlaced data into pure 2D image buffer with the PNG's colortype. + Steps: + *) if no Adam7: 1) unfilter 2) remove padding bits (= possible extra bits per scanline if bpp < 8) + *) if adam7: 1) 7x unfilter 2) 7x remove padding bits 3) Adam7_deinterlace + NOTE: the in buffer will be overwritten with intermediate data! + */ + unsigned bpp = lodepng_get_bpp(&info_png->color); + if(bpp == 0) return 31; /*error: invalid colortype*/ + + if(info_png->interlace_method == 0) { + if(bpp < 8 && w * bpp != ((w * bpp + 7u) / 8u) * 8u) { + CERROR_TRY_RETURN(unfilter(in, in, w, h, bpp)); + removePaddingBits(out, in, w * bpp, ((w * bpp + 7u) / 8u) * 8u, h); + } + /*we can immediately filter into the out buffer, no other steps needed*/ + else CERROR_TRY_RETURN(unfilter(out, in, w, h, bpp)); + } else /*interlace_method is 1 (Adam7)*/ { + unsigned passw[7], passh[7]; size_t filter_passstart[8], padded_passstart[8], passstart[8]; + unsigned i; + + Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); + + for(i = 0; i != 7; ++i) { + CERROR_TRY_RETURN(unfilter(&in[padded_passstart[i]], &in[filter_passstart[i]], passw[i], passh[i], bpp)); + /*TODO: possible efficiency improvement: if in this reduced image the bits fit nicely in 1 scanline, + move bytes instead of bits or move not at all*/ + if(bpp < 8) { + /*remove padding bits in scanlines; after this there still may be padding + bits between the different reduced images: each reduced image still starts nicely at a byte*/ + removePaddingBits(&in[passstart[i]], &in[padded_passstart[i]], passw[i] * bpp, + ((passw[i] * bpp + 7u) / 8u) * 8u, passh[i]); + } + } + + Adam7_deinterlace(out, in, w, h, bpp); + } + + return 0; +} + +static unsigned readChunk_PLTE(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) { + unsigned pos = 0, i; + color->palettesize = chunkLength / 3u; + if(color->palettesize == 0 || color->palettesize > 256) return 38; /*error: palette too small or big*/ + lodepng_color_mode_alloc_palette(color); + if(!color->palette && color->palettesize) { + color->palettesize = 0; + return 83; /*alloc fail*/ + } + + for(i = 0; i != color->palettesize; ++i) { + color->palette[4 * i + 0] = data[pos++]; /*R*/ + color->palette[4 * i + 1] = data[pos++]; /*G*/ + color->palette[4 * i + 2] = data[pos++]; /*B*/ + color->palette[4 * i + 3] = 255; /*alpha*/ + } + + return 0; /* OK */ +} + +static unsigned readChunk_tRNS(LodePNGColorMode* color, const unsigned char* data, size_t chunkLength) { + unsigned i; + if(color->colortype == LCT_PALETTE) { + /*error: more alpha values given than there are palette entries*/ + if(chunkLength > color->palettesize) return 39; + + for(i = 0; i != chunkLength; ++i) color->palette[4 * i + 3] = data[i]; + } else if(color->colortype == LCT_GREY) { + /*error: this chunk must be 2 bytes for grayscale image*/ + if(chunkLength != 2) return 30; + + color->key_defined = 1; + color->key_r = color->key_g = color->key_b = 256u * data[0] + data[1]; + } else if(color->colortype == LCT_RGB) { + /*error: this chunk must be 6 bytes for RGB image*/ + if(chunkLength != 6) return 41; + + color->key_defined = 1; + color->key_r = 256u * data[0] + data[1]; + color->key_g = 256u * data[2] + data[3]; + color->key_b = 256u * data[4] + data[5]; + } + else return 42; /*error: tRNS chunk not allowed for other color models*/ + + return 0; /* OK */ +} + + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS +/*background color chunk (bKGD)*/ +static unsigned readChunk_bKGD(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + if(info->color.colortype == LCT_PALETTE) { + /*error: this chunk must be 1 byte for indexed color image*/ + if(chunkLength != 1) return 43; + + /*error: invalid palette index, or maybe this chunk appeared before PLTE*/ + if(data[0] >= info->color.palettesize) return 103; + + info->background_defined = 1; + info->background_r = info->background_g = info->background_b = data[0]; + } else if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) { + /*error: this chunk must be 2 bytes for grayscale image*/ + if(chunkLength != 2) return 44; + + /*the values are truncated to bitdepth in the PNG file*/ + info->background_defined = 1; + info->background_r = info->background_g = info->background_b = 256u * data[0] + data[1]; + } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) { + /*error: this chunk must be 6 bytes for grayscale image*/ + if(chunkLength != 6) return 45; + + /*the values are truncated to bitdepth in the PNG file*/ + info->background_defined = 1; + info->background_r = 256u * data[0] + data[1]; + info->background_g = 256u * data[2] + data[3]; + info->background_b = 256u * data[4] + data[5]; + } + + return 0; /* OK */ +} + +/*text chunk (tEXt)*/ +static unsigned readChunk_tEXt(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + unsigned error = 0; + char *key = 0, *str = 0; + + while(!error) /*not really a while loop, only used to break on error*/ { + unsigned length, string2_begin; + + length = 0; + while(length < chunkLength && data[length] != 0) ++length; + /*even though it's not allowed by the standard, no error is thrown if + there's no null termination char, if the text is empty*/ + if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/ + + key = (char*)lodepng_malloc(length + 1); + if(!key) CERROR_BREAK(error, 83); /*alloc fail*/ + + lodepng_memcpy(key, data, length); + key[length] = 0; + + string2_begin = length + 1; /*skip keyword null terminator*/ + + length = (unsigned)(chunkLength < string2_begin ? 0 : chunkLength - string2_begin); + str = (char*)lodepng_malloc(length + 1); + if(!str) CERROR_BREAK(error, 83); /*alloc fail*/ + + lodepng_memcpy(str, data + string2_begin, length); + str[length] = 0; + + error = lodepng_add_text(info, key, str); + + break; + } + + lodepng_free(key); + lodepng_free(str); + + return error; +} + +/*compressed text chunk (zTXt)*/ +static unsigned readChunk_zTXt(LodePNGInfo* info, const LodePNGDecoderSettings* decoder, + const unsigned char* data, size_t chunkLength) { + unsigned error = 0; + + /*copy the object to change parameters in it*/ + LodePNGDecompressSettings zlibsettings = decoder->zlibsettings; + + unsigned length, string2_begin; + char *key = 0; + unsigned char* str = 0; + size_t size = 0; + + while(!error) /*not really a while loop, only used to break on error*/ { + for(length = 0; length < chunkLength && data[length] != 0; ++length) ; + if(length + 2 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/ + if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/ + + key = (char*)lodepng_malloc(length + 1); + if(!key) CERROR_BREAK(error, 83); /*alloc fail*/ + + lodepng_memcpy(key, data, length); + key[length] = 0; + + if(data[length + 1] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/ + + string2_begin = length + 2; + if(string2_begin > chunkLength) CERROR_BREAK(error, 75); /*no null termination, corrupt?*/ + + length = (unsigned)chunkLength - string2_begin; + zlibsettings.max_output_size = decoder->max_text_size; + /*will fail if zlib error, e.g. if length is too small*/ + error = zlib_decompress(&str, &size, 0, &data[string2_begin], + length, &zlibsettings); + /*error: compressed text larger than decoder->max_text_size*/ + if(error && size > zlibsettings.max_output_size) error = 112; + if(error) break; + error = lodepng_add_text_sized(info, key, (char*)str, size); + break; + } + + lodepng_free(key); + lodepng_free(str); + + return error; +} + +/*international text chunk (iTXt)*/ +static unsigned readChunk_iTXt(LodePNGInfo* info, const LodePNGDecoderSettings* decoder, + const unsigned char* data, size_t chunkLength) { + unsigned error = 0; + unsigned i; + + /*copy the object to change parameters in it*/ + LodePNGDecompressSettings zlibsettings = decoder->zlibsettings; + + unsigned length, begin, compressed; + char *key = 0, *langtag = 0, *transkey = 0; + + while(!error) /*not really a while loop, only used to break on error*/ { + /*Quick check if the chunk length isn't too small. Even without check + it'd still fail with other error checks below if it's too short. This just gives a different error code.*/ + if(chunkLength < 5) CERROR_BREAK(error, 30); /*iTXt chunk too short*/ + + /*read the key*/ + for(length = 0; length < chunkLength && data[length] != 0; ++length) ; + if(length + 3 >= chunkLength) CERROR_BREAK(error, 75); /*no null termination char, corrupt?*/ + if(length < 1 || length > 79) CERROR_BREAK(error, 89); /*keyword too short or long*/ + + key = (char*)lodepng_malloc(length + 1); + if(!key) CERROR_BREAK(error, 83); /*alloc fail*/ + + lodepng_memcpy(key, data, length); + key[length] = 0; + + /*read the compression method*/ + compressed = data[length + 1]; + if(data[length + 2] != 0) CERROR_BREAK(error, 72); /*the 0 byte indicating compression must be 0*/ + + /*even though it's not allowed by the standard, no error is thrown if + there's no null termination char, if the text is empty for the next 3 texts*/ + + /*read the langtag*/ + begin = length + 3; + length = 0; + for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length; + + langtag = (char*)lodepng_malloc(length + 1); + if(!langtag) CERROR_BREAK(error, 83); /*alloc fail*/ + + lodepng_memcpy(langtag, data + begin, length); + langtag[length] = 0; + + /*read the transkey*/ + begin += length + 1; + length = 0; + for(i = begin; i < chunkLength && data[i] != 0; ++i) ++length; + + transkey = (char*)lodepng_malloc(length + 1); + if(!transkey) CERROR_BREAK(error, 83); /*alloc fail*/ + + lodepng_memcpy(transkey, data + begin, length); + transkey[length] = 0; + + /*read the actual text*/ + begin += length + 1; + + length = (unsigned)chunkLength < begin ? 0 : (unsigned)chunkLength - begin; + + if(compressed) { + unsigned char* str = 0; + size_t size = 0; + zlibsettings.max_output_size = decoder->max_text_size; + /*will fail if zlib error, e.g. if length is too small*/ + error = zlib_decompress(&str, &size, 0, &data[begin], + length, &zlibsettings); + /*error: compressed text larger than decoder->max_text_size*/ + if(error && size > zlibsettings.max_output_size) error = 112; + if(!error) error = lodepng_add_itext_sized(info, key, langtag, transkey, (char*)str, size); + lodepng_free(str); + } else { + error = lodepng_add_itext_sized(info, key, langtag, transkey, (char*)(data + begin), length); + } + + break; + } + + lodepng_free(key); + lodepng_free(langtag); + lodepng_free(transkey); + + return error; +} + +static unsigned readChunk_tIME(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + if(chunkLength != 7) return 73; /*invalid tIME chunk size*/ + + info->time_defined = 1; + info->time.year = 256u * data[0] + data[1]; + info->time.month = data[2]; + info->time.day = data[3]; + info->time.hour = data[4]; + info->time.minute = data[5]; + info->time.second = data[6]; + + return 0; /* OK */ +} + +static unsigned readChunk_pHYs(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + if(chunkLength != 9) return 74; /*invalid pHYs chunk size*/ + + info->phys_defined = 1; + info->phys_x = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3]; + info->phys_y = 16777216u * data[4] + 65536u * data[5] + 256u * data[6] + data[7]; + info->phys_unit = data[8]; + + return 0; /* OK */ +} + +static unsigned readChunk_gAMA(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + if(chunkLength != 4) return 96; /*invalid gAMA chunk size*/ + + info->gama_defined = 1; + info->gama_gamma = 16777216u * data[0] + 65536u * data[1] + 256u * data[2] + data[3]; + + return 0; /* OK */ +} + +static unsigned readChunk_cHRM(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + if(chunkLength != 32) return 97; /*invalid cHRM chunk size*/ + + info->chrm_defined = 1; + info->chrm_white_x = 16777216u * data[ 0] + 65536u * data[ 1] + 256u * data[ 2] + data[ 3]; + info->chrm_white_y = 16777216u * data[ 4] + 65536u * data[ 5] + 256u * data[ 6] + data[ 7]; + info->chrm_red_x = 16777216u * data[ 8] + 65536u * data[ 9] + 256u * data[10] + data[11]; + info->chrm_red_y = 16777216u * data[12] + 65536u * data[13] + 256u * data[14] + data[15]; + info->chrm_green_x = 16777216u * data[16] + 65536u * data[17] + 256u * data[18] + data[19]; + info->chrm_green_y = 16777216u * data[20] + 65536u * data[21] + 256u * data[22] + data[23]; + info->chrm_blue_x = 16777216u * data[24] + 65536u * data[25] + 256u * data[26] + data[27]; + info->chrm_blue_y = 16777216u * data[28] + 65536u * data[29] + 256u * data[30] + data[31]; + + return 0; /* OK */ +} + +static unsigned readChunk_sRGB(LodePNGInfo* info, const unsigned char* data, size_t chunkLength) { + if(chunkLength != 1) return 98; /*invalid sRGB chunk size (this one is never ignored)*/ + + info->srgb_defined = 1; + info->srgb_intent = data[0]; + + return 0; /* OK */ +} + +static unsigned readChunk_iCCP(LodePNGInfo* info, const LodePNGDecoderSettings* decoder, + const unsigned char* data, size_t chunkLength) { + unsigned error = 0; + unsigned i; + size_t size = 0; + /*copy the object to change parameters in it*/ + LodePNGDecompressSettings zlibsettings = decoder->zlibsettings; + + unsigned length, string2_begin; + + info->iccp_defined = 1; + if(info->iccp_name) lodepng_clear_icc(info); + + for(length = 0; length < chunkLength && data[length] != 0; ++length) ; + if(length + 2 >= chunkLength) return 75; /*no null termination, corrupt?*/ + if(length < 1 || length > 79) return 89; /*keyword too short or long*/ + + info->iccp_name = (char*)lodepng_malloc(length + 1); + if(!info->iccp_name) return 83; /*alloc fail*/ + + info->iccp_name[length] = 0; + for(i = 0; i != length; ++i) info->iccp_name[i] = (char)data[i]; + + if(data[length + 1] != 0) return 72; /*the 0 byte indicating compression must be 0*/ + + string2_begin = length + 2; + if(string2_begin > chunkLength) return 75; /*no null termination, corrupt?*/ + + length = (unsigned)chunkLength - string2_begin; + zlibsettings.max_output_size = decoder->max_icc_size; + error = zlib_decompress(&info->iccp_profile, &size, 0, + &data[string2_begin], + length, &zlibsettings); + /*error: ICC profile larger than decoder->max_icc_size*/ + if(error && size > zlibsettings.max_output_size) error = 113; + info->iccp_profile_size = size; + if(!error && !info->iccp_profile_size) error = 100; /*invalid ICC profile size*/ + return error; +} +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +unsigned lodepng_inspect_chunk(LodePNGState* state, size_t pos, + const unsigned char* in, size_t insize) { + const unsigned char* chunk = in + pos; + unsigned chunkLength; + const unsigned char* data; + unsigned unhandled = 0; + unsigned error = 0; + + if(pos + 4 > insize) return 30; + chunkLength = lodepng_chunk_length(chunk); + if(chunkLength > 2147483647) return 63; + data = lodepng_chunk_data_const(chunk); + if(data + chunkLength + 4 > in + insize) return 30; + + if(lodepng_chunk_type_equals(chunk, "PLTE")) { + error = readChunk_PLTE(&state->info_png.color, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "tRNS")) { + error = readChunk_tRNS(&state->info_png.color, data, chunkLength); +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + } else if(lodepng_chunk_type_equals(chunk, "bKGD")) { + error = readChunk_bKGD(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "tEXt")) { + error = readChunk_tEXt(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "zTXt")) { + error = readChunk_zTXt(&state->info_png, &state->decoder, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "iTXt")) { + error = readChunk_iTXt(&state->info_png, &state->decoder, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "tIME")) { + error = readChunk_tIME(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "pHYs")) { + error = readChunk_pHYs(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "gAMA")) { + error = readChunk_gAMA(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "cHRM")) { + error = readChunk_cHRM(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "sRGB")) { + error = readChunk_sRGB(&state->info_png, data, chunkLength); + } else if(lodepng_chunk_type_equals(chunk, "iCCP")) { + error = readChunk_iCCP(&state->info_png, &state->decoder, data, chunkLength); +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + } else { + /* unhandled chunk is ok (is not an error) */ + unhandled = 1; + } + + if(!error && !unhandled && !state->decoder.ignore_crc) { + if(lodepng_chunk_check_crc(chunk)) return 57; /*invalid CRC*/ + } + + return error; +} + +/*read a PNG, the result will be in the same color type as the PNG (hence "generic")*/ +static void decodeGeneric(unsigned char** out, unsigned* w, unsigned* h, + LodePNGState* state, + const unsigned char* in, size_t insize) { + unsigned char IEND = 0; + const unsigned char* chunk; + unsigned char* idat; /*the data from idat chunks, zlib compressed*/ + size_t idatsize = 0; + unsigned char* scanlines = 0; + size_t scanlines_size = 0, expected_size = 0; + size_t outsize = 0; + + /*for unknown chunk order*/ + unsigned unknown = 0; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + unsigned critical_pos = 1; /*1 = after IHDR, 2 = after PLTE, 3 = after IDAT*/ +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + + + /* safe output values in case error happens */ + *out = 0; + *w = *h = 0; + + state->error = lodepng_inspect(w, h, state, in, insize); /*reads header and resets other parameters in state->info_png*/ + if(state->error) return; + + if(lodepng_pixel_overflow(*w, *h, &state->info_png.color, &state->info_raw)) { + CERROR_RETURN(state->error, 92); /*overflow possible due to amount of pixels*/ + } + + /*the input filesize is a safe upper bound for the sum of idat chunks size*/ + idat = (unsigned char*)lodepng_malloc(insize); + if(!idat) CERROR_RETURN(state->error, 83); /*alloc fail*/ + + chunk = &in[33]; /*first byte of the first chunk after the header*/ + + /*loop through the chunks, ignoring unknown chunks and stopping at IEND chunk. + IDAT data is put at the start of the in buffer*/ + while(!IEND && !state->error) { + unsigned chunkLength; + const unsigned char* data; /*the data in the chunk*/ + + /*error: size of the in buffer too small to contain next chunk*/ + if((size_t)((chunk - in) + 12) > insize || chunk < in) { + if(state->decoder.ignore_end) break; /*other errors may still happen though*/ + CERROR_BREAK(state->error, 30); + } + + /*length of the data of the chunk, excluding the length bytes, chunk type and CRC bytes*/ + chunkLength = lodepng_chunk_length(chunk); + /*error: chunk length larger than the max PNG chunk size*/ + if(chunkLength > 2147483647) { + if(state->decoder.ignore_end) break; /*other errors may still happen though*/ + CERROR_BREAK(state->error, 63); + } + + if((size_t)((chunk - in) + chunkLength + 12) > insize || (chunk + chunkLength + 12) < in) { + CERROR_BREAK(state->error, 64); /*error: size of the in buffer too small to contain next chunk*/ + } + + data = lodepng_chunk_data_const(chunk); + + unknown = 0; + + /*IDAT chunk, containing compressed image data*/ + if(lodepng_chunk_type_equals(chunk, "IDAT")) { + size_t newsize; + if(lodepng_addofl(idatsize, chunkLength, &newsize)) CERROR_BREAK(state->error, 95); + if(newsize > insize) CERROR_BREAK(state->error, 95); + lodepng_memcpy(idat + idatsize, data, chunkLength); + idatsize += chunkLength; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + critical_pos = 3; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + } else if(lodepng_chunk_type_equals(chunk, "IEND")) { + /*IEND chunk*/ + IEND = 1; + } else if(lodepng_chunk_type_equals(chunk, "PLTE")) { + /*palette chunk (PLTE)*/ + state->error = readChunk_PLTE(&state->info_png.color, data, chunkLength); + if(state->error) break; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + critical_pos = 2; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + } else if(lodepng_chunk_type_equals(chunk, "tRNS")) { + /*palette transparency chunk (tRNS). Even though this one is an ancillary chunk , it is still compiled + in without 'LODEPNG_COMPILE_ANCILLARY_CHUNKS' because it contains essential color information that + affects the alpha channel of pixels. */ + state->error = readChunk_tRNS(&state->info_png.color, data, chunkLength); + if(state->error) break; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /*background color chunk (bKGD)*/ + } else if(lodepng_chunk_type_equals(chunk, "bKGD")) { + state->error = readChunk_bKGD(&state->info_png, data, chunkLength); + if(state->error) break; + } else if(lodepng_chunk_type_equals(chunk, "tEXt")) { + /*text chunk (tEXt)*/ + if(state->decoder.read_text_chunks) { + state->error = readChunk_tEXt(&state->info_png, data, chunkLength); + if(state->error) break; + } + } else if(lodepng_chunk_type_equals(chunk, "zTXt")) { + /*compressed text chunk (zTXt)*/ + if(state->decoder.read_text_chunks) { + state->error = readChunk_zTXt(&state->info_png, &state->decoder, data, chunkLength); + if(state->error) break; + } + } else if(lodepng_chunk_type_equals(chunk, "iTXt")) { + /*international text chunk (iTXt)*/ + if(state->decoder.read_text_chunks) { + state->error = readChunk_iTXt(&state->info_png, &state->decoder, data, chunkLength); + if(state->error) break; + } + } else if(lodepng_chunk_type_equals(chunk, "tIME")) { + state->error = readChunk_tIME(&state->info_png, data, chunkLength); + if(state->error) break; + } else if(lodepng_chunk_type_equals(chunk, "pHYs")) { + state->error = readChunk_pHYs(&state->info_png, data, chunkLength); + if(state->error) break; + } else if(lodepng_chunk_type_equals(chunk, "gAMA")) { + state->error = readChunk_gAMA(&state->info_png, data, chunkLength); + if(state->error) break; + } else if(lodepng_chunk_type_equals(chunk, "cHRM")) { + state->error = readChunk_cHRM(&state->info_png, data, chunkLength); + if(state->error) break; + } else if(lodepng_chunk_type_equals(chunk, "sRGB")) { + state->error = readChunk_sRGB(&state->info_png, data, chunkLength); + if(state->error) break; + } else if(lodepng_chunk_type_equals(chunk, "iCCP")) { + state->error = readChunk_iCCP(&state->info_png, &state->decoder, data, chunkLength); + if(state->error) break; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + } else /*it's not an implemented chunk type, so ignore it: skip over the data*/ { + /*error: unknown critical chunk (5th bit of first byte of chunk type is 0)*/ + if(!state->decoder.ignore_critical && !lodepng_chunk_ancillary(chunk)) { + CERROR_BREAK(state->error, 69); + } + + unknown = 1; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + if(state->decoder.remember_unknown_chunks) { + state->error = lodepng_chunk_append(&state->info_png.unknown_chunks_data[critical_pos - 1], + &state->info_png.unknown_chunks_size[critical_pos - 1], chunk); + if(state->error) break; + } +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + } + + if(!state->decoder.ignore_crc && !unknown) /*check CRC if wanted, only on known chunk types*/ { + if(lodepng_chunk_check_crc(chunk)) CERROR_BREAK(state->error, 57); /*invalid CRC*/ + } + + if(!IEND) chunk = lodepng_chunk_next_const(chunk, in + insize); + } + + if(!state->error && state->info_png.color.colortype == LCT_PALETTE && !state->info_png.color.palette) { + state->error = 106; /* error: PNG file must have PLTE chunk if color type is palette */ + } + + if(!state->error) { + /*predict output size, to allocate exact size for output buffer to avoid more dynamic allocation. + If the decompressed size does not match the prediction, the image must be corrupt.*/ + if(state->info_png.interlace_method == 0) { + size_t bpp = lodepng_get_bpp(&state->info_png.color); + expected_size = lodepng_get_raw_size_idat(*w, *h, bpp); + } else { + size_t bpp = lodepng_get_bpp(&state->info_png.color); + /*Adam-7 interlaced: expected size is the sum of the 7 sub-images sizes*/ + expected_size = 0; + expected_size += lodepng_get_raw_size_idat((*w + 7) >> 3, (*h + 7) >> 3, bpp); + if(*w > 4) expected_size += lodepng_get_raw_size_idat((*w + 3) >> 3, (*h + 7) >> 3, bpp); + expected_size += lodepng_get_raw_size_idat((*w + 3) >> 2, (*h + 3) >> 3, bpp); + if(*w > 2) expected_size += lodepng_get_raw_size_idat((*w + 1) >> 2, (*h + 3) >> 2, bpp); + expected_size += lodepng_get_raw_size_idat((*w + 1) >> 1, (*h + 1) >> 2, bpp); + if(*w > 1) expected_size += lodepng_get_raw_size_idat((*w + 0) >> 1, (*h + 1) >> 1, bpp); + expected_size += lodepng_get_raw_size_idat((*w + 0), (*h + 0) >> 1, bpp); + } + + state->error = zlib_decompress(&scanlines, &scanlines_size, expected_size, idat, idatsize, &state->decoder.zlibsettings); + } + if(!state->error && scanlines_size != expected_size) state->error = 91; /*decompressed size doesn't match prediction*/ + lodepng_free(idat); + + if(!state->error) { + outsize = lodepng_get_raw_size(*w, *h, &state->info_png.color); + *out = (unsigned char*)lodepng_malloc(outsize); + if(!*out) state->error = 83; /*alloc fail*/ + } + if(!state->error) { + lodepng_memset(*out, 0, outsize); + state->error = postProcessScanlines(*out, scanlines, *w, *h, &state->info_png); + } + lodepng_free(scanlines); +} + +unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h, + LodePNGState* state, + const unsigned char* in, size_t insize) { + *out = 0; + decodeGeneric(out, w, h, state, in, insize); + if(state->error) return state->error; + if(!state->decoder.color_convert || lodepng_color_mode_equal(&state->info_raw, &state->info_png.color)) { + /*same color type, no copying or converting of data needed*/ + /*store the info_png color settings on the info_raw so that the info_raw still reflects what colortype + the raw image has to the end user*/ + if(!state->decoder.color_convert) { + state->error = lodepng_color_mode_copy(&state->info_raw, &state->info_png.color); + if(state->error) return state->error; + } + } else { /*color conversion needed*/ + unsigned char* data = *out; + size_t outsize; + + /*TODO: check if this works according to the statement in the documentation: "The converter can convert + from grayscale input color type, to 8-bit grayscale or grayscale with alpha"*/ + if(!(state->info_raw.colortype == LCT_RGB || state->info_raw.colortype == LCT_RGBA) + && !(state->info_raw.bitdepth == 8)) { + return 56; /*unsupported color mode conversion*/ + } + + outsize = lodepng_get_raw_size(*w, *h, &state->info_raw); + *out = (unsigned char*)lodepng_malloc(outsize); + if(!(*out)) { + state->error = 83; /*alloc fail*/ + } + else state->error = lodepng_convert(*out, data, &state->info_raw, + &state->info_png.color, *w, *h); + lodepng_free(data); + } + return state->error; +} + +unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, + size_t insize, LodePNGColorType colortype, unsigned bitdepth) { + unsigned error; + LodePNGState state; + lodepng_state_init(&state); + state.info_raw.colortype = colortype; + state.info_raw.bitdepth = bitdepth; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /*disable reading things that this function doesn't output*/ + state.decoder.read_text_chunks = 0; + state.decoder.remember_unknown_chunks = 0; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + error = lodepng_decode(out, w, h, &state, in, insize); + lodepng_state_cleanup(&state); + return error; +} + +unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) { + return lodepng_decode_memory(out, w, h, in, insize, LCT_RGBA, 8); +} + +unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, const unsigned char* in, size_t insize) { + return lodepng_decode_memory(out, w, h, in, insize, LCT_RGB, 8); +} + +#ifdef LODEPNG_COMPILE_DISK +unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename, + LodePNGColorType colortype, unsigned bitdepth) { + unsigned char* buffer = 0; + size_t buffersize; + unsigned error; + /* safe output values in case error happens */ + *out = 0; + *w = *h = 0; + error = lodepng_load_file(&buffer, &buffersize, filename); + if(!error) error = lodepng_decode_memory(out, w, h, buffer, buffersize, colortype, bitdepth); + lodepng_free(buffer); + return error; +} + +unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) { + return lodepng_decode_file(out, w, h, filename, LCT_RGBA, 8); +} + +unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, const char* filename) { + return lodepng_decode_file(out, w, h, filename, LCT_RGB, 8); +} +#endif /*LODEPNG_COMPILE_DISK*/ + +void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings) { + settings->color_convert = 1; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + settings->read_text_chunks = 1; + settings->remember_unknown_chunks = 0; + settings->max_text_size = 16777216; + settings->max_icc_size = 16777216; /* 16MB is much more than enough for any reasonable ICC profile */ +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + settings->ignore_crc = 0; + settings->ignore_critical = 0; + settings->ignore_end = 0; + lodepng_decompress_settings_init(&settings->zlibsettings); +} + +#endif /*LODEPNG_COMPILE_DECODER*/ + +#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) + +void lodepng_state_init(LodePNGState* state) { +#ifdef LODEPNG_COMPILE_DECODER + lodepng_decoder_settings_init(&state->decoder); +#endif /*LODEPNG_COMPILE_DECODER*/ +#ifdef LODEPNG_COMPILE_ENCODER + lodepng_encoder_settings_init(&state->encoder); +#endif /*LODEPNG_COMPILE_ENCODER*/ + lodepng_color_mode_init(&state->info_raw); + lodepng_info_init(&state->info_png); + state->error = 1; +} + +void lodepng_state_cleanup(LodePNGState* state) { + lodepng_color_mode_cleanup(&state->info_raw); + lodepng_info_cleanup(&state->info_png); +} + +void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source) { + lodepng_state_cleanup(dest); + *dest = *source; + lodepng_color_mode_init(&dest->info_raw); + lodepng_info_init(&dest->info_png); + dest->error = lodepng_color_mode_copy(&dest->info_raw, &source->info_raw); if(dest->error) return; + dest->error = lodepng_info_copy(&dest->info_png, &source->info_png); if(dest->error) return; +} + +#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */ + +#ifdef LODEPNG_COMPILE_ENCODER + +/* ////////////////////////////////////////////////////////////////////////// */ +/* / PNG Encoder / */ +/* ////////////////////////////////////////////////////////////////////////// */ + + +static unsigned writeSignature(ucvector* out) { + size_t pos = out->size; + const unsigned char signature[] = {137, 80, 78, 71, 13, 10, 26, 10}; + /*8 bytes PNG signature, aka the magic bytes*/ + if(!ucvector_resize(out, out->size + 8)) return 83; /*alloc fail*/ + lodepng_memcpy(out->data + pos, signature, 8); + return 0; +} + +static unsigned addChunk_IHDR(ucvector* out, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth, unsigned interlace_method) { + unsigned char *chunk, *data; + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 13, "IHDR")); + data = chunk + 8; + + lodepng_set32bitInt(data + 0, w); /*width*/ + lodepng_set32bitInt(data + 4, h); /*height*/ + data[8] = (unsigned char)bitdepth; /*bit depth*/ + data[9] = (unsigned char)colortype; /*color type*/ + data[10] = 0; /*compression method*/ + data[11] = 0; /*filter method*/ + data[12] = interlace_method; /*interlace method*/ + + lodepng_chunk_generate_crc(chunk); + return 0; +} + +/* only adds the chunk if needed (there is a key or palette with alpha) */ +static unsigned addChunk_PLTE(ucvector* out, const LodePNGColorMode* info) { + unsigned char* chunk; + size_t i, j = 8; + + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, info->palettesize * 3, "PLTE")); + + for(i = 0; i != info->palettesize; ++i) { + /*add all channels except alpha channel*/ + chunk[j++] = info->palette[i * 4 + 0]; + chunk[j++] = info->palette[i * 4 + 1]; + chunk[j++] = info->palette[i * 4 + 2]; + } + + lodepng_chunk_generate_crc(chunk); + return 0; +} + +static unsigned addChunk_tRNS(ucvector* out, const LodePNGColorMode* info) { + unsigned char* chunk = 0; + + if(info->colortype == LCT_PALETTE) { + size_t i, amount = info->palettesize; + /*the tail of palette values that all have 255 as alpha, does not have to be encoded*/ + for(i = info->palettesize; i != 0; --i) { + if(info->palette[4 * (i - 1) + 3] != 255) break; + --amount; + } + if(amount) { + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, amount, "tRNS")); + /*add the alpha channel values from the palette*/ + for(i = 0; i != amount; ++i) chunk[8 + i] = info->palette[4 * i + 3]; + } + } else if(info->colortype == LCT_GREY) { + if(info->key_defined) { + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 2, "tRNS")); + chunk[8] = (unsigned char)(info->key_r >> 8); + chunk[9] = (unsigned char)(info->key_r & 255); + } + } else if(info->colortype == LCT_RGB) { + if(info->key_defined) { + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 6, "tRNS")); + chunk[8] = (unsigned char)(info->key_r >> 8); + chunk[9] = (unsigned char)(info->key_r & 255); + chunk[10] = (unsigned char)(info->key_g >> 8); + chunk[11] = (unsigned char)(info->key_g & 255); + chunk[12] = (unsigned char)(info->key_b >> 8); + chunk[13] = (unsigned char)(info->key_b & 255); + } + } + + if(chunk) lodepng_chunk_generate_crc(chunk); + return 0; +} + +static unsigned addChunk_IDAT(ucvector* out, const unsigned char* data, size_t datasize, + LodePNGCompressSettings* zlibsettings) { + unsigned error = 0; + unsigned char* zlib = 0; + size_t zlibsize = 0; + + error = zlib_compress(&zlib, &zlibsize, data, datasize, zlibsettings); + if(!error) { + error = lodepng_chunk_createv(out, zlibsize, "IDAT", zlib); + } + lodepng_free(zlib); + return error; +} + +static unsigned addChunk_IEND(ucvector* out) { + return lodepng_chunk_createv(out, 0, "IEND", 0); +} + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + +static unsigned addChunk_tEXt(ucvector* out, const char* keyword, const char* textstring) { + unsigned char* chunk = 0; + size_t keysize = lodepng_strlen(keyword), textsize = lodepng_strlen(textstring); + size_t size = keysize + 1 + textsize; + if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/ + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, size, "tEXt")); + lodepng_memcpy(chunk + 8, keyword, keysize); + chunk[8 + keysize] = 0; /*null termination char*/ + lodepng_memcpy(chunk + 9 + keysize, textstring, textsize); + lodepng_chunk_generate_crc(chunk); + return 0; +} + +static unsigned addChunk_zTXt(ucvector* out, const char* keyword, const char* textstring, + LodePNGCompressSettings* zlibsettings) { + unsigned error = 0; + unsigned char* chunk = 0; + unsigned char* compressed = 0; + size_t compressedsize = 0; + size_t textsize = lodepng_strlen(textstring); + size_t keysize = lodepng_strlen(keyword); + if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/ + + error = zlib_compress(&compressed, &compressedsize, + (const unsigned char*)textstring, textsize, zlibsettings); + if(!error) { + size_t size = keysize + 2 + compressedsize; + error = lodepng_chunk_init(&chunk, out, size, "zTXt"); + } + if(!error) { + lodepng_memcpy(chunk + 8, keyword, keysize); + chunk[8 + keysize] = 0; /*null termination char*/ + chunk[9 + keysize] = 0; /*compression method: 0*/ + lodepng_memcpy(chunk + 10 + keysize, compressed, compressedsize); + lodepng_chunk_generate_crc(chunk); + } + + lodepng_free(compressed); + return error; +} + +static unsigned addChunk_iTXt(ucvector* out, unsigned compress, const char* keyword, const char* langtag, + const char* transkey, const char* textstring, LodePNGCompressSettings* zlibsettings) { + unsigned error = 0; + unsigned char* chunk = 0; + unsigned char* compressed = 0; + size_t compressedsize = 0; + size_t textsize = lodepng_strlen(textstring); + size_t keysize = lodepng_strlen(keyword), langsize = lodepng_strlen(langtag), transsize = lodepng_strlen(transkey); + + if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/ + + if(compress) { + error = zlib_compress(&compressed, &compressedsize, + (const unsigned char*)textstring, textsize, zlibsettings); + } + if(!error) { + size_t size = keysize + 3 + langsize + 1 + transsize + 1 + (compress ? compressedsize : textsize); + error = lodepng_chunk_init(&chunk, out, size, "iTXt"); + } + if(!error) { + size_t pos = 8; + lodepng_memcpy(chunk + pos, keyword, keysize); + pos += keysize; + chunk[pos++] = 0; /*null termination char*/ + chunk[pos++] = (compress ? 1 : 0); /*compression flag*/ + chunk[pos++] = 0; /*compression method: 0*/ + lodepng_memcpy(chunk + pos, langtag, langsize); + pos += langsize; + chunk[pos++] = 0; /*null termination char*/ + lodepng_memcpy(chunk + pos, transkey, transsize); + pos += transsize; + chunk[pos++] = 0; /*null termination char*/ + if(compress) { + lodepng_memcpy(chunk + pos, compressed, compressedsize); + } else { + lodepng_memcpy(chunk + pos, textstring, textsize); + } + lodepng_chunk_generate_crc(chunk); + } + + lodepng_free(compressed); + return error; +} + +static unsigned addChunk_bKGD(ucvector* out, const LodePNGInfo* info) { + unsigned char* chunk = 0; + if(info->color.colortype == LCT_GREY || info->color.colortype == LCT_GREY_ALPHA) { + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 2, "bKGD")); + chunk[8] = (unsigned char)(info->background_r >> 8); + chunk[9] = (unsigned char)(info->background_r & 255); + } else if(info->color.colortype == LCT_RGB || info->color.colortype == LCT_RGBA) { + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 6, "bKGD")); + chunk[8] = (unsigned char)(info->background_r >> 8); + chunk[9] = (unsigned char)(info->background_r & 255); + chunk[10] = (unsigned char)(info->background_g >> 8); + chunk[11] = (unsigned char)(info->background_g & 255); + chunk[12] = (unsigned char)(info->background_b >> 8); + chunk[13] = (unsigned char)(info->background_b & 255); + } else if(info->color.colortype == LCT_PALETTE) { + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 1, "bKGD")); + chunk[8] = (unsigned char)(info->background_r & 255); /*palette index*/ + } + if(chunk) lodepng_chunk_generate_crc(chunk); + return 0; +} + +static unsigned addChunk_tIME(ucvector* out, const LodePNGTime* time) { + unsigned char* chunk; + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 7, "tIME")); + chunk[8] = (unsigned char)(time->year >> 8); + chunk[9] = (unsigned char)(time->year & 255); + chunk[10] = (unsigned char)time->month; + chunk[11] = (unsigned char)time->day; + chunk[12] = (unsigned char)time->hour; + chunk[13] = (unsigned char)time->minute; + chunk[14] = (unsigned char)time->second; + lodepng_chunk_generate_crc(chunk); + return 0; +} + +static unsigned addChunk_pHYs(ucvector* out, const LodePNGInfo* info) { + unsigned char* chunk; + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 9, "pHYs")); + lodepng_set32bitInt(chunk + 8, info->phys_x); + lodepng_set32bitInt(chunk + 12, info->phys_y); + chunk[16] = info->phys_unit; + lodepng_chunk_generate_crc(chunk); + return 0; +} + +static unsigned addChunk_gAMA(ucvector* out, const LodePNGInfo* info) { + unsigned char* chunk; + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 4, "gAMA")); + lodepng_set32bitInt(chunk + 8, info->gama_gamma); + lodepng_chunk_generate_crc(chunk); + return 0; +} + +static unsigned addChunk_cHRM(ucvector* out, const LodePNGInfo* info) { + unsigned char* chunk; + CERROR_TRY_RETURN(lodepng_chunk_init(&chunk, out, 32, "cHRM")); + lodepng_set32bitInt(chunk + 8, info->chrm_white_x); + lodepng_set32bitInt(chunk + 12, info->chrm_white_y); + lodepng_set32bitInt(chunk + 16, info->chrm_red_x); + lodepng_set32bitInt(chunk + 20, info->chrm_red_y); + lodepng_set32bitInt(chunk + 24, info->chrm_green_x); + lodepng_set32bitInt(chunk + 28, info->chrm_green_y); + lodepng_set32bitInt(chunk + 32, info->chrm_blue_x); + lodepng_set32bitInt(chunk + 36, info->chrm_blue_y); + lodepng_chunk_generate_crc(chunk); + return 0; +} + +static unsigned addChunk_sRGB(ucvector* out, const LodePNGInfo* info) { + unsigned char data = info->srgb_intent; + return lodepng_chunk_createv(out, 1, "sRGB", &data); +} + +static unsigned addChunk_iCCP(ucvector* out, const LodePNGInfo* info, LodePNGCompressSettings* zlibsettings) { + unsigned error = 0; + unsigned char* chunk = 0; + unsigned char* compressed = 0; + size_t compressedsize = 0; + size_t keysize = lodepng_strlen(info->iccp_name); + + if(keysize < 1 || keysize > 79) return 89; /*error: invalid keyword size*/ + error = zlib_compress(&compressed, &compressedsize, + info->iccp_profile, info->iccp_profile_size, zlibsettings); + if(!error) { + size_t size = keysize + 2 + compressedsize; + error = lodepng_chunk_init(&chunk, out, size, "iCCP"); + } + if(!error) { + lodepng_memcpy(chunk + 8, info->iccp_name, keysize); + chunk[8 + keysize] = 0; /*null termination char*/ + chunk[9 + keysize] = 0; /*compression method: 0*/ + lodepng_memcpy(chunk + 10 + keysize, compressed, compressedsize); + lodepng_chunk_generate_crc(chunk); + } + + lodepng_free(compressed); + return error; +} + +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +static void filterScanline(unsigned char* out, const unsigned char* scanline, const unsigned char* prevline, + size_t length, size_t bytewidth, unsigned char filterType) { + size_t i; + switch(filterType) { + case 0: /*None*/ + for(i = 0; i != length; ++i) out[i] = scanline[i]; + break; + case 1: /*Sub*/ + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; + for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - scanline[i - bytewidth]; + break; + case 2: /*Up*/ + if(prevline) { + for(i = 0; i != length; ++i) out[i] = scanline[i] - prevline[i]; + } else { + for(i = 0; i != length; ++i) out[i] = scanline[i]; + } + break; + case 3: /*Average*/ + if(prevline) { + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i] - (prevline[i] >> 1); + for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - ((scanline[i - bytewidth] + prevline[i]) >> 1); + } else { + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; + for(i = bytewidth; i < length; ++i) out[i] = scanline[i] - (scanline[i - bytewidth] >> 1); + } + break; + case 4: /*Paeth*/ + if(prevline) { + /*paethPredictor(0, prevline[i], 0) is always prevline[i]*/ + for(i = 0; i != bytewidth; ++i) out[i] = (scanline[i] - prevline[i]); + for(i = bytewidth; i < length; ++i) { + out[i] = (scanline[i] - paethPredictor(scanline[i - bytewidth], prevline[i], prevline[i - bytewidth])); + } + } else { + for(i = 0; i != bytewidth; ++i) out[i] = scanline[i]; + /*paethPredictor(scanline[i - bytewidth], 0, 0) is always scanline[i - bytewidth]*/ + for(i = bytewidth; i < length; ++i) out[i] = (scanline[i] - scanline[i - bytewidth]); + } + break; + default: return; /*invalid filter type given*/ + } +} + +/* integer binary logarithm, max return value is 31 */ +static size_t ilog2(size_t i) { + size_t result = 0; + if(i >= 65536) { result += 16; i >>= 16; } + if(i >= 256) { result += 8; i >>= 8; } + if(i >= 16) { result += 4; i >>= 4; } + if(i >= 4) { result += 2; i >>= 2; } + if(i >= 2) { result += 1; /*i >>= 1;*/ } + return result; +} + +/* integer approximation for i * log2(i), helper function for LFS_ENTROPY */ +static size_t ilog2i(size_t i) { + size_t l; + if(i == 0) return 0; + l = ilog2(i); + /* approximate i*log2(i): l is integer logarithm, ((i - (1u << l)) << 1u) + linearly approximates the missing fractional part multiplied by i */ + return i * l + ((i - (1u << l)) << 1u); +} + +static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, + const LodePNGColorMode* color, const LodePNGEncoderSettings* settings) { + /* + For PNG filter method 0 + out must be a buffer with as size: h + (w * h * bpp + 7u) / 8u, because there are + the scanlines with 1 extra byte per scanline + */ + + unsigned bpp = lodepng_get_bpp(color); + /*the width of a scanline in bytes, not including the filter type*/ + size_t linebytes = lodepng_get_raw_size_idat(w, 1, bpp) - 1u; + + /*bytewidth is used for filtering, is 1 when bpp < 8, number of bytes per pixel otherwise*/ + size_t bytewidth = (bpp + 7u) / 8u; + const unsigned char* prevline = 0; + unsigned x, y; + unsigned error = 0; + LodePNGFilterStrategy strategy = settings->filter_strategy; + + /* + There is a heuristic called the minimum sum of absolute differences heuristic, suggested by the PNG standard: + * If the image type is Palette, or the bit depth is smaller than 8, then do not filter the image (i.e. + use fixed filtering, with the filter None). + * (The other case) If the image type is Grayscale or RGB (with or without Alpha), and the bit depth is + not smaller than 8, then use adaptive filtering heuristic as follows: independently for each row, apply + all five filters and select the filter that produces the smallest sum of absolute values per row. + This heuristic is used if filter strategy is LFS_MINSUM and filter_palette_zero is true. + + If filter_palette_zero is true and filter_strategy is not LFS_MINSUM, the above heuristic is followed, + but for "the other case", whatever strategy filter_strategy is set to instead of the minimum sum + heuristic is used. + */ + if(settings->filter_palette_zero && + (color->colortype == LCT_PALETTE || color->bitdepth < 8)) strategy = LFS_ZERO; + + if(bpp == 0) return 31; /*error: invalid color type*/ + + if(strategy >= LFS_ZERO && strategy <= LFS_FOUR) { + unsigned char type = (unsigned char)strategy; + for(y = 0; y != h; ++y) { + size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ + size_t inindex = linebytes * y; + out[outindex] = type; /*filter type byte*/ + filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type); + prevline = &in[inindex]; + } + } else if(strategy == LFS_MINSUM) { + /*adaptive filtering*/ + unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ + size_t smallest = 0; + unsigned char type, bestType = 0; + + for(type = 0; type != 5; ++type) { + attempt[type] = (unsigned char*)lodepng_malloc(linebytes); + if(!attempt[type]) error = 83; /*alloc fail*/ + } + + if(!error) { + for(y = 0; y != h; ++y) { + /*try the 5 filter types*/ + for(type = 0; type != 5; ++type) { + size_t sum = 0; + filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); + + /*calculate the sum of the result*/ + if(type == 0) { + for(x = 0; x != linebytes; ++x) sum += (unsigned char)(attempt[type][x]); + } else { + for(x = 0; x != linebytes; ++x) { + /*For differences, each byte should be treated as signed, values above 127 are negative + (converted to signed char). Filtertype 0 isn't a difference though, so use unsigned there. + This means filtertype 0 is almost never chosen, but that is justified.*/ + unsigned char s = attempt[type][x]; + sum += s < 128 ? s : (255U - s); + } + } + + /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/ + if(type == 0 || sum < smallest) { + bestType = type; + smallest = sum; + } + } + + prevline = &in[y * linebytes]; + + /*now fill the out values*/ + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + } + } + + for(type = 0; type != 5; ++type) lodepng_free(attempt[type]); + } else if(strategy == LFS_ENTROPY) { + unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ + size_t bestSum = 0; + unsigned type, bestType = 0; + unsigned count[256]; + + for(type = 0; type != 5; ++type) { + attempt[type] = (unsigned char*)lodepng_malloc(linebytes); + if(!attempt[type]) error = 83; /*alloc fail*/ + } + + if(!error) { + for(y = 0; y != h; ++y) { + /*try the 5 filter types*/ + for(type = 0; type != 5; ++type) { + size_t sum = 0; + filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); + lodepng_memset(count, 0, 256 * sizeof(*count)); + for(x = 0; x != linebytes; ++x) ++count[attempt[type][x]]; + ++count[type]; /*the filter type itself is part of the scanline*/ + for(x = 0; x != 256; ++x) { + sum += ilog2i(count[x]); + } + /*check if this is smallest sum (or if type == 0 it's the first case so always store the values)*/ + if(type == 0 || sum > bestSum) { + bestType = type; + bestSum = sum; + } + } + + prevline = &in[y * linebytes]; + + /*now fill the out values*/ + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + } + } + + for(type = 0; type != 5; ++type) lodepng_free(attempt[type]); + } else if(strategy == LFS_PREDEFINED) { + for(y = 0; y != h; ++y) { + size_t outindex = (1 + linebytes) * y; /*the extra filterbyte added to each row*/ + size_t inindex = linebytes * y; + unsigned char type = settings->predefined_filters[y]; + out[outindex] = type; /*filter type byte*/ + filterScanline(&out[outindex + 1], &in[inindex], prevline, linebytes, bytewidth, type); + prevline = &in[inindex]; + } + } else if(strategy == LFS_BRUTE_FORCE) { + /*brute force filter chooser. + deflate the scanline after every filter attempt to see which one deflates best. + This is very slow and gives only slightly smaller, sometimes even larger, result*/ + size_t size[5]; + unsigned char* attempt[5]; /*five filtering attempts, one for each filter type*/ + size_t smallest = 0; + unsigned type = 0, bestType = 0; + unsigned char* dummy; + LodePNGCompressSettings zlibsettings; + lodepng_memcpy(&zlibsettings, &settings->zlibsettings, sizeof(LodePNGCompressSettings)); + /*use fixed tree on the attempts so that the tree is not adapted to the filtertype on purpose, + to simulate the true case where the tree is the same for the whole image. Sometimes it gives + better result with dynamic tree anyway. Using the fixed tree sometimes gives worse, but in rare + cases better compression. It does make this a bit less slow, so it's worth doing this.*/ + zlibsettings.btype = 1; + /*a custom encoder likely doesn't read the btype setting and is optimized for complete PNG + images only, so disable it*/ + zlibsettings.custom_zlib = 0; + zlibsettings.custom_deflate = 0; + for(type = 0; type != 5; ++type) { + attempt[type] = (unsigned char*)lodepng_malloc(linebytes); + if(!attempt[type]) error = 83; /*alloc fail*/ + } + if(!error) { + for(y = 0; y != h; ++y) /*try the 5 filter types*/ { + for(type = 0; type != 5; ++type) { + unsigned testsize = (unsigned)linebytes; + /*if(testsize > 8) testsize /= 8;*/ /*it already works good enough by testing a part of the row*/ + + filterScanline(attempt[type], &in[y * linebytes], prevline, linebytes, bytewidth, type); + size[type] = 0; + dummy = 0; + zlib_compress(&dummy, &size[type], attempt[type], testsize, &zlibsettings); + lodepng_free(dummy); + /*check if this is smallest size (or if type == 0 it's the first case so always store the values)*/ + if(type == 0 || size[type] < smallest) { + bestType = type; + smallest = size[type]; + } + } + prevline = &in[y * linebytes]; + out[y * (linebytes + 1)] = bestType; /*the first byte of a scanline will be the filter type*/ + for(x = 0; x != linebytes; ++x) out[y * (linebytes + 1) + 1 + x] = attempt[bestType][x]; + } + } + for(type = 0; type != 5; ++type) lodepng_free(attempt[type]); + } + else return 88; /* unknown filter strategy */ + + return error; +} + +static void addPaddingBits(unsigned char* out, const unsigned char* in, + size_t olinebits, size_t ilinebits, unsigned h) { + /*The opposite of the removePaddingBits function + olinebits must be >= ilinebits*/ + unsigned y; + size_t diff = olinebits - ilinebits; + size_t obp = 0, ibp = 0; /*bit pointers*/ + for(y = 0; y != h; ++y) { + size_t x; + for(x = 0; x < ilinebits; ++x) { + unsigned char bit = readBitFromReversedStream(&ibp, in); + setBitOfReversedStream(&obp, out, bit); + } + /*obp += diff; --> no, fill in some value in the padding bits too, to avoid + "Use of uninitialised value of size ###" warning from valgrind*/ + for(x = 0; x != diff; ++x) setBitOfReversedStream(&obp, out, 0); + } +} + +/* +in: non-interlaced image with size w*h +out: the same pixels, but re-ordered according to PNG's Adam7 interlacing, with + no padding bits between scanlines, but between reduced images so that each + reduced image starts at a byte. +bpp: bits per pixel +there are no padding bits, not between scanlines, not between reduced images +in has the following size in bits: w * h * bpp. +out is possibly bigger due to padding bits between reduced images +NOTE: comments about padding bits are only relevant if bpp < 8 +*/ +static void Adam7_interlace(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, unsigned bpp) { + unsigned passw[7], passh[7]; + size_t filter_passstart[8], padded_passstart[8], passstart[8]; + unsigned i; + + Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); + + if(bpp >= 8) { + for(i = 0; i != 7; ++i) { + unsigned x, y, b; + size_t bytewidth = bpp / 8u; + for(y = 0; y < passh[i]; ++y) + for(x = 0; x < passw[i]; ++x) { + size_t pixelinstart = ((ADAM7_IY[i] + y * ADAM7_DY[i]) * w + ADAM7_IX[i] + x * ADAM7_DX[i]) * bytewidth; + size_t pixeloutstart = passstart[i] + (y * passw[i] + x) * bytewidth; + for(b = 0; b < bytewidth; ++b) { + out[pixeloutstart + b] = in[pixelinstart + b]; + } + } + } + } else /*bpp < 8: Adam7 with pixels < 8 bit is a bit trickier: with bit pointers*/ { + for(i = 0; i != 7; ++i) { + unsigned x, y, b; + unsigned ilinebits = bpp * passw[i]; + unsigned olinebits = bpp * w; + size_t obp, ibp; /*bit pointers (for out and in buffer)*/ + for(y = 0; y < passh[i]; ++y) + for(x = 0; x < passw[i]; ++x) { + ibp = (ADAM7_IY[i] + y * ADAM7_DY[i]) * olinebits + (ADAM7_IX[i] + x * ADAM7_DX[i]) * bpp; + obp = (8 * passstart[i]) + (y * ilinebits + x * bpp); + for(b = 0; b < bpp; ++b) { + unsigned char bit = readBitFromReversedStream(&ibp, in); + setBitOfReversedStream(&obp, out, bit); + } + } + } + } +} + +/*out must be buffer big enough to contain uncompressed IDAT chunk data, and in must contain the full image. +return value is error**/ +static unsigned preProcessScanlines(unsigned char** out, size_t* outsize, const unsigned char* in, + unsigned w, unsigned h, + const LodePNGInfo* info_png, const LodePNGEncoderSettings* settings) { + /* + This function converts the pure 2D image with the PNG's colortype, into filtered-padded-interlaced data. Steps: + *) if no Adam7: 1) add padding bits (= possible extra bits per scanline if bpp < 8) 2) filter + *) if adam7: 1) Adam7_interlace 2) 7x add padding bits 3) 7x filter + */ + unsigned bpp = lodepng_get_bpp(&info_png->color); + unsigned error = 0; + + if(info_png->interlace_method == 0) { + *outsize = h + (h * ((w * bpp + 7u) / 8u)); /*image size plus an extra byte per scanline + possible padding bits*/ + *out = (unsigned char*)lodepng_malloc(*outsize); + if(!(*out) && (*outsize)) error = 83; /*alloc fail*/ + + if(!error) { + /*non multiple of 8 bits per scanline, padding bits needed per scanline*/ + if(bpp < 8 && w * bpp != ((w * bpp + 7u) / 8u) * 8u) { + unsigned char* padded = (unsigned char*)lodepng_malloc(h * ((w * bpp + 7u) / 8u)); + if(!padded) error = 83; /*alloc fail*/ + if(!error) { + addPaddingBits(padded, in, ((w * bpp + 7u) / 8u) * 8u, w * bpp, h); + error = filter(*out, padded, w, h, &info_png->color, settings); + } + lodepng_free(padded); + } else { + /*we can immediately filter into the out buffer, no other steps needed*/ + error = filter(*out, in, w, h, &info_png->color, settings); + } + } + } else /*interlace_method is 1 (Adam7)*/ { + unsigned passw[7], passh[7]; + size_t filter_passstart[8], padded_passstart[8], passstart[8]; + unsigned char* adam7; + + Adam7_getpassvalues(passw, passh, filter_passstart, padded_passstart, passstart, w, h, bpp); + + *outsize = filter_passstart[7]; /*image size plus an extra byte per scanline + possible padding bits*/ + *out = (unsigned char*)lodepng_malloc(*outsize); + if(!(*out)) error = 83; /*alloc fail*/ + + adam7 = (unsigned char*)lodepng_malloc(passstart[7]); + if(!adam7 && passstart[7]) error = 83; /*alloc fail*/ + + if(!error) { + unsigned i; + + Adam7_interlace(adam7, in, w, h, bpp); + for(i = 0; i != 7; ++i) { + if(bpp < 8) { + unsigned char* padded = (unsigned char*)lodepng_malloc(padded_passstart[i + 1] - padded_passstart[i]); + if(!padded) ERROR_BREAK(83); /*alloc fail*/ + addPaddingBits(padded, &adam7[passstart[i]], + ((passw[i] * bpp + 7u) / 8u) * 8u, passw[i] * bpp, passh[i]); + error = filter(&(*out)[filter_passstart[i]], padded, + passw[i], passh[i], &info_png->color, settings); + lodepng_free(padded); + } else { + error = filter(&(*out)[filter_passstart[i]], &adam7[padded_passstart[i]], + passw[i], passh[i], &info_png->color, settings); + } + + if(error) break; + } + } + + lodepng_free(adam7); + } + + return error; +} + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS +static unsigned addUnknownChunks(ucvector* out, unsigned char* data, size_t datasize) { + unsigned char* inchunk = data; + while((size_t)(inchunk - data) < datasize) { + CERROR_TRY_RETURN(lodepng_chunk_append(&out->data, &out->size, inchunk)); + out->allocsize = out->size; /*fix the allocsize again*/ + inchunk = lodepng_chunk_next(inchunk, data + datasize); + } + return 0; +} + +static unsigned isGrayICCProfile(const unsigned char* profile, unsigned size) { + /* + It is a gray profile if bytes 16-19 are "GRAY", rgb profile if bytes 16-19 + are "RGB ". We do not perform any full parsing of the ICC profile here, other + than check those 4 bytes to grayscale profile. Other than that, validity of + the profile is not checked. This is needed only because the PNG specification + requires using a non-gray color model if there is an ICC profile with "RGB " + (sadly limiting compression opportunities if the input data is grayscale RGB + data), and requires using a gray color model if it is "GRAY". + */ + if(size < 20) return 0; + return profile[16] == 'G' && profile[17] == 'R' && profile[18] == 'A' && profile[19] == 'Y'; +} + +static unsigned isRGBICCProfile(const unsigned char* profile, unsigned size) { + /* See comment in isGrayICCProfile*/ + if(size < 20) return 0; + return profile[16] == 'R' && profile[17] == 'G' && profile[18] == 'B' && profile[19] == ' '; +} +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +unsigned lodepng_encode(unsigned char** out, size_t* outsize, + const unsigned char* image, unsigned w, unsigned h, + LodePNGState* state) { + unsigned char* data = 0; /*uncompressed version of the IDAT chunk data*/ + size_t datasize = 0; + ucvector outv = ucvector_init(NULL, 0); + LodePNGInfo info; + const LodePNGInfo* info_png = &state->info_png; + + lodepng_info_init(&info); + + /*provide some proper output values if error will happen*/ + *out = 0; + *outsize = 0; + state->error = 0; + + /*check input values validity*/ + if((info_png->color.colortype == LCT_PALETTE || state->encoder.force_palette) + && (info_png->color.palettesize == 0 || info_png->color.palettesize > 256)) { + state->error = 68; /*invalid palette size, it is only allowed to be 1-256*/ + goto cleanup; + } + if(state->encoder.zlibsettings.btype > 2) { + state->error = 61; /*error: invalid btype*/ + goto cleanup; + } + if(info_png->interlace_method > 1) { + state->error = 71; /*error: invalid interlace mode*/ + goto cleanup; + } + state->error = checkColorValidity(info_png->color.colortype, info_png->color.bitdepth); + if(state->error) goto cleanup; /*error: invalid color type given*/ + state->error = checkColorValidity(state->info_raw.colortype, state->info_raw.bitdepth); + if(state->error) goto cleanup; /*error: invalid color type given*/ + + /* color convert and compute scanline filter types */ + lodepng_info_copy(&info, &state->info_png); + if(state->encoder.auto_convert) { + LodePNGColorStats stats; + lodepng_color_stats_init(&stats); +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + if(info_png->iccp_defined && + isGrayICCProfile(info_png->iccp_profile, info_png->iccp_profile_size)) { + /*the PNG specification does not allow to use palette with a GRAY ICC profile, even + if the palette has only gray colors, so disallow it.*/ + stats.allow_palette = 0; + } + if(info_png->iccp_defined && + isRGBICCProfile(info_png->iccp_profile, info_png->iccp_profile_size)) { + /*the PNG specification does not allow to use grayscale color with RGB ICC profile, so disallow gray.*/ + stats.allow_greyscale = 0; + } +#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */ + state->error = lodepng_compute_color_stats(&stats, image, w, h, &state->info_raw); + if(state->error) goto cleanup; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + if(info_png->background_defined) { + /*the background chunk's color must be taken into account as well*/ + unsigned r = 0, g = 0, b = 0; + LodePNGColorMode mode16 = lodepng_color_mode_make(LCT_RGB, 16); + lodepng_convert_rgb(&r, &g, &b, info_png->background_r, info_png->background_g, info_png->background_b, &mode16, &info_png->color); + state->error = lodepng_color_stats_add(&stats, r, g, b, 65535); + if(state->error) goto cleanup; + } +#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */ + state->error = auto_choose_color(&info.color, &state->info_raw, &stats); + if(state->error) goto cleanup; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /*also convert the background chunk*/ + if(info_png->background_defined) { + if(lodepng_convert_rgb(&info.background_r, &info.background_g, &info.background_b, + info_png->background_r, info_png->background_g, info_png->background_b, &info.color, &info_png->color)) { + state->error = 104; + goto cleanup; + } + } +#endif /* LODEPNG_COMPILE_ANCILLARY_CHUNKS */ + } +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + if(info_png->iccp_defined) { + unsigned gray_icc = isGrayICCProfile(info_png->iccp_profile, info_png->iccp_profile_size); + unsigned rgb_icc = isRGBICCProfile(info_png->iccp_profile, info_png->iccp_profile_size); + unsigned gray_png = info.color.colortype == LCT_GREY || info.color.colortype == LCT_GREY_ALPHA; + if(!gray_icc && !rgb_icc) { + state->error = 100; /* Disallowed profile color type for PNG */ + goto cleanup; + } + if(gray_icc != gray_png) { + /*Not allowed to use RGB/RGBA/palette with GRAY ICC profile or vice versa, + or in case of auto_convert, it wasn't possible to find appropriate model*/ + state->error = state->encoder.auto_convert ? 102 : 101; + goto cleanup; + } + } +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + if(!lodepng_color_mode_equal(&state->info_raw, &info.color)) { + unsigned char* converted; + size_t size = ((size_t)w * (size_t)h * (size_t)lodepng_get_bpp(&info.color) + 7u) / 8u; + + converted = (unsigned char*)lodepng_malloc(size); + if(!converted && size) state->error = 83; /*alloc fail*/ + if(!state->error) { + state->error = lodepng_convert(converted, image, &info.color, &state->info_raw, w, h); + } + if(!state->error) { + state->error = preProcessScanlines(&data, &datasize, converted, w, h, &info, &state->encoder); + } + lodepng_free(converted); + if(state->error) goto cleanup; + } else { + state->error = preProcessScanlines(&data, &datasize, image, w, h, &info, &state->encoder); + if(state->error) goto cleanup; + } + + /* output all PNG chunks */ { +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + size_t i; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + /*write signature and chunks*/ + state->error = writeSignature(&outv); + if(state->error) goto cleanup; + /*IHDR*/ + state->error = addChunk_IHDR(&outv, w, h, info.color.colortype, info.color.bitdepth, info.interlace_method); + if(state->error) goto cleanup; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /*unknown chunks between IHDR and PLTE*/ + if(info.unknown_chunks_data[0]) { + state->error = addUnknownChunks(&outv, info.unknown_chunks_data[0], info.unknown_chunks_size[0]); + if(state->error) goto cleanup; + } + /*color profile chunks must come before PLTE */ + if(info.iccp_defined) { + state->error = addChunk_iCCP(&outv, &info, &state->encoder.zlibsettings); + if(state->error) goto cleanup; + } + if(info.srgb_defined) { + state->error = addChunk_sRGB(&outv, &info); + if(state->error) goto cleanup; + } + if(info.gama_defined) { + state->error = addChunk_gAMA(&outv, &info); + if(state->error) goto cleanup; + } + if(info.chrm_defined) { + state->error = addChunk_cHRM(&outv, &info); + if(state->error) goto cleanup; + } +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + /*PLTE*/ + if(info.color.colortype == LCT_PALETTE) { + state->error = addChunk_PLTE(&outv, &info.color); + if(state->error) goto cleanup; + } + if(state->encoder.force_palette && (info.color.colortype == LCT_RGB || info.color.colortype == LCT_RGBA)) { + /*force_palette means: write suggested palette for truecolor in PLTE chunk*/ + state->error = addChunk_PLTE(&outv, &info.color); + if(state->error) goto cleanup; + } + /*tRNS (this will only add if when necessary) */ + state->error = addChunk_tRNS(&outv, &info.color); + if(state->error) goto cleanup; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /*bKGD (must come between PLTE and the IDAt chunks*/ + if(info.background_defined) { + state->error = addChunk_bKGD(&outv, &info); + if(state->error) goto cleanup; + } + /*pHYs (must come before the IDAT chunks)*/ + if(info.phys_defined) { + state->error = addChunk_pHYs(&outv, &info); + if(state->error) goto cleanup; + } + + /*unknown chunks between PLTE and IDAT*/ + if(info.unknown_chunks_data[1]) { + state->error = addUnknownChunks(&outv, info.unknown_chunks_data[1], info.unknown_chunks_size[1]); + if(state->error) goto cleanup; + } +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + /*IDAT (multiple IDAT chunks must be consecutive)*/ + state->error = addChunk_IDAT(&outv, data, datasize, &state->encoder.zlibsettings); + if(state->error) goto cleanup; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /*tIME*/ + if(info.time_defined) { + state->error = addChunk_tIME(&outv, &info.time); + if(state->error) goto cleanup; + } + /*tEXt and/or zTXt*/ + for(i = 0; i != info.text_num; ++i) { + if(lodepng_strlen(info.text_keys[i]) > 79) { + state->error = 66; /*text chunk too large*/ + goto cleanup; + } + if(lodepng_strlen(info.text_keys[i]) < 1) { + state->error = 67; /*text chunk too small*/ + goto cleanup; + } + if(state->encoder.text_compression) { + state->error = addChunk_zTXt(&outv, info.text_keys[i], info.text_strings[i], &state->encoder.zlibsettings); + if(state->error) goto cleanup; + } else { + state->error = addChunk_tEXt(&outv, info.text_keys[i], info.text_strings[i]); + if(state->error) goto cleanup; + } + } + /*LodePNG version id in text chunk*/ + if(state->encoder.add_id) { + unsigned already_added_id_text = 0; + for(i = 0; i != info.text_num; ++i) { + const char* k = info.text_keys[i]; + /* Could use strcmp, but we're not calling or reimplementing this C library function for this use only */ + if(k[0] == 'L' && k[1] == 'o' && k[2] == 'd' && k[3] == 'e' && + k[4] == 'P' && k[5] == 'N' && k[6] == 'G' && k[7] == '\0') { + already_added_id_text = 1; + break; + } + } + if(already_added_id_text == 0) { + state->error = addChunk_tEXt(&outv, "LodePNG", LODEPNG_VERSION_STRING); /*it's shorter as tEXt than as zTXt chunk*/ + if(state->error) goto cleanup; + } + } + /*iTXt*/ + for(i = 0; i != info.itext_num; ++i) { + if(lodepng_strlen(info.itext_keys[i]) > 79) { + state->error = 66; /*text chunk too large*/ + goto cleanup; + } + if(lodepng_strlen(info.itext_keys[i]) < 1) { + state->error = 67; /*text chunk too small*/ + goto cleanup; + } + state->error = addChunk_iTXt( + &outv, state->encoder.text_compression, + info.itext_keys[i], info.itext_langtags[i], info.itext_transkeys[i], info.itext_strings[i], + &state->encoder.zlibsettings); + if(state->error) goto cleanup; + } + + /*unknown chunks between IDAT and IEND*/ + if(info.unknown_chunks_data[2]) { + state->error = addUnknownChunks(&outv, info.unknown_chunks_data[2], info.unknown_chunks_size[2]); + if(state->error) goto cleanup; + } +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + state->error = addChunk_IEND(&outv); + if(state->error) goto cleanup; + } + +cleanup: + lodepng_info_cleanup(&info); + lodepng_free(data); + + /*instead of cleaning the vector up, give it to the output*/ + *out = outv.data; + *outsize = outv.size; + + return state->error; +} + +unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, const unsigned char* image, + unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) { + unsigned error; + LodePNGState state; + lodepng_state_init(&state); + state.info_raw.colortype = colortype; + state.info_raw.bitdepth = bitdepth; + state.info_png.color.colortype = colortype; + state.info_png.color.bitdepth = bitdepth; + lodepng_encode(out, outsize, image, w, h, &state); + error = state.error; + lodepng_state_cleanup(&state); + return error; +} + +unsigned lodepng_encode32(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) { + return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGBA, 8); +} + +unsigned lodepng_encode24(unsigned char** out, size_t* outsize, const unsigned char* image, unsigned w, unsigned h) { + return lodepng_encode_memory(out, outsize, image, w, h, LCT_RGB, 8); +} + +#ifdef LODEPNG_COMPILE_DISK +unsigned lodepng_encode_file(const char* filename, const unsigned char* image, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth) { + unsigned char* buffer; + size_t buffersize; + unsigned error = lodepng_encode_memory(&buffer, &buffersize, image, w, h, colortype, bitdepth); + if(!error) error = lodepng_save_file(buffer, buffersize, filename); + lodepng_free(buffer); + return error; +} + +unsigned lodepng_encode32_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) { + return lodepng_encode_file(filename, image, w, h, LCT_RGBA, 8); +} + +unsigned lodepng_encode24_file(const char* filename, const unsigned char* image, unsigned w, unsigned h) { + return lodepng_encode_file(filename, image, w, h, LCT_RGB, 8); +} +#endif /*LODEPNG_COMPILE_DISK*/ + +void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings) { + lodepng_compress_settings_init(&settings->zlibsettings); + settings->filter_palette_zero = 1; + settings->filter_strategy = LFS_MINSUM; + settings->auto_convert = 1; + settings->force_palette = 0; + settings->predefined_filters = 0; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + settings->add_id = 0; + settings->text_compression = 1; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +} + +#endif /*LODEPNG_COMPILE_ENCODER*/ +#endif /*LODEPNG_COMPILE_PNG*/ + +#ifdef LODEPNG_COMPILE_ERROR_TEXT +/* +This returns the description of a numerical error code in English. This is also +the documentation of all the error codes. +*/ +const char* lodepng_error_text(unsigned code) { + switch(code) { + case 0: return "no error, everything went ok"; + case 1: return "nothing done yet"; /*the Encoder/Decoder has done nothing yet, error checking makes no sense yet*/ + case 10: return "end of input memory reached without huffman end code"; /*while huffman decoding*/ + case 11: return "error in code tree made it jump outside of huffman tree"; /*while huffman decoding*/ + case 13: return "problem while processing dynamic deflate block"; + case 14: return "problem while processing dynamic deflate block"; + case 15: return "problem while processing dynamic deflate block"; + /*this error could happen if there are only 0 or 1 symbols present in the huffman code:*/ + case 16: return "invalid code while processing dynamic deflate block"; + case 17: return "end of out buffer memory reached while inflating"; + case 18: return "invalid distance code while inflating"; + case 19: return "end of out buffer memory reached while inflating"; + case 20: return "invalid deflate block BTYPE encountered while decoding"; + case 21: return "NLEN is not ones complement of LEN in a deflate block"; + + /*end of out buffer memory reached while inflating: + This can happen if the inflated deflate data is longer than the amount of bytes required to fill up + all the pixels of the image, given the color depth and image dimensions. Something that doesn't + happen in a normal, well encoded, PNG image.*/ + case 22: return "end of out buffer memory reached while inflating"; + case 23: return "end of in buffer memory reached while inflating"; + case 24: return "invalid FCHECK in zlib header"; + case 25: return "invalid compression method in zlib header"; + case 26: return "FDICT encountered in zlib header while it's not used for PNG"; + case 27: return "PNG file is smaller than a PNG header"; + /*Checks the magic file header, the first 8 bytes of the PNG file*/ + case 28: return "incorrect PNG signature, it's no PNG or corrupted"; + case 29: return "first chunk is not the header chunk"; + case 30: return "chunk length too large, chunk broken off at end of file"; + case 31: return "illegal PNG color type or bpp"; + case 32: return "illegal PNG compression method"; + case 33: return "illegal PNG filter method"; + case 34: return "illegal PNG interlace method"; + case 35: return "chunk length of a chunk is too large or the chunk too small"; + case 36: return "illegal PNG filter type encountered"; + case 37: return "illegal bit depth for this color type given"; + case 38: return "the palette is too small or too big"; /*0, or more than 256 colors*/ + case 39: return "tRNS chunk before PLTE or has more entries than palette size"; + case 40: return "tRNS chunk has wrong size for grayscale image"; + case 41: return "tRNS chunk has wrong size for RGB image"; + case 42: return "tRNS chunk appeared while it was not allowed for this color type"; + case 43: return "bKGD chunk has wrong size for palette image"; + case 44: return "bKGD chunk has wrong size for grayscale image"; + case 45: return "bKGD chunk has wrong size for RGB image"; + case 48: return "empty input buffer given to decoder. Maybe caused by non-existing file?"; + case 49: return "jumped past memory while generating dynamic huffman tree"; + case 50: return "jumped past memory while generating dynamic huffman tree"; + case 51: return "jumped past memory while inflating huffman block"; + case 52: return "jumped past memory while inflating"; + case 53: return "size of zlib data too small"; + case 54: return "repeat symbol in tree while there was no value symbol yet"; + /*jumped past tree while generating huffman tree, this could be when the + tree will have more leaves than symbols after generating it out of the + given lengths. They call this an oversubscribed dynamic bit lengths tree in zlib.*/ + case 55: return "jumped past tree while generating huffman tree"; + case 56: return "given output image colortype or bitdepth not supported for color conversion"; + case 57: return "invalid CRC encountered (checking CRC can be disabled)"; + case 58: return "invalid ADLER32 encountered (checking ADLER32 can be disabled)"; + case 59: return "requested color conversion not supported"; + case 60: return "invalid window size given in the settings of the encoder (must be 0-32768)"; + case 61: return "invalid BTYPE given in the settings of the encoder (only 0, 1 and 2 are allowed)"; + /*LodePNG leaves the choice of RGB to grayscale conversion formula to the user.*/ + case 62: return "conversion from color to grayscale not supported"; + /*(2^31-1)*/ + case 63: return "length of a chunk too long, max allowed for PNG is 2147483647 bytes per chunk"; + /*this would result in the inability of a deflated block to ever contain an end code. It must be at least 1.*/ + case 64: return "the length of the END symbol 256 in the Huffman tree is 0"; + case 66: return "the length of a text chunk keyword given to the encoder is longer than the maximum of 79 bytes"; + case 67: return "the length of a text chunk keyword given to the encoder is smaller than the minimum of 1 byte"; + case 68: return "tried to encode a PLTE chunk with a palette that has less than 1 or more than 256 colors"; + case 69: return "unknown chunk type with 'critical' flag encountered by the decoder"; + case 71: return "invalid interlace mode given to encoder (must be 0 or 1)"; + case 72: return "while decoding, invalid compression method encountering in zTXt or iTXt chunk (it must be 0)"; + case 73: return "invalid tIME chunk size"; + case 74: return "invalid pHYs chunk size"; + /*length could be wrong, or data chopped off*/ + case 75: return "no null termination char found while decoding text chunk"; + case 76: return "iTXt chunk too short to contain required bytes"; + case 77: return "integer overflow in buffer size"; + case 78: return "failed to open file for reading"; /*file doesn't exist or couldn't be opened for reading*/ + case 79: return "failed to open file for writing"; + case 80: return "tried creating a tree of 0 symbols"; + case 81: return "lazy matching at pos 0 is impossible"; + case 82: return "color conversion to palette requested while a color isn't in palette, or index out of bounds"; + case 83: return "memory allocation failed"; + case 84: return "given image too small to contain all pixels to be encoded"; + case 86: return "impossible offset in lz77 encoding (internal bug)"; + case 87: return "must provide custom zlib function pointer if LODEPNG_COMPILE_ZLIB is not defined"; + case 88: return "invalid filter strategy given for LodePNGEncoderSettings.filter_strategy"; + case 89: return "text chunk keyword too short or long: must have size 1-79"; + /*the windowsize in the LodePNGCompressSettings. Requiring POT(==> & instead of %) makes encoding 12% faster.*/ + case 90: return "windowsize must be a power of two"; + case 91: return "invalid decompressed idat size"; + case 92: return "integer overflow due to too many pixels"; + case 93: return "zero width or height is invalid"; + case 94: return "header chunk must have a size of 13 bytes"; + case 95: return "integer overflow with combined idat chunk size"; + case 96: return "invalid gAMA chunk size"; + case 97: return "invalid cHRM chunk size"; + case 98: return "invalid sRGB chunk size"; + case 99: return "invalid sRGB rendering intent"; + case 100: return "invalid ICC profile color type, the PNG specification only allows RGB or GRAY"; + case 101: return "PNG specification does not allow RGB ICC profile on gray color types and vice versa"; + case 102: return "not allowed to set grayscale ICC profile with colored pixels by PNG specification"; + case 103: return "invalid palette index in bKGD chunk. Maybe it came before PLTE chunk?"; + case 104: return "invalid bKGD color while encoding (e.g. palette index out of range)"; + case 105: return "integer overflow of bitsize"; + case 106: return "PNG file must have PLTE chunk if color type is palette"; + case 107: return "color convert from palette mode requested without setting the palette data in it"; + case 108: return "tried to add more than 256 values to a palette"; + /*this limit can be configured in LodePNGDecompressSettings*/ + case 109: return "tried to decompress zlib or deflate data larger than desired max_output_size"; + case 110: return "custom zlib or inflate decompression failed"; + case 111: return "custom zlib or deflate compression failed"; + /*max text size limit can be configured in LodePNGDecoderSettings. This error prevents + unreasonable memory consumption when decoding due to impossibly large text sizes.*/ + case 112: return "compressed text unreasonably large"; + /*max ICC size limit can be configured in LodePNGDecoderSettings. This error prevents + unreasonable memory consumption when decoding due to impossibly large ICC profile*/ + case 113: return "ICC profile unreasonably large"; + } + return "unknown error code"; +} +#endif /*LODEPNG_COMPILE_ERROR_TEXT*/ + +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* // C++ Wrapper // */ +/* ////////////////////////////////////////////////////////////////////////// */ +/* ////////////////////////////////////////////////////////////////////////// */ + +#ifdef LODEPNG_COMPILE_CPP +namespace lodepng { + +#ifdef LODEPNG_COMPILE_DISK +unsigned load_file(std::vector& buffer, const std::string& filename) { + long size = lodepng_filesize(filename.c_str()); + if(size < 0) return 78; + buffer.resize((size_t)size); + return size == 0 ? 0 : lodepng_buffer_file(&buffer[0], (size_t)size, filename.c_str()); +} + +/*write given buffer to the file, overwriting the file, it doesn't append to it.*/ +unsigned save_file(const std::vector& buffer, const std::string& filename) { + return lodepng_save_file(buffer.empty() ? 0 : &buffer[0], buffer.size(), filename.c_str()); +} +#endif /* LODEPNG_COMPILE_DISK */ + +#ifdef LODEPNG_COMPILE_ZLIB +#ifdef LODEPNG_COMPILE_DECODER +unsigned decompress(std::vector& out, const unsigned char* in, size_t insize, + const LodePNGDecompressSettings& settings) { + unsigned char* buffer = 0; + size_t buffersize = 0; + unsigned error = zlib_decompress(&buffer, &buffersize, 0, in, insize, &settings); + if(buffer) { + out.insert(out.end(), &buffer[0], &buffer[buffersize]); + lodepng_free(buffer); + } + return error; +} + +unsigned decompress(std::vector& out, const std::vector& in, + const LodePNGDecompressSettings& settings) { + return decompress(out, in.empty() ? 0 : &in[0], in.size(), settings); +} +#endif /* LODEPNG_COMPILE_DECODER */ + +#ifdef LODEPNG_COMPILE_ENCODER +unsigned compress(std::vector& out, const unsigned char* in, size_t insize, + const LodePNGCompressSettings& settings) { + unsigned char* buffer = 0; + size_t buffersize = 0; + unsigned error = zlib_compress(&buffer, &buffersize, in, insize, &settings); + if(buffer) { + out.insert(out.end(), &buffer[0], &buffer[buffersize]); + lodepng_free(buffer); + } + return error; +} + +unsigned compress(std::vector& out, const std::vector& in, + const LodePNGCompressSettings& settings) { + return compress(out, in.empty() ? 0 : &in[0], in.size(), settings); +} +#endif /* LODEPNG_COMPILE_ENCODER */ +#endif /* LODEPNG_COMPILE_ZLIB */ + + +#ifdef LODEPNG_COMPILE_PNG + +State::State() { + lodepng_state_init(this); +} + +State::State(const State& other) { + lodepng_state_init(this); + lodepng_state_copy(this, &other); +} + +State::~State() { + lodepng_state_cleanup(this); +} + +State& State::operator=(const State& other) { + lodepng_state_copy(this, &other); + return *this; +} + +#ifdef LODEPNG_COMPILE_DECODER + +unsigned decode(std::vector& out, unsigned& w, unsigned& h, const unsigned char* in, + size_t insize, LodePNGColorType colortype, unsigned bitdepth) { + unsigned char* buffer = 0; + unsigned error = lodepng_decode_memory(&buffer, &w, &h, in, insize, colortype, bitdepth); + if(buffer && !error) { + State state; + state.info_raw.colortype = colortype; + state.info_raw.bitdepth = bitdepth; + size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw); + out.insert(out.end(), &buffer[0], &buffer[buffersize]); + } + lodepng_free(buffer); + return error; +} + +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + const std::vector& in, LodePNGColorType colortype, unsigned bitdepth) { + return decode(out, w, h, in.empty() ? 0 : &in[0], (unsigned)in.size(), colortype, bitdepth); +} + +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + State& state, + const unsigned char* in, size_t insize) { + unsigned char* buffer = NULL; + unsigned error = lodepng_decode(&buffer, &w, &h, &state, in, insize); + if(buffer && !error) { + size_t buffersize = lodepng_get_raw_size(w, h, &state.info_raw); + out.insert(out.end(), &buffer[0], &buffer[buffersize]); + } + lodepng_free(buffer); + return error; +} + +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + State& state, + const std::vector& in) { + return decode(out, w, h, state, in.empty() ? 0 : &in[0], in.size()); +} + +#ifdef LODEPNG_COMPILE_DISK +unsigned decode(std::vector& out, unsigned& w, unsigned& h, const std::string& filename, + LodePNGColorType colortype, unsigned bitdepth) { + std::vector buffer; + /* safe output values in case error happens */ + w = h = 0; + unsigned error = load_file(buffer, filename); + if(error) return error; + return decode(out, w, h, buffer, colortype, bitdepth); +} +#endif /* LODEPNG_COMPILE_DECODER */ +#endif /* LODEPNG_COMPILE_DISK */ + +#ifdef LODEPNG_COMPILE_ENCODER +unsigned encode(std::vector& out, const unsigned char* in, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth) { + unsigned char* buffer; + size_t buffersize; + unsigned error = lodepng_encode_memory(&buffer, &buffersize, in, w, h, colortype, bitdepth); + if(buffer) { + out.insert(out.end(), &buffer[0], &buffer[buffersize]); + lodepng_free(buffer); + } + return error; +} + +unsigned encode(std::vector& out, + const std::vector& in, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth) { + if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84; + return encode(out, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth); +} + +unsigned encode(std::vector& out, + const unsigned char* in, unsigned w, unsigned h, + State& state) { + unsigned char* buffer; + size_t buffersize; + unsigned error = lodepng_encode(&buffer, &buffersize, in, w, h, &state); + if(buffer) { + out.insert(out.end(), &buffer[0], &buffer[buffersize]); + lodepng_free(buffer); + } + return error; +} + +unsigned encode(std::vector& out, + const std::vector& in, unsigned w, unsigned h, + State& state) { + if(lodepng_get_raw_size(w, h, &state.info_raw) > in.size()) return 84; + return encode(out, in.empty() ? 0 : &in[0], w, h, state); +} + +#ifdef LODEPNG_COMPILE_DISK +unsigned encode(const std::string& filename, + const unsigned char* in, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth) { + std::vector buffer; + unsigned error = encode(buffer, in, w, h, colortype, bitdepth); + if(!error) error = save_file(buffer, filename); + return error; +} + +unsigned encode(const std::string& filename, + const std::vector& in, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth) { + if(lodepng_get_raw_size_lct(w, h, colortype, bitdepth) > in.size()) return 84; + return encode(filename, in.empty() ? 0 : &in[0], w, h, colortype, bitdepth); +} +#endif /* LODEPNG_COMPILE_DISK */ +#endif /* LODEPNG_COMPILE_ENCODER */ +#endif /* LODEPNG_COMPILE_PNG */ +} /* namespace lodepng */ +#endif /*LODEPNG_COMPILE_CPP*/ diff --git a/libkram/lodepng/lodepng.h b/libkram/lodepng/lodepng.h index 1f0bdfd8..6801cb78 100644 --- a/libkram/lodepng/lodepng.h +++ b/libkram/lodepng/lodepng.h @@ -1,1761 +1,1977 @@ -/* -LodePNG version 20160124 - -Copyright (c) 2005-2016 Lode Vandevenne - -This software is provided 'as-is', without any express or implied -warranty. In no event will the authors be held liable for any damages -arising from the use of this software. - -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - - 3. This notice may not be removed or altered from any source - distribution. -*/ - -#ifndef LODEPNG_H -#define LODEPNG_H - -#include /*for size_t*/ - -extern const char* LODEPNG_VERSION_STRING; - -// TODO: move to Cmake -#define LODEPNG_NO_COMPILE_ENCODER -#define LODEPNG_NO_COMPILE_DISK -#define LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS - -/* -The following #defines are used to create code sections. They can be disabled -to disable code sections, which can give faster compile time and smaller binary. -The "NO_COMPILE" defines are designed to be used to pass as defines to the -compiler command to disable them without modifying this header, e.g. --DLODEPNG_NO_COMPILE_ZLIB for gcc. -In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to -allow implementing a custom lodepng_crc32. -*/ -/*deflate & zlib. If disabled, you must specify alternative zlib functions in -the custom_zlib field of the compress and decompress settings*/ -#ifndef LODEPNG_NO_COMPILE_ZLIB -#define LODEPNG_COMPILE_ZLIB -#endif -/*png encoder and png decoder*/ -#ifndef LODEPNG_NO_COMPILE_PNG -#define LODEPNG_COMPILE_PNG -#endif -/*deflate&zlib decoder and png decoder*/ -#ifndef LODEPNG_NO_COMPILE_DECODER -#define LODEPNG_COMPILE_DECODER -#endif -/*deflate&zlib encoder and png encoder*/ -#ifndef LODEPNG_NO_COMPILE_ENCODER -#define LODEPNG_COMPILE_ENCODER -#endif -/*the optional built in harddisk file loading and saving functions*/ -#ifndef LODEPNG_NO_COMPILE_DISK -#define LODEPNG_COMPILE_DISK -#endif -/*support for chunks other than IHDR, IDAT, PLTE, tRNS, IEND: ancillary and unknown chunks*/ -#ifndef LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS -#define LODEPNG_COMPILE_ANCILLARY_CHUNKS -#endif -/*ability to convert error numerical codes to English text string*/ -#ifndef LODEPNG_NO_COMPILE_ERROR_TEXT -#define LODEPNG_COMPILE_ERROR_TEXT -#endif -/*Compile the default allocators (C's free, malloc and realloc). If you disable this, -you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your -source files with custom allocators.*/ -#ifndef LODEPNG_NO_COMPILE_ALLOCATORS -#define LODEPNG_COMPILE_ALLOCATORS -#endif -/*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/ -#ifdef __cplusplus -#ifndef LODEPNG_NO_COMPILE_CPP -#define LODEPNG_COMPILE_CPP -#endif -#endif - -#ifdef LODEPNG_COMPILE_CPP -#include -#include -#endif /*LODEPNG_COMPILE_CPP*/ - -#ifdef LODEPNG_COMPILE_PNG -/*The PNG color types (also used for raw).*/ -typedef enum LodePNGColorType -{ - LCT_GREY = 0, /*greyscale: 1,2,4,8,16 bit*/ - LCT_RGB = 2, /*RGB: 8,16 bit*/ - LCT_PALETTE = 3, /*palette: 1,2,4,8 bit*/ - LCT_GREY_ALPHA = 4, /*greyscale with alpha: 8,16 bit*/ - LCT_RGBA = 6 /*RGB with alpha: 8,16 bit*/ -} LodePNGColorType; - -#ifdef LODEPNG_COMPILE_DECODER -/* -Converts PNG data in memory to raw pixel data. -out: Output parameter. Pointer to buffer that will contain the raw pixel data. - After decoding, its size is w * h * (bytes per pixel) bytes larger than - initially. Bytes per pixel depends on colortype and bitdepth. - Must be freed after usage with free(*out). - Note: for 16-bit per channel colors, uses big endian format like PNG does. -w: Output parameter. Pointer to width of pixel data. -h: Output parameter. Pointer to height of pixel data. -in: Memory buffer with the PNG file. -insize: size of the in buffer. -colortype: the desired color type for the raw output image. See explanation on PNG color types. -bitdepth: the desired bit depth for the raw output image. See explanation on PNG color types. -Return value: LodePNG error code (0 means no error). -*/ -unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, - const unsigned char* in, size_t insize, - LodePNGColorType colortype, unsigned bitdepth); - -/*Same as lodepng_decode_memory, but always decodes to 32-bit RGBA raw image*/ -unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, - const unsigned char* in, size_t insize); - -/*Same as lodepng_decode_memory, but always decodes to 24-bit RGB raw image*/ -unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, - const unsigned char* in, size_t insize); - -#ifdef LODEPNG_COMPILE_DISK -/* -Load PNG from disk, from file with given name. -Same as the other decode functions, but instead takes a filename as input. -*/ -unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, - const char* filename, - LodePNGColorType colortype, unsigned bitdepth); - -/*Same as lodepng_decode_file, but always decodes to 32-bit RGBA raw image.*/ -unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, - const char* filename); - -/*Same as lodepng_decode_file, but always decodes to 24-bit RGB raw image.*/ -unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, - const char* filename); -#endif /*LODEPNG_COMPILE_DISK*/ -#endif /*LODEPNG_COMPILE_DECODER*/ - - -#ifdef LODEPNG_COMPILE_ENCODER -/* -Converts raw pixel data into a PNG image in memory. The colortype and bitdepth - of the output PNG image cannot be chosen, they are automatically determined - by the colortype, bitdepth and content of the input pixel data. - Note: for 16-bit per channel colors, needs big endian format like PNG does. -out: Output parameter. Pointer to buffer that will contain the PNG image data. - Must be freed after usage with free(*out). -outsize: Output parameter. Pointer to the size in bytes of the out buffer. -image: The raw pixel data to encode. The size of this buffer should be - w * h * (bytes per pixel), bytes per pixel depends on colortype and bitdepth. -w: width of the raw pixel data in pixels. -h: height of the raw pixel data in pixels. -colortype: the color type of the raw input image. See explanation on PNG color types. -bitdepth: the bit depth of the raw input image. See explanation on PNG color types. -Return value: LodePNG error code (0 means no error). -*/ -unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, - const unsigned char* image, unsigned w, unsigned h, - LodePNGColorType colortype, unsigned bitdepth); - -/*Same as lodepng_encode_memory, but always encodes from 32-bit RGBA raw image.*/ -unsigned lodepng_encode32(unsigned char** out, size_t* outsize, - const unsigned char* image, unsigned w, unsigned h); - -/*Same as lodepng_encode_memory, but always encodes from 24-bit RGB raw image.*/ -unsigned lodepng_encode24(unsigned char** out, size_t* outsize, - const unsigned char* image, unsigned w, unsigned h); - -#ifdef LODEPNG_COMPILE_DISK -/* -Converts raw pixel data into a PNG file on disk. -Same as the other encode functions, but instead takes a filename as output. -NOTE: This overwrites existing files without warning! -*/ -unsigned lodepng_encode_file(const char* filename, - const unsigned char* image, unsigned w, unsigned h, - LodePNGColorType colortype, unsigned bitdepth); - -/*Same as lodepng_encode_file, but always encodes from 32-bit RGBA raw image.*/ -unsigned lodepng_encode32_file(const char* filename, - const unsigned char* image, unsigned w, unsigned h); - -/*Same as lodepng_encode_file, but always encodes from 24-bit RGB raw image.*/ -unsigned lodepng_encode24_file(const char* filename, - const unsigned char* image, unsigned w, unsigned h); -#endif /*LODEPNG_COMPILE_DISK*/ -#endif /*LODEPNG_COMPILE_ENCODER*/ - - -#ifdef LODEPNG_COMPILE_CPP -namespace lodepng -{ -#ifdef LODEPNG_COMPILE_DECODER -/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype -is the format to output the pixels to. Default is RGBA 8-bit per channel.*/ -unsigned decode(std::vector& out, unsigned& w, unsigned& h, - const unsigned char* in, size_t insize, - LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); -unsigned decode(std::vector& out, unsigned& w, unsigned& h, - const std::vector& in, - LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); -#ifdef LODEPNG_COMPILE_DISK -/* -Converts PNG file from disk to raw pixel data in memory. -Same as the other decode functions, but instead takes a filename as input. -*/ -unsigned decode(std::vector& out, unsigned& w, unsigned& h, - const std::string& filename, - LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); -#endif /* LODEPNG_COMPILE_DISK */ -#endif /* LODEPNG_COMPILE_DECODER */ - -#ifdef LODEPNG_COMPILE_ENCODER -/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype -is that of the raw input data. The output PNG color type will be auto chosen.*/ -unsigned encode(std::vector& out, - const unsigned char* in, unsigned w, unsigned h, - LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); -unsigned encode(std::vector& out, - const std::vector& in, unsigned w, unsigned h, - LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); -#ifdef LODEPNG_COMPILE_DISK -/* -Converts 32-bit RGBA raw pixel data into a PNG file on disk. -Same as the other encode functions, but instead takes a filename as output. -NOTE: This overwrites existing files without warning! -*/ -unsigned encode(const std::string& filename, - const unsigned char* in, unsigned w, unsigned h, - LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); -unsigned encode(const std::string& filename, - const std::vector& in, unsigned w, unsigned h, - LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); -#endif /* LODEPNG_COMPILE_DISK */ -#endif /* LODEPNG_COMPILE_ENCODER */ -} /* namespace lodepng */ -#endif /*LODEPNG_COMPILE_CPP*/ -#endif /*LODEPNG_COMPILE_PNG*/ - -#ifdef LODEPNG_COMPILE_ERROR_TEXT -/*Returns an English description of the numerical error code.*/ -const char* lodepng_error_text(unsigned code); -#endif /*LODEPNG_COMPILE_ERROR_TEXT*/ - -#ifdef LODEPNG_COMPILE_DECODER -/*Settings for zlib decompression*/ -typedef struct LodePNGDecompressSettings LodePNGDecompressSettings; -struct LodePNGDecompressSettings -{ - unsigned ignore_adler32; /*if 1, continue and don't give an error message if the Adler32 checksum is corrupted*/ - - /*use custom zlib decoder instead of built in one (default: null)*/ - unsigned (*custom_zlib)(unsigned char**, size_t*, - const unsigned char*, size_t, - const LodePNGDecompressSettings*); - /*use custom deflate decoder instead of built in one (default: null) - if custom_zlib is used, custom_deflate is ignored since only the built in - zlib function will call custom_deflate*/ - unsigned (*custom_inflate)(unsigned char**, size_t*, - const unsigned char*, size_t, - const LodePNGDecompressSettings*); - - const void* custom_context; /*optional custom settings for custom functions*/ -}; - -extern const LodePNGDecompressSettings lodepng_default_decompress_settings; -void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings); -#endif /*LODEPNG_COMPILE_DECODER*/ - -#ifdef LODEPNG_COMPILE_ENCODER -/* -Settings for zlib compression. Tweaking these settings tweaks the balance -between speed and compression ratio. -*/ -typedef struct LodePNGCompressSettings LodePNGCompressSettings; -struct LodePNGCompressSettings /*deflate = compress*/ -{ - /*LZ77 related settings*/ - unsigned btype; /*the block type for LZ (0, 1, 2 or 3, see zlib standard). Should be 2 for proper compression.*/ - unsigned use_lz77; /*whether or not to use LZ77. Should be 1 for proper compression.*/ - unsigned windowsize; /*must be a power of two <= 32768. higher compresses more but is slower. Default value: 2048.*/ - unsigned minmatch; /*mininum lz77 length. 3 is normally best, 6 can be better for some PNGs. Default: 0*/ - unsigned nicematch; /*stop searching if >= this length found. Set to 258 for best compression. Default: 128*/ - unsigned lazymatching; /*use lazy matching: better compression but a bit slower. Default: true*/ - - /*use custom zlib encoder instead of built in one (default: null)*/ - unsigned (*custom_zlib)(unsigned char**, size_t*, - const unsigned char*, size_t, - const LodePNGCompressSettings*); - /*use custom deflate encoder instead of built in one (default: null) - if custom_zlib is used, custom_deflate is ignored since only the built in - zlib function will call custom_deflate*/ - unsigned (*custom_deflate)(unsigned char**, size_t*, - const unsigned char*, size_t, - const LodePNGCompressSettings*); - - const void* custom_context; /*optional custom settings for custom functions*/ -}; - -extern const LodePNGCompressSettings lodepng_default_compress_settings; -void lodepng_compress_settings_init(LodePNGCompressSettings* settings); -#endif /*LODEPNG_COMPILE_ENCODER*/ - -#ifdef LODEPNG_COMPILE_PNG -/* -Color mode of an image. Contains all information required to decode the pixel -bits to RGBA colors. This information is the same as used in the PNG file -format, and is used both for PNG and raw image data in LodePNG. -*/ -typedef struct LodePNGColorMode -{ - /*header (IHDR)*/ - LodePNGColorType colortype; /*color type, see PNG standard or documentation further in this header file*/ - unsigned bitdepth; /*bits per sample, see PNG standard or documentation further in this header file*/ - - /* - palette (PLTE and tRNS) - - Dynamically allocated with the colors of the palette, including alpha. - When encoding a PNG, to store your colors in the palette of the LodePNGColorMode, first use - lodepng_palette_clear, then for each color use lodepng_palette_add. - If you encode an image without alpha with palette, don't forget to put value 255 in each A byte of the palette. - - When decoding, by default you can ignore this palette, since LodePNG already - fills the palette colors in the pixels of the raw RGBA output. - - The palette is only supported for color type 3. - */ - unsigned char* palette; /*palette in RGBARGBA... order. When allocated, must be either 0, or have size 1024*/ - size_t palettesize; /*palette size in number of colors (amount of bytes is 4 * palettesize)*/ - - /* - transparent color key (tRNS) - - This color uses the same bit depth as the bitdepth value in this struct, which can be 1-bit to 16-bit. - For greyscale PNGs, r, g and b will all 3 be set to the same. - - When decoding, by default you can ignore this information, since LodePNG sets - pixels with this key to transparent already in the raw RGBA output. - - The color key is only supported for color types 0 and 2. - */ - unsigned key_defined; /*is a transparent color key given? 0 = false, 1 = true*/ - unsigned key_r; /*red/greyscale component of color key*/ - unsigned key_g; /*green component of color key*/ - unsigned key_b; /*blue component of color key*/ -} LodePNGColorMode; - -/*init, cleanup and copy functions to use with this struct*/ -void lodepng_color_mode_init(LodePNGColorMode* info); -void lodepng_color_mode_cleanup(LodePNGColorMode* info); -/*return value is error code (0 means no error)*/ -unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source); - -void lodepng_palette_clear(LodePNGColorMode* info); -/*add 1 color to the palette*/ -unsigned lodepng_palette_add(LodePNGColorMode* info, - unsigned char r, unsigned char g, unsigned char b, unsigned char a); - -/*get the total amount of bits per pixel, based on colortype and bitdepth in the struct*/ -unsigned lodepng_get_bpp(const LodePNGColorMode* info); -/*get the amount of color channels used, based on colortype in the struct. -If a palette is used, it counts as 1 channel.*/ -unsigned lodepng_get_channels(const LodePNGColorMode* info); -/*is it a greyscale type? (only colortype 0 or 4)*/ -unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info); -/*has it got an alpha channel? (only colortype 2 or 6)*/ -unsigned lodepng_is_alpha_type(const LodePNGColorMode* info); -/*has it got a palette? (only colortype 3)*/ -unsigned lodepng_is_palette_type(const LodePNGColorMode* info); -/*only returns true if there is a palette and there is a value in the palette with alpha < 255. -Loops through the palette to check this.*/ -unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info); -/* -Check if the given color info indicates the possibility of having non-opaque pixels in the PNG image. -Returns true if the image can have translucent or invisible pixels (it still be opaque if it doesn't use such pixels). -Returns false if the image can only have opaque pixels. -In detail, it returns true only if it's a color type with alpha, or has a palette with non-opaque values, -or if "key_defined" is true. -*/ -unsigned lodepng_can_have_alpha(const LodePNGColorMode* info); -/*Returns the byte size of a raw image buffer with given width, height and color mode*/ -size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color); - -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS -/*The information of a Time chunk in PNG.*/ -typedef struct LodePNGTime -{ - unsigned year; /*2 bytes used (0-65535)*/ - unsigned month; /*1-12*/ - unsigned day; /*1-31*/ - unsigned hour; /*0-23*/ - unsigned minute; /*0-59*/ - unsigned second; /*0-60 (to allow for leap seconds)*/ -} LodePNGTime; -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - -/*Information about the PNG image, except pixels, width and height.*/ -typedef struct LodePNGInfo -{ - /*header (IHDR), palette (PLTE) and transparency (tRNS) chunks*/ - unsigned compression_method;/*compression method of the original file. Always 0.*/ - unsigned filter_method; /*filter method of the original file*/ - unsigned interlace_method; /*interlace method of the original file*/ - LodePNGColorMode color; /*color type and bits, palette and transparency of the PNG file*/ - -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - /* - suggested background color chunk (bKGD) - This color uses the same color mode as the PNG (except alpha channel), which can be 1-bit to 16-bit. - - For greyscale PNGs, r, g and b will all 3 be set to the same. When encoding - the encoder writes the red one. For palette PNGs: When decoding, the RGB value - will be stored, not a palette index. But when encoding, specify the index of - the palette in background_r, the other two are then ignored. - - The decoder does not use this background color to edit the color of pixels. - */ - unsigned background_defined; /*is a suggested background color given?*/ - unsigned background_r; /*red component of suggested background color*/ - unsigned background_g; /*green component of suggested background color*/ - unsigned background_b; /*blue component of suggested background color*/ - - /* - non-international text chunks (tEXt and zTXt) - - The char** arrays each contain num strings. The actual messages are in - text_strings, while text_keys are keywords that give a short description what - the actual text represents, e.g. Title, Author, Description, or anything else. - - A keyword is minimum 1 character and maximum 79 characters long. It's - discouraged to use a single line length longer than 79 characters for texts. - - Don't allocate these text buffers yourself. Use the init/cleanup functions - correctly and use lodepng_add_text and lodepng_clear_text. - */ - size_t text_num; /*the amount of texts in these char** buffers (there may be more texts in itext)*/ - char** text_keys; /*the keyword of a text chunk (e.g. "Comment")*/ - char** text_strings; /*the actual text*/ - - /* - international text chunks (iTXt) - Similar to the non-international text chunks, but with additional strings - "langtags" and "transkeys". - */ - size_t itext_num; /*the amount of international texts in this PNG*/ - char** itext_keys; /*the English keyword of the text chunk (e.g. "Comment")*/ - char** itext_langtags; /*language tag for this text's language, ISO/IEC 646 string, e.g. ISO 639 language tag*/ - char** itext_transkeys; /*keyword translated to the international language - UTF-8 string*/ - char** itext_strings; /*the actual international text - UTF-8 string*/ - - /*time chunk (tIME)*/ - unsigned time_defined; /*set to 1 to make the encoder generate a tIME chunk*/ - LodePNGTime time; - - /*phys chunk (pHYs)*/ - unsigned phys_defined; /*if 0, there is no pHYs chunk and the values below are undefined, if 1 else there is one*/ - unsigned phys_x; /*pixels per unit in x direction*/ - unsigned phys_y; /*pixels per unit in y direction*/ - unsigned phys_unit; /*may be 0 (unknown unit) or 1 (metre)*/ - - /* - unknown chunks - There are 3 buffers, one for each position in the PNG where unknown chunks can appear - each buffer contains all unknown chunks for that position consecutively - The 3 buffers are the unknown chunks between certain critical chunks: - 0: IHDR-PLTE, 1: PLTE-IDAT, 2: IDAT-IEND - Do not allocate or traverse this data yourself. Use the chunk traversing functions declared - later, such as lodepng_chunk_next and lodepng_chunk_append, to read/write this struct. - */ - unsigned char* unknown_chunks_data[3]; - size_t unknown_chunks_size[3]; /*size in bytes of the unknown chunks, given for protection*/ -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ -} LodePNGInfo; - -/*init, cleanup and copy functions to use with this struct*/ -void lodepng_info_init(LodePNGInfo* info); -void lodepng_info_cleanup(LodePNGInfo* info); -/*return value is error code (0 means no error)*/ -unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source); - -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS -void lodepng_clear_text(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/ -unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str); /*push back both texts at once*/ - -void lodepng_clear_itext(LodePNGInfo* info); /*use this to clear the itexts again after you filled them in*/ -unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag, - const char* transkey, const char* str); /*push back the 4 texts of 1 chunk at once*/ -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ - -/* -Converts raw buffer from one color type to another color type, based on -LodePNGColorMode structs to describe the input and output color type. -See the reference manual at the end of this header file to see which color conversions are supported. -return value = LodePNG error code (0 if all went ok, an error if the conversion isn't supported) -The out buffer must have size (w * h * bpp + 7) / 8, where bpp is the bits per pixel -of the output color type (lodepng_get_bpp). -For < 8 bpp images, there should not be padding bits at the end of scanlines. -For 16-bit per channel colors, uses big endian format like PNG does. -Return value is LodePNG error code -*/ -unsigned lodepng_convert(unsigned char* out, const unsigned char* in, - const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in, - unsigned w, unsigned h); - -#ifdef LODEPNG_COMPILE_DECODER -/* -Settings for the decoder. This contains settings for the PNG and the Zlib -decoder, but not the Info settings from the Info structs. -*/ -typedef struct LodePNGDecoderSettings -{ - LodePNGDecompressSettings zlibsettings; /*in here is the setting to ignore Adler32 checksums*/ - - unsigned ignore_crc; /*ignore CRC checksums*/ - - unsigned color_convert; /*whether to convert the PNG to the color type you want. Default: yes*/ - -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - unsigned read_text_chunks; /*if false but remember_unknown_chunks is true, they're stored in the unknown chunks*/ - /*store all bytes from unknown chunks in the LodePNGInfo (off by default, useful for a png editor)*/ - unsigned remember_unknown_chunks; -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ -} LodePNGDecoderSettings; - -void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings); -#endif /*LODEPNG_COMPILE_DECODER*/ - -#ifdef LODEPNG_COMPILE_ENCODER -/*automatically use color type with less bits per pixel if losslessly possible. Default: AUTO*/ -typedef enum LodePNGFilterStrategy -{ - /*every filter at zero*/ - LFS_ZERO, - /*Use filter that gives minimum sum, as described in the official PNG filter heuristic.*/ - LFS_MINSUM, - /*Use the filter type that gives smallest Shannon entropy for this scanline. Depending - on the image, this is better or worse than minsum.*/ - LFS_ENTROPY, - /* - Brute-force-search PNG filters by compressing each filter for each scanline. - Experimental, very slow, and only rarely gives better compression than MINSUM. - */ - LFS_BRUTE_FORCE, - /*use predefined_filters buffer: you specify the filter type for each scanline*/ - LFS_PREDEFINED -} LodePNGFilterStrategy; - -/*Gives characteristics about the colors of the image, which helps decide which color model to use for encoding. -Used internally by default if "auto_convert" is enabled. Public because it's useful for custom algorithms.*/ -typedef struct LodePNGColorProfile -{ - unsigned colored; /*not greyscale*/ - unsigned key; /*if true, image is not opaque. Only if true and alpha is false, color key is possible.*/ - unsigned short key_r; /*these values are always in 16-bit bitdepth in the profile*/ - unsigned short key_g; - unsigned short key_b; - unsigned alpha; /*alpha channel or alpha palette required*/ - unsigned numcolors; /*amount of colors, up to 257. Not valid if bits == 16.*/ - unsigned char palette[1024]; /*Remembers up to the first 256 RGBA colors, in no particular order*/ - unsigned bits; /*bits per channel (not for palette). 1,2 or 4 for greyscale only. 16 if 16-bit per channel required.*/ -} LodePNGColorProfile; - -void lodepng_color_profile_init(LodePNGColorProfile* profile); - -/*Get a LodePNGColorProfile of the image.*/ -unsigned lodepng_get_color_profile(LodePNGColorProfile* profile, - const unsigned char* image, unsigned w, unsigned h, - const LodePNGColorMode* mode_in); -/*The function LodePNG uses internally to decide the PNG color with auto_convert. -Chooses an optimal color model, e.g. grey if only grey pixels, palette if < 256 colors, ...*/ -unsigned lodepng_auto_choose_color(LodePNGColorMode* mode_out, - const unsigned char* image, unsigned w, unsigned h, - const LodePNGColorMode* mode_in); - -/*Settings for the encoder.*/ -typedef struct LodePNGEncoderSettings -{ - LodePNGCompressSettings zlibsettings; /*settings for the zlib encoder, such as window size, ...*/ - - unsigned auto_convert; /*automatically choose output PNG color type. Default: true*/ - - /*If true, follows the official PNG heuristic: if the PNG uses a palette or lower than - 8 bit depth, set all filters to zero. Otherwise use the filter_strategy. Note that to - completely follow the official PNG heuristic, filter_palette_zero must be true and - filter_strategy must be LFS_MINSUM*/ - unsigned filter_palette_zero; - /*Which filter strategy to use when not using zeroes due to filter_palette_zero. - Set filter_palette_zero to 0 to ensure always using your chosen strategy. Default: LFS_MINSUM*/ - LodePNGFilterStrategy filter_strategy; - /*used if filter_strategy is LFS_PREDEFINED. In that case, this must point to a buffer with - the same length as the amount of scanlines in the image, and each value must <= 5. You - have to cleanup this buffer, LodePNG will never free it. Don't forget that filter_palette_zero - must be set to 0 to ensure this is also used on palette or low bitdepth images.*/ - const unsigned char* predefined_filters; - - /*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette). - If colortype is 3, PLTE is _always_ created.*/ - unsigned force_palette; -#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS - /*add LodePNG identifier and version as a text chunk, for debugging*/ - unsigned add_id; - /*encode text chunks as zTXt chunks instead of tEXt chunks, and use compression in iTXt chunks*/ - unsigned text_compression; -#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ -} LodePNGEncoderSettings; - -void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings); -#endif /*LODEPNG_COMPILE_ENCODER*/ - - -#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) -/*The settings, state and information for extended encoding and decoding.*/ -typedef struct LodePNGState -{ -#ifdef LODEPNG_COMPILE_DECODER - LodePNGDecoderSettings decoder; /*the decoding settings*/ -#endif /*LODEPNG_COMPILE_DECODER*/ -#ifdef LODEPNG_COMPILE_ENCODER - LodePNGEncoderSettings encoder; /*the encoding settings*/ -#endif /*LODEPNG_COMPILE_ENCODER*/ - LodePNGColorMode info_raw; /*specifies the format in which you would like to get the raw pixel buffer*/ - LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/ - unsigned error; -#ifdef LODEPNG_COMPILE_CPP - /* For the lodepng::State subclass. */ - virtual ~LodePNGState(){} -#endif -} LodePNGState; - -/*init, cleanup and copy functions to use with this struct*/ -void lodepng_state_init(LodePNGState* state); -void lodepng_state_cleanup(LodePNGState* state); -void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source); -#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */ - -#ifdef LODEPNG_COMPILE_DECODER -/* -Same as lodepng_decode_memory, but uses a LodePNGState to allow custom settings and -getting much more information about the PNG image and color mode. -*/ -unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h, - LodePNGState* state, - const unsigned char* in, size_t insize); - -/* -Read the PNG header, but not the actual data. This returns only the information -that is in the header chunk of the PNG, such as width, height and color type. The -information is placed in the info_png field of the LodePNGState. -*/ -unsigned lodepng_inspect(unsigned* w, unsigned* h, - LodePNGState* state, - const unsigned char* in, size_t insize); -#endif /*LODEPNG_COMPILE_DECODER*/ - - -#ifdef LODEPNG_COMPILE_ENCODER -/*This function allocates the out buffer with standard malloc and stores the size in *outsize.*/ -unsigned lodepng_encode(unsigned char** out, size_t* outsize, - const unsigned char* image, unsigned w, unsigned h, - LodePNGState* state); -#endif /*LODEPNG_COMPILE_ENCODER*/ - -/* -The lodepng_chunk functions are normally not needed, except to traverse the -unknown chunks stored in the LodePNGInfo struct, or add new ones to it. -It also allows traversing the chunks of an encoded PNG file yourself. - -PNG standard chunk naming conventions: -First byte: uppercase = critical, lowercase = ancillary -Second byte: uppercase = public, lowercase = private -Third byte: must be uppercase -Fourth byte: uppercase = unsafe to copy, lowercase = safe to copy -*/ - -/* -Gets the length of the data of the chunk. Total chunk length has 12 bytes more. -There must be at least 4 bytes to read from. If the result value is too large, -it may be corrupt data. -*/ -unsigned lodepng_chunk_length(const unsigned char* chunk); - -/*puts the 4-byte type in null terminated string*/ -void lodepng_chunk_type(char type[5], const unsigned char* chunk); - -/*check if the type is the given type*/ -unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type); - -/*0: it's one of the critical chunk types, 1: it's an ancillary chunk (see PNG standard)*/ -unsigned char lodepng_chunk_ancillary(const unsigned char* chunk); - -/*0: public, 1: private (see PNG standard)*/ -unsigned char lodepng_chunk_private(const unsigned char* chunk); - -/*0: the chunk is unsafe to copy, 1: the chunk is safe to copy (see PNG standard)*/ -unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk); - -/*get pointer to the data of the chunk, where the input points to the header of the chunk*/ -unsigned char* lodepng_chunk_data(unsigned char* chunk); -const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk); - -/*returns 0 if the crc is correct, 1 if it's incorrect (0 for OK as usual!)*/ -unsigned lodepng_chunk_check_crc(const unsigned char* chunk); - -/*generates the correct CRC from the data and puts it in the last 4 bytes of the chunk*/ -void lodepng_chunk_generate_crc(unsigned char* chunk); - -/*iterate to next chunks. don't use on IEND chunk, as there is no next chunk then*/ -unsigned char* lodepng_chunk_next(unsigned char* chunk); -const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk); - -/* -Appends chunk to the data in out. The given chunk should already have its chunk header. -The out variable and outlength are updated to reflect the new reallocated buffer. -Returns error code (0 if it went ok) -*/ -unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk); - -/* -Appends new chunk to out. The chunk to append is given by giving its length, type -and data separately. The type is a 4-letter string. -The out variable and outlength are updated to reflect the new reallocated buffer. -Returne error code (0 if it went ok) -*/ -unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length, - const char* type, const unsigned char* data); - - -/*Calculate CRC32 of buffer*/ -unsigned lodepng_crc32(const unsigned char* buf, size_t len); -#endif /*LODEPNG_COMPILE_PNG*/ - - -#ifdef LODEPNG_COMPILE_ZLIB -/* -This zlib part can be used independently to zlib compress and decompress a -buffer. It cannot be used to create gzip files however, and it only supports the -part of zlib that is required for PNG, it does not support dictionaries. -*/ - -#ifdef LODEPNG_COMPILE_DECODER -/*Inflate a buffer. Inflate is the decompression step of deflate. Out buffer must be freed after use.*/ -unsigned lodepng_inflate(unsigned char** out, size_t* outsize, - const unsigned char* in, size_t insize, - const LodePNGDecompressSettings* settings); - -/* -Decompresses Zlib data. Reallocates the out buffer and appends the data. The -data must be according to the zlib specification. -Either, *out must be NULL and *outsize must be 0, or, *out must be a valid -buffer and *outsize its size in bytes. out must be freed by user after usage. -*/ -unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, - const unsigned char* in, size_t insize, - const LodePNGDecompressSettings* settings); -#endif /*LODEPNG_COMPILE_DECODER*/ - -#ifdef LODEPNG_COMPILE_ENCODER -/* -Compresses data with Zlib. Reallocates the out buffer and appends the data. -Zlib adds a small header and trailer around the deflate data. -The data is output in the format of the zlib specification. -Either, *out must be NULL and *outsize must be 0, or, *out must be a valid -buffer and *outsize its size in bytes. out must be freed by user after usage. -*/ -unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, - const unsigned char* in, size_t insize, - const LodePNGCompressSettings* settings); - -/* -Find length-limited Huffman code for given frequencies. This function is in the -public interface only for tests, it's used internally by lodepng_deflate. -*/ -unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies, - size_t numcodes, unsigned maxbitlen); - -/*Compress a buffer with deflate. See RFC 1951. Out buffer must be freed after use.*/ -unsigned lodepng_deflate(unsigned char** out, size_t* outsize, - const unsigned char* in, size_t insize, - const LodePNGCompressSettings* settings); - -#endif /*LODEPNG_COMPILE_ENCODER*/ -#endif /*LODEPNG_COMPILE_ZLIB*/ - -#ifdef LODEPNG_COMPILE_DISK -/* -Load a file from disk into buffer. The function allocates the out buffer, and -after usage you should free it. -out: output parameter, contains pointer to loaded buffer. -outsize: output parameter, size of the allocated out buffer -filename: the path to the file to load -return value: error code (0 means ok) -*/ -unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename); - -/* -Save a file from buffer to disk. Warning, if it exists, this function overwrites -the file without warning! -buffer: the buffer to write -buffersize: size of the buffer to write -filename: the path to the file to save to -return value: error code (0 means ok) -*/ -unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename); -#endif /*LODEPNG_COMPILE_DISK*/ - -#ifdef LODEPNG_COMPILE_CPP -/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */ -namespace lodepng -{ -#ifdef LODEPNG_COMPILE_PNG -class State : public LodePNGState -{ - public: - State(); - State(const State& other); - virtual ~State(); - State& operator=(const State& other); -}; - -#ifdef LODEPNG_COMPILE_DECODER -/* Same as other lodepng::decode, but using a State for more settings and information. */ -unsigned decode(std::vector& out, unsigned& w, unsigned& h, - State& state, - const unsigned char* in, size_t insize); -unsigned decode(std::vector& out, unsigned& w, unsigned& h, - State& state, - const std::vector& in); -#endif /*LODEPNG_COMPILE_DECODER*/ - -#ifdef LODEPNG_COMPILE_ENCODER -/* Same as other lodepng::encode, but using a State for more settings and information. */ -unsigned encode(std::vector& out, - const unsigned char* in, unsigned w, unsigned h, - State& state); -unsigned encode(std::vector& out, - const std::vector& in, unsigned w, unsigned h, - State& state); -#endif /*LODEPNG_COMPILE_ENCODER*/ - -#ifdef LODEPNG_COMPILE_DISK -/* -Load a file from disk into an std::vector. -return value: error code (0 means ok) -*/ -unsigned load_file(std::vector& buffer, const std::string& filename); - -/* -Save the binary data in an std::vector to a file on disk. The file is overwritten -without warning. -*/ -unsigned save_file(const std::vector& buffer, const std::string& filename); -#endif /* LODEPNG_COMPILE_DISK */ -#endif /* LODEPNG_COMPILE_PNG */ - -#ifdef LODEPNG_COMPILE_ZLIB -#ifdef LODEPNG_COMPILE_DECODER -/* Zlib-decompress an unsigned char buffer */ -unsigned decompress(std::vector& out, const unsigned char* in, size_t insize, - const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings); - -/* Zlib-decompress an std::vector */ -unsigned decompress(std::vector& out, const std::vector& in, - const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings); -#endif /* LODEPNG_COMPILE_DECODER */ - -#ifdef LODEPNG_COMPILE_ENCODER -/* Zlib-compress an unsigned char buffer */ -unsigned compress(std::vector& out, const unsigned char* in, size_t insize, - const LodePNGCompressSettings& settings = lodepng_default_compress_settings); - -/* Zlib-compress an std::vector */ -unsigned compress(std::vector& out, const std::vector& in, - const LodePNGCompressSettings& settings = lodepng_default_compress_settings); -#endif /* LODEPNG_COMPILE_ENCODER */ -#endif /* LODEPNG_COMPILE_ZLIB */ -} /* namespace lodepng */ -#endif /*LODEPNG_COMPILE_CPP*/ - -/* -TODO: -[.] test if there are no memory leaks or security exploits - done a lot but needs to be checked often -[.] check compatibility with various compilers - done but needs to be redone for every newer version -[X] converting color to 16-bit per channel types -[ ] read all public PNG chunk types (but never let the color profile and gamma ones touch RGB values) -[ ] make sure encoder generates no chunks with size > (2^31)-1 -[ ] partial decoding (stream processing) -[X] let the "isFullyOpaque" function check color keys and transparent palettes too -[X] better name for the variables "codes", "codesD", "codelengthcodes", "clcl" and "lldl" -[ ] don't stop decoding on errors like 69, 57, 58 (make warnings) -[ ] let the C++ wrapper catch exceptions coming from the standard library and return LodePNG error codes -[ ] allow user to provide custom color conversion functions, e.g. for premultiplied alpha, padding bits or not, ... -[ ] allow user to give data (void*) to custom allocator -*/ - -#endif /*LODEPNG_H inclusion guard*/ - -/* -LodePNG Documentation ---------------------- - -0. table of contents --------------------- - - 1. about - 1.1. supported features - 1.2. features not supported - 2. C and C++ version - 3. security - 4. decoding - 5. encoding - 6. color conversions - 6.1. PNG color types - 6.2. color conversions - 6.3. padding bits - 6.4. A note about 16-bits per channel and endianness - 7. error values - 8. chunks and PNG editing - 9. compiler support - 10. examples - 10.1. decoder C++ example - 10.2. decoder C example - 11. state settings reference - 12. changes - 13. contact information - - -1. about --------- - -PNG is a file format to store raster images losslessly with good compression, -supporting different color types and alpha channel. - -LodePNG is a PNG codec according to the Portable Network Graphics (PNG) -Specification (Second Edition) - W3C Recommendation 10 November 2003. - -The specifications used are: - -*) Portable Network Graphics (PNG) Specification (Second Edition): - http://www.w3.org/TR/2003/REC-PNG-20031110 -*) RFC 1950 ZLIB Compressed Data Format version 3.3: - http://www.gzip.org/zlib/rfc-zlib.html -*) RFC 1951 DEFLATE Compressed Data Format Specification ver 1.3: - http://www.gzip.org/zlib/rfc-deflate.html - -The most recent version of LodePNG can currently be found at -http://lodev.org/lodepng/ - -LodePNG works both in C (ISO C90) and C++, with a C++ wrapper that adds -extra functionality. - -LodePNG exists out of two files: --lodepng.h: the header file for both C and C++ --lodepng.c(pp): give it the name lodepng.c or lodepng.cpp (or .cc) depending on your usage - -If you want to start using LodePNG right away without reading this doc, get the -examples from the LodePNG website to see how to use it in code, or check the -smaller examples in chapter 13 here. - -LodePNG is simple but only supports the basic requirements. To achieve -simplicity, the following design choices were made: There are no dependencies -on any external library. There are functions to decode and encode a PNG with -a single function call, and extended versions of these functions taking a -LodePNGState struct allowing to specify or get more information. By default -the colors of the raw image are always RGB or RGBA, no matter what color type -the PNG file uses. To read and write files, there are simple functions to -convert the files to/from buffers in memory. - -This all makes LodePNG suitable for loading textures in games, demos and small -programs, ... It's less suitable for full fledged image editors, loading PNGs -over network (it requires all the image data to be available before decoding can -begin), life-critical systems, ... - -1.1. supported features ------------------------ - -The following features are supported by the decoder: - -*) decoding of PNGs with any color type, bit depth and interlace mode, to a 24- or 32-bit color raw image, - or the same color type as the PNG -*) encoding of PNGs, from any raw image to 24- or 32-bit color, or the same color type as the raw image -*) Adam7 interlace and deinterlace for any color type -*) loading the image from harddisk or decoding it from a buffer from other sources than harddisk -*) support for alpha channels, including RGBA color model, translucent palettes and color keying -*) zlib decompression (inflate) -*) zlib compression (deflate) -*) CRC32 and ADLER32 checksums -*) handling of unknown chunks, allowing making a PNG editor that stores custom and unknown chunks. -*) the following chunks are supported (generated/interpreted) by both encoder and decoder: - IHDR: header information - PLTE: color palette - IDAT: pixel data - IEND: the final chunk - tRNS: transparency for palettized images - tEXt: textual information - zTXt: compressed textual information - iTXt: international textual information - bKGD: suggested background color - pHYs: physical dimensions - tIME: modification time - -1.2. features not supported ---------------------------- - -The following features are _not_ supported: - -*) some features needed to make a conformant PNG-Editor might be still missing. -*) partial loading/stream processing. All data must be available and is processed in one call. -*) The following public chunks are not supported but treated as unknown chunks by LodePNG - cHRM, gAMA, iCCP, sRGB, sBIT, hIST, sPLT - Some of these are not supported on purpose: LodePNG wants to provide the RGB values - stored in the pixels, not values modified by system dependent gamma or color models. - - -2. C and C++ version --------------------- - -The C version uses buffers allocated with alloc that you need to free() -yourself. You need to use init and cleanup functions for each struct whenever -using a struct from the C version to avoid exploits and memory leaks. - -The C++ version has extra functions with std::vectors in the interface and the -lodepng::State class which is a LodePNGState with constructor and destructor. - -These files work without modification for both C and C++ compilers because all -the additional C++ code is in "#ifdef __cplusplus" blocks that make C-compilers -ignore it, and the C code is made to compile both with strict ISO C90 and C++. - -To use the C++ version, you need to rename the source file to lodepng.cpp -(instead of lodepng.c), and compile it with a C++ compiler. - -To use the C version, you need to rename the source file to lodepng.c (instead -of lodepng.cpp), and compile it with a C compiler. - - -3. Security ------------ - -Even if carefully designed, it's always possible that LodePNG contains possible -exploits. If you discover one, please let me know, and it will be fixed. - -When using LodePNG, care has to be taken with the C version of LodePNG, as well -as the C-style structs when working with C++. The following conventions are used -for all C-style structs: - --if a struct has a corresponding init function, always call the init function when making a new one --if a struct has a corresponding cleanup function, call it before the struct disappears to avoid memory leaks --if a struct has a corresponding copy function, use the copy function instead of "=". - The destination must also be inited already. - - -4. Decoding ------------ - -Decoding converts a PNG compressed image to a raw pixel buffer. - -Most documentation on using the decoder is at its declarations in the header -above. For C, simple decoding can be done with functions such as -lodepng_decode32, and more advanced decoding can be done with the struct -LodePNGState and lodepng_decode. For C++, all decoding can be done with the -various lodepng::decode functions, and lodepng::State can be used for advanced -features. - -When using the LodePNGState, it uses the following fields for decoding: -*) LodePNGInfo info_png: it stores extra information about the PNG (the input) in here -*) LodePNGColorMode info_raw: here you can say what color mode of the raw image (the output) you want to get -*) LodePNGDecoderSettings decoder: you can specify a few extra settings for the decoder to use - -LodePNGInfo info_png --------------------- - -After decoding, this contains extra information of the PNG image, except the actual -pixels, width and height because these are already gotten directly from the decoder -functions. - -It contains for example the original color type of the PNG image, text comments, -suggested background color, etc... More details about the LodePNGInfo struct are -at its declaration documentation. - -LodePNGColorMode info_raw -------------------------- - -When decoding, here you can specify which color type you want -the resulting raw image to be. If this is different from the colortype of the -PNG, then the decoder will automatically convert the result. This conversion -always works, except if you want it to convert a color PNG to greyscale or to -a palette with missing colors. - -By default, 32-bit color is used for the result. - -LodePNGDecoderSettings decoder ------------------------------- - -The settings can be used to ignore the errors created by invalid CRC and Adler32 -chunks, and to disable the decoding of tEXt chunks. - -There's also a setting color_convert, true by default. If false, no conversion -is done, the resulting data will be as it was in the PNG (after decompression) -and you'll have to puzzle the colors of the pixels together yourself using the -color type information in the LodePNGInfo. - - -5. Encoding ------------ - -Encoding converts a raw pixel buffer to a PNG compressed image. - -Most documentation on using the encoder is at its declarations in the header -above. For C, simple encoding can be done with functions such as -lodepng_encode32, and more advanced decoding can be done with the struct -LodePNGState and lodepng_encode. For C++, all encoding can be done with the -various lodepng::encode functions, and lodepng::State can be used for advanced -features. - -Like the decoder, the encoder can also give errors. However it gives less errors -since the encoder input is trusted, the decoder input (a PNG image that could -be forged by anyone) is not trusted. - -When using the LodePNGState, it uses the following fields for encoding: -*) LodePNGInfo info_png: here you specify how you want the PNG (the output) to be. -*) LodePNGColorMode info_raw: here you say what color type of the raw image (the input) has -*) LodePNGEncoderSettings encoder: you can specify a few settings for the encoder to use - -LodePNGInfo info_png --------------------- - -When encoding, you use this the opposite way as when decoding: for encoding, -you fill in the values you want the PNG to have before encoding. By default it's -not needed to specify a color type for the PNG since it's automatically chosen, -but it's possible to choose it yourself given the right settings. - -The encoder will not always exactly match the LodePNGInfo struct you give, -it tries as close as possible. Some things are ignored by the encoder. The -encoder uses, for example, the following settings from it when applicable: -colortype and bitdepth, text chunks, time chunk, the color key, the palette, the -background color, the interlace method, unknown chunks, ... - -When encoding to a PNG with colortype 3, the encoder will generate a PLTE chunk. -If the palette contains any colors for which the alpha channel is not 255 (so -there are translucent colors in the palette), it'll add a tRNS chunk. - -LodePNGColorMode info_raw -------------------------- - -You specify the color type of the raw image that you give to the input here, -including a possible transparent color key and palette you happen to be using in -your raw image data. - -By default, 32-bit color is assumed, meaning your input has to be in RGBA -format with 4 bytes (unsigned chars) per pixel. - -LodePNGEncoderSettings encoder ------------------------------- - -The following settings are supported (some are in sub-structs): -*) auto_convert: when this option is enabled, the encoder will -automatically choose the smallest possible color mode (including color key) that -can encode the colors of all pixels without information loss. -*) btype: the block type for LZ77. 0 = uncompressed, 1 = fixed huffman tree, - 2 = dynamic huffman tree (best compression). Should be 2 for proper - compression. -*) use_lz77: whether or not to use LZ77 for compressed block types. Should be - true for proper compression. -*) windowsize: the window size used by the LZ77 encoder (1 - 32768). Has value - 2048 by default, but can be set to 32768 for better, but slow, compression. -*) force_palette: if colortype is 2 or 6, you can make the encoder write a PLTE - chunk if force_palette is true. This can used as suggested palette to convert - to by viewers that don't support more than 256 colors (if those still exist) -*) add_id: add text chunk "Encoder: LodePNG " to the image. -*) text_compression: default 1. If 1, it'll store texts as zTXt instead of tEXt chunks. - zTXt chunks use zlib compression on the text. This gives a smaller result on - large texts but a larger result on small texts (such as a single program name). - It's all tEXt or all zTXt though, there's no separate setting per text yet. - - -6. color conversions --------------------- - -An important thing to note about LodePNG, is that the color type of the PNG, and -the color type of the raw image, are completely independent. By default, when -you decode a PNG, you get the result as a raw image in the color type you want, -no matter whether the PNG was encoded with a palette, greyscale or RGBA color. -And if you encode an image, by default LodePNG will automatically choose the PNG -color type that gives good compression based on the values of colors and amount -of colors in the image. It can be configured to let you control it instead as -well, though. - -To be able to do this, LodePNG does conversions from one color mode to another. -It can convert from almost any color type to any other color type, except the -following conversions: RGB to greyscale is not supported, and converting to a -palette when the palette doesn't have a required color is not supported. This is -not supported on purpose: this is information loss which requires a color -reduction algorithm that is beyong the scope of a PNG encoder (yes, RGB to grey -is easy, but there are multiple ways if you want to give some channels more -weight). - -By default, when decoding, you get the raw image in 32-bit RGBA or 24-bit RGB -color, no matter what color type the PNG has. And by default when encoding, -LodePNG automatically picks the best color model for the output PNG, and expects -the input image to be 32-bit RGBA or 24-bit RGB. So, unless you want to control -the color format of the images yourself, you can skip this chapter. - -6.1. PNG color types --------------------- - -A PNG image can have many color types, ranging from 1-bit color to 64-bit color, -as well as palettized color modes. After the zlib decompression and unfiltering -in the PNG image is done, the raw pixel data will have that color type and thus -a certain amount of bits per pixel. If you want the output raw image after -decoding to have another color type, a conversion is done by LodePNG. - -The PNG specification gives the following color types: - -0: greyscale, bit depths 1, 2, 4, 8, 16 -2: RGB, bit depths 8 and 16 -3: palette, bit depths 1, 2, 4 and 8 -4: greyscale with alpha, bit depths 8 and 16 -6: RGBA, bit depths 8 and 16 - -Bit depth is the amount of bits per pixel per color channel. So the total amount -of bits per pixel is: amount of channels * bitdepth. - -6.2. color conversions ----------------------- - -As explained in the sections about the encoder and decoder, you can specify -color types and bit depths in info_png and info_raw to change the default -behaviour. - -If, when decoding, you want the raw image to be something else than the default, -you need to set the color type and bit depth you want in the LodePNGColorMode, -or the parameters colortype and bitdepth of the simple decoding function. - -If, when encoding, you use another color type than the default in the raw input -image, you need to specify its color type and bit depth in the LodePNGColorMode -of the raw image, or use the parameters colortype and bitdepth of the simple -encoding function. - -If, when encoding, you don't want LodePNG to choose the output PNG color type -but control it yourself, you need to set auto_convert in the encoder settings -to false, and specify the color type you want in the LodePNGInfo of the -encoder (including palette: it can generate a palette if auto_convert is true, -otherwise not). - -If the input and output color type differ (whether user chosen or auto chosen), -LodePNG will do a color conversion, which follows the rules below, and may -sometimes result in an error. - -To avoid some confusion: --the decoder converts from PNG to raw image --the encoder converts from raw image to PNG --the colortype and bitdepth in LodePNGColorMode info_raw, are those of the raw image --the colortype and bitdepth in the color field of LodePNGInfo info_png, are those of the PNG --when encoding, the color type in LodePNGInfo is ignored if auto_convert - is enabled, it is automatically generated instead --when decoding, the color type in LodePNGInfo is set by the decoder to that of the original - PNG image, but it can be ignored since the raw image has the color type you requested instead --if the color type of the LodePNGColorMode and PNG image aren't the same, a conversion - between the color types is done if the color types are supported. If it is not - supported, an error is returned. If the types are the same, no conversion is done. --even though some conversions aren't supported, LodePNG supports loading PNGs from any - colortype and saving PNGs to any colortype, sometimes it just requires preparing - the raw image correctly before encoding. --both encoder and decoder use the same color converter. - -Non supported color conversions: --color to greyscale: no error is thrown, but the result will look ugly because -only the red channel is taken --anything to palette when that palette does not have that color in it: in this -case an error is thrown - -Supported color conversions: --anything to 8-bit RGB, 8-bit RGBA, 16-bit RGB, 16-bit RGBA --any grey or grey+alpha, to grey or grey+alpha --anything to a palette, as long as the palette has the requested colors in it --removing alpha channel --higher to smaller bitdepth, and vice versa - -If you want no color conversion to be done (e.g. for speed or control): --In the encoder, you can make it save a PNG with any color type by giving the -raw color mode and LodePNGInfo the same color mode, and setting auto_convert to -false. --In the decoder, you can make it store the pixel data in the same color type -as the PNG has, by setting the color_convert setting to false. Settings in -info_raw are then ignored. - -The function lodepng_convert does the color conversion. It is available in the -interface but normally isn't needed since the encoder and decoder already call -it. - -6.3. padding bits ------------------ - -In the PNG file format, if a less than 8-bit per pixel color type is used and the scanlines -have a bit amount that isn't a multiple of 8, then padding bits are used so that each -scanline starts at a fresh byte. But that is NOT true for the LodePNG raw input and output. -The raw input image you give to the encoder, and the raw output image you get from the decoder -will NOT have these padding bits, e.g. in the case of a 1-bit image with a width -of 7 pixels, the first pixel of the second scanline will the the 8th bit of the first byte, -not the first bit of a new byte. - -6.4. A note about 16-bits per channel and endianness ----------------------------------------------------- - -LodePNG uses unsigned char arrays for 16-bit per channel colors too, just like -for any other color format. The 16-bit values are stored in big endian (most -significant byte first) in these arrays. This is the opposite order of the -little endian used by x86 CPU's. - -LodePNG always uses big endian because the PNG file format does so internally. -Conversions to other formats than PNG uses internally are not supported by -LodePNG on purpose, there are myriads of formats, including endianness of 16-bit -colors, the order in which you store R, G, B and A, and so on. Supporting and -converting to/from all that is outside the scope of LodePNG. - -This may mean that, depending on your use case, you may want to convert the big -endian output of LodePNG to little endian with a for loop. This is certainly not -always needed, many applications and libraries support big endian 16-bit colors -anyway, but it means you cannot simply cast the unsigned char* buffer to an -unsigned short* buffer on x86 CPUs. - - -7. error values ---------------- - -All functions in LodePNG that return an error code, return 0 if everything went -OK, or a non-zero code if there was an error. - -The meaning of the LodePNG error values can be retrieved with the function -lodepng_error_text: given the numerical error code, it returns a description -of the error in English as a string. - -Check the implementation of lodepng_error_text to see the meaning of each code. - - -8. chunks and PNG editing -------------------------- - -If you want to add extra chunks to a PNG you encode, or use LodePNG for a PNG -editor that should follow the rules about handling of unknown chunks, or if your -program is able to read other types of chunks than the ones handled by LodePNG, -then that's possible with the chunk functions of LodePNG. - -A PNG chunk has the following layout: - -4 bytes length -4 bytes type name -length bytes data -4 bytes CRC - -8.1. iterating through chunks ------------------------------ - -If you have a buffer containing the PNG image data, then the first chunk (the -IHDR chunk) starts at byte number 8 of that buffer. The first 8 bytes are the -signature of the PNG and are not part of a chunk. But if you start at byte 8 -then you have a chunk, and can check the following things of it. - -NOTE: none of these functions check for memory buffer boundaries. To avoid -exploits, always make sure the buffer contains all the data of the chunks. -When using lodepng_chunk_next, make sure the returned value is within the -allocated memory. - -unsigned lodepng_chunk_length(const unsigned char* chunk): - -Get the length of the chunk's data. The total chunk length is this length + 12. - -void lodepng_chunk_type(char type[5], const unsigned char* chunk): -unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type): - -Get the type of the chunk or compare if it's a certain type - -unsigned char lodepng_chunk_critical(const unsigned char* chunk): -unsigned char lodepng_chunk_private(const unsigned char* chunk): -unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk): - -Check if the chunk is critical in the PNG standard (only IHDR, PLTE, IDAT and IEND are). -Check if the chunk is private (public chunks are part of the standard, private ones not). -Check if the chunk is safe to copy. If it's not, then, when modifying data in a critical -chunk, unsafe to copy chunks of the old image may NOT be saved in the new one if your -program doesn't handle that type of unknown chunk. - -unsigned char* lodepng_chunk_data(unsigned char* chunk): -const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk): - -Get a pointer to the start of the data of the chunk. - -unsigned lodepng_chunk_check_crc(const unsigned char* chunk): -void lodepng_chunk_generate_crc(unsigned char* chunk): - -Check if the crc is correct or generate a correct one. - -unsigned char* lodepng_chunk_next(unsigned char* chunk): -const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk): - -Iterate to the next chunk. This works if you have a buffer with consecutive chunks. Note that these -functions do no boundary checking of the allocated data whatsoever, so make sure there is enough -data available in the buffer to be able to go to the next chunk. - -unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk): -unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length, - const char* type, const unsigned char* data): - -These functions are used to create new chunks that are appended to the data in *out that has -length *outlength. The append function appends an existing chunk to the new data. The create -function creates a new chunk with the given parameters and appends it. Type is the 4-letter -name of the chunk. - -8.2. chunks in info_png ------------------------ - -The LodePNGInfo struct contains fields with the unknown chunk in it. It has 3 -buffers (each with size) to contain 3 types of unknown chunks: -the ones that come before the PLTE chunk, the ones that come between the PLTE -and the IDAT chunks, and the ones that come after the IDAT chunks. -It's necessary to make the distionction between these 3 cases because the PNG -standard forces to keep the ordering of unknown chunks compared to the critical -chunks, but does not force any other ordering rules. - -info_png.unknown_chunks_data[0] is the chunks before PLTE -info_png.unknown_chunks_data[1] is the chunks after PLTE, before IDAT -info_png.unknown_chunks_data[2] is the chunks after IDAT - -The chunks in these 3 buffers can be iterated through and read by using the same -way described in the previous subchapter. - -When using the decoder to decode a PNG, you can make it store all unknown chunks -if you set the option settings.remember_unknown_chunks to 1. By default, this -option is off (0). - -The encoder will always encode unknown chunks that are stored in the info_png. -If you need it to add a particular chunk that isn't known by LodePNG, you can -use lodepng_chunk_append or lodepng_chunk_create to the chunk data in -info_png.unknown_chunks_data[x]. - -Chunks that are known by LodePNG should not be added in that way. E.g. to make -LodePNG add a bKGD chunk, set background_defined to true and add the correct -parameters there instead. - - -9. compiler support -------------------- - -No libraries other than the current standard C library are needed to compile -LodePNG. For the C++ version, only the standard C++ library is needed on top. -Add the files lodepng.c(pp) and lodepng.h to your project, include -lodepng.h where needed, and your program can read/write PNG files. - -It is compatible with C90 and up, and C++03 and up. - -If performance is important, use optimization when compiling! For both the -encoder and decoder, this makes a large difference. - -Make sure that LodePNG is compiled with the same compiler of the same version -and with the same settings as the rest of the program, or the interfaces with -std::vectors and std::strings in C++ can be incompatible. - -CHAR_BITS must be 8 or higher, because LodePNG uses unsigned chars for octets. - -*) gcc and g++ - -LodePNG is developed in gcc so this compiler is natively supported. It gives no -warnings with compiler options "-Wall -Wextra -pedantic -ansi", with gcc and g++ -version 4.7.1 on Linux, 32-bit and 64-bit. - -*) Clang - -Fully supported and warning-free. - -*) Mingw - -The Mingw compiler (a port of gcc for Windows) should be fully supported by -LodePNG. - -*) Visual Studio and Visual C++ Express Edition - -LodePNG should be warning-free with warning level W4. Two warnings were disabled -with pragmas though: warning 4244 about implicit conversions, and warning 4996 -where it wants to use a non-standard function fopen_s instead of the standard C -fopen. - -Visual Studio may want "stdafx.h" files to be included in each source file and -give an error "unexpected end of file while looking for precompiled header". -This is not standard C++ and will not be added to the stock LodePNG. You can -disable it for lodepng.cpp only by right clicking it, Properties, C/C++, -Precompiled Headers, and set it to Not Using Precompiled Headers there. - -NOTE: Modern versions of VS should be fully supported, but old versions, e.g. -VS6, are not guaranteed to work. - -*) Compilers on Macintosh - -LodePNG has been reported to work both with gcc and LLVM for Macintosh, both for -C and C++. - -*) Other Compilers - -If you encounter problems on any compilers, feel free to let me know and I may -try to fix it if the compiler is modern and standards complient. - - -10. examples ------------- - -This decoder example shows the most basic usage of LodePNG. More complex -examples can be found on the LodePNG website. - -10.1. decoder C++ example -------------------------- - -#include "lodepng.h" -#include - -int main(int argc, char *argv[]) -{ - const char* filename = argc > 1 ? argv[1] : "test.png"; - - //load and decode - std::vector image; - unsigned width, height; - unsigned error = lodepng::decode(image, width, height, filename); - - //if there's an error, display it - if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl; - - //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ... -} - -10.2. decoder C example ------------------------ - -#include "lodepng.h" - -int main(int argc, char *argv[]) -{ - unsigned error; - unsigned char* image; - size_t width, height; - const char* filename = argc > 1 ? argv[1] : "test.png"; - - error = lodepng_decode32_file(&image, &width, &height, filename); - - if(error) printf("decoder error %u: %s\n", error, lodepng_error_text(error)); - - / * use image here * / - - free(image); - return 0; -} - -11. state settings reference ----------------------------- - -A quick reference of some settings to set on the LodePNGState - -For decoding: - -state.decoder.zlibsettings.ignore_adler32: ignore ADLER32 checksums -state.decoder.zlibsettings.custom_...: use custom inflate function -state.decoder.ignore_crc: ignore CRC checksums -state.decoder.color_convert: convert internal PNG color to chosen one -state.decoder.read_text_chunks: whether to read in text metadata chunks -state.decoder.remember_unknown_chunks: whether to read in unknown chunks -state.info_raw.colortype: desired color type for decoded image -state.info_raw.bitdepth: desired bit depth for decoded image -state.info_raw....: more color settings, see struct LodePNGColorMode -state.info_png....: no settings for decoder but ouput, see struct LodePNGInfo - -For encoding: - -state.encoder.zlibsettings.btype: disable compression by setting it to 0 -state.encoder.zlibsettings.use_lz77: use LZ77 in compression -state.encoder.zlibsettings.windowsize: tweak LZ77 windowsize -state.encoder.zlibsettings.minmatch: tweak min LZ77 length to match -state.encoder.zlibsettings.nicematch: tweak LZ77 match where to stop searching -state.encoder.zlibsettings.lazymatching: try one more LZ77 matching -state.encoder.zlibsettings.custom_...: use custom deflate function -state.encoder.auto_convert: choose optimal PNG color type, if 0 uses info_png -state.encoder.filter_palette_zero: PNG filter strategy for palette -state.encoder.filter_strategy: PNG filter strategy to encode with -state.encoder.force_palette: add palette even if not encoding to one -state.encoder.add_id: add LodePNG identifier and version as a text chunk -state.encoder.text_compression: use compressed text chunks for metadata -state.info_raw.colortype: color type of raw input image you provide -state.info_raw.bitdepth: bit depth of raw input image you provide -state.info_raw: more color settings, see struct LodePNGColorMode -state.info_png.color.colortype: desired color type if auto_convert is false -state.info_png.color.bitdepth: desired bit depth if auto_convert is false -state.info_png.color....: more color settings, see struct LodePNGColorMode -state.info_png....: more PNG related settings, see struct LodePNGInfo - - -12. changes ------------ - -The version number of LodePNG is the date of the change given in the format -yyyymmdd. - -Some changes aren't backwards compatible. Those are indicated with a (!) -symbol. - -*) 08 dec 2015: Made load_file function return error if file can't be opened. -*) 24 okt 2015: Bugfix with decoding to palette output. -*) 18 apr 2015: Boundary PM instead of just package-merge for faster encoding. -*) 23 aug 2014: Reduced needless memory usage of decoder. -*) 28 jun 2014: Removed fix_png setting, always support palette OOB for - simplicity. Made ColorProfile public. -*) 09 jun 2014: Faster encoder by fixing hash bug and more zeros optimization. -*) 22 dec 2013: Power of two windowsize required for optimization. -*) 15 apr 2013: Fixed bug with LAC_ALPHA and color key. -*) 25 mar 2013: Added an optional feature to ignore some PNG errors (fix_png). -*) 11 mar 2013 (!): Bugfix with custom free. Changed from "my" to "lodepng_" - prefix for the custom allocators and made it possible with a new #define to - use custom ones in your project without needing to change lodepng's code. -*) 28 jan 2013: Bugfix with color key. -*) 27 okt 2012: Tweaks in text chunk keyword length error handling. -*) 8 okt 2012 (!): Added new filter strategy (entropy) and new auto color mode. - (no palette). Better deflate tree encoding. New compression tweak settings. - Faster color conversions while decoding. Some internal cleanups. -*) 23 sep 2012: Reduced warnings in Visual Studio a little bit. -*) 1 sep 2012 (!): Removed #define's for giving custom (de)compression functions - and made it work with function pointers instead. -*) 23 jun 2012: Added more filter strategies. Made it easier to use custom alloc - and free functions and toggle #defines from compiler flags. Small fixes. -*) 6 may 2012 (!): Made plugging in custom zlib/deflate functions more flexible. -*) 22 apr 2012 (!): Made interface more consistent, renaming a lot. Removed - redundant C++ codec classes. Reduced amount of structs. Everything changed, - but it is cleaner now imho and functionality remains the same. Also fixed - several bugs and shrunk the implementation code. Made new samples. -*) 6 nov 2011 (!): By default, the encoder now automatically chooses the best - PNG color model and bit depth, based on the amount and type of colors of the - raw image. For this, autoLeaveOutAlphaChannel replaced by auto_choose_color. -*) 9 okt 2011: simpler hash chain implementation for the encoder. -*) 8 sep 2011: lz77 encoder lazy matching instead of greedy matching. -*) 23 aug 2011: tweaked the zlib compression parameters after benchmarking. - A bug with the PNG filtertype heuristic was fixed, so that it chooses much - better ones (it's quite significant). A setting to do an experimental, slow, - brute force search for PNG filter types is added. -*) 17 aug 2011 (!): changed some C zlib related function names. -*) 16 aug 2011: made the code less wide (max 120 characters per line). -*) 17 apr 2011: code cleanup. Bugfixes. Convert low to 16-bit per sample colors. -*) 21 feb 2011: fixed compiling for C90. Fixed compiling with sections disabled. -*) 11 dec 2010: encoding is made faster, based on suggestion by Peter Eastman - to optimize long sequences of zeros. -*) 13 nov 2010: added LodePNG_InfoColor_hasPaletteAlpha and - LodePNG_InfoColor_canHaveAlpha functions for convenience. -*) 7 nov 2010: added LodePNG_error_text function to get error code description. -*) 30 okt 2010: made decoding slightly faster -*) 26 okt 2010: (!) changed some C function and struct names (more consistent). - Reorganized the documentation and the declaration order in the header. -*) 08 aug 2010: only changed some comments and external samples. -*) 05 jul 2010: fixed bug thanks to warnings in the new gcc version. -*) 14 mar 2010: fixed bug where too much memory was allocated for char buffers. -*) 02 sep 2008: fixed bug where it could create empty tree that linux apps could - read by ignoring the problem but windows apps couldn't. -*) 06 jun 2008: added more error checks for out of memory cases. -*) 26 apr 2008: added a few more checks here and there to ensure more safety. -*) 06 mar 2008: crash with encoding of strings fixed -*) 02 feb 2008: support for international text chunks added (iTXt) -*) 23 jan 2008: small cleanups, and #defines to divide code in sections -*) 20 jan 2008: support for unknown chunks allowing using LodePNG for an editor. -*) 18 jan 2008: support for tIME and pHYs chunks added to encoder and decoder. -*) 17 jan 2008: ability to encode and decode compressed zTXt chunks added - Also various fixes, such as in the deflate and the padding bits code. -*) 13 jan 2008: Added ability to encode Adam7-interlaced images. Improved - filtering code of encoder. -*) 07 jan 2008: (!) changed LodePNG to use ISO C90 instead of C++. A - C++ wrapper around this provides an interface almost identical to before. - Having LodePNG be pure ISO C90 makes it more portable. The C and C++ code - are together in these files but it works both for C and C++ compilers. -*) 29 dec 2007: (!) changed most integer types to unsigned int + other tweaks -*) 30 aug 2007: bug fixed which makes this Borland C++ compatible -*) 09 aug 2007: some VS2005 warnings removed again -*) 21 jul 2007: deflate code placed in new namespace separate from zlib code -*) 08 jun 2007: fixed bug with 2- and 4-bit color, and small interlaced images -*) 04 jun 2007: improved support for Visual Studio 2005: crash with accessing - invalid std::vector element [0] fixed, and level 3 and 4 warnings removed -*) 02 jun 2007: made the encoder add a tag with version by default -*) 27 may 2007: zlib and png code separated (but still in the same file), - simple encoder/decoder functions added for more simple usage cases -*) 19 may 2007: minor fixes, some code cleaning, new error added (error 69), - moved some examples from here to lodepng_examples.cpp -*) 12 may 2007: palette decoding bug fixed -*) 24 apr 2007: changed the license from BSD to the zlib license -*) 11 mar 2007: very simple addition: ability to encode bKGD chunks. -*) 04 mar 2007: (!) tEXt chunk related fixes, and support for encoding - palettized PNG images. Plus little interface change with palette and texts. -*) 03 mar 2007: Made it encode dynamic Huffman shorter with repeat codes. - Fixed a bug where the end code of a block had length 0 in the Huffman tree. -*) 26 feb 2007: Huffman compression with dynamic trees (BTYPE 2) now implemented - and supported by the encoder, resulting in smaller PNGs at the output. -*) 27 jan 2007: Made the Adler-32 test faster so that a timewaste is gone. -*) 24 jan 2007: gave encoder an error interface. Added color conversion from any - greyscale type to 8-bit greyscale with or without alpha. -*) 21 jan 2007: (!) Totally changed the interface. It allows more color types - to convert to and is more uniform. See the manual for how it works now. -*) 07 jan 2007: Some cleanup & fixes, and a few changes over the last days: - encode/decode custom tEXt chunks, separate classes for zlib & deflate, and - at last made the decoder give errors for incorrect Adler32 or Crc. -*) 01 jan 2007: Fixed bug with encoding PNGs with less than 8 bits per channel. -*) 29 dec 2006: Added support for encoding images without alpha channel, and - cleaned out code as well as making certain parts faster. -*) 28 dec 2006: Added "Settings" to the encoder. -*) 26 dec 2006: The encoder now does LZ77 encoding and produces much smaller files now. - Removed some code duplication in the decoder. Fixed little bug in an example. -*) 09 dec 2006: (!) Placed output parameters of public functions as first parameter. - Fixed a bug of the decoder with 16-bit per color. -*) 15 okt 2006: Changed documentation structure -*) 09 okt 2006: Encoder class added. It encodes a valid PNG image from the - given image buffer, however for now it's not compressed. -*) 08 sep 2006: (!) Changed to interface with a Decoder class -*) 30 jul 2006: (!) LodePNG_InfoPng , width and height are now retrieved in different - way. Renamed decodePNG to decodePNGGeneric. -*) 29 jul 2006: (!) Changed the interface: image info is now returned as a - struct of type LodePNG::LodePNG_Info, instead of a vector, which was a bit clumsy. -*) 28 jul 2006: Cleaned the code and added new error checks. - Corrected terminology "deflate" into "inflate". -*) 23 jun 2006: Added SDL example in the documentation in the header, this - example allows easy debugging by displaying the PNG and its transparency. -*) 22 jun 2006: (!) Changed way to obtain error value. Added - loadFile function for convenience. Made decodePNG32 faster. -*) 21 jun 2006: (!) Changed type of info vector to unsigned. - Changed position of palette in info vector. Fixed an important bug that - happened on PNGs with an uncompressed block. -*) 16 jun 2006: Internally changed unsigned into unsigned where - needed, and performed some optimizations. -*) 07 jun 2006: (!) Renamed functions to decodePNG and placed them - in LodePNG namespace. Changed the order of the parameters. Rewrote the - documentation in the header. Renamed files to lodepng.cpp and lodepng.h -*) 22 apr 2006: Optimized and improved some code -*) 07 sep 2005: (!) Changed to std::vector interface -*) 12 aug 2005: Initial release (C++, decoder only) - - -13. contact information ------------------------ - -Feel free to contact me with suggestions, problems, comments, ... concerning -LodePNG. If you encounter a PNG image that doesn't work properly with this -decoder, feel free to send it and I'll use it to find and fix the problem. - -My email address is (puzzle the account and domain together with an @ symbol): -Domain: gmail dot com. -Account: lode dot vandevenne. - - -Copyright (c) 2005-2016 Lode Vandevenne -*/ +/* +LodePNG version 20201017 + +Copyright (c) 2005-2020 Lode Vandevenne + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +*/ + +#ifndef LODEPNG_H +#define LODEPNG_H + +#include /*for size_t*/ + +extern const char* LODEPNG_VERSION_STRING; + +/* +The following #defines are used to create code sections. They can be disabled +to disable code sections, which can give faster compile time and smaller binary. +The "NO_COMPILE" defines are designed to be used to pass as defines to the +compiler command to disable them without modifying this header, e.g. +-DLODEPNG_NO_COMPILE_ZLIB for gcc. +In addition to those below, you can also define LODEPNG_NO_COMPILE_CRC to +allow implementing a custom lodepng_crc32. +*/ +/*deflate & zlib. If disabled, you must specify alternative zlib functions in +the custom_zlib field of the compress and decompress settings*/ +#ifndef LODEPNG_NO_COMPILE_ZLIB +#define LODEPNG_COMPILE_ZLIB +#endif + +/*png encoder and png decoder*/ +#ifndef LODEPNG_NO_COMPILE_PNG +#define LODEPNG_COMPILE_PNG +#endif + +/*deflate&zlib decoder and png decoder*/ +#ifndef LODEPNG_NO_COMPILE_DECODER +#define LODEPNG_COMPILE_DECODER +#endif + +/*deflate&zlib encoder and png encoder*/ +#ifndef LODEPNG_NO_COMPILE_ENCODER +#define LODEPNG_COMPILE_ENCODER +#endif + +/*the optional built in harddisk file loading and saving functions*/ +#ifndef LODEPNG_NO_COMPILE_DISK +#define LODEPNG_COMPILE_DISK +#endif + +/*support for chunks other than IHDR, IDAT, PLTE, tRNS, IEND: ancillary and unknown chunks*/ +#ifndef LODEPNG_NO_COMPILE_ANCILLARY_CHUNKS +#define LODEPNG_COMPILE_ANCILLARY_CHUNKS +#endif + +/*ability to convert error numerical codes to English text string*/ +#ifndef LODEPNG_NO_COMPILE_ERROR_TEXT +#define LODEPNG_COMPILE_ERROR_TEXT +#endif + +/*Compile the default allocators (C's free, malloc and realloc). If you disable this, +you can define the functions lodepng_free, lodepng_malloc and lodepng_realloc in your +source files with custom allocators.*/ +#ifndef LODEPNG_NO_COMPILE_ALLOCATORS +#define LODEPNG_COMPILE_ALLOCATORS +#endif + +/*compile the C++ version (you can disable the C++ wrapper here even when compiling for C++)*/ +#ifdef __cplusplus +#ifndef LODEPNG_NO_COMPILE_CPP +#define LODEPNG_COMPILE_CPP +#endif +#endif + +#ifdef LODEPNG_COMPILE_CPP +#include +#include +#endif /*LODEPNG_COMPILE_CPP*/ + +#ifdef LODEPNG_COMPILE_PNG +/*The PNG color types (also used for raw image).*/ +typedef enum LodePNGColorType { + LCT_GREY = 0, /*grayscale: 1,2,4,8,16 bit*/ + LCT_RGB = 2, /*RGB: 8,16 bit*/ + LCT_PALETTE = 3, /*palette: 1,2,4,8 bit*/ + LCT_GREY_ALPHA = 4, /*grayscale with alpha: 8,16 bit*/ + LCT_RGBA = 6, /*RGB with alpha: 8,16 bit*/ + /*LCT_MAX_OCTET_VALUE lets the compiler allow this enum to represent any invalid + byte value from 0 to 255 that could be present in an invalid PNG file header. Do + not use, compare with or set the name LCT_MAX_OCTET_VALUE, instead either use + the valid color type names above, or numeric values like 1 or 7 when checking for + particular disallowed color type byte values, or cast to integer to print it.*/ + LCT_MAX_OCTET_VALUE = 255 +} LodePNGColorType; + +#ifdef LODEPNG_COMPILE_DECODER +/* +Converts PNG data in memory to raw pixel data. +out: Output parameter. Pointer to buffer that will contain the raw pixel data. + After decoding, its size is w * h * (bytes per pixel) bytes larger than + initially. Bytes per pixel depends on colortype and bitdepth. + Must be freed after usage with free(*out). + Note: for 16-bit per channel colors, uses big endian format like PNG does. +w: Output parameter. Pointer to width of pixel data. +h: Output parameter. Pointer to height of pixel data. +in: Memory buffer with the PNG file. +insize: size of the in buffer. +colortype: the desired color type for the raw output image. See explanation on PNG color types. +bitdepth: the desired bit depth for the raw output image. See explanation on PNG color types. +Return value: LodePNG error code (0 means no error). +*/ +unsigned lodepng_decode_memory(unsigned char** out, unsigned* w, unsigned* h, + const unsigned char* in, size_t insize, + LodePNGColorType colortype, unsigned bitdepth); + +/*Same as lodepng_decode_memory, but always decodes to 32-bit RGBA raw image*/ +unsigned lodepng_decode32(unsigned char** out, unsigned* w, unsigned* h, + const unsigned char* in, size_t insize); + +/*Same as lodepng_decode_memory, but always decodes to 24-bit RGB raw image*/ +unsigned lodepng_decode24(unsigned char** out, unsigned* w, unsigned* h, + const unsigned char* in, size_t insize); + +#ifdef LODEPNG_COMPILE_DISK +/* +Load PNG from disk, from file with given name. +Same as the other decode functions, but instead takes a filename as input. +*/ +unsigned lodepng_decode_file(unsigned char** out, unsigned* w, unsigned* h, + const char* filename, + LodePNGColorType colortype, unsigned bitdepth); + +/*Same as lodepng_decode_file, but always decodes to 32-bit RGBA raw image.*/ +unsigned lodepng_decode32_file(unsigned char** out, unsigned* w, unsigned* h, + const char* filename); + +/*Same as lodepng_decode_file, but always decodes to 24-bit RGB raw image.*/ +unsigned lodepng_decode24_file(unsigned char** out, unsigned* w, unsigned* h, + const char* filename); +#endif /*LODEPNG_COMPILE_DISK*/ +#endif /*LODEPNG_COMPILE_DECODER*/ + + +#ifdef LODEPNG_COMPILE_ENCODER +/* +Converts raw pixel data into a PNG image in memory. The colortype and bitdepth + of the output PNG image cannot be chosen, they are automatically determined + by the colortype, bitdepth and content of the input pixel data. + Note: for 16-bit per channel colors, needs big endian format like PNG does. +out: Output parameter. Pointer to buffer that will contain the PNG image data. + Must be freed after usage with free(*out). +outsize: Output parameter. Pointer to the size in bytes of the out buffer. +image: The raw pixel data to encode. The size of this buffer should be + w * h * (bytes per pixel), bytes per pixel depends on colortype and bitdepth. +w: width of the raw pixel data in pixels. +h: height of the raw pixel data in pixels. +colortype: the color type of the raw input image. See explanation on PNG color types. +bitdepth: the bit depth of the raw input image. See explanation on PNG color types. +Return value: LodePNG error code (0 means no error). +*/ +unsigned lodepng_encode_memory(unsigned char** out, size_t* outsize, + const unsigned char* image, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth); + +/*Same as lodepng_encode_memory, but always encodes from 32-bit RGBA raw image.*/ +unsigned lodepng_encode32(unsigned char** out, size_t* outsize, + const unsigned char* image, unsigned w, unsigned h); + +/*Same as lodepng_encode_memory, but always encodes from 24-bit RGB raw image.*/ +unsigned lodepng_encode24(unsigned char** out, size_t* outsize, + const unsigned char* image, unsigned w, unsigned h); + +#ifdef LODEPNG_COMPILE_DISK +/* +Converts raw pixel data into a PNG file on disk. +Same as the other encode functions, but instead takes a filename as output. +NOTE: This overwrites existing files without warning! +*/ +unsigned lodepng_encode_file(const char* filename, + const unsigned char* image, unsigned w, unsigned h, + LodePNGColorType colortype, unsigned bitdepth); + +/*Same as lodepng_encode_file, but always encodes from 32-bit RGBA raw image.*/ +unsigned lodepng_encode32_file(const char* filename, + const unsigned char* image, unsigned w, unsigned h); + +/*Same as lodepng_encode_file, but always encodes from 24-bit RGB raw image.*/ +unsigned lodepng_encode24_file(const char* filename, + const unsigned char* image, unsigned w, unsigned h); +#endif /*LODEPNG_COMPILE_DISK*/ +#endif /*LODEPNG_COMPILE_ENCODER*/ + + +#ifdef LODEPNG_COMPILE_CPP +namespace lodepng { +#ifdef LODEPNG_COMPILE_DECODER +/*Same as lodepng_decode_memory, but decodes to an std::vector. The colortype +is the format to output the pixels to. Default is RGBA 8-bit per channel.*/ +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + const unsigned char* in, size_t insize, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + const std::vector& in, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +#ifdef LODEPNG_COMPILE_DISK +/* +Converts PNG file from disk to raw pixel data in memory. +Same as the other decode functions, but instead takes a filename as input. +*/ +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + const std::string& filename, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +#endif /* LODEPNG_COMPILE_DISK */ +#endif /* LODEPNG_COMPILE_DECODER */ + +#ifdef LODEPNG_COMPILE_ENCODER +/*Same as lodepng_encode_memory, but encodes to an std::vector. colortype +is that of the raw input data. The output PNG color type will be auto chosen.*/ +unsigned encode(std::vector& out, + const unsigned char* in, unsigned w, unsigned h, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +unsigned encode(std::vector& out, + const std::vector& in, unsigned w, unsigned h, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +#ifdef LODEPNG_COMPILE_DISK +/* +Converts 32-bit RGBA raw pixel data into a PNG file on disk. +Same as the other encode functions, but instead takes a filename as output. +NOTE: This overwrites existing files without warning! +*/ +unsigned encode(const std::string& filename, + const unsigned char* in, unsigned w, unsigned h, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +unsigned encode(const std::string& filename, + const std::vector& in, unsigned w, unsigned h, + LodePNGColorType colortype = LCT_RGBA, unsigned bitdepth = 8); +#endif /* LODEPNG_COMPILE_DISK */ +#endif /* LODEPNG_COMPILE_ENCODER */ +} /* namespace lodepng */ +#endif /*LODEPNG_COMPILE_CPP*/ +#endif /*LODEPNG_COMPILE_PNG*/ + +#ifdef LODEPNG_COMPILE_ERROR_TEXT +/*Returns an English description of the numerical error code.*/ +const char* lodepng_error_text(unsigned code); +#endif /*LODEPNG_COMPILE_ERROR_TEXT*/ + +#ifdef LODEPNG_COMPILE_DECODER +/*Settings for zlib decompression*/ +typedef struct LodePNGDecompressSettings LodePNGDecompressSettings; +struct LodePNGDecompressSettings { + /* Check LodePNGDecoderSettings for more ignorable errors such as ignore_crc */ + unsigned ignore_adler32; /*if 1, continue and don't give an error message if the Adler32 checksum is corrupted*/ + unsigned ignore_nlen; /*ignore complement of len checksum in uncompressed blocks*/ + + /*Maximum decompressed size, beyond this the decoder may (and is encouraged to) stop decoding, + return an error, output a data size > max_output_size and all the data up to that point. This is + not hard limit nor a guarantee, but can prevent excessive memory usage. This setting is + ignored by the PNG decoder, but is used by the deflate/zlib decoder and can be used by custom ones. + Set to 0 to impose no limit (the default).*/ + size_t max_output_size; + + /*use custom zlib decoder instead of built in one (default: null). + Should return 0 if success, any non-0 if error (numeric value not exposed).*/ + unsigned (*custom_zlib)(unsigned char**, size_t*, + const unsigned char*, size_t, + const LodePNGDecompressSettings*); + /*use custom deflate decoder instead of built in one (default: null) + if custom_zlib is not null, custom_inflate is ignored (the zlib format uses deflate). + Should return 0 if success, any non-0 if error (numeric value not exposed).*/ + unsigned (*custom_inflate)(unsigned char**, size_t*, + const unsigned char*, size_t, + const LodePNGDecompressSettings*); + + const void* custom_context; /*optional custom settings for custom functions*/ +}; + +extern const LodePNGDecompressSettings lodepng_default_decompress_settings; +void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings); +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_ENCODER +/* +Settings for zlib compression. Tweaking these settings tweaks the balance +between speed and compression ratio. +*/ +typedef struct LodePNGCompressSettings LodePNGCompressSettings; +struct LodePNGCompressSettings /*deflate = compress*/ { + /*LZ77 related settings*/ + unsigned btype; /*the block type for LZ (0, 1, 2 or 3, see zlib standard). Should be 2 for proper compression.*/ + unsigned use_lz77; /*whether or not to use LZ77. Should be 1 for proper compression.*/ + unsigned windowsize; /*must be a power of two <= 32768. higher compresses more but is slower. Default value: 2048.*/ + unsigned minmatch; /*minimum lz77 length. 3 is normally best, 6 can be better for some PNGs. Default: 0*/ + unsigned nicematch; /*stop searching if >= this length found. Set to 258 for best compression. Default: 128*/ + unsigned lazymatching; /*use lazy matching: better compression but a bit slower. Default: true*/ + + /*use custom zlib encoder instead of built in one (default: null)*/ + unsigned (*custom_zlib)(unsigned char**, size_t*, + const unsigned char*, size_t, + const LodePNGCompressSettings*); + /*use custom deflate encoder instead of built in one (default: null) + if custom_zlib is used, custom_deflate is ignored since only the built in + zlib function will call custom_deflate*/ + unsigned (*custom_deflate)(unsigned char**, size_t*, + const unsigned char*, size_t, + const LodePNGCompressSettings*); + + const void* custom_context; /*optional custom settings for custom functions*/ +}; + +extern const LodePNGCompressSettings lodepng_default_compress_settings; +void lodepng_compress_settings_init(LodePNGCompressSettings* settings); +#endif /*LODEPNG_COMPILE_ENCODER*/ + +#ifdef LODEPNG_COMPILE_PNG +/* +Color mode of an image. Contains all information required to decode the pixel +bits to RGBA colors. This information is the same as used in the PNG file +format, and is used both for PNG and raw image data in LodePNG. +*/ +typedef struct LodePNGColorMode { + /*header (IHDR)*/ + LodePNGColorType colortype; /*color type, see PNG standard or documentation further in this header file*/ + unsigned bitdepth; /*bits per sample, see PNG standard or documentation further in this header file*/ + + /* + palette (PLTE and tRNS) + + Dynamically allocated with the colors of the palette, including alpha. + This field may not be allocated directly, use lodepng_color_mode_init first, + then lodepng_palette_add per color to correctly initialize it (to ensure size + of exactly 1024 bytes). + + The alpha channels must be set as well, set them to 255 for opaque images. + + When decoding, by default you can ignore this palette, since LodePNG already + fills the palette colors in the pixels of the raw RGBA output. + + The palette is only supported for color type 3. + */ + unsigned char* palette; /*palette in RGBARGBA... order. Must be either 0, or when allocated must have 1024 bytes*/ + size_t palettesize; /*palette size in number of colors (amount of used bytes is 4 * palettesize)*/ + + /* + transparent color key (tRNS) + + This color uses the same bit depth as the bitdepth value in this struct, which can be 1-bit to 16-bit. + For grayscale PNGs, r, g and b will all 3 be set to the same. + + When decoding, by default you can ignore this information, since LodePNG sets + pixels with this key to transparent already in the raw RGBA output. + + The color key is only supported for color types 0 and 2. + */ + unsigned key_defined; /*is a transparent color key given? 0 = false, 1 = true*/ + unsigned key_r; /*red/grayscale component of color key*/ + unsigned key_g; /*green component of color key*/ + unsigned key_b; /*blue component of color key*/ +} LodePNGColorMode; + +/*init, cleanup and copy functions to use with this struct*/ +void lodepng_color_mode_init(LodePNGColorMode* info); +void lodepng_color_mode_cleanup(LodePNGColorMode* info); +/*return value is error code (0 means no error)*/ +unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source); +/* Makes a temporary LodePNGColorMode that does not need cleanup (no palette) */ +LodePNGColorMode lodepng_color_mode_make(LodePNGColorType colortype, unsigned bitdepth); + +void lodepng_palette_clear(LodePNGColorMode* info); +/*add 1 color to the palette*/ +unsigned lodepng_palette_add(LodePNGColorMode* info, + unsigned char r, unsigned char g, unsigned char b, unsigned char a); + +/*get the total amount of bits per pixel, based on colortype and bitdepth in the struct*/ +unsigned lodepng_get_bpp(const LodePNGColorMode* info); +/*get the amount of color channels used, based on colortype in the struct. +If a palette is used, it counts as 1 channel.*/ +unsigned lodepng_get_channels(const LodePNGColorMode* info); +/*is it a grayscale type? (only colortype 0 or 4)*/ +unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info); +/*has it got an alpha channel? (only colortype 2 or 6)*/ +unsigned lodepng_is_alpha_type(const LodePNGColorMode* info); +/*has it got a palette? (only colortype 3)*/ +unsigned lodepng_is_palette_type(const LodePNGColorMode* info); +/*only returns true if there is a palette and there is a value in the palette with alpha < 255. +Loops through the palette to check this.*/ +unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info); +/* +Check if the given color info indicates the possibility of having non-opaque pixels in the PNG image. +Returns true if the image can have translucent or invisible pixels (it still be opaque if it doesn't use such pixels). +Returns false if the image can only have opaque pixels. +In detail, it returns true only if it's a color type with alpha, or has a palette with non-opaque values, +or if "key_defined" is true. +*/ +unsigned lodepng_can_have_alpha(const LodePNGColorMode* info); +/*Returns the byte size of a raw image buffer with given width, height and color mode*/ +size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color); + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS +/*The information of a Time chunk in PNG.*/ +typedef struct LodePNGTime { + unsigned year; /*2 bytes used (0-65535)*/ + unsigned month; /*1-12*/ + unsigned day; /*1-31*/ + unsigned hour; /*0-23*/ + unsigned minute; /*0-59*/ + unsigned second; /*0-60 (to allow for leap seconds)*/ +} LodePNGTime; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +/*Information about the PNG image, except pixels, width and height.*/ +typedef struct LodePNGInfo { + /*header (IHDR), palette (PLTE) and transparency (tRNS) chunks*/ + unsigned compression_method;/*compression method of the original file. Always 0.*/ + unsigned filter_method; /*filter method of the original file*/ + unsigned interlace_method; /*interlace method of the original file: 0=none, 1=Adam7*/ + LodePNGColorMode color; /*color type and bits, palette and transparency of the PNG file*/ + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /* + Suggested background color chunk (bKGD) + + This uses the same color mode and bit depth as the PNG (except no alpha channel), + with values truncated to the bit depth in the unsigned integer. + + For grayscale and palette PNGs, the value is stored in background_r. The values + in background_g and background_b are then unused. + + So when decoding, you may get these in a different color mode than the one you requested + for the raw pixels. + + When encoding with auto_convert, you must use the color model defined in info_png.color for + these values. The encoder normally ignores info_png.color when auto_convert is on, but will + use it to interpret these values (and convert copies of them to its chosen color model). + + When encoding, avoid setting this to an expensive color, such as a non-gray value + when the image is gray, or the compression will be worse since it will be forced to + write the PNG with a more expensive color mode (when auto_convert is on). + + The decoder does not use this background color to edit the color of pixels. This is a + completely optional metadata feature. + */ + unsigned background_defined; /*is a suggested background color given?*/ + unsigned background_r; /*red/gray/palette component of suggested background color*/ + unsigned background_g; /*green component of suggested background color*/ + unsigned background_b; /*blue component of suggested background color*/ + + /* + Non-international text chunks (tEXt and zTXt) + + The char** arrays each contain num strings. The actual messages are in + text_strings, while text_keys are keywords that give a short description what + the actual text represents, e.g. Title, Author, Description, or anything else. + + All the string fields below including strings, keys, names and language tags are null terminated. + The PNG specification uses null characters for the keys, names and tags, and forbids null + characters to appear in the main text which is why we can use null termination everywhere here. + + A keyword is minimum 1 character and maximum 79 characters long (plus the + additional null terminator). It's discouraged to use a single line length + longer than 79 characters for texts. + + Don't allocate these text buffers yourself. Use the init/cleanup functions + correctly and use lodepng_add_text and lodepng_clear_text. + + Standard text chunk keywords and strings are encoded using Latin-1. + */ + size_t text_num; /*the amount of texts in these char** buffers (there may be more texts in itext)*/ + char** text_keys; /*the keyword of a text chunk (e.g. "Comment")*/ + char** text_strings; /*the actual text*/ + + /* + International text chunks (iTXt) + Similar to the non-international text chunks, but with additional strings + "langtags" and "transkeys", and the following text encodings are used: + keys: Latin-1, langtags: ASCII, transkeys and strings: UTF-8. + keys must be 1-79 characters (plus the additional null terminator), the other + strings are any length. + */ + size_t itext_num; /*the amount of international texts in this PNG*/ + char** itext_keys; /*the English keyword of the text chunk (e.g. "Comment")*/ + char** itext_langtags; /*language tag for this text's language, ISO/IEC 646 string, e.g. ISO 639 language tag*/ + char** itext_transkeys; /*keyword translated to the international language - UTF-8 string*/ + char** itext_strings; /*the actual international text - UTF-8 string*/ + + /*time chunk (tIME)*/ + unsigned time_defined; /*set to 1 to make the encoder generate a tIME chunk*/ + LodePNGTime time; + + /*phys chunk (pHYs)*/ + unsigned phys_defined; /*if 0, there is no pHYs chunk and the values below are undefined, if 1 else there is one*/ + unsigned phys_x; /*pixels per unit in x direction*/ + unsigned phys_y; /*pixels per unit in y direction*/ + unsigned phys_unit; /*may be 0 (unknown unit) or 1 (metre)*/ + + /* + Color profile related chunks: gAMA, cHRM, sRGB, iCPP + + LodePNG does not apply any color conversions on pixels in the encoder or decoder and does not interpret these color + profile values. It merely passes on the information. If you wish to use color profiles and convert colors, please + use these values with a color management library. + + See the PNG, ICC and sRGB specifications for more information about the meaning of these values. + */ + + /* gAMA chunk: optional, overridden by sRGB or iCCP if those are present. */ + unsigned gama_defined; /* Whether a gAMA chunk is present (0 = not present, 1 = present). */ + unsigned gama_gamma; /* Gamma exponent times 100000 */ + + /* cHRM chunk: optional, overridden by sRGB or iCCP if those are present. */ + unsigned chrm_defined; /* Whether a cHRM chunk is present (0 = not present, 1 = present). */ + unsigned chrm_white_x; /* White Point x times 100000 */ + unsigned chrm_white_y; /* White Point y times 100000 */ + unsigned chrm_red_x; /* Red x times 100000 */ + unsigned chrm_red_y; /* Red y times 100000 */ + unsigned chrm_green_x; /* Green x times 100000 */ + unsigned chrm_green_y; /* Green y times 100000 */ + unsigned chrm_blue_x; /* Blue x times 100000 */ + unsigned chrm_blue_y; /* Blue y times 100000 */ + + /* + sRGB chunk: optional. May not appear at the same time as iCCP. + If gAMA is also present gAMA must contain value 45455. + If cHRM is also present cHRM must contain respectively 31270,32900,64000,33000,30000,60000,15000,6000. + */ + unsigned srgb_defined; /* Whether an sRGB chunk is present (0 = not present, 1 = present). */ + unsigned srgb_intent; /* Rendering intent: 0=perceptual, 1=rel. colorimetric, 2=saturation, 3=abs. colorimetric */ + + /* + iCCP chunk: optional. May not appear at the same time as sRGB. + + LodePNG does not parse or use the ICC profile (except its color space header field for an edge case), a + separate library to handle the ICC data (not included in LodePNG) format is needed to use it for color + management and conversions. + + For encoding, if iCCP is present, gAMA and cHRM are recommended to be added as well with values that match the ICC + profile as closely as possible, if you wish to do this you should provide the correct values for gAMA and cHRM and + enable their '_defined' flags since LodePNG will not automatically compute them from the ICC profile. + + For encoding, the ICC profile is required by the PNG specification to be an "RGB" profile for non-gray + PNG color types and a "GRAY" profile for gray PNG color types. If you disable auto_convert, you must ensure + the ICC profile type matches your requested color type, else the encoder gives an error. If auto_convert is + enabled (the default), and the ICC profile is not a good match for the pixel data, this will result in an encoder + error if the pixel data has non-gray pixels for a GRAY profile, or a silent less-optimal compression of the pixel + data if the pixels could be encoded as grayscale but the ICC profile is RGB. + + To avoid this do not set an ICC profile in the image unless there is a good reason for it, and when doing so + make sure you compute it carefully to avoid the above problems. + */ + unsigned iccp_defined; /* Whether an iCCP chunk is present (0 = not present, 1 = present). */ + char* iccp_name; /* Null terminated string with profile name, 1-79 bytes */ + /* + The ICC profile in iccp_profile_size bytes. + Don't allocate this buffer yourself. Use the init/cleanup functions + correctly and use lodepng_set_icc and lodepng_clear_icc. + */ + unsigned char* iccp_profile; + unsigned iccp_profile_size; /* The size of iccp_profile in bytes */ + + /* End of color profile related chunks */ + + + /* + unknown chunks: chunks not known by LodePNG, passed on byte for byte. + + There are 3 buffers, one for each position in the PNG where unknown chunks can appear. + Each buffer contains all unknown chunks for that position consecutively. + The 3 positions are: + 0: between IHDR and PLTE, 1: between PLTE and IDAT, 2: between IDAT and IEND. + + For encoding, do not store critical chunks or known chunks that are enabled with a "_defined" flag + above in here, since the encoder will blindly follow this and could then encode an invalid PNG file + (such as one with two IHDR chunks or the disallowed combination of sRGB with iCCP). But do use + this if you wish to store an ancillary chunk that is not supported by LodePNG (such as sPLT or hIST), + or any non-standard PNG chunk. + + Do not allocate or traverse this data yourself. Use the chunk traversing functions declared + later, such as lodepng_chunk_next and lodepng_chunk_append, to read/write this struct. + */ + unsigned char* unknown_chunks_data[3]; + size_t unknown_chunks_size[3]; /*size in bytes of the unknown chunks, given for protection*/ +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +} LodePNGInfo; + +/*init, cleanup and copy functions to use with this struct*/ +void lodepng_info_init(LodePNGInfo* info); +void lodepng_info_cleanup(LodePNGInfo* info); +/*return value is error code (0 means no error)*/ +unsigned lodepng_info_copy(LodePNGInfo* dest, const LodePNGInfo* source); + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS +unsigned lodepng_add_text(LodePNGInfo* info, const char* key, const char* str); /*push back both texts at once*/ +void lodepng_clear_text(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/ + +unsigned lodepng_add_itext(LodePNGInfo* info, const char* key, const char* langtag, + const char* transkey, const char* str); /*push back the 4 texts of 1 chunk at once*/ +void lodepng_clear_itext(LodePNGInfo* info); /*use this to clear the itexts again after you filled them in*/ + +/*replaces if exists*/ +unsigned lodepng_set_icc(LodePNGInfo* info, const char* name, const unsigned char* profile, unsigned profile_size); +void lodepng_clear_icc(LodePNGInfo* info); /*use this to clear the texts again after you filled them in*/ +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ + +/* +Converts raw buffer from one color type to another color type, based on +LodePNGColorMode structs to describe the input and output color type. +See the reference manual at the end of this header file to see which color conversions are supported. +return value = LodePNG error code (0 if all went ok, an error if the conversion isn't supported) +The out buffer must have size (w * h * bpp + 7) / 8, where bpp is the bits per pixel +of the output color type (lodepng_get_bpp). +For < 8 bpp images, there should not be padding bits at the end of scanlines. +For 16-bit per channel colors, uses big endian format like PNG does. +Return value is LodePNG error code +*/ +unsigned lodepng_convert(unsigned char* out, const unsigned char* in, + const LodePNGColorMode* mode_out, const LodePNGColorMode* mode_in, + unsigned w, unsigned h); + +#ifdef LODEPNG_COMPILE_DECODER +/* +Settings for the decoder. This contains settings for the PNG and the Zlib +decoder, but not the Info settings from the Info structs. +*/ +typedef struct LodePNGDecoderSettings { + LodePNGDecompressSettings zlibsettings; /*in here is the setting to ignore Adler32 checksums*/ + + /* Check LodePNGDecompressSettings for more ignorable errors such as ignore_adler32 */ + unsigned ignore_crc; /*ignore CRC checksums*/ + unsigned ignore_critical; /*ignore unknown critical chunks*/ + unsigned ignore_end; /*ignore issues at end of file if possible (missing IEND chunk, too large chunk, ...)*/ + /* TODO: make a system involving warnings with levels and a strict mode instead. Other potentially recoverable + errors: srgb rendering intent value, size of content of ancillary chunks, more than 79 characters for some + strings, placement/combination rules for ancillary chunks, crc of unknown chunks, allowed characters + in string keys, etc... */ + + unsigned color_convert; /*whether to convert the PNG to the color type you want. Default: yes*/ + +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + unsigned read_text_chunks; /*if false but remember_unknown_chunks is true, they're stored in the unknown chunks*/ + + /*store all bytes from unknown chunks in the LodePNGInfo (off by default, useful for a png editor)*/ + unsigned remember_unknown_chunks; + + /* maximum size for decompressed text chunks. If a text chunk's text is larger than this, an error is returned, + unless reading text chunks is disabled or this limit is set higher or disabled. Set to 0 to allow any size. + By default it is a value that prevents unreasonably large strings from hogging memory. */ + size_t max_text_size; + + /* maximum size for compressed ICC chunks. If the ICC profile is larger than this, an error will be returned. Set to + 0 to allow any size. By default this is a value that prevents ICC profiles that would be much larger than any + legitimate profile could be to hog memory. */ + size_t max_icc_size; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +} LodePNGDecoderSettings; + +void lodepng_decoder_settings_init(LodePNGDecoderSettings* settings); +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_ENCODER +/*automatically use color type with less bits per pixel if losslessly possible. Default: AUTO*/ +typedef enum LodePNGFilterStrategy { + /*every filter at zero*/ + LFS_ZERO = 0, + /*every filter at 1, 2, 3 or 4 (paeth), unlike LFS_ZERO not a good choice, but for testing*/ + LFS_ONE = 1, + LFS_TWO = 2, + LFS_THREE = 3, + LFS_FOUR = 4, + /*Use filter that gives minimum sum, as described in the official PNG filter heuristic.*/ + LFS_MINSUM, + /*Use the filter type that gives smallest Shannon entropy for this scanline. Depending + on the image, this is better or worse than minsum.*/ + LFS_ENTROPY, + /* + Brute-force-search PNG filters by compressing each filter for each scanline. + Experimental, very slow, and only rarely gives better compression than MINSUM. + */ + LFS_BRUTE_FORCE, + /*use predefined_filters buffer: you specify the filter type for each scanline*/ + LFS_PREDEFINED +} LodePNGFilterStrategy; + +/*Gives characteristics about the integer RGBA colors of the image (count, alpha channel usage, bit depth, ...), +which helps decide which color model to use for encoding. +Used internally by default if "auto_convert" is enabled. Public because it's useful for custom algorithms.*/ +typedef struct LodePNGColorStats { + unsigned colored; /*not grayscale*/ + unsigned key; /*image is not opaque and color key is possible instead of full alpha*/ + unsigned short key_r; /*key values, always as 16-bit, in 8-bit case the byte is duplicated, e.g. 65535 means 255*/ + unsigned short key_g; + unsigned short key_b; + unsigned alpha; /*image is not opaque and alpha channel or alpha palette required*/ + unsigned numcolors; /*amount of colors, up to 257. Not valid if bits == 16 or allow_palette is disabled.*/ + unsigned char palette[1024]; /*Remembers up to the first 256 RGBA colors, in no particular order, only valid when numcolors is valid*/ + unsigned bits; /*bits per channel (not for palette). 1,2 or 4 for grayscale only. 16 if 16-bit per channel required.*/ + size_t numpixels; + + /*user settings for computing/using the stats*/ + unsigned allow_palette; /*default 1. if 0, disallow choosing palette colortype in auto_choose_color, and don't count numcolors*/ + unsigned allow_greyscale; /*default 1. if 0, choose RGB or RGBA even if the image only has gray colors*/ +} LodePNGColorStats; + +void lodepng_color_stats_init(LodePNGColorStats* stats); + +/*Get a LodePNGColorStats of the image. The stats must already have been inited. +Returns error code (e.g. alloc fail) or 0 if ok.*/ +unsigned lodepng_compute_color_stats(LodePNGColorStats* stats, + const unsigned char* image, unsigned w, unsigned h, + const LodePNGColorMode* mode_in); + +/*Settings for the encoder.*/ +typedef struct LodePNGEncoderSettings { + LodePNGCompressSettings zlibsettings; /*settings for the zlib encoder, such as window size, ...*/ + + unsigned auto_convert; /*automatically choose output PNG color type. Default: true*/ + + /*If true, follows the official PNG heuristic: if the PNG uses a palette or lower than + 8 bit depth, set all filters to zero. Otherwise use the filter_strategy. Note that to + completely follow the official PNG heuristic, filter_palette_zero must be true and + filter_strategy must be LFS_MINSUM*/ + unsigned filter_palette_zero; + /*Which filter strategy to use when not using zeroes due to filter_palette_zero. + Set filter_palette_zero to 0 to ensure always using your chosen strategy. Default: LFS_MINSUM*/ + LodePNGFilterStrategy filter_strategy; + /*used if filter_strategy is LFS_PREDEFINED. In that case, this must point to a buffer with + the same length as the amount of scanlines in the image, and each value must <= 5. You + have to cleanup this buffer, LodePNG will never free it. Don't forget that filter_palette_zero + must be set to 0 to ensure this is also used on palette or low bitdepth images.*/ + const unsigned char* predefined_filters; + + /*force creating a PLTE chunk if colortype is 2 or 6 (= a suggested palette). + If colortype is 3, PLTE is _always_ created.*/ + unsigned force_palette; +#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS + /*add LodePNG identifier and version as a text chunk, for debugging*/ + unsigned add_id; + /*encode text chunks as zTXt chunks instead of tEXt chunks, and use compression in iTXt chunks*/ + unsigned text_compression; +#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/ +} LodePNGEncoderSettings; + +void lodepng_encoder_settings_init(LodePNGEncoderSettings* settings); +#endif /*LODEPNG_COMPILE_ENCODER*/ + + +#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) +/*The settings, state and information for extended encoding and decoding.*/ +typedef struct LodePNGState { +#ifdef LODEPNG_COMPILE_DECODER + LodePNGDecoderSettings decoder; /*the decoding settings*/ +#endif /*LODEPNG_COMPILE_DECODER*/ +#ifdef LODEPNG_COMPILE_ENCODER + LodePNGEncoderSettings encoder; /*the encoding settings*/ +#endif /*LODEPNG_COMPILE_ENCODER*/ + LodePNGColorMode info_raw; /*specifies the format in which you would like to get the raw pixel buffer*/ + LodePNGInfo info_png; /*info of the PNG image obtained after decoding*/ + unsigned error; +} LodePNGState; + +/*init, cleanup and copy functions to use with this struct*/ +void lodepng_state_init(LodePNGState* state); +void lodepng_state_cleanup(LodePNGState* state); +void lodepng_state_copy(LodePNGState* dest, const LodePNGState* source); +#endif /* defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_ENCODER) */ + +#ifdef LODEPNG_COMPILE_DECODER +/* +Same as lodepng_decode_memory, but uses a LodePNGState to allow custom settings and +getting much more information about the PNG image and color mode. +*/ +unsigned lodepng_decode(unsigned char** out, unsigned* w, unsigned* h, + LodePNGState* state, + const unsigned char* in, size_t insize); + +/* +Read the PNG header, but not the actual data. This returns only the information +that is in the IHDR chunk of the PNG, such as width, height and color type. The +information is placed in the info_png field of the LodePNGState. +*/ +unsigned lodepng_inspect(unsigned* w, unsigned* h, + LodePNGState* state, + const unsigned char* in, size_t insize); +#endif /*LODEPNG_COMPILE_DECODER*/ + +/* +Reads one metadata chunk (other than IHDR) of the PNG file and outputs what it +read in the state. Returns error code on failure. +Use lodepng_inspect first with a new state, then e.g. lodepng_chunk_find_const +to find the desired chunk type, and if non null use lodepng_inspect_chunk (with +chunk_pointer - start_of_file as pos). +Supports most metadata chunks from the PNG standard (gAMA, bKGD, tEXt, ...). +Ignores unsupported, unknown, non-metadata or IHDR chunks (without error). +Requirements: &in[pos] must point to start of a chunk, must use regular +lodepng_inspect first since format of most other chunks depends on IHDR, and if +there is a PLTE chunk, that one must be inspected before tRNS or bKGD. +*/ +unsigned lodepng_inspect_chunk(LodePNGState* state, size_t pos, + const unsigned char* in, size_t insize); + +#ifdef LODEPNG_COMPILE_ENCODER +/*This function allocates the out buffer with standard malloc and stores the size in *outsize.*/ +unsigned lodepng_encode(unsigned char** out, size_t* outsize, + const unsigned char* image, unsigned w, unsigned h, + LodePNGState* state); +#endif /*LODEPNG_COMPILE_ENCODER*/ + +/* +The lodepng_chunk functions are normally not needed, except to traverse the +unknown chunks stored in the LodePNGInfo struct, or add new ones to it. +It also allows traversing the chunks of an encoded PNG file yourself. + +The chunk pointer always points to the beginning of the chunk itself, that is +the first byte of the 4 length bytes. + +In the PNG file format, chunks have the following format: +-4 bytes length: length of the data of the chunk in bytes (chunk itself is 12 bytes longer) +-4 bytes chunk type (ASCII a-z,A-Z only, see below) +-length bytes of data (may be 0 bytes if length was 0) +-4 bytes of CRC, computed on chunk name + data + +The first chunk starts at the 8th byte of the PNG file, the entire rest of the file +exists out of concatenated chunks with the above format. + +PNG standard chunk ASCII naming conventions: +-First byte: uppercase = critical, lowercase = ancillary +-Second byte: uppercase = public, lowercase = private +-Third byte: must be uppercase +-Fourth byte: uppercase = unsafe to copy, lowercase = safe to copy +*/ + +/* +Gets the length of the data of the chunk. Total chunk length has 12 bytes more. +There must be at least 4 bytes to read from. If the result value is too large, +it may be corrupt data. +*/ +unsigned lodepng_chunk_length(const unsigned char* chunk); + +/*puts the 4-byte type in null terminated string*/ +void lodepng_chunk_type(char type[5], const unsigned char* chunk); + +/*check if the type is the given type*/ +unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type); + +/*0: it's one of the critical chunk types, 1: it's an ancillary chunk (see PNG standard)*/ +unsigned char lodepng_chunk_ancillary(const unsigned char* chunk); + +/*0: public, 1: private (see PNG standard)*/ +unsigned char lodepng_chunk_private(const unsigned char* chunk); + +/*0: the chunk is unsafe to copy, 1: the chunk is safe to copy (see PNG standard)*/ +unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk); + +/*get pointer to the data of the chunk, where the input points to the header of the chunk*/ +unsigned char* lodepng_chunk_data(unsigned char* chunk); +const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk); + +/*returns 0 if the crc is correct, 1 if it's incorrect (0 for OK as usual!)*/ +unsigned lodepng_chunk_check_crc(const unsigned char* chunk); + +/*generates the correct CRC from the data and puts it in the last 4 bytes of the chunk*/ +void lodepng_chunk_generate_crc(unsigned char* chunk); + +/* +Iterate to next chunks, allows iterating through all chunks of the PNG file. +Input must be at the beginning of a chunk (result of a previous lodepng_chunk_next call, +or the 8th byte of a PNG file which always has the first chunk), or alternatively may +point to the first byte of the PNG file (which is not a chunk but the magic header, the +function will then skip over it and return the first real chunk). +Will output pointer to the start of the next chunk, or at or beyond end of the file if there +is no more chunk after this or possibly if the chunk is corrupt. +Start this process at the 8th byte of the PNG file. +In a non-corrupt PNG file, the last chunk should have name "IEND". +*/ +unsigned char* lodepng_chunk_next(unsigned char* chunk, unsigned char* end); +const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk, const unsigned char* end); + +/*Finds the first chunk with the given type in the range [chunk, end), or returns NULL if not found.*/ +unsigned char* lodepng_chunk_find(unsigned char* chunk, unsigned char* end, const char type[5]); +const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]); + +/* +Appends chunk to the data in out. The given chunk should already have its chunk header. +The out variable and outsize are updated to reflect the new reallocated buffer. +Returns error code (0 if it went ok) +*/ +unsigned lodepng_chunk_append(unsigned char** out, size_t* outsize, const unsigned char* chunk); + +/* +Appends new chunk to out. The chunk to append is given by giving its length, type +and data separately. The type is a 4-letter string. +The out variable and outsize are updated to reflect the new reallocated buffer. +Returne error code (0 if it went ok) +*/ +unsigned lodepng_chunk_create(unsigned char** out, size_t* outsize, unsigned length, + const char* type, const unsigned char* data); + + +/*Calculate CRC32 of buffer*/ +unsigned lodepng_crc32(const unsigned char* buf, size_t len); +#endif /*LODEPNG_COMPILE_PNG*/ + + +#ifdef LODEPNG_COMPILE_ZLIB +/* +This zlib part can be used independently to zlib compress and decompress a +buffer. It cannot be used to create gzip files however, and it only supports the +part of zlib that is required for PNG, it does not support dictionaries. +*/ + +#ifdef LODEPNG_COMPILE_DECODER +/*Inflate a buffer. Inflate is the decompression step of deflate. Out buffer must be freed after use.*/ +unsigned lodepng_inflate(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGDecompressSettings* settings); + +/* +Decompresses Zlib data. Reallocates the out buffer and appends the data. The +data must be according to the zlib specification. +Either, *out must be NULL and *outsize must be 0, or, *out must be a valid +buffer and *outsize its size in bytes. out must be freed by user after usage. +*/ +unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGDecompressSettings* settings); +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_ENCODER +/* +Compresses data with Zlib. Reallocates the out buffer and appends the data. +Zlib adds a small header and trailer around the deflate data. +The data is output in the format of the zlib specification. +Either, *out must be NULL and *outsize must be 0, or, *out must be a valid +buffer and *outsize its size in bytes. out must be freed by user after usage. +*/ +unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGCompressSettings* settings); + +/* +Find length-limited Huffman code for given frequencies. This function is in the +public interface only for tests, it's used internally by lodepng_deflate. +*/ +unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies, + size_t numcodes, unsigned maxbitlen); + +/*Compress a buffer with deflate. See RFC 1951. Out buffer must be freed after use.*/ +unsigned lodepng_deflate(unsigned char** out, size_t* outsize, + const unsigned char* in, size_t insize, + const LodePNGCompressSettings* settings); + +#endif /*LODEPNG_COMPILE_ENCODER*/ +#endif /*LODEPNG_COMPILE_ZLIB*/ + +#ifdef LODEPNG_COMPILE_DISK +/* +Load a file from disk into buffer. The function allocates the out buffer, and +after usage you should free it. +out: output parameter, contains pointer to loaded buffer. +outsize: output parameter, size of the allocated out buffer +filename: the path to the file to load +return value: error code (0 means ok) +*/ +unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename); + +/* +Save a file from buffer to disk. Warning, if it exists, this function overwrites +the file without warning! +buffer: the buffer to write +buffersize: size of the buffer to write +filename: the path to the file to save to +return value: error code (0 means ok) +*/ +unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename); +#endif /*LODEPNG_COMPILE_DISK*/ + +#ifdef LODEPNG_COMPILE_CPP +/* The LodePNG C++ wrapper uses std::vectors instead of manually allocated memory buffers. */ +namespace lodepng { +#ifdef LODEPNG_COMPILE_PNG +class State : public LodePNGState { + public: + State(); + State(const State& other); + ~State(); + State& operator=(const State& other); +}; + +#ifdef LODEPNG_COMPILE_DECODER +/* Same as other lodepng::decode, but using a State for more settings and information. */ +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + State& state, + const unsigned char* in, size_t insize); +unsigned decode(std::vector& out, unsigned& w, unsigned& h, + State& state, + const std::vector& in); +#endif /*LODEPNG_COMPILE_DECODER*/ + +#ifdef LODEPNG_COMPILE_ENCODER +/* Same as other lodepng::encode, but using a State for more settings and information. */ +unsigned encode(std::vector& out, + const unsigned char* in, unsigned w, unsigned h, + State& state); +unsigned encode(std::vector& out, + const std::vector& in, unsigned w, unsigned h, + State& state); +#endif /*LODEPNG_COMPILE_ENCODER*/ + +#ifdef LODEPNG_COMPILE_DISK +/* +Load a file from disk into an std::vector. +return value: error code (0 means ok) +*/ +unsigned load_file(std::vector& buffer, const std::string& filename); + +/* +Save the binary data in an std::vector to a file on disk. The file is overwritten +without warning. +*/ +unsigned save_file(const std::vector& buffer, const std::string& filename); +#endif /* LODEPNG_COMPILE_DISK */ +#endif /* LODEPNG_COMPILE_PNG */ + +#ifdef LODEPNG_COMPILE_ZLIB +#ifdef LODEPNG_COMPILE_DECODER +/* Zlib-decompress an unsigned char buffer */ +unsigned decompress(std::vector& out, const unsigned char* in, size_t insize, + const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings); + +/* Zlib-decompress an std::vector */ +unsigned decompress(std::vector& out, const std::vector& in, + const LodePNGDecompressSettings& settings = lodepng_default_decompress_settings); +#endif /* LODEPNG_COMPILE_DECODER */ + +#ifdef LODEPNG_COMPILE_ENCODER +/* Zlib-compress an unsigned char buffer */ +unsigned compress(std::vector& out, const unsigned char* in, size_t insize, + const LodePNGCompressSettings& settings = lodepng_default_compress_settings); + +/* Zlib-compress an std::vector */ +unsigned compress(std::vector& out, const std::vector& in, + const LodePNGCompressSettings& settings = lodepng_default_compress_settings); +#endif /* LODEPNG_COMPILE_ENCODER */ +#endif /* LODEPNG_COMPILE_ZLIB */ +} /* namespace lodepng */ +#endif /*LODEPNG_COMPILE_CPP*/ + +/* +TODO: +[.] test if there are no memory leaks or security exploits - done a lot but needs to be checked often +[.] check compatibility with various compilers - done but needs to be redone for every newer version +[X] converting color to 16-bit per channel types +[X] support color profile chunk types (but never let them touch RGB values by default) +[ ] support all public PNG chunk types (almost done except sBIT, sPLT and hIST) +[ ] make sure encoder generates no chunks with size > (2^31)-1 +[ ] partial decoding (stream processing) +[X] let the "isFullyOpaque" function check color keys and transparent palettes too +[X] better name for the variables "codes", "codesD", "codelengthcodes", "clcl" and "lldl" +[ ] allow treating some errors like warnings, when image is recoverable (e.g. 69, 57, 58) +[ ] make warnings like: oob palette, checksum fail, data after iend, wrong/unknown crit chunk, no null terminator in text, ... +[ ] error messages with line numbers (and version) +[ ] errors in state instead of as return code? +[ ] new errors/warnings like suspiciously big decompressed ztxt or iccp chunk +[ ] let the C++ wrapper catch exceptions coming from the standard library and return LodePNG error codes +[ ] allow user to provide custom color conversion functions, e.g. for premultiplied alpha, padding bits or not, ... +[ ] allow user to give data (void*) to custom allocator +[X] provide alternatives for C library functions not present on some platforms (memcpy, ...) +*/ + +#endif /*LODEPNG_H inclusion guard*/ + +/* +LodePNG Documentation +--------------------- + +0. table of contents +-------------------- + + 1. about + 1.1. supported features + 1.2. features not supported + 2. C and C++ version + 3. security + 4. decoding + 5. encoding + 6. color conversions + 6.1. PNG color types + 6.2. color conversions + 6.3. padding bits + 6.4. A note about 16-bits per channel and endianness + 7. error values + 8. chunks and PNG editing + 9. compiler support + 10. examples + 10.1. decoder C++ example + 10.2. decoder C example + 11. state settings reference + 12. changes + 13. contact information + + +1. about +-------- + +PNG is a file format to store raster images losslessly with good compression, +supporting different color types and alpha channel. + +LodePNG is a PNG codec according to the Portable Network Graphics (PNG) +Specification (Second Edition) - W3C Recommendation 10 November 2003. + +The specifications used are: + +*) Portable Network Graphics (PNG) Specification (Second Edition): + http://www.w3.org/TR/2003/REC-PNG-20031110 +*) RFC 1950 ZLIB Compressed Data Format version 3.3: + http://www.gzip.org/zlib/rfc-zlib.html +*) RFC 1951 DEFLATE Compressed Data Format Specification ver 1.3: + http://www.gzip.org/zlib/rfc-deflate.html + +The most recent version of LodePNG can currently be found at +http://lodev.org/lodepng/ + +LodePNG works both in C (ISO C90) and C++, with a C++ wrapper that adds +extra functionality. + +LodePNG exists out of two files: +-lodepng.h: the header file for both C and C++ +-lodepng.c(pp): give it the name lodepng.c or lodepng.cpp (or .cc) depending on your usage + +If you want to start using LodePNG right away without reading this doc, get the +examples from the LodePNG website to see how to use it in code, or check the +smaller examples in chapter 13 here. + +LodePNG is simple but only supports the basic requirements. To achieve +simplicity, the following design choices were made: There are no dependencies +on any external library. There are functions to decode and encode a PNG with +a single function call, and extended versions of these functions taking a +LodePNGState struct allowing to specify or get more information. By default +the colors of the raw image are always RGB or RGBA, no matter what color type +the PNG file uses. To read and write files, there are simple functions to +convert the files to/from buffers in memory. + +This all makes LodePNG suitable for loading textures in games, demos and small +programs, ... It's less suitable for full fledged image editors, loading PNGs +over network (it requires all the image data to be available before decoding can +begin), life-critical systems, ... + +1.1. supported features +----------------------- + +The following features are supported by the decoder: + +*) decoding of PNGs with any color type, bit depth and interlace mode, to a 24- or 32-bit color raw image, + or the same color type as the PNG +*) encoding of PNGs, from any raw image to 24- or 32-bit color, or the same color type as the raw image +*) Adam7 interlace and deinterlace for any color type +*) loading the image from harddisk or decoding it from a buffer from other sources than harddisk +*) support for alpha channels, including RGBA color model, translucent palettes and color keying +*) zlib decompression (inflate) +*) zlib compression (deflate) +*) CRC32 and ADLER32 checksums +*) colorimetric color profile conversions: currently experimentally available in lodepng_util.cpp only, + plus alternatively ability to pass on chroma/gamma/ICC profile information to other color management system. +*) handling of unknown chunks, allowing making a PNG editor that stores custom and unknown chunks. +*) the following chunks are supported by both encoder and decoder: + IHDR: header information + PLTE: color palette + IDAT: pixel data + IEND: the final chunk + tRNS: transparency for palettized images + tEXt: textual information + zTXt: compressed textual information + iTXt: international textual information + bKGD: suggested background color + pHYs: physical dimensions + tIME: modification time + cHRM: RGB chromaticities + gAMA: RGB gamma correction + iCCP: ICC color profile + sRGB: rendering intent + +1.2. features not supported +--------------------------- + +The following features are _not_ supported: + +*) some features needed to make a conformant PNG-Editor might be still missing. +*) partial loading/stream processing. All data must be available and is processed in one call. +*) The following public chunks are not (yet) supported but treated as unknown chunks by LodePNG: + sBIT + hIST + sPLT + + +2. C and C++ version +-------------------- + +The C version uses buffers allocated with alloc that you need to free() +yourself. You need to use init and cleanup functions for each struct whenever +using a struct from the C version to avoid exploits and memory leaks. + +The C++ version has extra functions with std::vectors in the interface and the +lodepng::State class which is a LodePNGState with constructor and destructor. + +These files work without modification for both C and C++ compilers because all +the additional C++ code is in "#ifdef __cplusplus" blocks that make C-compilers +ignore it, and the C code is made to compile both with strict ISO C90 and C++. + +To use the C++ version, you need to rename the source file to lodepng.cpp +(instead of lodepng.c), and compile it with a C++ compiler. + +To use the C version, you need to rename the source file to lodepng.c (instead +of lodepng.cpp), and compile it with a C compiler. + + +3. Security +----------- + +Even if carefully designed, it's always possible that LodePNG contains possible +exploits. If you discover one, please let me know, and it will be fixed. + +When using LodePNG, care has to be taken with the C version of LodePNG, as well +as the C-style structs when working with C++. The following conventions are used +for all C-style structs: + +-if a struct has a corresponding init function, always call the init function when making a new one +-if a struct has a corresponding cleanup function, call it before the struct disappears to avoid memory leaks +-if a struct has a corresponding copy function, use the copy function instead of "=". + The destination must also be inited already. + + +4. Decoding +----------- + +Decoding converts a PNG compressed image to a raw pixel buffer. + +Most documentation on using the decoder is at its declarations in the header +above. For C, simple decoding can be done with functions such as +lodepng_decode32, and more advanced decoding can be done with the struct +LodePNGState and lodepng_decode. For C++, all decoding can be done with the +various lodepng::decode functions, and lodepng::State can be used for advanced +features. + +When using the LodePNGState, it uses the following fields for decoding: +*) LodePNGInfo info_png: it stores extra information about the PNG (the input) in here +*) LodePNGColorMode info_raw: here you can say what color mode of the raw image (the output) you want to get +*) LodePNGDecoderSettings decoder: you can specify a few extra settings for the decoder to use + +LodePNGInfo info_png +-------------------- + +After decoding, this contains extra information of the PNG image, except the actual +pixels, width and height because these are already gotten directly from the decoder +functions. + +It contains for example the original color type of the PNG image, text comments, +suggested background color, etc... More details about the LodePNGInfo struct are +at its declaration documentation. + +LodePNGColorMode info_raw +------------------------- + +When decoding, here you can specify which color type you want +the resulting raw image to be. If this is different from the colortype of the +PNG, then the decoder will automatically convert the result. This conversion +always works, except if you want it to convert a color PNG to grayscale or to +a palette with missing colors. + +By default, 32-bit color is used for the result. + +LodePNGDecoderSettings decoder +------------------------------ + +The settings can be used to ignore the errors created by invalid CRC and Adler32 +chunks, and to disable the decoding of tEXt chunks. + +There's also a setting color_convert, true by default. If false, no conversion +is done, the resulting data will be as it was in the PNG (after decompression) +and you'll have to puzzle the colors of the pixels together yourself using the +color type information in the LodePNGInfo. + + +5. Encoding +----------- + +Encoding converts a raw pixel buffer to a PNG compressed image. + +Most documentation on using the encoder is at its declarations in the header +above. For C, simple encoding can be done with functions such as +lodepng_encode32, and more advanced decoding can be done with the struct +LodePNGState and lodepng_encode. For C++, all encoding can be done with the +various lodepng::encode functions, and lodepng::State can be used for advanced +features. + +Like the decoder, the encoder can also give errors. However it gives less errors +since the encoder input is trusted, the decoder input (a PNG image that could +be forged by anyone) is not trusted. + +When using the LodePNGState, it uses the following fields for encoding: +*) LodePNGInfo info_png: here you specify how you want the PNG (the output) to be. +*) LodePNGColorMode info_raw: here you say what color type of the raw image (the input) has +*) LodePNGEncoderSettings encoder: you can specify a few settings for the encoder to use + +LodePNGInfo info_png +-------------------- + +When encoding, you use this the opposite way as when decoding: for encoding, +you fill in the values you want the PNG to have before encoding. By default it's +not needed to specify a color type for the PNG since it's automatically chosen, +but it's possible to choose it yourself given the right settings. + +The encoder will not always exactly match the LodePNGInfo struct you give, +it tries as close as possible. Some things are ignored by the encoder. The +encoder uses, for example, the following settings from it when applicable: +colortype and bitdepth, text chunks, time chunk, the color key, the palette, the +background color, the interlace method, unknown chunks, ... + +When encoding to a PNG with colortype 3, the encoder will generate a PLTE chunk. +If the palette contains any colors for which the alpha channel is not 255 (so +there are translucent colors in the palette), it'll add a tRNS chunk. + +LodePNGColorMode info_raw +------------------------- + +You specify the color type of the raw image that you give to the input here, +including a possible transparent color key and palette you happen to be using in +your raw image data. + +By default, 32-bit color is assumed, meaning your input has to be in RGBA +format with 4 bytes (unsigned chars) per pixel. + +LodePNGEncoderSettings encoder +------------------------------ + +The following settings are supported (some are in sub-structs): +*) auto_convert: when this option is enabled, the encoder will +automatically choose the smallest possible color mode (including color key) that +can encode the colors of all pixels without information loss. +*) btype: the block type for LZ77. 0 = uncompressed, 1 = fixed huffman tree, + 2 = dynamic huffman tree (best compression). Should be 2 for proper + compression. +*) use_lz77: whether or not to use LZ77 for compressed block types. Should be + true for proper compression. +*) windowsize: the window size used by the LZ77 encoder (1 - 32768). Has value + 2048 by default, but can be set to 32768 for better, but slow, compression. +*) force_palette: if colortype is 2 or 6, you can make the encoder write a PLTE + chunk if force_palette is true. This can used as suggested palette to convert + to by viewers that don't support more than 256 colors (if those still exist) +*) add_id: add text chunk "Encoder: LodePNG " to the image. +*) text_compression: default 1. If 1, it'll store texts as zTXt instead of tEXt chunks. + zTXt chunks use zlib compression on the text. This gives a smaller result on + large texts but a larger result on small texts (such as a single program name). + It's all tEXt or all zTXt though, there's no separate setting per text yet. + + +6. color conversions +-------------------- + +An important thing to note about LodePNG, is that the color type of the PNG, and +the color type of the raw image, are completely independent. By default, when +you decode a PNG, you get the result as a raw image in the color type you want, +no matter whether the PNG was encoded with a palette, grayscale or RGBA color. +And if you encode an image, by default LodePNG will automatically choose the PNG +color type that gives good compression based on the values of colors and amount +of colors in the image. It can be configured to let you control it instead as +well, though. + +To be able to do this, LodePNG does conversions from one color mode to another. +It can convert from almost any color type to any other color type, except the +following conversions: RGB to grayscale is not supported, and converting to a +palette when the palette doesn't have a required color is not supported. This is +not supported on purpose: this is information loss which requires a color +reduction algorithm that is beyond the scope of a PNG encoder (yes, RGB to gray +is easy, but there are multiple ways if you want to give some channels more +weight). + +By default, when decoding, you get the raw image in 32-bit RGBA or 24-bit RGB +color, no matter what color type the PNG has. And by default when encoding, +LodePNG automatically picks the best color model for the output PNG, and expects +the input image to be 32-bit RGBA or 24-bit RGB. So, unless you want to control +the color format of the images yourself, you can skip this chapter. + +6.1. PNG color types +-------------------- + +A PNG image can have many color types, ranging from 1-bit color to 64-bit color, +as well as palettized color modes. After the zlib decompression and unfiltering +in the PNG image is done, the raw pixel data will have that color type and thus +a certain amount of bits per pixel. If you want the output raw image after +decoding to have another color type, a conversion is done by LodePNG. + +The PNG specification gives the following color types: + +0: grayscale, bit depths 1, 2, 4, 8, 16 +2: RGB, bit depths 8 and 16 +3: palette, bit depths 1, 2, 4 and 8 +4: grayscale with alpha, bit depths 8 and 16 +6: RGBA, bit depths 8 and 16 + +Bit depth is the amount of bits per pixel per color channel. So the total amount +of bits per pixel is: amount of channels * bitdepth. + +6.2. color conversions +---------------------- + +As explained in the sections about the encoder and decoder, you can specify +color types and bit depths in info_png and info_raw to change the default +behaviour. + +If, when decoding, you want the raw image to be something else than the default, +you need to set the color type and bit depth you want in the LodePNGColorMode, +or the parameters colortype and bitdepth of the simple decoding function. + +If, when encoding, you use another color type than the default in the raw input +image, you need to specify its color type and bit depth in the LodePNGColorMode +of the raw image, or use the parameters colortype and bitdepth of the simple +encoding function. + +If, when encoding, you don't want LodePNG to choose the output PNG color type +but control it yourself, you need to set auto_convert in the encoder settings +to false, and specify the color type you want in the LodePNGInfo of the +encoder (including palette: it can generate a palette if auto_convert is true, +otherwise not). + +If the input and output color type differ (whether user chosen or auto chosen), +LodePNG will do a color conversion, which follows the rules below, and may +sometimes result in an error. + +To avoid some confusion: +-the decoder converts from PNG to raw image +-the encoder converts from raw image to PNG +-the colortype and bitdepth in LodePNGColorMode info_raw, are those of the raw image +-the colortype and bitdepth in the color field of LodePNGInfo info_png, are those of the PNG +-when encoding, the color type in LodePNGInfo is ignored if auto_convert + is enabled, it is automatically generated instead +-when decoding, the color type in LodePNGInfo is set by the decoder to that of the original + PNG image, but it can be ignored since the raw image has the color type you requested instead +-if the color type of the LodePNGColorMode and PNG image aren't the same, a conversion + between the color types is done if the color types are supported. If it is not + supported, an error is returned. If the types are the same, no conversion is done. +-even though some conversions aren't supported, LodePNG supports loading PNGs from any + colortype and saving PNGs to any colortype, sometimes it just requires preparing + the raw image correctly before encoding. +-both encoder and decoder use the same color converter. + +The function lodepng_convert does the color conversion. It is available in the +interface but normally isn't needed since the encoder and decoder already call +it. + +Non supported color conversions: +-color to grayscale when non-gray pixels are present: no error is thrown, but +the result will look ugly because only the red channel is taken (it assumes all +three channels are the same in this case so ignores green and blue). The reason +no error is given is to allow converting from three-channel grayscale images to +one-channel even if there are numerical imprecisions. +-anything to palette when the palette does not have an exact match for a from-color +in it: in this case an error is thrown + +Supported color conversions: +-anything to 8-bit RGB, 8-bit RGBA, 16-bit RGB, 16-bit RGBA +-any gray or gray+alpha, to gray or gray+alpha +-anything to a palette, as long as the palette has the requested colors in it +-removing alpha channel +-higher to smaller bitdepth, and vice versa + +If you want no color conversion to be done (e.g. for speed or control): +-In the encoder, you can make it save a PNG with any color type by giving the +raw color mode and LodePNGInfo the same color mode, and setting auto_convert to +false. +-In the decoder, you can make it store the pixel data in the same color type +as the PNG has, by setting the color_convert setting to false. Settings in +info_raw are then ignored. + +6.3. padding bits +----------------- + +In the PNG file format, if a less than 8-bit per pixel color type is used and the scanlines +have a bit amount that isn't a multiple of 8, then padding bits are used so that each +scanline starts at a fresh byte. But that is NOT true for the LodePNG raw input and output. +The raw input image you give to the encoder, and the raw output image you get from the decoder +will NOT have these padding bits, e.g. in the case of a 1-bit image with a width +of 7 pixels, the first pixel of the second scanline will the 8th bit of the first byte, +not the first bit of a new byte. + +6.4. A note about 16-bits per channel and endianness +---------------------------------------------------- + +LodePNG uses unsigned char arrays for 16-bit per channel colors too, just like +for any other color format. The 16-bit values are stored in big endian (most +significant byte first) in these arrays. This is the opposite order of the +little endian used by x86 CPU's. + +LodePNG always uses big endian because the PNG file format does so internally. +Conversions to other formats than PNG uses internally are not supported by +LodePNG on purpose, there are myriads of formats, including endianness of 16-bit +colors, the order in which you store R, G, B and A, and so on. Supporting and +converting to/from all that is outside the scope of LodePNG. + +This may mean that, depending on your use case, you may want to convert the big +endian output of LodePNG to little endian with a for loop. This is certainly not +always needed, many applications and libraries support big endian 16-bit colors +anyway, but it means you cannot simply cast the unsigned char* buffer to an +unsigned short* buffer on x86 CPUs. + + +7. error values +--------------- + +All functions in LodePNG that return an error code, return 0 if everything went +OK, or a non-zero code if there was an error. + +The meaning of the LodePNG error values can be retrieved with the function +lodepng_error_text: given the numerical error code, it returns a description +of the error in English as a string. + +Check the implementation of lodepng_error_text to see the meaning of each code. + +It is not recommended to use the numerical values to programmatically make +different decisions based on error types as the numbers are not guaranteed to +stay backwards compatible. They are for human consumption only. Programmatically +only 0 or non-0 matter. + + +8. chunks and PNG editing +------------------------- + +If you want to add extra chunks to a PNG you encode, or use LodePNG for a PNG +editor that should follow the rules about handling of unknown chunks, or if your +program is able to read other types of chunks than the ones handled by LodePNG, +then that's possible with the chunk functions of LodePNG. + +A PNG chunk has the following layout: + +4 bytes length +4 bytes type name +length bytes data +4 bytes CRC + +8.1. iterating through chunks +----------------------------- + +If you have a buffer containing the PNG image data, then the first chunk (the +IHDR chunk) starts at byte number 8 of that buffer. The first 8 bytes are the +signature of the PNG and are not part of a chunk. But if you start at byte 8 +then you have a chunk, and can check the following things of it. + +NOTE: none of these functions check for memory buffer boundaries. To avoid +exploits, always make sure the buffer contains all the data of the chunks. +When using lodepng_chunk_next, make sure the returned value is within the +allocated memory. + +unsigned lodepng_chunk_length(const unsigned char* chunk): + +Get the length of the chunk's data. The total chunk length is this length + 12. + +void lodepng_chunk_type(char type[5], const unsigned char* chunk): +unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type): + +Get the type of the chunk or compare if it's a certain type + +unsigned char lodepng_chunk_critical(const unsigned char* chunk): +unsigned char lodepng_chunk_private(const unsigned char* chunk): +unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk): + +Check if the chunk is critical in the PNG standard (only IHDR, PLTE, IDAT and IEND are). +Check if the chunk is private (public chunks are part of the standard, private ones not). +Check if the chunk is safe to copy. If it's not, then, when modifying data in a critical +chunk, unsafe to copy chunks of the old image may NOT be saved in the new one if your +program doesn't handle that type of unknown chunk. + +unsigned char* lodepng_chunk_data(unsigned char* chunk): +const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk): + +Get a pointer to the start of the data of the chunk. + +unsigned lodepng_chunk_check_crc(const unsigned char* chunk): +void lodepng_chunk_generate_crc(unsigned char* chunk): + +Check if the crc is correct or generate a correct one. + +unsigned char* lodepng_chunk_next(unsigned char* chunk): +const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk): + +Iterate to the next chunk. This works if you have a buffer with consecutive chunks. Note that these +functions do no boundary checking of the allocated data whatsoever, so make sure there is enough +data available in the buffer to be able to go to the next chunk. + +unsigned lodepng_chunk_append(unsigned char** out, size_t* outsize, const unsigned char* chunk): +unsigned lodepng_chunk_create(unsigned char** out, size_t* outsize, unsigned length, + const char* type, const unsigned char* data): + +These functions are used to create new chunks that are appended to the data in *out that has +length *outsize. The append function appends an existing chunk to the new data. The create +function creates a new chunk with the given parameters and appends it. Type is the 4-letter +name of the chunk. + +8.2. chunks in info_png +----------------------- + +The LodePNGInfo struct contains fields with the unknown chunk in it. It has 3 +buffers (each with size) to contain 3 types of unknown chunks: +the ones that come before the PLTE chunk, the ones that come between the PLTE +and the IDAT chunks, and the ones that come after the IDAT chunks. +It's necessary to make the distinction between these 3 cases because the PNG +standard forces to keep the ordering of unknown chunks compared to the critical +chunks, but does not force any other ordering rules. + +info_png.unknown_chunks_data[0] is the chunks before PLTE +info_png.unknown_chunks_data[1] is the chunks after PLTE, before IDAT +info_png.unknown_chunks_data[2] is the chunks after IDAT + +The chunks in these 3 buffers can be iterated through and read by using the same +way described in the previous subchapter. + +When using the decoder to decode a PNG, you can make it store all unknown chunks +if you set the option settings.remember_unknown_chunks to 1. By default, this +option is off (0). + +The encoder will always encode unknown chunks that are stored in the info_png. +If you need it to add a particular chunk that isn't known by LodePNG, you can +use lodepng_chunk_append or lodepng_chunk_create to the chunk data in +info_png.unknown_chunks_data[x]. + +Chunks that are known by LodePNG should not be added in that way. E.g. to make +LodePNG add a bKGD chunk, set background_defined to true and add the correct +parameters there instead. + + +9. compiler support +------------------- + +No libraries other than the current standard C library are needed to compile +LodePNG. For the C++ version, only the standard C++ library is needed on top. +Add the files lodepng.c(pp) and lodepng.h to your project, include +lodepng.h where needed, and your program can read/write PNG files. + +It is compatible with C90 and up, and C++03 and up. + +If performance is important, use optimization when compiling! For both the +encoder and decoder, this makes a large difference. + +Make sure that LodePNG is compiled with the same compiler of the same version +and with the same settings as the rest of the program, or the interfaces with +std::vectors and std::strings in C++ can be incompatible. + +CHAR_BITS must be 8 or higher, because LodePNG uses unsigned chars for octets. + +*) gcc and g++ + +LodePNG is developed in gcc so this compiler is natively supported. It gives no +warnings with compiler options "-Wall -Wextra -pedantic -ansi", with gcc and g++ +version 4.7.1 on Linux, 32-bit and 64-bit. + +*) Clang + +Fully supported and warning-free. + +*) Mingw + +The Mingw compiler (a port of gcc for Windows) should be fully supported by +LodePNG. + +*) Visual Studio and Visual C++ Express Edition + +LodePNG should be warning-free with warning level W4. Two warnings were disabled +with pragmas though: warning 4244 about implicit conversions, and warning 4996 +where it wants to use a non-standard function fopen_s instead of the standard C +fopen. + +Visual Studio may want "stdafx.h" files to be included in each source file and +give an error "unexpected end of file while looking for precompiled header". +This is not standard C++ and will not be added to the stock LodePNG. You can +disable it for lodepng.cpp only by right clicking it, Properties, C/C++, +Precompiled Headers, and set it to Not Using Precompiled Headers there. + +NOTE: Modern versions of VS should be fully supported, but old versions, e.g. +VS6, are not guaranteed to work. + +*) Compilers on Macintosh + +LodePNG has been reported to work both with gcc and LLVM for Macintosh, both for +C and C++. + +*) Other Compilers + +If you encounter problems on any compilers, feel free to let me know and I may +try to fix it if the compiler is modern and standards compliant. + + +10. examples +------------ + +This decoder example shows the most basic usage of LodePNG. More complex +examples can be found on the LodePNG website. + +10.1. decoder C++ example +------------------------- + +#include "lodepng.h" +#include + +int main(int argc, char *argv[]) { + const char* filename = argc > 1 ? argv[1] : "test.png"; + + //load and decode + std::vector image; + unsigned width, height; + unsigned error = lodepng::decode(image, width, height, filename); + + //if there's an error, display it + if(error) std::cout << "decoder error " << error << ": " << lodepng_error_text(error) << std::endl; + + //the pixels are now in the vector "image", 4 bytes per pixel, ordered RGBARGBA..., use it as texture, draw it, ... +} + +10.2. decoder C example +----------------------- + +#include "lodepng.h" + +int main(int argc, char *argv[]) { + unsigned error; + unsigned char* image; + size_t width, height; + const char* filename = argc > 1 ? argv[1] : "test.png"; + + error = lodepng_decode32_file(&image, &width, &height, filename); + + if(error) printf("decoder error %u: %s\n", error, lodepng_error_text(error)); + + / * use image here * / + + free(image); + return 0; +} + +11. state settings reference +---------------------------- + +A quick reference of some settings to set on the LodePNGState + +For decoding: + +state.decoder.zlibsettings.ignore_adler32: ignore ADLER32 checksums +state.decoder.zlibsettings.custom_...: use custom inflate function +state.decoder.ignore_crc: ignore CRC checksums +state.decoder.ignore_critical: ignore unknown critical chunks +state.decoder.ignore_end: ignore missing IEND chunk. May fail if this corruption causes other errors +state.decoder.color_convert: convert internal PNG color to chosen one +state.decoder.read_text_chunks: whether to read in text metadata chunks +state.decoder.remember_unknown_chunks: whether to read in unknown chunks +state.info_raw.colortype: desired color type for decoded image +state.info_raw.bitdepth: desired bit depth for decoded image +state.info_raw....: more color settings, see struct LodePNGColorMode +state.info_png....: no settings for decoder but ouput, see struct LodePNGInfo + +For encoding: + +state.encoder.zlibsettings.btype: disable compression by setting it to 0 +state.encoder.zlibsettings.use_lz77: use LZ77 in compression +state.encoder.zlibsettings.windowsize: tweak LZ77 windowsize +state.encoder.zlibsettings.minmatch: tweak min LZ77 length to match +state.encoder.zlibsettings.nicematch: tweak LZ77 match where to stop searching +state.encoder.zlibsettings.lazymatching: try one more LZ77 matching +state.encoder.zlibsettings.custom_...: use custom deflate function +state.encoder.auto_convert: choose optimal PNG color type, if 0 uses info_png +state.encoder.filter_palette_zero: PNG filter strategy for palette +state.encoder.filter_strategy: PNG filter strategy to encode with +state.encoder.force_palette: add palette even if not encoding to one +state.encoder.add_id: add LodePNG identifier and version as a text chunk +state.encoder.text_compression: use compressed text chunks for metadata +state.info_raw.colortype: color type of raw input image you provide +state.info_raw.bitdepth: bit depth of raw input image you provide +state.info_raw: more color settings, see struct LodePNGColorMode +state.info_png.color.colortype: desired color type if auto_convert is false +state.info_png.color.bitdepth: desired bit depth if auto_convert is false +state.info_png.color....: more color settings, see struct LodePNGColorMode +state.info_png....: more PNG related settings, see struct LodePNGInfo + + +12. changes +----------- + +The version number of LodePNG is the date of the change given in the format +yyyymmdd. + +Some changes aren't backwards compatible. Those are indicated with a (!) +symbol. + +Not all changes are listed here, the commit history in github lists more: +https://github.com/lvandeve/lodepng + +*) 17 okt 2020: prevent decoding too large text/icc chunks by default. +*) 06 mar 2020: simplified some of the dynamic memory allocations. +*) 12 jan 2020: (!) added 'end' argument to lodepng_chunk_next to allow correct + overflow checks. +*) 14 aug 2019: around 25% faster decoding thanks to huffman lookup tables. +*) 15 jun 2019: (!) auto_choose_color API changed (for bugfix: don't use palette + if gray ICC profile) and non-ICC LodePNGColorProfile renamed to + LodePNGColorStats. +*) 30 dec 2018: code style changes only: removed newlines before opening braces. +*) 10 sep 2018: added way to inspect metadata chunks without full decoding. +*) 19 aug 2018: (!) fixed color mode bKGD is encoded with and made it use + palette index in case of palette. +*) 10 aug 2018: (!) added support for gAMA, cHRM, sRGB and iCCP chunks. This + change is backwards compatible unless you relied on unknown_chunks for those. +*) 11 jun 2018: less restrictive check for pixel size integer overflow +*) 14 jan 2018: allow optionally ignoring a few more recoverable errors +*) 17 sep 2017: fix memory leak for some encoder input error cases +*) 27 nov 2016: grey+alpha auto color model detection bugfix +*) 18 apr 2016: Changed qsort to custom stable sort (for platforms w/o qsort). +*) 09 apr 2016: Fixed colorkey usage detection, and better file loading (within + the limits of pure C90). +*) 08 dec 2015: Made load_file function return error if file can't be opened. +*) 24 okt 2015: Bugfix with decoding to palette output. +*) 18 apr 2015: Boundary PM instead of just package-merge for faster encoding. +*) 24 aug 2014: Moved to github +*) 23 aug 2014: Reduced needless memory usage of decoder. +*) 28 jun 2014: Removed fix_png setting, always support palette OOB for + simplicity. Made ColorProfile public. +*) 09 jun 2014: Faster encoder by fixing hash bug and more zeros optimization. +*) 22 dec 2013: Power of two windowsize required for optimization. +*) 15 apr 2013: Fixed bug with LAC_ALPHA and color key. +*) 25 mar 2013: Added an optional feature to ignore some PNG errors (fix_png). +*) 11 mar 2013: (!) Bugfix with custom free. Changed from "my" to "lodepng_" + prefix for the custom allocators and made it possible with a new #define to + use custom ones in your project without needing to change lodepng's code. +*) 28 jan 2013: Bugfix with color key. +*) 27 okt 2012: Tweaks in text chunk keyword length error handling. +*) 8 okt 2012: (!) Added new filter strategy (entropy) and new auto color mode. + (no palette). Better deflate tree encoding. New compression tweak settings. + Faster color conversions while decoding. Some internal cleanups. +*) 23 sep 2012: Reduced warnings in Visual Studio a little bit. +*) 1 sep 2012: (!) Removed #define's for giving custom (de)compression functions + and made it work with function pointers instead. +*) 23 jun 2012: Added more filter strategies. Made it easier to use custom alloc + and free functions and toggle #defines from compiler flags. Small fixes. +*) 6 may 2012: (!) Made plugging in custom zlib/deflate functions more flexible. +*) 22 apr 2012: (!) Made interface more consistent, renaming a lot. Removed + redundant C++ codec classes. Reduced amount of structs. Everything changed, + but it is cleaner now imho and functionality remains the same. Also fixed + several bugs and shrunk the implementation code. Made new samples. +*) 6 nov 2011: (!) By default, the encoder now automatically chooses the best + PNG color model and bit depth, based on the amount and type of colors of the + raw image. For this, autoLeaveOutAlphaChannel replaced by auto_choose_color. +*) 9 okt 2011: simpler hash chain implementation for the encoder. +*) 8 sep 2011: lz77 encoder lazy matching instead of greedy matching. +*) 23 aug 2011: tweaked the zlib compression parameters after benchmarking. + A bug with the PNG filtertype heuristic was fixed, so that it chooses much + better ones (it's quite significant). A setting to do an experimental, slow, + brute force search for PNG filter types is added. +*) 17 aug 2011: (!) changed some C zlib related function names. +*) 16 aug 2011: made the code less wide (max 120 characters per line). +*) 17 apr 2011: code cleanup. Bugfixes. Convert low to 16-bit per sample colors. +*) 21 feb 2011: fixed compiling for C90. Fixed compiling with sections disabled. +*) 11 dec 2010: encoding is made faster, based on suggestion by Peter Eastman + to optimize long sequences of zeros. +*) 13 nov 2010: added LodePNG_InfoColor_hasPaletteAlpha and + LodePNG_InfoColor_canHaveAlpha functions for convenience. +*) 7 nov 2010: added LodePNG_error_text function to get error code description. +*) 30 okt 2010: made decoding slightly faster +*) 26 okt 2010: (!) changed some C function and struct names (more consistent). + Reorganized the documentation and the declaration order in the header. +*) 08 aug 2010: only changed some comments and external samples. +*) 05 jul 2010: fixed bug thanks to warnings in the new gcc version. +*) 14 mar 2010: fixed bug where too much memory was allocated for char buffers. +*) 02 sep 2008: fixed bug where it could create empty tree that linux apps could + read by ignoring the problem but windows apps couldn't. +*) 06 jun 2008: added more error checks for out of memory cases. +*) 26 apr 2008: added a few more checks here and there to ensure more safety. +*) 06 mar 2008: crash with encoding of strings fixed +*) 02 feb 2008: support for international text chunks added (iTXt) +*) 23 jan 2008: small cleanups, and #defines to divide code in sections +*) 20 jan 2008: support for unknown chunks allowing using LodePNG for an editor. +*) 18 jan 2008: support for tIME and pHYs chunks added to encoder and decoder. +*) 17 jan 2008: ability to encode and decode compressed zTXt chunks added + Also various fixes, such as in the deflate and the padding bits code. +*) 13 jan 2008: Added ability to encode Adam7-interlaced images. Improved + filtering code of encoder. +*) 07 jan 2008: (!) changed LodePNG to use ISO C90 instead of C++. A + C++ wrapper around this provides an interface almost identical to before. + Having LodePNG be pure ISO C90 makes it more portable. The C and C++ code + are together in these files but it works both for C and C++ compilers. +*) 29 dec 2007: (!) changed most integer types to unsigned int + other tweaks +*) 30 aug 2007: bug fixed which makes this Borland C++ compatible +*) 09 aug 2007: some VS2005 warnings removed again +*) 21 jul 2007: deflate code placed in new namespace separate from zlib code +*) 08 jun 2007: fixed bug with 2- and 4-bit color, and small interlaced images +*) 04 jun 2007: improved support for Visual Studio 2005: crash with accessing + invalid std::vector element [0] fixed, and level 3 and 4 warnings removed +*) 02 jun 2007: made the encoder add a tag with version by default +*) 27 may 2007: zlib and png code separated (but still in the same file), + simple encoder/decoder functions added for more simple usage cases +*) 19 may 2007: minor fixes, some code cleaning, new error added (error 69), + moved some examples from here to lodepng_examples.cpp +*) 12 may 2007: palette decoding bug fixed +*) 24 apr 2007: changed the license from BSD to the zlib license +*) 11 mar 2007: very simple addition: ability to encode bKGD chunks. +*) 04 mar 2007: (!) tEXt chunk related fixes, and support for encoding + palettized PNG images. Plus little interface change with palette and texts. +*) 03 mar 2007: Made it encode dynamic Huffman shorter with repeat codes. + Fixed a bug where the end code of a block had length 0 in the Huffman tree. +*) 26 feb 2007: Huffman compression with dynamic trees (BTYPE 2) now implemented + and supported by the encoder, resulting in smaller PNGs at the output. +*) 27 jan 2007: Made the Adler-32 test faster so that a timewaste is gone. +*) 24 jan 2007: gave encoder an error interface. Added color conversion from any + greyscale type to 8-bit greyscale with or without alpha. +*) 21 jan 2007: (!) Totally changed the interface. It allows more color types + to convert to and is more uniform. See the manual for how it works now. +*) 07 jan 2007: Some cleanup & fixes, and a few changes over the last days: + encode/decode custom tEXt chunks, separate classes for zlib & deflate, and + at last made the decoder give errors for incorrect Adler32 or Crc. +*) 01 jan 2007: Fixed bug with encoding PNGs with less than 8 bits per channel. +*) 29 dec 2006: Added support for encoding images without alpha channel, and + cleaned out code as well as making certain parts faster. +*) 28 dec 2006: Added "Settings" to the encoder. +*) 26 dec 2006: The encoder now does LZ77 encoding and produces much smaller files now. + Removed some code duplication in the decoder. Fixed little bug in an example. +*) 09 dec 2006: (!) Placed output parameters of public functions as first parameter. + Fixed a bug of the decoder with 16-bit per color. +*) 15 okt 2006: Changed documentation structure +*) 09 okt 2006: Encoder class added. It encodes a valid PNG image from the + given image buffer, however for now it's not compressed. +*) 08 sep 2006: (!) Changed to interface with a Decoder class +*) 30 jul 2006: (!) LodePNG_InfoPng , width and height are now retrieved in different + way. Renamed decodePNG to decodePNGGeneric. +*) 29 jul 2006: (!) Changed the interface: image info is now returned as a + struct of type LodePNG::LodePNG_Info, instead of a vector, which was a bit clumsy. +*) 28 jul 2006: Cleaned the code and added new error checks. + Corrected terminology "deflate" into "inflate". +*) 23 jun 2006: Added SDL example in the documentation in the header, this + example allows easy debugging by displaying the PNG and its transparency. +*) 22 jun 2006: (!) Changed way to obtain error value. Added + loadFile function for convenience. Made decodePNG32 faster. +*) 21 jun 2006: (!) Changed type of info vector to unsigned. + Changed position of palette in info vector. Fixed an important bug that + happened on PNGs with an uncompressed block. +*) 16 jun 2006: Internally changed unsigned into unsigned where + needed, and performed some optimizations. +*) 07 jun 2006: (!) Renamed functions to decodePNG and placed them + in LodePNG namespace. Changed the order of the parameters. Rewrote the + documentation in the header. Renamed files to lodepng.cpp and lodepng.h +*) 22 apr 2006: Optimized and improved some code +*) 07 sep 2005: (!) Changed to std::vector interface +*) 12 aug 2005: Initial release (C++, decoder only) + + +13. contact information +----------------------- + +Feel free to contact me with suggestions, problems, comments, ... concerning +LodePNG. If you encounter a PNG image that doesn't work properly with this +decoder, feel free to send it and I'll use it to find and fix the problem. + +My email address is (puzzle the account and domain together with an @ symbol): +Domain: gmail dot com. +Account: lode dot vandevenne. + + +Copyright (c) 2005-2020 Lode Vandevenne +*/ From d11773c5fe237e70a42fb8634cd5aff31614e5e9 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 27 Mar 2021 11:52:57 -0700 Subject: [PATCH 020/901] astc-encoder - switch to v2.5 sources A few small changes, but mostly a drop-in replacement. Updated makefiles. --- libkram/CMakeLists.txt | 5 - libkram/astc-encoder/astcenc.h | 361 +++- .../astcenc_averages_and_directions.cpp | 873 +++++---- libkram/astc-encoder/astcenc_block_sizes2.cpp | 588 +++--- .../astc-encoder/astcenc_color_quantize.cpp | 1200 +++++------- .../astc-encoder/astcenc_color_unquantize.cpp | 764 +++----- .../astcenc_compress_symbolic.cpp | 1305 ++++++++------ .../astc-encoder/astcenc_compute_variance.cpp | 314 ++-- .../astcenc_decompress_symbolic.cpp | 407 +++-- .../astc-encoder/astcenc_diagnostic_trace.cpp | 219 +++ .../astc-encoder/astcenc_diagnostic_trace.h | 225 +++ .../astcenc_encoding_choice_error.cpp | 363 ++-- libkram/astc-encoder/astcenc_entry.cpp | 636 +++++-- .../astcenc_find_best_partitioning.cpp | 852 ++++----- .../astcenc_ideal_endpoints_and_weights.cpp | 1605 +++++++++-------- libkram/astc-encoder/astcenc_image.cpp | 821 +++------ .../astc-encoder/astcenc_integer_sequence.cpp | 455 ++--- libkram/astc-encoder/astcenc_internal.h | 744 +++++--- .../astcenc_kmeans_partitioning.cpp | 238 +-- libkram/astc-encoder/astcenc_mathlib.cpp | 37 +- libkram/astc-encoder/astcenc_mathlib.h | 546 +++--- .../astcenc_mathlib_softfloat.cpp | 34 +- .../astc-encoder/astcenc_partition_tables.cpp | 14 +- .../astcenc_percentile_tables.cpp | 6 +- .../astcenc_pick_best_endpoint_format.cpp | 456 ++--- .../astcenc_platform_isa_detection.cpp | 57 +- libkram/astc-encoder/astcenc_quantization.cpp | 29 +- .../astcenc_symbolic_physical.cpp | 71 +- libkram/astc-encoder/astcenc_vecmathlib.h | 977 ++++------ .../astc-encoder/astcenc_vecmathlib_avx2_8.h | 943 ++++++++++ .../astcenc_vecmathlib_common_4.h | 352 ++++ .../astc-encoder/astcenc_vecmathlib_neon_4.h | 915 ++++++++++ .../astcenc_vecmathlib_neon_armv7_4.h | 186 ++ .../astc-encoder/astcenc_vecmathlib_none_4.h | 1025 +++++++++++ .../astc-encoder/astcenc_vecmathlib_sse_4.h | 1008 +++++++++++ libkram/astc-encoder/astcenc_weight_align.cpp | 219 +-- .../astcenc_weight_quant_xfer_tables.cpp | 2 +- libkram/kram/Kram.cpp | 7 + libkram/kram/KramConfig.h | 7 + libkram/kram/KramImage.cpp | 150 +- libkram/kram/KramImage.h | 2 +- plugin/kps/KPS.cpp | 2 - 42 files changed, 11729 insertions(+), 7291 deletions(-) create mode 100644 libkram/astc-encoder/astcenc_diagnostic_trace.cpp create mode 100644 libkram/astc-encoder/astcenc_diagnostic_trace.h create mode 100755 libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h create mode 100755 libkram/astc-encoder/astcenc_vecmathlib_common_4.h create mode 100755 libkram/astc-encoder/astcenc_vecmathlib_neon_4.h create mode 100644 libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h create mode 100644 libkram/astc-encoder/astcenc_vecmathlib_none_4.h create mode 100755 libkram/astc-encoder/astcenc_vecmathlib_sse_4.h diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt index 3c3b3be5..0b501ee3 100644 --- a/libkram/CMakeLists.txt +++ b/libkram/CMakeLists.txt @@ -174,11 +174,6 @@ elseif (UNIXBUILD) endif() - -target_compile_definitions(${myTargetLib} PUBLIC - -DASTCENC_SSE=42 -DASTCENC_AVX=1 -DASTCENC_POPCNT=0 - -DASTCENC_VECALIGN=16 -DASTCENC_ISA_INVARIANCE=0) - target_compile_definitions(${myTargetLib} PUBLIC "-DCOMPILE_ATE=${COMPILE_ATE}" "-DCOMPILE_BCENC=${COMPILE_BCENC}" diff --git a/libkram/astc-encoder/astcenc.h b/libkram/astc-encoder/astcenc.h index 2a9a6e56..618ded49 100644 --- a/libkram/astc-encoder/astcenc.h +++ b/libkram/astc-encoder/astcenc.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2020 Arm Limited +// Copyright 2020-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -43,9 +43,9 @@ * allocate multiple contexts and assign each context to a thread. * * An application wishing to process a single image in using multiple * threads can configure the context for multi-threaded use, and invoke - * astcenc_compress() once per thread for faster compression. The caller - * is responsible for creating the worker threads. Note that - * decompression is always single-threaded. + * astcenc_compress/decompress() once per thread for faster processing. + * The caller is responsible for creating the worker threads, and + * synchronizing between images. * * Threading * ========= @@ -76,21 +76,21 @@ * Images * ====== * - * Images are passed in as a astcenc_image structure. Inputs can be either - * 8-bit unorm inputs (passed in via the data8 pointer), or 16-bit floating - * point inputs (passed in via the data16 pointer). The unused pointer should - * be set to nullptr. + * Images are passed in as an astcenc_image structure. Inputs can be either + * 8-bit unorm, 16-bit half-float, or 32-bit float, as indicated by the + * data_type field. * * Images can be any dimension; there is no requirement for them to be a * multiple of the ASTC block size. * - * Data is always passed in as 4 color channels, and accessed as 3D array - * indexed using e.g. + * Data is always passed in as 4 color channels, and accessed as an array of + * 2D image slices. Data within an image slice is always tightly packed without + * padding. Addresing looks like this: * - * data8[z_coord][y_coord][x_coord * 4 ] // Red - * data8[z_coord][y_coord][x_coord * 4 + 1] // Green - * data8[z_coord][y_coord][x_coord * 4 + 2] // Blue - * data8[z_coord][y_coord][x_coord * 4 + 3] // Alpha + * data[z_coord][y_coord * x_dim * 4 + x_coord * 4 ] // Red + * data[z_coord][y_coord * x_dim * 4 + x_coord * 4 + 1] // Green + * data[z_coord][y_coord * x_dim * 4 + x_coord * 4 + 2] // Blue + * data[z_coord][y_coord * x_dim * 4 + x_coord * 4 + 3] // Alpha * * Common compressor usage * ======================= @@ -144,6 +144,16 @@ #include #include +#if defined(ASTCENC_DYNAMIC_LIBRARY) + #if defined(_MSC_VER) + #define ASTCENC_PUBLIC extern "C" __declspec(dllexport) + #else + #define ASTCENC_PUBLIC extern "C" __attribute__ ((visibility ("default"))) + #endif +#else + #define ASTCENC_PUBLIC +#endif + /* ============================================================================ Data declarations ============================================================================ */ @@ -171,8 +181,8 @@ enum astcenc_error { ASTCENC_ERR_BAD_BLOCK_SIZE, /** @brief The call failed due to an out-of-spec color profile. */ ASTCENC_ERR_BAD_PROFILE, - /** @brief The call failed due to an out-of-spec quality preset. */ - ASTCENC_ERR_BAD_PRESET, + /** @brief The call failed due to an out-of-spec quality value. */ + ASTCENC_ERR_BAD_QUALITY, /** @brief The call failed due to an out-of-spec channel swizzle. */ ASTCENC_ERR_BAD_SWIZZLE, /** @brief The call failed due to an out-of-spec flag set. */ @@ -180,7 +190,11 @@ enum astcenc_error { /** @brief The call failed due to the context not supporting the operation. */ ASTCENC_ERR_BAD_CONTEXT, /** @brief The call failed due to unimplemented functionality. */ - ASTCENC_ERR_NOT_IMPLEMENTED + ASTCENC_ERR_NOT_IMPLEMENTED, +#if defined(ASTCENC_DIAGNOSTICS) + /** @brief The call failed due to an issue with diagnostic tracing. */ + ASTCENC_ERR_DTRACE_FAILURE, +#endif }; /** @@ -197,21 +211,20 @@ enum astcenc_profile { ASTCENC_PRF_HDR }; -/** - * @brief A codec quality preset. - */ -enum astcenc_preset { - /** @brief The fastest, lowest quality, search preset. */ - ASTCENC_PRE_FASTEST = 0, - /** @brief The fast search preset. */ - ASTCENC_PRE_FAST, - /** @brief The medium quality search preset. */ - ASTCENC_PRE_MEDIUM, - /** @brief The throrough quality search preset. */ - ASTCENC_PRE_THOROUGH, - /** @brief The exhaustive, highest quality, search preset. */ - ASTCENC_PRE_EXHAUSTIVE -}; +/** @brief The fastest, lowest quality, search preset. */ +static const float ASTCENC_PRE_FASTEST = 0.0f; + +/** @brief The fast search preset. */ +static const float ASTCENC_PRE_FAST = 10.0f; + +/** @brief The medium quality search preset. */ +static const float ASTCENC_PRE_MEDIUM = 60.0f; + +/** @brief The throrough quality search preset. */ +static const float ASTCENC_PRE_THOROUGH = 98.0f; + +/** @brief The exhaustive, highest quality, search preset. */ +static const float ASTCENC_PRE_EXHAUSTIVE = 100.0f; /** * @brief A codec channel swizzle selector. @@ -276,7 +289,32 @@ static const unsigned int ASTCENC_FLG_MAP_NORMAL = 1 << 0; * the color channels to be treated independently for the purposes of error * analysis. */ -static const unsigned int ASTCENC_FLG_MAP_MASK = 1 << 1; +static const unsigned int ASTCENC_FLG_MAP_MASK = 1 << 1; + +/** + * @brief Enable RGBM map compression. + * + * Input data will be treated as HDR data that has been stored in an LDR + * RGBM-encoded wrapper format. Data must be preprocessed by the user to be in + * LDR RGBM format before calling the compression function, this flag is only + * used to control the use of RGBM-specific heuristics and error metrics. + * + * IMPORTANT: The ASTC format is prone to bad failure modes with unconstrained + * RGBM data; very small M values can round to zero due to quantization and + * result in black or white pixels. It is *highly* recommended that the minimum + * value of M used in the encoding is kept above a lower threshold (try 16 or + * 32). Applying this threshold reduces the number of very dark colors that can + * be represented, but is still slightly higher precision than 8-bit LDR. + * + * When this flag is set the value of @c rgbm_m_scale in the context must be + * set to the RGBM scale factor used during reconstruction. This defaults to 5 + * when in RGBM mode. + * + * It is recommended that the value of @c cw_a_weight is set to twice the value + * of the multiplier scale, ensuring that the M value is accurately encoded. + * This defaults to 10 when in RGBM mode, matching the default scale factor. + */ +static const unsigned int ASTCENC_FLG_MAP_RGBM = 1 << 6; /** * @brief Enable alpha weighting. @@ -286,7 +324,7 @@ static const unsigned int ASTCENC_FLG_MAP_MASK = 1 << 1; * more accurately encode the alpha value in areas where the color value * is less significant. */ -static const unsigned int ASTCENC_FLG_USE_ALPHA_WEIGHT = 1 << 2; +static const unsigned int ASTCENC_FLG_USE_ALPHA_WEIGHT = 1 << 2; /** * @brief Enable perceptual error metrics. @@ -295,25 +333,38 @@ static const unsigned int ASTCENC_FLG_USE_ALPHA_WEIGHT = 1 << 2; * perceptual error rather than best PSNR. Only some input modes support * perceptual error metrics. */ -static const unsigned int ASTCENC_FLG_USE_PERCEPTUAL = 1 << 3; +static const unsigned int ASTCENC_FLG_USE_PERCEPTUAL = 1 << 3; /** * @brief Create a decompression-only context. * - * This mode enables context allocation to skip some transient buffer - * allocation, resulting in a lower-memory footprint. + * This mode disables support for compression. This enables context allocation + * to skip some transient buffer allocation, resulting in lower memory usage. + */ +static const unsigned int ASTCENC_FLG_DECOMPRESS_ONLY = 1 << 4; + +/** + * @brief Create a self-decompression context. + * + * This mode configures the compressor so that it is only guaranteed to be + * able to decompress images that were actually created using the current + * context. This is the common case for compression use cases, and setting this + * flag enables additional optimizations, but does mean that the context cannot + * reliably decompress arbitrary ASTC images. */ -static const unsigned int ASTCENC_FLG_DECOMPRESS_ONLY = 1 << 4; +static const unsigned int ASTCENC_FLG_SELF_DECOMPRESS_ONLY = 1 << 5; /** * @brief The bit mask of all valid flags. */ static const unsigned int ASTCENC_ALL_FLAGS = - ASTCENC_FLG_MAP_NORMAL | ASTCENC_FLG_MAP_MASK | + ASTCENC_FLG_MAP_NORMAL | + ASTCENC_FLG_MAP_RGBM | ASTCENC_FLG_USE_ALPHA_WEIGHT | ASTCENC_FLG_USE_PERCEPTUAL | - ASTCENC_FLG_DECOMPRESS_ONLY; + ASTCENC_FLG_DECOMPRESS_ONLY | + ASTCENC_FLG_SELF_DECOMPRESS_ONLY; /** * @brief The config structure. @@ -327,7 +378,8 @@ static const unsigned int ASTCENC_ALL_FLAGS = * the value in the config applies to the channel that exists after any * compression data swizzle is applied. */ -struct astcenc_config { +struct astcenc_config +{ /** @brief The color profile. */ astcenc_profile profile; @@ -402,12 +454,22 @@ struct astcenc_config { */ float b_deblock_weight; + /** @brief The RGBM scale factor for the shared multiplier (-rgbm). */ + float rgbm_m_scale; + + /** + * @brief The maximum number of partitions searched (-partitioncountlimit). + * + * Valid values are between 1 and 4. + */ + unsigned int tune_partition_count_limit; + /** - * @brief The maximum number of partitions searched (-partitionlimit). + * @brief The maximum number of partitions searched (-partitionindexlimit). * * Valid values are between 1 and 1024. */ - unsigned int tune_partition_limit; + unsigned int tune_partition_index_limit; /** * @brief The maximum centile for block modes searched (-blockmodelimit). @@ -438,6 +500,30 @@ struct astcenc_config { */ float tune_db_limit; + /** + * @brief The amount of overshoot needed to early-out mode 0 fast path. + * + * We have a fast-path for mode 0 (1 partition, 1 plane) which uses only + * essential block modes as an initital search. This can short-cut + * compression for simple blocks, but to avoid shortcutting too much we + * force this to overshoot the MSE threshold needed to hit the block-local + * db_limit e.g. 1.0 = no overshoot, 2.0 = need half the error to trigger. + */ + float tune_mode0_mse_overshoot; + + /** + * @brief The amount of overshoot needed to early-out refinement. + * + * The codec will refine block candidates iteratively to improve the + * encoding, based on the @c tune_refinement_limit count. Earlier + * implementations will use all refinement iterations, even if the target + * threshold is reached. This tuning parameter allows an early out, but + * with an overshoot MSE threshold. Setting this to 1.0 will early-out as + * soon as the target is hit, but does reduce image quality vs the + * default behavior of over-refinement. + */ + float tune_refinement_mse_overshoot; + /** * @brief The threshold for skipping 3+ partitions (-partitionearlylimit). * @@ -451,27 +537,116 @@ struct astcenc_config { * This option is ineffective for normal maps. */ float tune_two_plane_early_out_limit; + +#if defined(ASTCENC_DIAGNOSTICS) + /** + * @brief The path to save the diagnostic trace data to. + * + * This option is not part of the public API, and requires special builds + * of the library. + */ + const char* trace_file_path; +#endif }; /** * @brief An uncompressed 2D or 3D image. * - * Inputs can be either 8-bit unorm inputs (passed in via the data8 pointer), - * or 16-bit floating point inputs (passed in via the data16 pointer). The - * unused pointer must be set to nullptr. Data is always passed in as 4 color - * channels, and accessed as 3D array indexed using [Z][Y][(X * 4) + (0..3)]. + * 3D image are passed in as an array of 2D slices. Each slice has identical + * size and color format. */ -struct astcenc_image { +struct astcenc_image +{ /** @brief The X dimension of the image, in texels. */ unsigned int dim_x; + /** @brief The Y dimension of the image, in texels. */ unsigned int dim_y; - /** @brief The X dimension of the image, in texels. */ + + /** @brief The Z dimension of the image, in texels. */ unsigned int dim_z; + /** @brief The data type per channel. */ astcenc_type data_type; - /** @brief The data; actually of type ***. */ - void *data; + + /** @brief The array of 2D slices, of length @c dim_z. */ + void** data; +}; + +/** + * @brief A block encoding metadata query result. + * + * If the block is an error block or a constant color block or an error block + * all fields other than the profile, block dimensions, and error/constant + * indicator will be zero. + */ +struct astcenc_block_info +{ + /** @brief The block encoding color profile. */ + astcenc_profile profile; + + /** @brief The number of texels in the X dimension. */ + int block_x; + + /** @brief The number of texels in the Y dimension. */ + int block_y; + + /** @brief The number of texel in the Z dimension. */ + int block_z; + + /** @brief The number of texels in the block. */ + int texel_count; + + /** @brief True if this block is an error block. */ + bool is_error_block; + + /** @brief True if this block is a constant color block. */ + bool is_constant_block; + + /** @brief True if this block is an HDR block. */ + bool is_hdr_block; + + /** @brief True if this block uses two weight planes. */ + bool is_dual_plane_block; + + /** @brief The number of partitions if not constant color. */ + int partition_count; + + /** @brief The partition index if 2 - 4 partitions used. */ + int partition_index; + + /** @brief The component index of the second plane if dual plane. */ + int dual_plane_component; + + /** @brief The color endpoint encoding mode for each partition. */ + int color_endpoint_modes[4]; + + /** @brief The number of color endpoint quantization levels. */ + int color_level_count; + + /** @brief The number of weight quantization levels. */ + int weight_level_count; + + /** @brief The number of weights in the X dimension. */ + int weight_x; + + /** @brief The number of weights in the Y dimension. */ + int weight_y; + + /** @brief The number of weights in the Z dimension. */ + int weight_z; + + /** @brief The unpacked color endpoints for each partition. */ + float color_endpoints[4][2][4]; + + /** @brief The per-texel interpolation weights for the block. */ + float weight_values_plane1[216]; + + /** @brief The per-texel interpolation weights for the block. */ + float weight_values_plane2[216]; + + /** @brief The per-texel partition assignments for the block. */ + uint8_t partition_assignment[216]; }; /** @@ -484,21 +659,24 @@ struct astcenc_image { * @param block_x ASTC block size X dimension. * @param block_y ASTC block size Y dimension. * @param block_z ASTC block size Z dimension. - * @param preset Search quality preset. + * @param quality Search quality preset / effort level. Either an + * @c ASTCENC_PRE_* value, or a effort level between 0 + * and 100. Performance is not linear between 0 and 100. + * @param flags A valid set of ASTCENC_FLG_* flag bits. * @param[out] config Output config struct to populate. * * @return ASTCENC_SUCCESS on success, or an error if the inputs are invalid * either individually, or in combination. */ -astcenc_error astcenc_config_init( +ASTCENC_PUBLIC astcenc_error astcenc_config_init( astcenc_profile profile, unsigned int block_x, unsigned int block_y, unsigned int block_z, - astcenc_preset preset, + float quality, unsigned int flags, - astcenc_config& config); + astcenc_config* config); /** * @brief Allocate a new codec context based on a config. @@ -515,14 +693,13 @@ astcenc_error astcenc_config_init( * be set when creating ay context. * * @param[in] config Codec config. - * @param thread_count Thread count to configure for. Decompress-only - * contexts must have a thread_count of 1. + * @param thread_count Thread count to configure for. * @param[out] context Location to store an opaque context pointer. * * @return ASTCENC_SUCCESS on success, or an error if context creation failed. */ -astcenc_error astcenc_context_alloc( - const astcenc_config& config, +ASTCENC_PUBLIC astcenc_error astcenc_context_alloc( + const astcenc_config* config, unsigned int thread_count, astcenc_context** context); @@ -536,7 +713,7 @@ astcenc_error astcenc_context_alloc( * available. Each thread must have a unique thread_index. * * @param context Codec context. - * @param[in,out] image Input image. + * @param[in,out] image An input image, in 2D slices. * @param swizzle Compression data swizzle. * @param[out] data_out Pointer to output data array. * @param data_len Length of the output data array. @@ -544,16 +721,16 @@ astcenc_error astcenc_context_alloc( * * @return ASTCENC_SUCCESS on success, or an error if compression failed. */ -astcenc_error astcenc_compress_image( +ASTCENC_PUBLIC astcenc_error astcenc_compress_image( astcenc_context* context, - astcenc_image& image, + astcenc_image* image, astcenc_swizzle swizzle, uint8_t* data_out, size_t data_len, unsigned int thread_index); /** - * @brief Reset the compressor state for a new compression. + * @brief Reset the codec state for a new compression. * * The caller is responsible for synchronizing threads in the worker thread * pool. This function must only be called when all threads have exited the @@ -564,35 +741,71 @@ astcenc_error astcenc_compress_image( * * @return ASTCENC_SUCCESS on success, or an error if reset failed. */ -astcenc_error astcenc_compress_reset( +ASTCENC_PUBLIC astcenc_error astcenc_compress_reset( astcenc_context* context); /** * @brief Decompress an image. * - * @param context Codec context. - * @param[in] data Pointer to compressed data. - * @param data_len Length of the compressed data, in bytes. - * @param[in,out] image_out Output image. - * @param swizzle Decompression data swizzle. + * @param context Codec context. + * @param[in] data Pointer to compressed data. + * @param data_len Length of the compressed data, in bytes. + * @param[in,out] image_out Output image. + * @param swizzle Decompression data swizzle. + * @param thread_index Thread index [0..N-1] of calling thread. * * @return ASTCENC_SUCCESS on success, or an error if decompression failed. */ -astcenc_error astcenc_decompress_image( +ASTCENC_PUBLIC astcenc_error astcenc_decompress_image( astcenc_context* context, const uint8_t* data, size_t data_len, - astcenc_image& image_out, - astcenc_swizzle swizzle); + astcenc_image* image_out, + astcenc_swizzle swizzle, + unsigned int thread_index); + +/** + * @brief Reset the codec state for a new decompression. + * + * The caller is responsible for synchronizing threads in the worker thread + * pool. This function must only be called when all threads have exited the + * astcenc_decompress_image() function for image N, but before any thread + * enters it for image N + 1. + * + * @param context Codec context. + * + * @return ASTCENC_SUCCESS on success, or an error if reset failed. + */ +ASTCENC_PUBLIC astcenc_error astcenc_decompress_reset( + astcenc_context* context); /** * Free the compressor context. * * @param context The codec context. */ -void astcenc_context_free( +ASTCENC_PUBLIC void astcenc_context_free( astcenc_context* context); +/** + * @brief Provide a high level summary of a block's encoding. + * + * This feature is primarily useful for codec developers but may be useful + * for developers building advanced content packaging pipelines. + * + * @param context Codec context. + * @param data One block of compressesd ASTC data. + * @param info The output info structure to populate. + * + * @return ASTCENC_SUCCESS if the block was decoded, or an error otherwise. + * Note that this function will return success even if the block itself + * was an error block encoding, as the decode was correctly handled. + */ +ASTCENC_PUBLIC astcenc_error astcenc_get_block_info( + astcenc_context* context, + const uint8_t data[16], + astcenc_block_info* info); + /** * @brief Get a printable string for specific status code. * @@ -600,7 +813,7 @@ void astcenc_context_free( * * @return A human readable nul-terminated string. */ -const char* astcenc_get_error_string( +ASTCENC_PUBLIC const char* astcenc_get_error_string( astcenc_error status); #endif diff --git a/libkram/astc-encoder/astcenc_averages_and_directions.cpp b/libkram/astc-encoder/astcenc_averages_and_directions.cpp index 8e34ccec..048f0881 100644 --- a/libkram/astc-encoder/astcenc_averages_and_directions.cpp +++ b/libkram/astc-encoder/astcenc_averages_and_directions.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -37,81 +37,71 @@ // We have separate versions for blocks with and without alpha, since the // processing for blocks with alpha is significantly more expensive. The // direction vectors it produces are NOT normalized. -void compute_averages_and_directions_rgba( +void compute_avgs_and_dirs_4_comp( const partition_info* pt, const imageblock* blk, const error_weight_block* ewb, - const float4* color_scalefactors, - float4* averages, - float4* directions_rgba + partition_metrics pms[4] ) { int partition_count = pt->partition_count; + promise(partition_count > 0); + for (int partition = 0; partition < partition_count; partition++) { const uint8_t *weights = pt->texels_of_partition[partition]; - int texelcount = pt->texels_per_partition[partition]; - float4 base_sum = float4(0.0f); + vfloat4 base_sum = vfloat4::zero(); float partition_weight = 0.0f; - for (int i = 0; i < texelcount; i++) + int texel_count = pt->partition_texel_count[partition]; + promise(texel_count > 0); + + for (int i = 0; i < texel_count; i++) { int iwt = weights[i]; float weight = ewb->texel_weight[iwt]; - float4 texel_datum = float4(blk->data_r[iwt], - blk->data_g[iwt], - blk->data_b[iwt], - blk->data_a[iwt]) * weight; - partition_weight += weight; + vfloat4 texel_datum = blk->texel(iwt); - base_sum = base_sum + texel_datum; + partition_weight += weight; + base_sum = base_sum + texel_datum * weight; } - float4 average = base_sum * (1.0f / MAX(partition_weight, 1e-7f)); - averages[partition] = average * color_scalefactors[partition]; + vfloat4 average = base_sum * (1.0f / astc::max(partition_weight, 1e-7f)); + pms[partition].avg = average * pms[partition].color_scale; - float4 sum_xp = float4(0.0f); - float4 sum_yp = float4(0.0f); - float4 sum_zp = float4(0.0f); - float4 sum_wp = float4(0.0f); + vfloat4 sum_xp = vfloat4::zero(); + vfloat4 sum_yp = vfloat4::zero(); + vfloat4 sum_zp = vfloat4::zero(); + vfloat4 sum_wp = vfloat4::zero(); - for (int i = 0; i < texelcount; i++) + for (int i = 0; i < texel_count; i++) { int iwt = weights[i]; float weight = ewb->texel_weight[iwt]; - float4 texel_datum = float4(blk->data_r[iwt], - blk->data_g[iwt], - blk->data_b[iwt], - blk->data_a[iwt]); + vfloat4 texel_datum = blk->texel(iwt); texel_datum = (texel_datum - average) * weight; - if (texel_datum.r > 0.0f) - { - sum_xp = sum_xp + texel_datum; - } + vfloat4 zero = vfloat4::zero(); - if (texel_datum.g > 0.0f) - { - sum_yp = sum_yp + texel_datum; - } + vmask4 tdm0 = vfloat4(texel_datum.lane<0>()) > zero; + sum_xp += select(zero, texel_datum, tdm0); - if (texel_datum.b > 0.0f) - { - sum_zp = sum_zp + texel_datum; - } + vmask4 tdm1 = vfloat4(texel_datum.lane<1>()) > zero; + sum_yp += select(zero, texel_datum, tdm1); - if (texel_datum.a > 0.0f) - { - sum_wp = sum_wp + texel_datum; - } + vmask4 tdm2 = vfloat4(texel_datum.lane<2>()) > zero; + sum_zp += select(zero, texel_datum, tdm2); + + vmask4 tdm3 = vfloat4(texel_datum.lane<3>()) > zero; + sum_wp += select(zero, texel_datum, tdm3); } - float prod_xp = dot(sum_xp, sum_xp); - float prod_yp = dot(sum_yp, sum_yp); - float prod_zp = dot(sum_zp, sum_zp); - float prod_wp = dot(sum_wp, sum_wp); + float prod_xp = dot_s(sum_xp, sum_xp); + float prod_yp = dot_s(sum_yp, sum_yp); + float prod_zp = dot_s(sum_zp, sum_zp); + float prod_wp = dot_s(sum_wp, sum_wp); - float4 best_vector = sum_xp; + vfloat4 best_vector = sum_xp; float best_sum = prod_xp; if (prod_yp > best_sum) @@ -131,109 +121,21 @@ void compute_averages_and_directions_rgba( best_vector = sum_wp; } - directions_rgba[partition] = best_vector; - } -} - -void compute_averages_and_directions_rgb( - const partition_info* pt, - const imageblock* blk, - const error_weight_block* ewb, - const float4* color_scalefactors, - float3* averages, - float3* directions_rgb -) { - int partition_count = pt->partition_count; - const float *texel_weights = ewb->texel_weight_rgb; - - for (int partition = 0; partition < partition_count; partition++) - { - const uint8_t *weights = pt->texels_of_partition[partition]; - int texelcount = pt->texels_per_partition[partition]; - - float3 base_sum = float3(0.0f, 0.0f, 0.0f); - float partition_weight = 0.0f; - - for (int i = 0; i < texelcount; i++) - { - int iwt = weights[i]; - float weight = texel_weights[iwt]; - float3 texel_datum = float3(blk->data_r[iwt], - blk->data_g[iwt], - blk->data_b[iwt]) * weight; - partition_weight += weight; - - base_sum = base_sum + texel_datum; - } - - float4 csf = color_scalefactors[partition]; - float3 average = base_sum * (1.0f / MAX(partition_weight, 1e-7f)); - averages[partition] = average * float3(csf.r, csf.g, csf.b); - - float3 sum_xp = float3(0.0f); - float3 sum_yp = float3(0.0f); - float3 sum_zp = float3(0.0f); - - for (int i = 0; i < texelcount; i++) - { - int iwt = weights[i]; - float weight = texel_weights[iwt]; - float3 texel_datum = float3(blk->data_r[iwt], - blk->data_g[iwt], - blk->data_b[iwt]); - texel_datum = (texel_datum - average) * weight; - - if (texel_datum.r > 0.0f) - { - sum_xp = sum_xp + texel_datum; - } - - if (texel_datum.g > 0.0f) - { - sum_yp = sum_yp + texel_datum; - } - - if (texel_datum.b > 0.0f) - { - sum_zp = sum_zp + texel_datum; - } - } - - float prod_xp = dot(sum_xp, sum_xp); - float prod_yp = dot(sum_yp, sum_yp); - float prod_zp = dot(sum_zp, sum_zp); - - float3 best_vector = sum_xp; - float best_sum = prod_xp; - - if (prod_yp > best_sum) - { - best_vector = sum_yp; - best_sum = prod_yp; - } - - if (prod_zp > best_sum) - { - best_vector = sum_zp; - } - - directions_rgb[partition] = best_vector; + pms[partition].dir = best_vector; } } -void compute_averages_and_directions_3_components( +void compute_avgs_and_dirs_3_comp( const partition_info* pt, const imageblock* blk, const error_weight_block* ewb, - const float3* color_scalefactors, int omitted_component, - float3* averages, - float3* directions + partition_metrics pm[4] ) { const float *texel_weights; - const float* data_vr; - const float* data_vg; - const float* data_vb; + const float* data_vr = blk->data_r; + const float* data_vg = blk->data_g; + const float* data_vb = blk->data_b; if (omitted_component == 0) { @@ -245,86 +147,82 @@ void compute_averages_and_directions_3_components( else if (omitted_component == 1) { texel_weights = ewb->texel_weight_rba; - data_vr = blk->data_r; data_vg = blk->data_b; data_vb = blk->data_a; } else if (omitted_component == 2) { texel_weights = ewb->texel_weight_rga; - data_vr = blk->data_r; - data_vg = blk->data_g; data_vb = blk->data_a; } else { assert(omitted_component == 3); texel_weights = ewb->texel_weight_rgb; - data_vr = blk->data_r; - data_vg = blk->data_g; - data_vb = blk->data_b; } int partition_count = pt->partition_count; + promise(partition_count > 0); + for (int partition = 0; partition < partition_count; partition++) { const uint8_t *weights = pt->texels_of_partition[partition]; - int texelcount = pt->texels_per_partition[partition]; - float3 base_sum = float3(0.0f); + vfloat4 base_sum = vfloat4::zero(); float partition_weight = 0.0f; - for (int i = 0; i < texelcount; i++) + int texel_count = pt->partition_texel_count[partition]; + promise(texel_count > 0); + + for (int i = 0; i < texel_count; i++) { int iwt = weights[i]; float weight = texel_weights[iwt]; - float3 texel_datum = float3(data_vr[iwt], - data_vg[iwt], - data_vb[iwt]) * weight; - partition_weight += weight; + vfloat4 texel_datum(data_vr[iwt], + data_vg[iwt], + data_vb[iwt], + 0.0f); - base_sum = base_sum + texel_datum; + partition_weight += weight; + base_sum = base_sum + texel_datum * weight; } - float3 csf = color_scalefactors[partition]; + vfloat4 csf = pm[partition].color_scale; - float3 average = base_sum * (1.0f / MAX(partition_weight, 1e-7f)); - averages[partition] = average * float3(csf.r, csf.g, csf.b); + vfloat4 average = base_sum * (1.0f / astc::max(partition_weight, 1e-7f)); + pm[partition].avg = average * csf; - float3 sum_xp = float3(0.0f); - float3 sum_yp = float3(0.0f); - float3 sum_zp = float3(0.0f); + vfloat4 sum_xp = vfloat4::zero(); + vfloat4 sum_yp = vfloat4::zero(); + vfloat4 sum_zp = vfloat4::zero(); - for (int i = 0; i < texelcount; i++) + for (int i = 0; i < texel_count; i++) { int iwt = weights[i]; float weight = texel_weights[iwt]; - float3 texel_datum = float3(data_vr[iwt], - data_vg[iwt], - data_vb[iwt]); + vfloat4 texel_datum = vfloat4(data_vr[iwt], + data_vg[iwt], + data_vb[iwt], + 0.0f); texel_datum = (texel_datum - average) * weight; - if (texel_datum.r > 0.0f) - { - sum_xp = sum_xp + texel_datum; - } + vfloat4 zero = vfloat4::zero(); - if (texel_datum.g > 0.0f) - { - sum_yp = sum_yp + texel_datum; - } + vmask4 tdm0 = vfloat4(texel_datum.lane<0>()) > zero; + sum_xp += select(zero, texel_datum, tdm0); - if (texel_datum.b > 0.0f) - { - sum_zp = sum_zp + texel_datum; - } + vmask4 tdm1 = vfloat4(texel_datum.lane<1>()) > zero; + sum_yp += select(zero, texel_datum, tdm1); + + vmask4 tdm2 = vfloat4(texel_datum.lane<2>()) > zero; + sum_zp += select(zero, texel_datum, tdm2); } - float prod_xp = dot(sum_xp, sum_xp); - float prod_yp = dot(sum_yp, sum_yp); - float prod_zp = dot(sum_zp, sum_zp); + float prod_xp = dot3_s(sum_xp, sum_xp); + float prod_yp = dot3_s(sum_yp, sum_yp); + float prod_zp = dot3_s(sum_zp, sum_zp); - float3 best_vector = sum_xp; + vfloat4 best_vector = sum_xp; float best_sum = prod_xp; if (prod_yp > best_sum) @@ -338,17 +236,16 @@ void compute_averages_and_directions_3_components( best_vector = sum_zp; } - if (dot(best_vector, best_vector) < 1e-18f) + if (dot3_s(best_vector, best_vector) < 1e-18f) { - best_vector = float3(1.0f, 1.0f, 1.0f); + best_vector = vfloat4(1.0f, 1.0f, 1.0f, 0.0f); } - directions[partition] = best_vector; + pm[partition].dir = best_vector; } - } -void compute_averages_and_directions_2_components( +void compute_avgs_and_dirs_2_comp( const partition_info* pt, const imageblock* blk, const error_weight_block* ewb, @@ -383,15 +280,19 @@ void compute_averages_and_directions_2_components( } int partition_count = pt->partition_count; + promise(partition_count > 0); + for (int partition = 0; partition < partition_count; partition++) { const uint8_t *weights = pt->texels_of_partition[partition]; - int texelcount = pt->texels_per_partition[partition]; float2 base_sum = float2(0.0f); float partition_weight = 0.0f; - for (int i = 0; i < texelcount; i++) + int texel_count = pt->partition_texel_count[partition]; + promise(texel_count > 0); + + for (int i = 0; i < texel_count; i++) { int iwt = weights[i]; float weight = texel_weights[iwt]; @@ -403,13 +304,13 @@ void compute_averages_and_directions_2_components( float2 csf = color_scalefactors[partition]; - float2 average = base_sum * (1.0f / MAX(partition_weight, 1e-7f)); + float2 average = base_sum * (1.0f / astc::max(partition_weight, 1e-7f)); averages[partition] = average * float2(csf.r, csf.g); float2 sum_xp = float2(0.0f); float2 sum_yp = float2(0.0f); - for (int i = 0; i < texelcount; i++) + for (int i = 0; i < texel_count; i++) { int iwt = weights[i]; float weight = texel_weights[iwt]; @@ -443,340 +344,406 @@ void compute_averages_and_directions_2_components( } void compute_error_squared_rgba( - const partition_info* pt, // the partition that we use when computing the squared-error. + const partition_info* pt, const imageblock* blk, const error_weight_block* ewb, - const processed_line4* plines_uncorr, - const processed_line4* plines_samechroma, - const processed_line3* plines_separate_red, - const processed_line3* plines_separate_green, - const processed_line3* plines_separate_blue, - const processed_line3* plines_separate_alpha, - float* lengths_uncorr, - float* lengths_samechroma, - float4* lengths_separate, - float* uncorr_errors, - float* samechroma_errors, - float4* separate_color_errors + const processed_line4* uncor_plines, + const processed_line4* samec_plines, + float* uncor_lengths, + float* samec_lengths, + float* uncor_errors, + float* samec_errors ) { - float uncorr_errorsum = 0.0f; - float samechroma_errorsum = 0.0f; - float red_errorsum = 0.0f; - float green_errorsum = 0.0f; - float blue_errorsum = 0.0f; - float alpha_errorsum = 0.0f; - - for (int partition = 0; partition < pt->partition_count; partition++) + float uncor_errorsum = 0.0f; + float samec_errorsum = 0.0f; + + int partition_count = pt->partition_count; + promise(partition_count > 0); + + for (int partition = 0; partition < partition_count; partition++) { - // TODO: sort partitions by number of texels. For warp-architectures, - // this can reduce the running time by about 25-50%. const uint8_t *weights = pt->texels_of_partition[partition]; - int texelcount = pt->texels_per_partition[partition]; - float uncorr_lowparam = 1e10f; - float uncorr_highparam = -1e10f; + float uncor_loparam = 1e10f; + float uncor_hiparam = -1e10f; - float samechroma_lowparam = 1e10f; - float samechroma_highparam = -1e10f; + float samec_loparam = 1e10f; + float samec_hiparam = -1e10f; - float4 separate_lowparam = float4(1e10f); - float4 separate_highparam = float4(-1e10f); + processed_line4 l_uncor = uncor_plines[partition]; + processed_line4 l_samec = samec_plines[partition]; - processed_line4 l_uncorr = plines_uncorr[partition]; - processed_line4 l_samechroma = plines_samechroma[partition]; - processed_line3 l_red = plines_separate_red[partition]; - processed_line3 l_green = plines_separate_green[partition]; - processed_line3 l_blue = plines_separate_blue[partition]; - processed_line3 l_alpha = plines_separate_alpha[partition]; + int texel_count = pt->partition_texel_count[partition]; + promise(texel_count > 0); - // TODO: split up this loop due to too many temporaries; in particular, - // the six line functions will consume 18 vector registers - for (int i = 0; i < texelcount; i++) - { - int iwt = weights[i]; + int i = 0; - float texel_weight_rgba = ewb->texel_weight[iwt]; - if (texel_weight_rgba > 1e-20f) - { - float4 dat = float4(blk->data_r[iwt], - blk->data_g[iwt], - blk->data_b[iwt], - blk->data_a[iwt]); - - float4 ews = ewb->error_weights[iwt]; - - float uncorr_param = dot(dat, l_uncorr.bs); - uncorr_lowparam = MIN(uncorr_param, uncorr_lowparam); - uncorr_highparam = MAX(uncorr_param, uncorr_highparam); - - float samechroma_param = dot(dat, l_samechroma.bs); - samechroma_lowparam = MIN(samechroma_param, samechroma_lowparam); - samechroma_highparam = MAX(samechroma_param, samechroma_highparam); - - float4 separate_param = float4(dot(float3(dat.g, dat.b, dat.a), l_red.bs), - dot(float3(dat.r, dat.b, dat.a), l_green.bs), - dot(float3(dat.r, dat.g, dat.a), l_blue.bs), - dot(float3(dat.r, dat.g, dat.b), l_alpha.bs)); - - separate_lowparam = float4(MIN(separate_param.r, separate_lowparam.r), - MIN(separate_param.g, separate_lowparam.g), - MIN(separate_param.b, separate_lowparam.b), - MIN(separate_param.a, separate_lowparam.a)); - - separate_highparam = float4(MAX(separate_param.r, separate_highparam.r), - MAX(separate_param.g, separate_highparam.g), - MAX(separate_param.b, separate_highparam.b), - MAX(separate_param.a, separate_highparam.a)); - - float4 uncorr_dist = (l_uncorr.amod - dat) + (uncorr_param * l_uncorr.bis); - uncorr_errorsum += dot(ews, uncorr_dist * uncorr_dist); - - float4 samechroma_dist = (l_samechroma.amod - dat) + - (samechroma_param * l_samechroma.bis); - samechroma_errorsum += dot(ews, samechroma_dist * samechroma_dist); - - float3 red_dist = (l_red.amod - float3(dat.g, dat.b, dat.a)) + - (separate_param.r * l_red.bis); - red_errorsum += dot(float3(ews.g, ews.b, ews.a), red_dist * red_dist); - - float3 green_dist = (l_green.amod - float3(dat.r, dat.b, dat.a)) + - (separate_param.g * l_green.bis); - green_errorsum += dot(float3(ews.r, ews.b, ews.a), green_dist * green_dist); - - float3 blue_dist = (l_blue.amod - float3(dat.r, dat.g, dat.a)) + - (separate_param.b * l_blue.bis); - blue_errorsum += dot(float3(ews.r, ews.g, ews.a), blue_dist * blue_dist); - - float3 alpha_dist = (l_alpha.amod - float3(dat.r, dat.g, dat.b)) + - (separate_param.a * l_alpha.bis); - alpha_errorsum += dot(float3(ews.r, ews.g, ews.b), alpha_dist * alpha_dist); - } - } + // Vectorize some useful scalar inputs + vfloat l_uncor_bs0(l_uncor.bs.lane<0>()); + vfloat l_uncor_bs1(l_uncor.bs.lane<1>()); + vfloat l_uncor_bs2(l_uncor.bs.lane<2>()); + vfloat l_uncor_bs3(l_uncor.bs.lane<3>()); - float uncorr_linelen = uncorr_highparam - uncorr_lowparam; - float samechroma_linelen = samechroma_highparam - samechroma_lowparam; - float4 separate_linelen = separate_highparam - separate_lowparam; + vfloat l_uncor_amod0(l_uncor.amod.lane<0>()); + vfloat l_uncor_amod1(l_uncor.amod.lane<1>()); + vfloat l_uncor_amod2(l_uncor.amod.lane<2>()); + vfloat l_uncor_amod3(l_uncor.amod.lane<3>()); - // Turn very small numbers and NaNs into a small number - if (!(uncorr_linelen > 1e-7f)) - { - uncorr_linelen = 1e-7f; - } + vfloat l_uncor_bis0(l_uncor.bis.lane<0>()); + vfloat l_uncor_bis1(l_uncor.bis.lane<1>()); + vfloat l_uncor_bis2(l_uncor.bis.lane<2>()); + vfloat l_uncor_bis3(l_uncor.bis.lane<3>()); - if (!(samechroma_linelen > 1e-7f)) - { - samechroma_linelen = 1e-7f; - } + vfloat l_samec_bs0(l_samec.bs.lane<0>()); + vfloat l_samec_bs1(l_samec.bs.lane<1>()); + vfloat l_samec_bs2(l_samec.bs.lane<2>()); + vfloat l_samec_bs3(l_samec.bs.lane<3>()); - if (!(separate_linelen.r > 1e-7f)) - { - separate_linelen.r = 1e-7f; - } + assert(all(l_samec.amod == vfloat4(0.0f))); - if (!(separate_linelen.g > 1e-7f)) - { - separate_linelen.g = 1e-7f; - } + vfloat l_samec_bis0(l_samec.bis.lane<0>()); + vfloat l_samec_bis1(l_samec.bis.lane<1>()); + vfloat l_samec_bis2(l_samec.bis.lane<2>()); + vfloat l_samec_bis3(l_samec.bis.lane<3>()); + + vfloat uncor_loparamv(1e10f); + vfloat uncor_hiparamv(-1e10f); + vfloat4 uncor_errorsumv = vfloat4::zero(); - if (!(separate_linelen.b > 1e-7f)) + vfloat samec_loparamv(1e10f); + vfloat samec_hiparamv(-1e10f); + vfloat4 samec_errorsumv = vfloat4::zero(); + + int clipped_texel_count = round_down_to_simd_multiple_vla(texel_count); + for (/* */; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH) { - separate_linelen.b = 1e-7f; + vint texel_idxs(&(weights[i])); + + vfloat data_r = gatherf(blk->data_r, texel_idxs); + vfloat data_g = gatherf(blk->data_g, texel_idxs); + vfloat data_b = gatherf(blk->data_b, texel_idxs); + vfloat data_a = gatherf(blk->data_a, texel_idxs); + + vfloat ew_r = gatherf(ewb->texel_weight_r, texel_idxs); + vfloat ew_g = gatherf(ewb->texel_weight_g, texel_idxs); + vfloat ew_b = gatherf(ewb->texel_weight_b, texel_idxs); + vfloat ew_a = gatherf(ewb->texel_weight_a, texel_idxs); + + vfloat uncor_param = (data_r * l_uncor_bs0) + + (data_g * l_uncor_bs1) + + (data_b * l_uncor_bs2) + + (data_a * l_uncor_bs3); + + uncor_loparamv = min(uncor_param, uncor_loparamv); + uncor_hiparamv = max(uncor_param, uncor_hiparamv); + + vfloat uncor_dist0 = (l_uncor_amod0 - data_r) + + (uncor_param * l_uncor_bis0); + vfloat uncor_dist1 = (l_uncor_amod1 - data_g) + + (uncor_param * l_uncor_bis1); + vfloat uncor_dist2 = (l_uncor_amod2 - data_b) + + (uncor_param * l_uncor_bis2); + vfloat uncor_dist3 = (l_uncor_amod3 - data_a) + + (uncor_param * l_uncor_bis3); + + vfloat uncor_error = (ew_r * uncor_dist0 * uncor_dist0) + + (ew_g * uncor_dist1 * uncor_dist1) + + (ew_b * uncor_dist2 * uncor_dist2) + + (ew_a * uncor_dist3 * uncor_dist3); + + haccumulate(uncor_errorsumv, uncor_error); + + // Process samechroma data + vfloat samec_param = (data_r * l_samec_bs0) + + (data_g * l_samec_bs1) + + (data_b * l_samec_bs2) + + (data_a * l_samec_bs3); + + samec_loparamv = min(samec_param, samec_loparamv); + samec_hiparamv = max(samec_param, samec_hiparamv); + + + vfloat samec_dist0 = samec_param * l_samec_bis0 - data_r; + vfloat samec_dist1 = samec_param * l_samec_bis1 - data_g; + vfloat samec_dist2 = samec_param * l_samec_bis2 - data_b; + vfloat samec_dist3 = samec_param * l_samec_bis3 - data_a; + + vfloat samec_error = (ew_r * samec_dist0 * samec_dist0) + + (ew_g * samec_dist1 * samec_dist1) + + (ew_b * samec_dist2 * samec_dist2) + + (ew_a * samec_dist3 * samec_dist3); + + haccumulate(samec_errorsumv, samec_error); } - if (!(separate_linelen.a > 1e-7f)) + uncor_loparam = hmin_s(uncor_loparamv); + uncor_hiparam = hmax_s(uncor_hiparamv); + + samec_loparam = hmin_s(samec_loparamv); + samec_hiparam = hmax_s(samec_hiparamv); + + // Loop tail + // Error is buffered and accumulated in blocks of 4 to ensure that + // the partial sums added to the accumulator are invariant with the + // vector implementation, irrespective of vector size ... + alignas(16) float uncor_errorsum_tmp[4] { 0 }; + alignas(16) float samec_errorsum_tmp[4] { 0 }; + for (/* */; i < texel_count; i++) { - separate_linelen.a = 1e-7f; + int iwt = weights[i]; + + vfloat4 dat = blk->texel(iwt); + vfloat4 ews = ewb->error_weights[iwt]; + + float uncor_param = dot_s(dat, l_uncor.bs); + uncor_loparam = astc::min(uncor_param, uncor_loparam); + uncor_hiparam = astc::max(uncor_param, uncor_hiparam); + + float samec_param = dot_s(dat, l_samec.bs); + samec_loparam = astc::min(samec_param, samec_loparam); + samec_hiparam = astc::max(samec_param, samec_hiparam); + + vfloat4 uncor_dist = (l_uncor.amod - dat) + + (uncor_param * l_uncor.bis); + float uncor_error_tmp = dot_s(ews, uncor_dist * uncor_dist); + + vfloat4 samec_dist = samec_param * l_samec.bis - dat; + float samec_error_tmp = dot_s(ews, samec_dist * samec_dist); + + // Accumulate error sum in the temporary array + int error_index = i & 0x3; + uncor_errorsum_tmp[error_index] = uncor_error_tmp; + samec_errorsum_tmp[error_index] = samec_error_tmp; + +#if ASTCENC_SIMD_WIDTH == 8 + // Zero the temporary staging buffer every 4 items unless last iter + if ((i & 0x7) == 0x03) + { + haccumulate(uncor_errorsumv, vfloat4::loada(uncor_errorsum_tmp)); + storea(vfloat4::zero(), uncor_errorsum_tmp); + + haccumulate(samec_errorsumv, vfloat4::loada(samec_errorsum_tmp)); + storea(vfloat4::zero(), samec_errorsum_tmp); + } +#endif } - lengths_uncorr[partition] = uncorr_linelen; - lengths_samechroma[partition] = samechroma_linelen; - lengths_separate[partition] = separate_linelen; + // Accumulate the loop tail using the vfloat4 swizzle + haccumulate(uncor_errorsumv, vfloat4::loada(uncor_errorsum_tmp)); + haccumulate(samec_errorsumv, vfloat4::loada(samec_errorsum_tmp)); + + // Resolve the final scalar accumulator sum + haccumulate(uncor_errorsum, uncor_errorsumv); + haccumulate(samec_errorsum, samec_errorsumv); + + float uncor_linelen = uncor_hiparam - uncor_loparam; + float samec_linelen = samec_hiparam - samec_loparam; - *uncorr_errors = uncorr_errorsum; - *samechroma_errors = samechroma_errorsum; - *separate_color_errors = float4(red_errorsum, green_errorsum, blue_errorsum, alpha_errorsum); + // Turn very small numbers and NaNs into a small number + uncor_linelen = astc::max(uncor_linelen, 1e-7f); + samec_linelen = astc::max(samec_linelen, 1e-7f); + + uncor_lengths[partition] = uncor_linelen; + samec_lengths[partition] = samec_linelen; } + + *uncor_errors = uncor_errorsum; + *samec_errors = samec_errorsum; } void compute_error_squared_rgb( - const partition_info *pt, // the partition that we use when computing the squared-error. + const partition_info *pt, const imageblock *blk, const error_weight_block *ewb, - const processed_line3 *plines_uncorr, - const processed_line3 *plines_samechroma, - const processed_line2 *plines_separate_red, - const processed_line2 *plines_separate_green, - const processed_line2 *plines_separate_blue, - float *lengths_uncorr, - float *lengths_samechroma, - float3 *lengths_separate, - float *uncorr_errors, - float *samechroma_errors, - float3 *separate_color_errors + partition_lines3 plines[4], + float& uncor_error, + float& samec_error ) { - float uncorr_errorsum = 0.0f; - float samechroma_errorsum = 0.0f; - float red_errorsum = 0.0f; - float green_errorsum = 0.0f; - float blue_errorsum = 0.0f; + float uncor_errorsum = 0.0f; + float samec_errorsum = 0.0f; - for (int partition = 0; partition < pt->partition_count; partition++) + int partition_count = pt->partition_count; + promise(partition_count > 0); + + for (int partition = 0; partition < partition_count; partition++) { - // TODO: sort partitions by number of texels. For warp-architectures, - // this can reduce the running time by about 25-50%. + partition_lines3& pl = plines[partition]; const uint8_t *weights = pt->texels_of_partition[partition]; - int texelcount = pt->texels_per_partition[partition]; + int texel_count = pt->partition_texel_count[partition]; + promise(texel_count > 0); + + float uncor_loparam = 1e10f; + float uncor_hiparam = -1e10f; + + float samec_loparam = 1e10f; + float samec_hiparam = -1e10f; + + processed_line3 l_uncor = pl.uncor_pline; + processed_line3 l_samec = pl.samec_pline; - float uncorr_lowparam = 1e10f; - float uncorr_highparam = -1e10f; + int i = 0; - float samechroma_lowparam = 1e10f; - float samechroma_highparam = -1e10f; + // This implementation is an example vectorization of this function. + // It works for - the codec is a 2-4% faster than not vectorizing - but + // the benefit is limited by the use of gathers and register pressure - float3 separate_lowparam = float3(1e10f); - float3 separate_highparam = float3(-1e10f); + // Vectorize some useful scalar inputs + vfloat l_uncor_bs0(l_uncor.bs.lane<0>()); + vfloat l_uncor_bs1(l_uncor.bs.lane<1>()); + vfloat l_uncor_bs2(l_uncor.bs.lane<2>()); - processed_line3 l_uncorr = plines_uncorr[partition]; - processed_line3 l_samechroma = plines_samechroma[partition]; - processed_line2 l_red = plines_separate_red[partition]; - processed_line2 l_green = plines_separate_green[partition]; - processed_line2 l_blue = plines_separate_blue[partition]; + vfloat l_uncor_amod0(l_uncor.amod.lane<0>()); + vfloat l_uncor_amod1(l_uncor.amod.lane<1>()); + vfloat l_uncor_amod2(l_uncor.amod.lane<2>()); - // TODO: split up this loop due to too many temporaries; in - // particular, the six line functions will consume 18 vector registers + vfloat l_uncor_bis0(l_uncor.bis.lane<0>()); + vfloat l_uncor_bis1(l_uncor.bis.lane<1>()); + vfloat l_uncor_bis2(l_uncor.bis.lane<2>()); - for (int i = 0; i < texelcount; i++) + vfloat l_samec_bs0(l_samec.bs.lane<0>()); + vfloat l_samec_bs1(l_samec.bs.lane<1>()); + vfloat l_samec_bs2(l_samec.bs.lane<2>()); + + assert(all(l_samec.amod == vfloat4(0.0f))); + + vfloat l_samec_bis0(l_samec.bis.lane<0>()); + vfloat l_samec_bis1(l_samec.bis.lane<1>()); + vfloat l_samec_bis2(l_samec.bis.lane<2>()); + + vfloat uncor_loparamv(1e10f); + vfloat uncor_hiparamv(-1e10f); + vfloat4 uncor_errorsumv = vfloat4::zero(); + + vfloat samec_loparamv(1e10f); + vfloat samec_hiparamv(-1e10f); + vfloat4 samec_errorsumv = vfloat4::zero(); + + int clipped_texel_count = round_down_to_simd_multiple_vla(texel_count); + for (/* */; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH) { - int iwt = weights[i]; + vint texel_idxs(&(weights[i])); - float texel_weight_rgb = ewb->texel_weight_rgb[iwt]; - if (texel_weight_rgb > 1e-20f) - { - float3 dat = float3(blk->data_r[iwt], - blk->data_g[iwt], - blk->data_b[iwt]); + vfloat data_r = gatherf(blk->data_r, texel_idxs); + vfloat data_g = gatherf(blk->data_g, texel_idxs); + vfloat data_b = gatherf(blk->data_b, texel_idxs); - float3 ews = float3(ewb->error_weights[iwt].r, - ewb->error_weights[iwt].g, - ewb->error_weights[iwt].b); + vfloat ew_r = gatherf(ewb->texel_weight_r, texel_idxs); + vfloat ew_g = gatherf(ewb->texel_weight_g, texel_idxs); + vfloat ew_b = gatherf(ewb->texel_weight_b, texel_idxs); - float uncorr_param = dot(dat, l_uncorr.bs); - uncorr_lowparam = MIN(uncorr_param, uncorr_lowparam); - uncorr_highparam = MAX(uncorr_param, uncorr_highparam); + vfloat uncor_param = (data_r * l_uncor_bs0) + + (data_g * l_uncor_bs1) + + (data_b * l_uncor_bs2); - float samechroma_param = dot(dat, l_samechroma.bs); - samechroma_lowparam = MIN(samechroma_param, samechroma_lowparam); - samechroma_highparam = MAX(samechroma_param, samechroma_highparam); + uncor_loparamv = min(uncor_param, uncor_loparamv); + uncor_hiparamv = max(uncor_param, uncor_hiparamv); - float3 separate_param = float3(dot(float2(dat.g, dat.b), l_red.bs), - dot(float2(dat.r, dat.b), l_green.bs), - dot(float2(dat.r, dat.g), l_blue.bs)); + vfloat uncor_dist0 = (l_uncor_amod0 - data_r) + + (uncor_param * l_uncor_bis0); + vfloat uncor_dist1 = (l_uncor_amod1 - data_g) + + (uncor_param * l_uncor_bis1); + vfloat uncor_dist2 = (l_uncor_amod2 - data_b) + + (uncor_param * l_uncor_bis2); - separate_lowparam = float3(MIN(separate_param.r, separate_lowparam.r), - MIN(separate_param.g, separate_lowparam.g), - MIN(separate_param.b, separate_lowparam.b)); + vfloat uncor_err = (ew_r * uncor_dist0 * uncor_dist0) + + (ew_g * uncor_dist1 * uncor_dist1) + + (ew_b * uncor_dist2 * uncor_dist2); - separate_highparam = float3(MAX(separate_param.r, separate_highparam.r), - MAX(separate_param.g, separate_highparam.g), - MAX(separate_param.b, separate_highparam.b)); + haccumulate(uncor_errorsumv, uncor_err); - float3 uncorr_dist = (l_uncorr.amod - dat) + - (uncorr_param * l_uncorr.bis); - uncorr_errorsum += dot(ews, uncorr_dist * uncorr_dist); + // Process samechroma data + vfloat samec_param = (data_r * l_samec_bs0) + + (data_g * l_samec_bs1) + + (data_b * l_samec_bs2); - float3 samechroma_dist = (l_samechroma.amod - dat) + - (samechroma_param * l_samechroma.bis); - samechroma_errorsum += dot(ews, samechroma_dist * samechroma_dist); + samec_loparamv = min(samec_param, samec_loparamv); + samec_hiparamv = max(samec_param, samec_hiparamv); - float2 red_dist = (l_red.amod - float2(dat.g, dat.b)) + - (separate_param.r * l_red.bis); - red_errorsum += dot(float2(ews.g, ews.b), red_dist * red_dist); - float2 green_dist = (l_green.amod - float2(dat.r, dat.b)) + - (separate_param.g * l_green.bis); - green_errorsum += dot(float2(ews.r, ews.b), green_dist * green_dist); + vfloat samec_dist0 = samec_param * l_samec_bis0 - data_r; + vfloat samec_dist1 = samec_param * l_samec_bis1 - data_g; + vfloat samec_dist2 = samec_param * l_samec_bis2 - data_b; - float2 blue_dist = (l_blue.amod - float2(dat.r, dat.g)) + - (separate_param.b * l_blue.bis); - blue_errorsum += dot(float2(ews.r, ews.g), blue_dist * blue_dist); - } + vfloat samec_err = (ew_r * samec_dist0 * samec_dist0) + + (ew_g * samec_dist1 * samec_dist1) + + (ew_b * samec_dist2 * samec_dist2); + + haccumulate(samec_errorsumv, samec_err); } - float uncorr_linelen = uncorr_highparam - uncorr_lowparam; - float samechroma_linelen = samechroma_highparam - samechroma_lowparam; - float3 separate_linelen = separate_highparam - separate_lowparam; + uncor_loparam = hmin_s(uncor_loparamv); + uncor_hiparam = hmax_s(uncor_hiparamv); - // Turn very small numbers and NaNs into a small number - if (!(uncorr_linelen > 1e-7f)) - { - uncorr_linelen = 1e-7f; - } + samec_loparam = hmin_s(samec_loparamv); + samec_hiparam = hmax_s(samec_hiparamv); - if (!(samechroma_linelen > 1e-7f)) + // Loop tail + // Error is buffered and accumulated in blocks of 4 to ensure that + // the partial sums added to the accumulator are invariant with the + // vector implementation, irrespective of vector size ... + alignas(16) float uncor_errorsum_tmp[4] { 0 }; + alignas(16) float samec_errorsum_tmp[4] { 0 }; + for (/* */; i < texel_count; i++) { - samechroma_linelen = 1e-7f; - } + int iwt = weights[i]; - if (!(separate_linelen.r > 1e-7f)) - { - separate_linelen.r = 1e-7f; - } + vfloat4 dat = blk->texel3(iwt); + vfloat4 ews = ewb->error_weights[iwt]; - if (!(separate_linelen.g > 1e-7f)) - { - separate_linelen.g = 1e-7f; - } + float uncor_param = dot3_s(dat, l_uncor.bs); + uncor_loparam = astc::min(uncor_param, uncor_loparam); + uncor_hiparam = astc::max(uncor_param, uncor_hiparam); - if (!(separate_linelen.b > 1e-7f)) - { - separate_linelen.b = 1e-7f; - } + float samec_param = dot3_s(dat, l_samec.bs); + samec_loparam = astc::min(samec_param, samec_loparam); + samec_hiparam = astc::max(samec_param, samec_hiparam); - lengths_uncorr[partition] = uncorr_linelen; - lengths_samechroma[partition] = samechroma_linelen; - lengths_separate[partition] = separate_linelen; + vfloat4 uncor_dist = (l_uncor.amod - dat) + + (uncor_param * l_uncor.bis); + float uncor_error_tmp = dot3_s(ews, uncor_dist * uncor_dist); - *uncorr_errors = uncorr_errorsum; - *samechroma_errors = samechroma_errorsum; - *separate_color_errors = float3(red_errorsum, green_errorsum, blue_errorsum); - } -} + vfloat4 samec_dist = samec_param * l_samec.bis - dat; + float samec_error_tmp = dot3_s(ews, samec_dist * samec_dist); -// function to compute the error across a tile when using a particular line for -// a particular partition. -float compute_error_squared_rgb_single_partition( - int partition_to_test, - const block_size_descriptor* bsd, - const partition_info* pt, // the partition that we use when computing the squared-error. - const imageblock* blk, - const error_weight_block* ewb, - const processed_line3* lin // the line for the partition. -) { - int texels_per_block = bsd->texel_count; - float errorsum = 0.0f; + // Accumulate error sum in the temporary array + int error_index = i & 0x3; + uncor_errorsum_tmp[error_index] = uncor_error_tmp; + samec_errorsum_tmp[error_index] = samec_error_tmp; - for (int i = 0; i < texels_per_block; i++) - { - int partition = pt->partition_of_texel[i]; - float texel_weight = ewb->texel_weight_rgb[i]; +#if ASTCENC_SIMD_WIDTH == 8 + // Emit the staging buffer every 4 items unless last iteration + if ((i & 0x7) == 0x03) + { + haccumulate(uncor_errorsumv, vfloat4::loada(uncor_errorsum_tmp)); + storea(vfloat4::zero(), uncor_errorsum_tmp); - if (partition != partition_to_test || texel_weight < 1e-20f) - { - continue; + haccumulate(samec_errorsumv, vfloat4::loada(samec_errorsum_tmp)); + storea(vfloat4::zero(), samec_errorsum_tmp); + } +#endif } - float3 point = float3(blk->data_r[i], - blk->data_g[i], - blk->data_b[i]); - float param = dot(point, lin->bs); - float3 rp1 = lin->amod + param * lin->bis; - float3 dist = rp1 - point; - float4 ews = ewb->error_weights[i]; - float3 ews3 = float3(ews.r, ews.g, ews.b); - errorsum += dot(ews3, dist * dist); + // Accumulate the loop tail using the vfloat4 swizzle + haccumulate(uncor_errorsumv, vfloat4::loada(uncor_errorsum_tmp)); + haccumulate(samec_errorsumv, vfloat4::loada(samec_errorsum_tmp)); + + // Resolve the final scalar accumulator sum + haccumulate(uncor_errorsum, uncor_errorsumv); + haccumulate(samec_errorsum, samec_errorsumv); + + float uncor_linelen = uncor_hiparam - uncor_loparam; + float samec_linelen = samec_hiparam - samec_loparam; + + // Turn very small numbers and NaNs into a small number + uncor_linelen = astc::max(uncor_linelen, 1e-7f); + samec_linelen = astc::max(samec_linelen, 1e-7f); + + pl.uncor_line_len = uncor_linelen; + pl.samec_line_len = samec_linelen; } - return errorsum; + uncor_error = uncor_errorsum; + samec_error = samec_errorsum; } #endif diff --git a/libkram/astc-encoder/astcenc_block_sizes2.cpp b/libkram/astc-encoder/astcenc_block_sizes2.cpp index 04089a2c..b37892d8 100644 --- a/libkram/astc-encoder/astcenc_block_sizes2.cpp +++ b/libkram/astc-encoder/astcenc_block_sizes2.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -117,8 +117,10 @@ static int decode_block_mode_2d( int weight_count = N * M * (D + 1); int qmode = (base_quant_mode - 2) + 6 * H; - int weightbits = compute_ise_bitcount(weight_count, (quantization_method) qmode); - if (weight_count > MAX_WEIGHTS_PER_BLOCK || weightbits < MIN_WEIGHT_BITS_PER_BLOCK || weightbits > MAX_WEIGHT_BITS_PER_BLOCK) + int weightbits = get_ise_sequence_bitcount(weight_count, (quant_method)qmode); + if (weight_count > MAX_WEIGHTS_PER_BLOCK || + weightbits < MIN_WEIGHT_BITS_PER_BLOCK || + weightbits > MAX_WEIGHT_BITS_PER_BLOCK) { return 0; } @@ -211,7 +213,7 @@ static int decode_block_mode_3d( int weight_count = N * M * Q * (D + 1); int qmode = (base_quant_mode - 2) + 6 * H; - int weightbits = compute_ise_bitcount(weight_count, (quantization_method) qmode); + int weightbits = get_ise_sequence_bitcount(weight_count, (quant_method)qmode); if (weight_count > MAX_WEIGHTS_PER_BLOCK || weightbits < MIN_WEIGHT_BITS_PER_BLOCK || weightbits > MAX_WEIGHT_BITS_PER_BLOCK) @@ -237,12 +239,12 @@ static void initialize_decimation_table_2d( int texels_per_block = xdim * ydim; int weights_per_block = x_weights * y_weights; - int weightcount_of_texel[MAX_TEXELS_PER_BLOCK]; - int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4]; - int weights_of_texel[MAX_TEXELS_PER_BLOCK][4]; + uint8_t weightcount_of_texel[MAX_TEXELS_PER_BLOCK]; + uint8_t grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4]; + uint8_t weights_of_texel[MAX_TEXELS_PER_BLOCK][4]; - int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK]; - int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; + uint8_t texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK]; + uint8_t texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; for (int i = 0; i < weights_per_block; i++) @@ -269,7 +271,6 @@ static void initialize_decimation_table_2d( int x_weight_int = x_weight >> 4; int y_weight_int = y_weight >> 4; int qweight[4]; - int weight[4]; qweight[0] = x_weight_int + y_weight_int * x_weights; qweight[1] = qweight[0] + 1; qweight[2] = qweight[0] + x_weights; @@ -278,6 +279,7 @@ static void initialize_decimation_table_2d( // truncated-precision bilinear interpolation. int prod = x_weight_frac * y_weight_frac; + int weight[4]; weight[3] = (prod + 8) >> 4; weight[1] = x_weight_frac - weight[3]; weight[2] = y_weight_frac - weight[3]; @@ -300,35 +302,44 @@ static void initialize_decimation_table_2d( for (int i = 0; i < texels_per_block; i++) { - dt->texel_num_weights[i] = weightcount_of_texel[i]; + dt->texel_weight_count[i] = weightcount_of_texel[i]; - // ensure that all 4 entries are actually initialized. - // This allows a branch-free implementation of compute_value_of_texel_flt() + // Init all 4 entries so we can rely on zeros for vectorization for (int j = 0; j < 4; j++) { - dt->texel_weights_int[i][j] = 0; - dt->texel_weights_float[i][j] = 0.0f; - dt->texel_weights[i][j] = 0; + dt->texel_weights_int_t4[i][j] = 0; + dt->texel_weights_float_t4[i][j] = 0.0f; + dt->texel_weights_t4[i][j] = 0; + + dt->texel_weights_float_4t[j][i] = 0.0f; + dt->texel_weights_4t[j][i] = 0; + } for (int j = 0; j < weightcount_of_texel[i]; j++) { - dt->texel_weights_int[i][j] = (uint8_t)weights_of_texel[i][j]; - dt->texel_weights_float[i][j] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM); - dt->texel_weights[i][j] = (uint8_t)grid_weights_of_texel[i][j]; + dt->texel_weights_int_t4[i][j] = weights_of_texel[i][j]; + dt->texel_weights_float_t4[i][j] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM); + dt->texel_weights_t4[i][j] = grid_weights_of_texel[i][j]; + + dt->texel_weights_float_4t[j][i] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM); + dt->texel_weights_4t[j][i] = grid_weights_of_texel[i][j]; } } for (int i = 0; i < weights_per_block; i++) { - dt->weight_num_texels[i] = texelcount_of_weight[i]; + dt->weight_texel_count[i] = texelcount_of_weight[i]; for (int j = 0; j < texelcount_of_weight[i]; j++) { - int texel = texels_of_weight[i][j]; - dt->weight_texel[i][j] = (uint8_t)texel; - dt->weights_int[i][j] = (uint8_t)texelweights_of_weight[i][j]; - dt->weights_flt[i][j] = (float)texelweights_of_weight[i][j]; + uint8_t texel = texels_of_weight[i][j]; + + dt->weights_int[i][j] = texelweights_of_weight[i][j]; + + // Create transposed versions of these for better vectorization + dt->weight_texel[j][i] = texel; + dt->weights_flt[j][i] = (float)texelweights_of_weight[i][j]; // perform a layer of array unrolling. An aspect of this unrolling is that // one of the texel-weight indexes is an identity-mapped index; we will use this @@ -336,30 +347,33 @@ static void initialize_decimation_table_2d( int swap_idx = -1; for (int k = 0; k < 4; k++) { - int dttw = dt->texel_weights[texel][k]; - float dttwf = dt->texel_weights_float[texel][k]; + uint8_t dttw = dt->texel_weights_t4[texel][k]; + float dttwf = dt->texel_weights_float_t4[texel][k]; if (dttw == i && dttwf != 0.0f) { swap_idx = k; } - dt->texel_weights_texel[i][j][k] = (uint8_t)dttw; + dt->texel_weights_texel[i][j][k] = dttw; dt->texel_weights_float_texel[i][j][k] = dttwf; } if (swap_idx != 0) { - int vi = dt->texel_weights_texel[i][j][0]; + uint8_t vi = dt->texel_weights_texel[i][j][0]; float vf = dt->texel_weights_float_texel[i][j][0]; dt->texel_weights_texel[i][j][0] = dt->texel_weights_texel[i][j][swap_idx]; dt->texel_weights_float_texel[i][j][0] = dt->texel_weights_float_texel[i][j][swap_idx]; - dt->texel_weights_texel[i][j][swap_idx] = (uint8_t)vi; + dt->texel_weights_texel[i][j][swap_idx] = vi; dt->texel_weights_float_texel[i][j][swap_idx] = vf; } } } - dt->num_texels = texels_per_block; - dt->num_weights = weights_per_block; + dt->texel_count = texels_per_block; + dt->weight_count = weights_per_block; + dt->weight_x = x_weights; + dt->weight_y = y_weights; + dt->weight_z = 1; } static void initialize_decimation_table_3d( @@ -374,12 +388,12 @@ static void initialize_decimation_table_3d( int texels_per_block = xdim * ydim * zdim; int weights_per_block = x_weights * y_weights * z_weights; - int weightcount_of_texel[MAX_TEXELS_PER_BLOCK]; - int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4]; - int weights_of_texel[MAX_TEXELS_PER_BLOCK][4]; + uint8_t weightcount_of_texel[MAX_TEXELS_PER_BLOCK]; + uint8_t grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4]; + uint8_t weights_of_texel[MAX_TEXELS_PER_BLOCK][4]; - int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK]; - int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; + uint8_t texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK]; + uint8_t texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; for (int i = 0; i < weights_per_block; i++) @@ -510,34 +524,42 @@ static void initialize_decimation_table_3d( for (int i = 0; i < texels_per_block; i++) { - dt->texel_num_weights[i] = weightcount_of_texel[i]; + dt->texel_weight_count[i] = weightcount_of_texel[i]; - // ensure that all 4 entries are actually initialized. - // This allows a branch-free implementation of compute_value_of_texel_flt() + // Init all 4 entries so we can rely on zeros for vectorization for (int j = 0; j < 4; j++) { - dt->texel_weights_int[i][j] = 0; - dt->texel_weights_float[i][j] = 0.0f; - dt->texel_weights[i][j] = 0; + dt->texel_weights_int_t4[i][j] = 0; + dt->texel_weights_float_t4[i][j] = 0.0f; + dt->texel_weights_t4[i][j] = 0; + + dt->texel_weights_float_4t[j][i] = 0.0f; + dt->texel_weights_4t[j][i] = 0; } for (int j = 0; j < weightcount_of_texel[i]; j++) { - dt->texel_weights_int[i][j] = (uint8_t)weights_of_texel[i][j]; - dt->texel_weights_float[i][j] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM); - dt->texel_weights[i][j] = (uint8_t)grid_weights_of_texel[i][j]; + dt->texel_weights_int_t4[i][j] = weights_of_texel[i][j]; + dt->texel_weights_float_t4[i][j] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM); + dt->texel_weights_t4[i][j] = grid_weights_of_texel[i][j]; + + dt->texel_weights_float_4t[j][i] = ((float)weights_of_texel[i][j]) * (1.0f / TEXEL_WEIGHT_SUM); + dt->texel_weights_4t[j][i] = grid_weights_of_texel[i][j]; } } for (int i = 0; i < weights_per_block; i++) { - dt->weight_num_texels[i] = texelcount_of_weight[i]; + dt->weight_texel_count[i] = texelcount_of_weight[i]; for (int j = 0; j < texelcount_of_weight[i]; j++) { int texel = texels_of_weight[i][j]; - dt->weight_texel[i][j] = (uint8_t)texel; - dt->weights_int[i][j] = (uint8_t)texelweights_of_weight[i][j]; - dt->weights_flt[i][j] = (float)texelweights_of_weight[i][j]; + + dt->weights_int[i][j] = texelweights_of_weight[i][j]; + + // Create transposed versions of these for better vectorization + dt->weight_texel[j][i] = texel; + dt->weights_flt[j][i] = (float)texelweights_of_weight[i][j]; // perform a layer of array unrolling. An aspect of this unrolling is that // one of the texel-weight indexes is an identity-mapped index; we will use this @@ -545,219 +567,281 @@ static void initialize_decimation_table_3d( int swap_idx = -1; for (int k = 0; k < 4; k++) { - int dttw = dt->texel_weights[texel][k]; - float dttwf = dt->texel_weights_float[texel][k]; + uint8_t dttw = dt->texel_weights_t4[texel][k]; + float dttwf = dt->texel_weights_float_t4[texel][k]; if (dttw == i && dttwf != 0.0f) { swap_idx = k; } - dt->texel_weights_texel[i][j][k] = (uint8_t)dttw; + dt->texel_weights_texel[i][j][k] = dttw; dt->texel_weights_float_texel[i][j][k] = dttwf; } if (swap_idx != 0) { - int vi = dt->texel_weights_texel[i][j][0]; + uint8_t vi = dt->texel_weights_texel[i][j][0]; float vf = dt->texel_weights_float_texel[i][j][0]; dt->texel_weights_texel[i][j][0] = dt->texel_weights_texel[i][j][swap_idx]; dt->texel_weights_float_texel[i][j][0] = dt->texel_weights_float_texel[i][j][swap_idx]; - dt->texel_weights_texel[i][j][swap_idx] = (uint8_t)vi; + dt->texel_weights_texel[i][j][swap_idx] = vi; dt->texel_weights_float_texel[i][j][swap_idx] = vf; } } } - dt->num_texels = texels_per_block; - dt->num_weights = weights_per_block; + dt->texel_count = texels_per_block; + dt->weight_count = weights_per_block; + dt->weight_x = x_weights; + dt->weight_y = y_weights; + dt->weight_z = z_weights; } -static void construct_block_size_descriptor_2d( - int xdim, - int ydim, - block_size_descriptor* bsd +/** + * @brief Assign the texels to use for kmeans clustering. + * + * The max limit is MAX_KMEANS_TEXELS; above this a random selection is used. + * The @c bsd.texel_count is an input and must be populated beforehand. + * + * @param bsd The block size descriptor to populate. + */ +static void assign_kmeans_texels( + block_size_descriptor& bsd ) { - int decimation_mode_index[256]; // for each of the 256 entries in the decim_table_array, its index - int decimation_mode_count = 0; + // Use all texels for kmeans on a small block + if (bsd.texel_count <= MAX_KMEANS_TEXELS) + { + for (int i = 0; i < bsd.texel_count; i++) + { + bsd.kmeans_texels[i] = i; + } - bsd->xdim = xdim; - bsd->ydim = ydim; - bsd->zdim = 1; - bsd->texel_count = xdim * ydim; + bsd.kmeans_texel_count = bsd.texel_count; + return; + } + + // Select a random subset of texels for kmeans on a large block + uint64_t rng_state[2]; + astc::rand_init(rng_state); - for (int i = 0; i < 256; i++) + // Pick 64 random texels for use with bitmap partitioning. + bool seen[MAX_TEXELS_PER_BLOCK]; + for (int i = 0; i < bsd.texel_count; i++) { - decimation_mode_index[i] = -1; + seen[i] = false; } - // gather all the infill-modes that can be used with the current block size - for (int x_weights = 2; x_weights <= 12; x_weights++) + // Assign 64 random indices, retrying if we see repeats + int arr_elements_set = 0; + while (arr_elements_set < MAX_KMEANS_TEXELS) { - for (int y_weights = 2; y_weights <= 12; y_weights++) + unsigned int idx = (unsigned int)astc::rand(rng_state); + idx %= bsd.texel_count; + if (!seen[idx]) { - if (x_weights * y_weights > MAX_WEIGHTS_PER_BLOCK) - { - continue; - } + bsd.kmeans_texels[arr_elements_set++] = idx; + seen[idx] = true; + } + } - decimation_table *dt = new decimation_table; - decimation_mode_index[y_weights * 16 + x_weights] = decimation_mode_count; - initialize_decimation_table_2d(xdim, ydim, x_weights, y_weights, dt); + bsd.kmeans_texel_count = MAX_KMEANS_TEXELS; +} - int weight_count = x_weights * y_weights; +/** + * @brief Allocate a single 2D decimation table entry. + * + * @param x_dim The block X dimension. + * @param y_dim The block Y dimension. + * @param x_weights The weight grid X dimension. + * @param y_weights The weight grid Y dimension. + * + * @return The new entry's index in the compacted decimation_table array. + */ +static int construct_dt_entry_2d( + int x_dim, + int y_dim, + int x_weights, + int y_weights, + block_size_descriptor& bsd +) { + int dm_index = bsd.decimation_mode_count; + int weight_count = x_weights * y_weights; + assert(weight_count <= MAX_WEIGHTS_PER_BLOCK); - int maxprec_1plane = -1; - int maxprec_2planes = -1; - for (int i = 0; i < 12; i++) - { - int bits_1plane = compute_ise_bitcount(weight_count, (quantization_method) i); - int bits_2planes = compute_ise_bitcount(2 * weight_count, (quantization_method) i); + bool try_2planes = (2 * weight_count) <= MAX_WEIGHTS_PER_BLOCK; - if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK) - { - maxprec_1plane = i; - } + decimation_table *dt = aligned_malloc(sizeof(decimation_table), ASTCENC_VECALIGN); + initialize_decimation_table_2d(x_dim, y_dim, x_weights, y_weights, dt); - if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK) - { - maxprec_2planes = i; - } - } + int maxprec_1plane = -1; + int maxprec_2planes = -1; + for (int i = 0; i < 12; i++) + { + int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i); + if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK) + { + maxprec_1plane = i; + } - if (2 * x_weights * y_weights > MAX_WEIGHTS_PER_BLOCK) + if (try_2planes) + { + int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i); + if (bits_2planes >= MIN_WEIGHT_BITS_PER_BLOCK && bits_2planes <= MAX_WEIGHT_BITS_PER_BLOCK) { - maxprec_2planes = -1; + maxprec_2planes = i; } + } + } - bsd->permit_encode[decimation_mode_count] = (x_weights <= xdim && y_weights <= ydim); + // At least one of the two should be valid ... + assert(maxprec_1plane >= 0 || maxprec_2planes >= 0); + bsd.decimation_modes[dm_index].maxprec_1plane = maxprec_1plane; + bsd.decimation_modes[dm_index].maxprec_2planes = maxprec_2planes; + bsd.decimation_modes[dm_index].percentile_hit = false; + bsd.decimation_modes[dm_index].percentile_always = false; + bsd.decimation_tables[dm_index] = dt; - bsd->decimation_mode_samples[decimation_mode_count] = weight_count; - bsd->decimation_mode_maxprec_1plane[decimation_mode_count] = maxprec_1plane; - bsd->decimation_mode_maxprec_2planes[decimation_mode_count] = maxprec_2planes; - bsd->decimation_tables[decimation_mode_count] = dt; + bsd.decimation_mode_count++; + return dm_index; +} - decimation_mode_count++; - } - } +/** + * @brief Allocate block modes and decimation tables for a single BSD. + * + * @param x_dim The block X dimension. + * @param y_dim The block Y dimension. + * @param can_omit_modes True if we are allowed to discard modes that + * compression won't use, even if they are legal. + * @param mode_cutoff Block mode percentile cut off, between [0,1]. + * @param bsd The BSD to populate. + */ +static void construct_block_size_descriptor_2d( + int x_dim, + int y_dim, + bool can_omit_modes, + float mode_cutoff, + block_size_descriptor& bsd +) { + // Store a remap table for storing packed decimation modes. + // Indexing uses [Y * 16 + X] and max block size for each axis is 12. + static const int MAX_DMI = 12 * 16 + 12; + int decimation_mode_index[MAX_DMI]; - for (int i = 0; i < MAX_DECIMATION_MODES; i++) - { - bsd->decimation_mode_percentile[i] = 1.0f; - } + bsd.xdim = x_dim; + bsd.ydim = y_dim; + bsd.zdim = 1; + bsd.texel_count = x_dim * y_dim; + bsd.decimation_mode_count = 0; - for (int i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++) + for (int i = 0; i < MAX_DMI; i++) { - bsd->permit_encode[i] = 0; - bsd->decimation_mode_samples[i] = 0; - bsd->decimation_mode_maxprec_1plane[i] = -1; - bsd->decimation_mode_maxprec_2planes[i] = -1; + decimation_mode_index[i] = -1; } - bsd->decimation_mode_count = decimation_mode_count; - + // Gather all the decimation grids that can be used with the current block. #if !defined(ASTCENC_DECOMPRESS_ONLY) - const float *percentiles = get_2d_percentile_table(xdim, ydim); + const float *percentiles = get_2d_percentile_table(x_dim, y_dim); +#else + // Unused in decompress-only builds + (void)can_omit_modes; + (void)mode_cutoff; #endif - // then construct the list of block formats + // Construct the list of block formats referencing the decimation tables int packed_idx = 0; for (int i = 0; i < MAX_WEIGHT_MODES; i++) { int x_weights, y_weights; int is_dual_plane; - int quantization_mode; - int permit_encode = 1; + int quant_mode; - if (decode_block_mode_2d(i, &x_weights, &y_weights, &is_dual_plane, &quantization_mode)) - { - if (x_weights > xdim || y_weights > ydim) - { - permit_encode = 0; - } - } - else - { - permit_encode = 0; - } - - bsd->block_mode_to_packed[i] = -1; - if (!permit_encode) // also disallow decode of grid size larger than block size. - continue; - int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights]; - bsd->block_modes_packed[packed_idx].decimation_mode = decimation_mode; - bsd->block_modes_packed[packed_idx].quantization_mode = quantization_mode; - bsd->block_modes_packed[packed_idx].is_dual_plane = is_dual_plane; - bsd->block_modes_packed[packed_idx].mode_index = i; + bool valid = decode_block_mode_2d(i, &x_weights, &y_weights, &is_dual_plane, &quant_mode); #if !defined(ASTCENC_DECOMPRESS_ONLY) - bsd->block_modes_packed[packed_idx].percentile = percentiles[i]; - if (bsd->decimation_mode_percentile[decimation_mode] > percentiles[i]) - { - bsd->decimation_mode_percentile[decimation_mode] = percentiles[i]; - } + float percentile = percentiles[i]; + bool selected = (percentile <= mode_cutoff) || !can_omit_modes; #else - bsd->block_modes_packed[packed_idx].percentile = 0.0f; + // Decompressor builds can never discard modes, as we cannot make any + // assumptions about the modes the original compressor used + bool selected = true; #endif - bsd->block_mode_to_packed[i] = packed_idx; - ++packed_idx; - } - bsd->block_mode_packed_count = packed_idx; -#if !defined(ASTCENC_DECOMPRESS_ONLY) - delete[] percentiles; -#endif + // ASSUMPTION: No compressor will use more weights in a dimension than + // the block has actual texels, because it wastes bits. Decompression + // of an image which violates this assumption will fail, even though it + // is technically permitted by the specification. - if (xdim * ydim <= 64) - { - bsd->texelcount_for_bitmap_partitioning = xdim * ydim; - for (int i = 0; i < xdim * ydim; i++) + // Skip modes that are invalid, too large, or not selected by heuristic + if (!valid || !selected || (x_weights > x_dim) || (y_weights > y_dim)) { - bsd->texels_for_bitmap_partitioning[i] = i; + bsd.block_mode_packed_index[i] = -1; + continue; } - } - else - { - uint64_t rng_state[2]; - astc::rand_init(rng_state); - // pick 64 random texels for use with bitmap partitioning. - int arr[MAX_TEXELS_PER_BLOCK]; - for (int i = 0; i < xdim * ydim; i++) + // Allocate and initialize the DT entry if we've not used it yet. + int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights]; + if (decimation_mode == -1) { - arr[i] = 0; + decimation_mode = construct_dt_entry_2d(x_dim, y_dim, x_weights, y_weights, bsd); + decimation_mode_index[y_weights * 16 + x_weights] = decimation_mode; } - int arr_elements_set = 0; - while (arr_elements_set < 64) +#if !defined(ASTCENC_DECOMPRESS_ONLY) + // Flatten the block mode heuristic into some precomputed flags + if (percentile == 0.0f) { - unsigned int idx = (unsigned int)astc::rand(rng_state); - idx %= xdim * ydim; - if (arr[idx] == 0) - { - arr_elements_set++; - arr[idx] = 1; - } + bsd.block_modes[packed_idx].percentile_always = true; + bsd.decimation_modes[decimation_mode].percentile_always = true; + + bsd.block_modes[packed_idx].percentile_hit = true; + bsd.decimation_modes[decimation_mode].percentile_hit = true; } + else if (percentile <= mode_cutoff) + { + bsd.block_modes[packed_idx].percentile_always = false; - int texel_weights_written = 0; - int idx = 0; - while (texel_weights_written < 64) + bsd.block_modes[packed_idx].percentile_hit = true; + bsd.decimation_modes[decimation_mode].percentile_hit = true; + } + else { - if (arr[idx]) - { - bsd->texels_for_bitmap_partitioning[texel_weights_written++] = idx; - } - idx++; + bsd.block_modes[packed_idx].percentile_always = false; + bsd.block_modes[packed_idx].percentile_hit = false; } +#endif - bsd->texelcount_for_bitmap_partitioning = 64; + bsd.block_modes[packed_idx].decimation_mode = decimation_mode; + bsd.block_modes[packed_idx].quant_mode = quant_mode; + bsd.block_modes[packed_idx].is_dual_plane = is_dual_plane ? 1 : 0; + bsd.block_modes[packed_idx].mode_index = i; + bsd.block_mode_packed_index[i] = packed_idx; + ++packed_idx; } + + bsd.block_mode_count = packed_idx; + +#if !defined(ASTCENC_DECOMPRESS_ONLY) + delete[] percentiles; +#endif + + // Ensure the end of the array contains valid data (should never get read) + for (int i = bsd.decimation_mode_count; i < MAX_DECIMATION_MODES; i++) + { + bsd.decimation_modes[i].maxprec_1plane = -1; + bsd.decimation_modes[i].maxprec_2planes = -1; + bsd.decimation_modes[i].percentile_hit = false; + bsd.decimation_modes[i].percentile_always = false; + bsd.decimation_tables[i] = nullptr; + } + + // Determine the texels to use for kmeans clustering. + assign_kmeans_texels(bsd); } static void construct_block_size_descriptor_3d( int xdim, int ydim, int zdim, - block_size_descriptor * bsd + block_size_descriptor* bsd ) { int decimation_mode_index[512]; // for each of the 512 entries in the decim_table_array, its index int decimation_mode_count = 0; @@ -773,29 +857,28 @@ static void construct_block_size_descriptor_3d( } // gather all the infill-modes that can be used with the current block size - for (int x_weights = 2; x_weights <= 6; x_weights++) + for (int x_weights = 2; x_weights <= xdim; x_weights++) { - for (int y_weights = 2; y_weights <= 6; y_weights++) + for (int y_weights = 2; y_weights <= ydim; y_weights++) { - for (int z_weights = 2; z_weights <= 6; z_weights++) + for (int z_weights = 2; z_weights <= zdim; z_weights++) { - if ((x_weights * y_weights * z_weights) > MAX_WEIGHTS_PER_BLOCK) + int weight_count = x_weights * y_weights * z_weights; + if (weight_count > MAX_WEIGHTS_PER_BLOCK) { continue; } - decimation_table *dt = new decimation_table; + decimation_table *dt = aligned_malloc(sizeof(decimation_table), ASTCENC_VECALIGN); decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count; initialize_decimation_table_3d(xdim, ydim, zdim, x_weights, y_weights, z_weights, dt); - int weight_count = x_weights * y_weights * z_weights; - int maxprec_1plane = -1; int maxprec_2planes = -1; for (int i = 0; i < 12; i++) { - int bits_1plane = compute_ise_bitcount(weight_count, (quantization_method) i); - int bits_2planes = compute_ise_bitcount(2 * weight_count, (quantization_method) i); + int bits_1plane = get_ise_sequence_bitcount(weight_count, (quant_method)i); + int bits_2planes = get_ise_sequence_bitcount(2 * weight_count, (quant_method)i); if (bits_1plane >= MIN_WEIGHT_BITS_PER_BLOCK && bits_1plane <= MAX_WEIGHT_BITS_PER_BLOCK) { @@ -808,34 +891,28 @@ static void construct_block_size_descriptor_3d( } } - if ((2 * x_weights * y_weights * z_weights) > MAX_WEIGHTS_PER_BLOCK) + if ((2 * weight_count) > MAX_WEIGHTS_PER_BLOCK) { maxprec_2planes = -1; } - bsd->permit_encode[decimation_mode_count] = (x_weights <= xdim && y_weights <= ydim && z_weights <= zdim); - - bsd->decimation_mode_samples[decimation_mode_count] = weight_count; - bsd->decimation_mode_maxprec_1plane[decimation_mode_count] = maxprec_1plane; - bsd->decimation_mode_maxprec_2planes[decimation_mode_count] = maxprec_2planes; + bsd->decimation_modes[decimation_mode_count].maxprec_1plane = maxprec_1plane; + bsd->decimation_modes[decimation_mode_count].maxprec_2planes = maxprec_2planes; + bsd->decimation_modes[decimation_mode_count].percentile_hit = false; + bsd->decimation_modes[decimation_mode_count].percentile_always = false; bsd->decimation_tables[decimation_mode_count] = dt; - decimation_mode_count++; } } } - for (int i = 0; i < MAX_DECIMATION_MODES; i++) - { - bsd->decimation_mode_percentile[i] = 1.0f; - } - for (int i = decimation_mode_count; i < MAX_DECIMATION_MODES; i++) { - bsd->permit_encode[i] = 0; - bsd->decimation_mode_samples[i] = 0; - bsd->decimation_mode_maxprec_1plane[i] = -1; - bsd->decimation_mode_maxprec_2planes[i] = -1; + bsd->decimation_modes[i].maxprec_1plane = -1; + bsd->decimation_modes[i].maxprec_2planes = -1; + bsd->decimation_modes[i].percentile_hit = false; + bsd->decimation_modes[i].percentile_always = false; + bsd->decimation_tables[i] = nullptr; } bsd->decimation_mode_count = decimation_mode_count; @@ -846,10 +923,10 @@ static void construct_block_size_descriptor_3d( { int x_weights, y_weights, z_weights; int is_dual_plane; - int quantization_mode; + int quant_mode; int permit_encode = 1; - if (decode_block_mode_3d(i, &x_weights, &y_weights, &z_weights, &is_dual_plane, &quantization_mode)) + if (decode_block_mode_3d(i, &x_weights, &y_weights, &z_weights, &is_dual_plane, &quant_mode)) { if (x_weights > xdim || y_weights > ydim || z_weights > zdim) { @@ -860,70 +937,33 @@ static void construct_block_size_descriptor_3d( { permit_encode = 0; } - bsd->block_mode_to_packed[i] = -1; - if (!permit_encode) - continue; - - int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights]; - bsd->block_modes_packed[packed_idx].decimation_mode = decimation_mode; - bsd->block_modes_packed[packed_idx].quantization_mode = quantization_mode; - bsd->block_modes_packed[packed_idx].is_dual_plane = is_dual_plane; - bsd->block_modes_packed[packed_idx].mode_index = i; - bsd->block_modes_packed[packed_idx].percentile = 0.0f; // No percentile table - if (bsd->decimation_mode_percentile[decimation_mode] > 0.0f) + bsd->block_mode_packed_index[i] = -1; + if (!permit_encode) { - bsd->decimation_mode_percentile[decimation_mode] = 0.0f; + continue; } - bsd->block_mode_to_packed[i] = packed_idx; - ++packed_idx; - } - bsd->block_mode_packed_count = packed_idx; - if (xdim * ydim * zdim <= 64) - { - bsd->texelcount_for_bitmap_partitioning = xdim * ydim * zdim; - for (int i = 0; i < xdim * ydim * zdim; i++) - { - bsd->texels_for_bitmap_partitioning[i] = i; - } + int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights]; + bsd->block_modes[packed_idx].decimation_mode = decimation_mode; + bsd->block_modes[packed_idx].quant_mode = quant_mode; + bsd->block_modes[packed_idx].is_dual_plane = is_dual_plane ? 1 : 0; + bsd->block_modes[packed_idx].mode_index = i; + + // No percentile table, so enable everything all the time ... + bsd->block_modes[packed_idx].percentile_hit = true; + bsd->block_modes[packed_idx].percentile_always = true; + bsd->decimation_modes[decimation_mode].percentile_hit = true; + bsd->decimation_modes[decimation_mode].percentile_always = true; + + bsd->block_mode_packed_index[i] = packed_idx; + ++packed_idx; } - else - { - uint64_t rng_state[2]; - astc::rand_init(rng_state); - // pick 64 random texels for use with bitmap partitioning. - int arr[MAX_TEXELS_PER_BLOCK]; - for (int i = 0; i < xdim * ydim * zdim; i++) - { - arr[i] = 0; - } - - int arr_elements_set = 0; - while (arr_elements_set < 64) - { - unsigned int idx = (unsigned int)astc::rand(rng_state); - idx %= xdim * ydim * zdim; - if (arr[idx] == 0) - { - arr_elements_set++; - arr[idx] = 1; - } - } + bsd->block_mode_count = packed_idx; - int texel_weights_written = 0; - int idx = 0; - while (texel_weights_written < 64) - { - if (arr[idx]) - { - bsd->texels_for_bitmap_partitioning[texel_weights_written++] = idx; - } - idx++; - } - bsd->texelcount_for_bitmap_partitioning = 64; - } + // Determine the texels to use for kmeans clustering. + assign_kmeans_texels(*bsd); } /* Public function, see header file for detailed documentation */ @@ -931,6 +971,8 @@ void init_block_size_descriptor( int xdim, int ydim, int zdim, + bool can_omit_modes, + float mode_cutoff, block_size_descriptor* bsd ) { if (zdim > 1) @@ -939,17 +981,17 @@ void init_block_size_descriptor( } else { - construct_block_size_descriptor_2d(xdim, ydim, bsd); + construct_block_size_descriptor_2d(xdim, ydim, can_omit_modes, mode_cutoff, *bsd); } init_partition_tables(bsd); } void term_block_size_descriptor( - block_size_descriptor* bsd) -{ + block_size_descriptor* bsd +) { for (int i = 0; i < bsd->decimation_mode_count; i++) { - delete bsd->decimation_tables[i]; + aligned_free(bsd->decimation_tables[i]); } } diff --git a/libkram/astc-encoder/astcenc_color_quantize.cpp b/libkram/astc-encoder/astcenc_color_quantize.cpp index 8c402986..b3592657 100644 --- a/libkram/astc-encoder/astcenc_color_quantize.cpp +++ b/libkram/astc-encoder/astcenc_color_quantize.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -35,36 +35,29 @@ increased until color0 is no longer larger than color1. */ static inline int cqt_lookup( - int quantization_level, + int quant_level, int value ) { - if (value < 0) - { - value = 0; - } - else if (value > 255) - { - value = 255; - } - - return color_quantization_tables[quantization_level][value]; + // TODO: Make this unsigned and avoid the low clamp + value = astc::clamp(value, 0, 255); + return color_quant_tables[quant_level][value]; } static void quantize_rgb( - float4 color0, // LDR: 0=lowest, 255=highest - float4 color1, + vfloat4 color0, + vfloat4 color1, int output[6], - int quantization_level + int quant_level ) { float scale = 1.0f / 257.0f; - float r0 = astc::clamp255f(color0.r * scale); - float g0 = astc::clamp255f(color0.g * scale); - float b0 = astc::clamp255f(color0.b * scale); + float r0 = astc::clamp255f(color0.lane<0>() * scale); + float g0 = astc::clamp255f(color0.lane<1>() * scale); + float b0 = astc::clamp255f(color0.lane<2>() * scale); - float r1 = astc::clamp255f(color1.r * scale); - float g1 = astc::clamp255f(color1.g * scale); - float b1 = astc::clamp255f(color1.b * scale); + float r1 = astc::clamp255f(color1.lane<0>() * scale); + float g1 = astc::clamp255f(color1.lane<1>() * scale); + float b1 = astc::clamp255f(color1.lane<2>() * scale); int ri0, gi0, bi0, ri1, gi1, bi1; int ri0b, gi0b, bi0b, ri1b, gi1b, bi1b; @@ -73,19 +66,19 @@ static void quantize_rgb( int iters = 0; do { - ri0 = cqt_lookup(quantization_level, astc::flt2int_rd(r0 + rgb0_addon)); - gi0 = cqt_lookup(quantization_level, astc::flt2int_rd(g0 + rgb0_addon)); - bi0 = cqt_lookup(quantization_level, astc::flt2int_rd(b0 + rgb0_addon)); - ri1 = cqt_lookup(quantization_level, astc::flt2int_rd(r1 + rgb1_addon)); - gi1 = cqt_lookup(quantization_level, astc::flt2int_rd(g1 + rgb1_addon)); - bi1 = cqt_lookup(quantization_level, astc::flt2int_rd(b1 + rgb1_addon)); - - ri0b = color_unquantization_tables[quantization_level][ri0]; - gi0b = color_unquantization_tables[quantization_level][gi0]; - bi0b = color_unquantization_tables[quantization_level][bi0]; - ri1b = color_unquantization_tables[quantization_level][ri1]; - gi1b = color_unquantization_tables[quantization_level][gi1]; - bi1b = color_unquantization_tables[quantization_level][bi1]; + ri0 = cqt_lookup(quant_level, astc::flt2int_rd(r0 + rgb0_addon)); + gi0 = cqt_lookup(quant_level, astc::flt2int_rd(g0 + rgb0_addon)); + bi0 = cqt_lookup(quant_level, astc::flt2int_rd(b0 + rgb0_addon)); + ri1 = cqt_lookup(quant_level, astc::flt2int_rd(r1 + rgb1_addon)); + gi1 = cqt_lookup(quant_level, astc::flt2int_rd(g1 + rgb1_addon)); + bi1 = cqt_lookup(quant_level, astc::flt2int_rd(b1 + rgb1_addon)); + + ri0b = color_unquant_tables[quant_level][ri0]; + gi0b = color_unquant_tables[quant_level][gi0]; + bi0b = color_unquant_tables[quant_level][bi0]; + ri1b = color_unquant_tables[quant_level][ri1]; + gi1b = color_unquant_tables[quant_level][gi1]; + bi1b = color_unquant_tables[quant_level][bi1]; rgb0_addon -= 0.2f; rgb1_addon += 0.2f; @@ -102,47 +95,41 @@ static void quantize_rgb( /* quantize an RGBA color. */ static void quantize_rgba( - float4 color0, - float4 color1, + vfloat4 color0, + vfloat4 color1, int output[8], - int quantization_level + int quant_level ) { - color0.a *= (1.0f / 257.0f); - color1.a *= (1.0f / 257.0f); + float scale = 1.0f / 257.0f; + + float a0 = astc::clamp255f(color0.lane<3>() * scale); + float a1 = astc::clamp255f(color1.lane<3>() * scale); - float a0 = astc::clamp255f(color0.a); - float a1 = astc::clamp255f(color1.a); - int ai0 = color_quantization_tables[quantization_level][astc::flt2int_rtn(a0)]; - int ai1 = color_quantization_tables[quantization_level][astc::flt2int_rtn(a1)]; + int ai0 = color_quant_tables[quant_level][astc::flt2int_rtn(a0)]; + int ai1 = color_quant_tables[quant_level][astc::flt2int_rtn(a1)]; output[6] = ai0; output[7] = ai1; - quantize_rgb(color0, color1, output, quantization_level); + quantize_rgb(color0, color1, output, quant_level); } /* attempt to quantize RGB endpoint values with blue-contraction. Returns 1 on failure, 0 on success. */ -static int try_quantize_rgb_blue_contract( - float4 color0, // assumed to be the smaller color - float4 color1, // assumed to be the larger color +static bool try_quantize_rgb_blue_contract( + vfloat4 color0, // assumed to be the smaller color + vfloat4 color1, // assumed to be the larger color int output[6], - int quantization_level + int quant_level ) { - color0.r *= (1.0f / 257.0f); - color0.g *= (1.0f / 257.0f); - color0.b *= (1.0f / 257.0f); - - color1.r *= (1.0f / 257.0f); - color1.g *= (1.0f / 257.0f); - color1.b *= (1.0f / 257.0f); + float scale = 1.0f / 257.0f; - float r0 = color0.r; - float g0 = color0.g; - float b0 = color0.b; + float r0 = color0.lane<0>() * scale; + float g0 = color0.lane<1>() * scale; + float b0 = color0.lane<2>() * scale; - float r1 = color1.r; - float g1 = color1.g; - float b1 = color1.b; + float r1 = color1.lane<0>() * scale; + float g1 = color1.lane<1>() * scale; + float b1 = color1.lane<2>() * scale; // inverse blue-contraction. This can produce an overflow; // just bail out immediately if this is the case. @@ -154,31 +141,31 @@ static int try_quantize_rgb_blue_contract( if (r0 < 0.0f || r0 > 255.0f || g0 < 0.0f || g0 > 255.0f || b0 < 0.0f || b0 > 255.0f || r1 < 0.0f || r1 > 255.0f || g1 < 0.0f || g1 > 255.0f || b1 < 0.0f || b1 > 255.0f) { - return 0; + return false; } // quantize the inverse-blue-contracted color - int ri0 = color_quantization_tables[quantization_level][astc::flt2int_rtn(r0)]; - int gi0 = color_quantization_tables[quantization_level][astc::flt2int_rtn(g0)]; - int bi0 = color_quantization_tables[quantization_level][astc::flt2int_rtn(b0)]; - int ri1 = color_quantization_tables[quantization_level][astc::flt2int_rtn(r1)]; - int gi1 = color_quantization_tables[quantization_level][astc::flt2int_rtn(g1)]; - int bi1 = color_quantization_tables[quantization_level][astc::flt2int_rtn(b1)]; + int ri0 = color_quant_tables[quant_level][astc::flt2int_rtn(r0)]; + int gi0 = color_quant_tables[quant_level][astc::flt2int_rtn(g0)]; + int bi0 = color_quant_tables[quant_level][astc::flt2int_rtn(b0)]; + int ri1 = color_quant_tables[quant_level][astc::flt2int_rtn(r1)]; + int gi1 = color_quant_tables[quant_level][astc::flt2int_rtn(g1)]; + int bi1 = color_quant_tables[quant_level][astc::flt2int_rtn(b1)]; // then unquantize again - int ru0 = color_unquantization_tables[quantization_level][ri0]; - int gu0 = color_unquantization_tables[quantization_level][gi0]; - int bu0 = color_unquantization_tables[quantization_level][bi0]; - int ru1 = color_unquantization_tables[quantization_level][ri1]; - int gu1 = color_unquantization_tables[quantization_level][gi1]; - int bu1 = color_unquantization_tables[quantization_level][bi1]; + int ru0 = color_unquant_tables[quant_level][ri0]; + int gu0 = color_unquant_tables[quant_level][gi0]; + int bu0 = color_unquant_tables[quant_level][bi0]; + int ru1 = color_unquant_tables[quant_level][ri1]; + int gu1 = color_unquant_tables[quant_level][gi1]; + int bu1 = color_unquant_tables[quant_level][bi1]; // if color #1 is not larger than color #0, then blue-contraction is not a valid approach. // note that blue-contraction and quantization may itself change this order, which is why // we must only test AFTER blue-contraction. if (ru1 + gu1 + bu1 <= ru0 + gu0 + bu0) { - return 0; + return false; } output[0] = ri1; @@ -188,26 +175,25 @@ static int try_quantize_rgb_blue_contract( output[4] = bi1; output[5] = bi0; - return 1; + return true; } /* quantize an RGBA color with blue-contraction */ static int try_quantize_rgba_blue_contract( - float4 color0, - float4 color1, + vfloat4 color0, + vfloat4 color1, int output[8], - int quantization_level + int quant_level ) { - color0.a *= (1.0f / 257.0f); - color1.a *= (1.0f / 257.0f); + float scale = 1.0f / 257.0f; - float a0 = astc::clamp255f(color0.a); - float a1 = astc::clamp255f(color1.a); + float a0 = astc::clamp255f(color0.lane<3>() * scale); + float a1 = astc::clamp255f(color1.lane<3>() * scale); - output[7] = color_quantization_tables[quantization_level][astc::flt2int_rtn(a0)]; - output[6] = color_quantization_tables[quantization_level][astc::flt2int_rtn(a1)]; + output[7] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)]; + output[6] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)]; - return try_quantize_rgb_blue_contract(color0, color1, output, quantization_level); + return try_quantize_rgb_blue_contract(color0, color1, output, quant_level); } @@ -218,32 +204,27 @@ static int try_quantize_rgba_blue_contract( // if the sum of the offsets is nonnegative, then we encode a regular delta. /* attempt to quantize an RGB endpoint value with delta-encoding. */ -static int try_quantize_rgb_delta( - float4 color0, - float4 color1, +static bool try_quantize_rgb_delta( + vfloat4 color0, + vfloat4 color1, int output[6], - int quantization_level + int quant_level ) { - color0.r *= (1.0f / 257.0f); - color0.g *= (1.0f / 257.0f); - color0.b *= (1.0f / 257.0f); - - color1.r *= (1.0f / 257.0f); - color1.g *= (1.0f / 257.0f); - color1.b *= (1.0f / 257.0f); + float scale = 1.0f / 257.0f; - float r0 = astc::clamp255f(color0.r); - float g0 = astc::clamp255f(color0.g); - float b0 = astc::clamp255f(color0.b); + float r0 = astc::clamp255f(color0.lane<0>() * scale); + float g0 = astc::clamp255f(color0.lane<1>() * scale); + float b0 = astc::clamp255f(color0.lane<2>() * scale); - float r1 = astc::clamp255f(color1.r); - float g1 = astc::clamp255f(color1.g); - float b1 = astc::clamp255f(color1.b); + float r1 = astc::clamp255f(color1.lane<0>() * scale); + float g1 = astc::clamp255f(color1.lane<1>() * scale); + float b1 = astc::clamp255f(color1.lane<2>() * scale); // transform r0 to unorm9 int r0a = astc::flt2int_rtn(r0); int g0a = astc::flt2int_rtn(g0); int b0a = astc::flt2int_rtn(b0); + r0a <<= 1; g0a <<= 1; b0a <<= 1; @@ -255,13 +236,13 @@ static int try_quantize_rgb_delta( // quantize, then unquantize in order to get a value that we take // differences against. - int r0be = color_quantization_tables[quantization_level][r0b]; - int g0be = color_quantization_tables[quantization_level][g0b]; - int b0be = color_quantization_tables[quantization_level][b0b]; + int r0be = color_quant_tables[quant_level][r0b]; + int g0be = color_quant_tables[quant_level][g0b]; + int b0be = color_quant_tables[quant_level][b0b]; - r0b = color_unquantization_tables[quantization_level][r0be]; - g0b = color_unquantization_tables[quantization_level][g0be]; - b0b = color_unquantization_tables[quantization_level][b0be]; + r0b = color_unquant_tables[quant_level][r0be]; + g0b = color_unquant_tables[quant_level][g0be]; + b0b = color_unquant_tables[quant_level][b0be]; r0b |= r0a & 0x100; // final unquantized-values for endpoint 0. g0b |= g0a & 0x100; b0b |= b0a & 0x100; @@ -282,7 +263,7 @@ static int try_quantize_rgb_delta( // check if the difference is too large to be encodable. if (r1d > 63 || g1d > 63 || b1d > 63 || r1d < -64 || g1d < -64 || b1d < -64) { - return 0; + return false; } // insert top bit of the base into the offset @@ -297,17 +278,17 @@ static int try_quantize_rgb_delta( // then quantize & unquantize; if this causes any of the top two bits to flip, // then encoding fails, since we have then corrupted either the top bit of the base // or the sign bit of the offset. - int r1de = color_quantization_tables[quantization_level][r1d]; - int g1de = color_quantization_tables[quantization_level][g1d]; - int b1de = color_quantization_tables[quantization_level][b1d]; + int r1de = color_quant_tables[quant_level][r1d]; + int g1de = color_quant_tables[quant_level][g1d]; + int b1de = color_quant_tables[quant_level][b1d]; - int r1du = color_unquantization_tables[quantization_level][r1de]; - int g1du = color_unquantization_tables[quantization_level][g1de]; - int b1du = color_unquantization_tables[quantization_level][b1de]; + int r1du = color_unquant_tables[quant_level][r1de]; + int g1du = color_unquant_tables[quant_level][g1de]; + int b1du = color_unquant_tables[quant_level][b1de]; if (((r1d ^ r1du) | (g1d ^ g1du) | (b1d ^ b1du)) & 0xC0) { - return 0; + return false; } // check that the sum of the encoded offsets is nonnegative, else encoding fails @@ -332,7 +313,7 @@ static int try_quantize_rgb_delta( if (r1du + g1du + b1du < 0) { - return 0; + return false; } // check that the offsets produce legitimate sums as well. @@ -341,7 +322,7 @@ static int try_quantize_rgb_delta( b1du += b0b; if (r1du < 0 || r1du > 0x1FF || g1du < 0 || g1du > 0x1FF || b1du < 0 || b1du > 0x1FF) { - return 0; + return false; } // OK, we've come this far; we can now encode legitimate values. @@ -352,31 +333,25 @@ static int try_quantize_rgb_delta( output[4] = b0be; output[5] = b1de; - return 1; + return true; } -static int try_quantize_rgb_delta_blue_contract( - float4 color0, - float4 color1, +static bool try_quantize_rgb_delta_blue_contract( + vfloat4 color0, + vfloat4 color1, int output[6], - int quantization_level + int quant_level ) { - color0.r *= (1.0f / 257.0f); - color0.g *= (1.0f / 257.0f); - color0.b *= (1.0f / 257.0f); - - color1.r *= (1.0f / 257.0f); - color1.g *= (1.0f / 257.0f); - color1.b *= (1.0f / 257.0f); + // Note: Switch around endpoint colors already at start + float scale = 1.0f / 257.0f; - // switch around endpoint colors already at start. - float r0 = color1.r; - float g0 = color1.g; - float b0 = color1.b; + float r1 = color0.lane<0>() * scale; + float g1 = color0.lane<1>() * scale; + float b1 = color0.lane<2>() * scale; - float r1 = color0.r; - float g1 = color0.g; - float b1 = color0.b; + float r0 = color1.lane<0>() * scale; + float g0 = color1.lane<1>() * scale; + float b0 = color1.lane<2>() * scale; // inverse blue-contraction. This step can perform an overflow, in which case // we will bail out immediately. @@ -388,7 +363,7 @@ static int try_quantize_rgb_delta_blue_contract( if (r0 < 0.0f || r0 > 255.0f || g0 < 0.0f || g0 > 255.0f || b0 < 0.0f || b0 > 255.0f || r1 < 0.0f || r1 > 255.0f || g1 < 0.0f || g1 > 255.0f || b1 < 0.0f || b1 > 255.0f) { - return 0; + return false; } // transform r0 to unorm9 @@ -406,13 +381,13 @@ static int try_quantize_rgb_delta_blue_contract( // quantize, then unquantize in order to get a value that we take // differences against. - int r0be = color_quantization_tables[quantization_level][r0b]; - int g0be = color_quantization_tables[quantization_level][g0b]; - int b0be = color_quantization_tables[quantization_level][b0b]; + int r0be = color_quant_tables[quant_level][r0b]; + int g0be = color_quant_tables[quant_level][g0b]; + int b0be = color_quant_tables[quant_level][b0b]; - r0b = color_unquantization_tables[quantization_level][r0be]; - g0b = color_unquantization_tables[quantization_level][g0be]; - b0b = color_unquantization_tables[quantization_level][b0be]; + r0b = color_unquant_tables[quant_level][r0be]; + g0b = color_unquant_tables[quant_level][g0be]; + b0b = color_unquant_tables[quant_level][b0be]; r0b |= r0a & 0x100; // final unquantized-values for endpoint 0. g0b |= g0a & 0x100; b0b |= b0a & 0x100; @@ -433,7 +408,7 @@ static int try_quantize_rgb_delta_blue_contract( // check if the difference is too large to be encodable. if (r1d > 63 || g1d > 63 || b1d > 63 || r1d < -64 || g1d < -64 || b1d < -64) { - return 0; + return false; } // insert top bit of the base into the offset @@ -448,17 +423,17 @@ static int try_quantize_rgb_delta_blue_contract( // then quantize & unquantize; if this causes any of the top two bits to flip, // then encoding fails, since we have then corrupted either the top bit of the base // or the sign bit of the offset. - int r1de = color_quantization_tables[quantization_level][r1d]; - int g1de = color_quantization_tables[quantization_level][g1d]; - int b1de = color_quantization_tables[quantization_level][b1d]; + int r1de = color_quant_tables[quant_level][r1d]; + int g1de = color_quant_tables[quant_level][g1d]; + int b1de = color_quant_tables[quant_level][b1d]; - int r1du = color_unquantization_tables[quantization_level][r1de]; - int g1du = color_unquantization_tables[quantization_level][g1de]; - int b1du = color_unquantization_tables[quantization_level][b1de]; + int r1du = color_unquant_tables[quant_level][r1de]; + int g1du = color_unquant_tables[quant_level][g1de]; + int b1du = color_unquant_tables[quant_level][b1de]; if (((r1d ^ r1du) | (g1d ^ g1du) | (b1d ^ b1du)) & 0xC0) { - return 0; + return false; } // check that the sum of the encoded offsets is negative, else encoding fails @@ -484,7 +459,7 @@ static int try_quantize_rgb_delta_blue_contract( if (r1du + g1du + b1du >= 0) { - return 0; + return false; } // check that the offsets produce legitimate sums as well. @@ -494,7 +469,7 @@ static int try_quantize_rgb_delta_blue_contract( if (r1du < 0 || r1du > 0x1FF || g1du < 0 || g1du > 0x1FF || b1du < 0 || b1du > 0x1FF) { - return 0; + return false; } // OK, we've come this far; we can now encode legitimate values. @@ -505,43 +480,40 @@ static int try_quantize_rgb_delta_blue_contract( output[4] = b0be; output[5] = b1de; - return 1; + return true; } -static int try_quantize_alpha_delta( - float4 color0, - float4 color1, +static bool try_quantize_alpha_delta( + vfloat4 color0, + vfloat4 color1, int output[8], - int quantization_level + int quant_level ) { - color0.a *= (1.0f / 257.0f); - color1.a *= (1.0f / 257.0f); + float scale = 1.0f / 257.0f; - // the calculation for alpha-delta is exactly the same as for RGB-delta; see - // the RGB-delta function for comments. - float a0 = astc::clamp255f(color0.a); - float a1 = astc::clamp255f(color1.a); + float a0 = astc::clamp255f(color0.lane<3>() * scale); + float a1 = astc::clamp255f(color1.lane<3>() * scale); int a0a = astc::flt2int_rtn(a0); a0a <<= 1; int a0b = a0a & 0xFF; - int a0be = color_quantization_tables[quantization_level][a0b]; - a0b = color_unquantization_tables[quantization_level][a0be]; + int a0be = color_quant_tables[quant_level][a0b]; + a0b = color_unquant_tables[quant_level][a0be]; a0b |= a0a & 0x100; int a1d = astc::flt2int_rtn(a1); a1d <<= 1; a1d -= a0b; if (a1d > 63 || a1d < -64) { - return 0; + return false; } a1d &= 0x7F; a1d |= (a0b & 0x100) >> 1; - int a1de = color_quantization_tables[quantization_level][a1d]; - int a1du = color_unquantization_tables[quantization_level][a1de]; + int a1de = color_quant_tables[quant_level][a1d]; + int a1du = color_unquant_tables[quant_level][a1de]; if ((a1d ^ a1du) & 0xC0) { - return 0; + return false; } a1du &= 0x7F; if (a1du & 0x40) @@ -551,23 +523,26 @@ static int try_quantize_alpha_delta( a1du += a0b; if (a1du < 0 || a1du > 0x1FF) { - return 0; + return false; } output[6] = a0be; output[7] = a1de; - return 1; + return true; } -int try_quantize_luminance_alpha_delta( - float4 color0, - float4 color1, +static bool try_quantize_luminance_alpha_delta( + vfloat4 color0, + vfloat4 color1, int output[8], - int quantization_level + int quant_level ) { - float l0 = astc::clamp255f((color0.r + color0.g + color0.b) * ((1.0f / 3.0f) * (1.0f / 257.0f))); - float l1 = astc::clamp255f((color1.r + color1.g + color1.b) * ((1.0f / 3.0f) * (1.0f / 257.0f))); - float a0 = astc::clamp255f(color0.a * (1.0f / 257.0f)); - float a1 = astc::clamp255f(color1.a * (1.0f / 257.0f)); + float scale = 1.0f / 257.0f; + + float l0 = astc::clamp255f(hadd_rgb_s(color0) * ((1.0f / 3.0f) * scale)); + float l1 = astc::clamp255f(hadd_rgb_s(color1) * ((1.0f / 3.0f) * scale)); + + float a0 = astc::clamp255f(color0.lane<3>() * scale); + float a1 = astc::clamp255f(color1.lane<3>() * scale); int l0a = astc::flt2int_rtn(l0); int a0a = astc::flt2int_rtn(a0); @@ -575,10 +550,10 @@ int try_quantize_luminance_alpha_delta( a0a <<= 1; int l0b = l0a & 0xFF; int a0b = a0a & 0xFF; - int l0be = color_quantization_tables[quantization_level][l0b]; - int a0be = color_quantization_tables[quantization_level][a0b]; - l0b = color_unquantization_tables[quantization_level][l0be]; - a0b = color_unquantization_tables[quantization_level][a0be]; + int l0be = color_quant_tables[quant_level][l0b]; + int a0be = color_quant_tables[quant_level][a0b]; + l0b = color_unquant_tables[quant_level][l0be]; + a0b = color_unquant_tables[quant_level][a0be]; l0b |= l0a & 0x100; a0b |= a0a & 0x100; int l1d = astc::flt2int_rtn(l1); @@ -589,28 +564,28 @@ int try_quantize_luminance_alpha_delta( a1d -= a0b; if (l1d > 63 || l1d < -64) { - return 0; + return false; } if (a1d > 63 || a1d < -64) { - return 0; + return false; } l1d &= 0x7F; a1d &= 0x7F; l1d |= (l0b & 0x100) >> 1; a1d |= (a0b & 0x100) >> 1; - int l1de = color_quantization_tables[quantization_level][l1d]; - int a1de = color_quantization_tables[quantization_level][a1d]; - int l1du = color_unquantization_tables[quantization_level][l1de]; - int a1du = color_unquantization_tables[quantization_level][a1de]; + int l1de = color_quant_tables[quant_level][l1d]; + int a1de = color_quant_tables[quant_level][a1d]; + int l1du = color_unquant_tables[quant_level][l1de]; + int a1du = color_unquant_tables[quant_level][a1de]; if ((l1d ^ l1du) & 0xC0) { - return 0; + return false; } if ((a1d ^ a1du) & 0xC0) { - return 0; + return false; } l1du &= 0x7F; a1du &= 0x7F; @@ -626,126 +601,120 @@ int try_quantize_luminance_alpha_delta( a1du += a0b; if (l1du < 0 || l1du > 0x1FF) { - return 0; + return false; } if (a1du < 0 || a1du > 0x1FF) { - return 0; + return false; } output[0] = l0be; output[1] = l1de; output[2] = a0be; output[3] = a1de; - return 1; + return true; } -static int try_quantize_rgba_delta( - float4 color0, - float4 color1, +static bool try_quantize_rgba_delta( + vfloat4 color0, + vfloat4 color1, int output[8], - int quantization_level + int quant_level ) { - int alpha_delta_res = try_quantize_alpha_delta(color0, color1, output, quantization_level); + bool alpha_delta_res = try_quantize_alpha_delta(color0, color1, output, quant_level); - if (alpha_delta_res == 0) + if (alpha_delta_res == false) { - return 0; + return false; } - return try_quantize_rgb_delta(color0, color1, output, quantization_level); + return try_quantize_rgb_delta(color0, color1, output, quant_level); } -static int try_quantize_rgba_delta_blue_contract( - float4 color0, - float4 color1, +static bool try_quantize_rgba_delta_blue_contract( + vfloat4 color0, + vfloat4 color1, int output[8], - int quantization_level + int quant_level ) { // notice that for the alpha encoding, we are swapping around color0 and color1; // this is because blue-contraction involves swapping around the two colors. - int alpha_delta_res = try_quantize_alpha_delta(color1, color0, output, quantization_level); + int alpha_delta_res = try_quantize_alpha_delta(color1, color0, output, quant_level); if (alpha_delta_res == 0) { - return 0; + return false; } - return try_quantize_rgb_delta_blue_contract(color0, color1, output, quantization_level); + return try_quantize_rgb_delta_blue_contract(color0, color1, output, quant_level); } static void quantize_rgbs_new( - float4 rgbs_color, // W component is a desired-scale to apply, in the range 0..1 + vfloat4 rgbs_color, // W component is a desired-scale to apply, in the range 0..1 int output[4], - int quantization_level + int quant_level ) { - rgbs_color.r *= (1.0f / 257.0f); - rgbs_color.g *= (1.0f / 257.0f); - rgbs_color.b *= (1.0f / 257.0f); + float scale = 1.0f / 257.0f; - float r = astc::clamp255f(rgbs_color.r); - float g = astc::clamp255f(rgbs_color.g); - float b = astc::clamp255f(rgbs_color.b); + float r = astc::clamp255f(rgbs_color.lane<0>() * scale); + float g = astc::clamp255f(rgbs_color.lane<1>() * scale); + float b = astc::clamp255f(rgbs_color.lane<2>() * scale); - int ri = color_quantization_tables[quantization_level][astc::flt2int_rtn(r)]; - int gi = color_quantization_tables[quantization_level][astc::flt2int_rtn(g)]; - int bi = color_quantization_tables[quantization_level][astc::flt2int_rtn(b)]; + int ri = color_quant_tables[quant_level][astc::flt2int_rtn(r)]; + int gi = color_quant_tables[quant_level][astc::flt2int_rtn(g)]; + int bi = color_quant_tables[quant_level][astc::flt2int_rtn(b)]; - int ru = color_unquantization_tables[quantization_level][ri]; - int gu = color_unquantization_tables[quantization_level][gi]; - int bu = color_unquantization_tables[quantization_level][bi]; + int ru = color_unquant_tables[quant_level][ri]; + int gu = color_unquant_tables[quant_level][gi]; + int bu = color_unquant_tables[quant_level][bi]; - float oldcolorsum = rgbs_color.r + rgbs_color.g + rgbs_color.b; + float oldcolorsum = hadd_rgb_s(rgbs_color) * scale; float newcolorsum = (float)(ru + gu + bu); - float scale = astc::clamp1f(rgbs_color.a * (oldcolorsum + 1e-10f) / (newcolorsum + 1e-10f)); - int scale_idx = astc::flt2int_rtn(scale * 256.0f); - scale_idx = astc::clampi(scale_idx, 0, 255); + float scalea = astc::clamp1f(rgbs_color.lane<3>() * (oldcolorsum + 1e-10f) / (newcolorsum + 1e-10f)); + int scale_idx = astc::flt2int_rtn(scalea * 256.0f); + scale_idx = astc::clamp(scale_idx, 0, 255); output[0] = ri; output[1] = gi; output[2] = bi; - output[3] = color_quantization_tables[quantization_level][scale_idx]; + output[3] = color_quant_tables[quant_level][scale_idx]; } static void quantize_rgbs_alpha_new( - float4 color0, - float4 color1, - float4 rgbs_color, + vfloat4 color0, + vfloat4 color1, + vfloat4 rgbs_color, int output[6], - int quantization_level + int quant_level ) { - color0.a *= (1.0f / 257.0f); - color1.a *= (1.0f / 257.0f); + float scale = 1.0f / 257.0f; - float a0 = astc::clamp255f(color0.a); - float a1 = astc::clamp255f(color1.a); + float a0 = astc::clamp255f(color0.lane<3>() * scale); + float a1 = astc::clamp255f(color1.lane<3>() * scale); - int ai0 = color_quantization_tables[quantization_level][astc::flt2int_rtn(a0)]; - int ai1 = color_quantization_tables[quantization_level][astc::flt2int_rtn(a1)]; + int ai0 = color_quant_tables[quant_level][astc::flt2int_rtn(a0)]; + int ai1 = color_quant_tables[quant_level][astc::flt2int_rtn(a1)]; output[4] = ai0; output[5] = ai1; - quantize_rgbs_new(rgbs_color, output, quantization_level); + quantize_rgbs_new(rgbs_color, output, quant_level); } static void quantize_luminance( - float4 color0, - float4 color1, + vfloat4 color0, + vfloat4 color1, int output[2], - int quantization_level + int quant_level ) { - color0.r *= (1.0f / 257.0f); - color0.g *= (1.0f / 257.0f); - color0.b *= (1.0f / 257.0f); + float scale = 1.0f / 257.0f; - color1.r *= (1.0f / 257.0f); - color1.g *= (1.0f / 257.0f); - color1.b *= (1.0f / 257.0f); + color0 = color0 * scale; + color1 = color1 * scale; - float lum0 = astc::clamp255f((color0.r + color0.g + color0.b) * (1.0f / 3.0f)); - float lum1 = astc::clamp255f((color1.r + color1.g + color1.b) * (1.0f / 3.0f)); + float lum0 = astc::clamp255f(hadd_rgb_s(color0) * (1.0f / 3.0f)); + float lum1 = astc::clamp255f(hadd_rgb_s(color1) * (1.0f / 3.0f)); if (lum0 > lum1) { @@ -754,66 +723,73 @@ static void quantize_luminance( lum1 = avg; } - output[0] = color_quantization_tables[quantization_level][astc::flt2int_rtn(lum0)]; - output[1] = color_quantization_tables[quantization_level][astc::flt2int_rtn(lum1)]; + output[0] = color_quant_tables[quant_level][astc::flt2int_rtn(lum0)]; + output[1] = color_quant_tables[quant_level][astc::flt2int_rtn(lum1)]; } static void quantize_luminance_alpha( - float4 color0, - float4 color1, + vfloat4 color0, + vfloat4 color1, int output[4], - int quantization_level + int quant_level ) { - color0 = color0 * (1.0f / 257.0f); - color1 = color1 * (1.0f / 257.0f); + float scale = 1.0f / 257.0f; + + color0 = color0 * scale; + color1 = color1 * scale; + + float lum0 = astc::clamp255f(hadd_rgb_s(color0) * (1.0f / 3.0f)); + float lum1 = astc::clamp255f(hadd_rgb_s(color1) * (1.0f / 3.0f)); - float lum0 = astc::clamp255f((color0.r + color0.g + color0.b) * (1.0f / 3.0f)); - float lum1 = astc::clamp255f((color1.r + color1.g + color1.b) * (1.0f / 3.0f)); - float a0 = astc::clamp255f(color0.a); - float a1 = astc::clamp255f(color1.a); + float a0 = astc::clamp255f(color0.lane<3>()); + float a1 = astc::clamp255f(color1.lane<3>()); // if the endpoints are *really* close, then pull them apart slightly; // this affords for >8 bits precision for normal maps. - if (quantization_level > 18 && fabsf(lum0 - lum1) < 3.0f) - { - if (lum0 < lum1) - { - lum0 -= 0.5f; - lum1 += 0.5f; - } - else - { - lum0 += 0.5f; - lum1 -= 0.5f; - } - lum0 = astc::clamp255f(lum0); - lum1 = astc::clamp255f(lum1); - } - if (quantization_level > 18 && fabsf(a0 - a1) < 3.0f) + if (quant_level > 18) { - if (a0 < a1) + if (fabsf(lum0 - lum1) < 3.0f) { - a0 -= 0.5f; - a1 += 0.5f; + if (lum0 < lum1) + { + lum0 -= 0.5f; + lum1 += 0.5f; + } + else + { + lum0 += 0.5f; + lum1 -= 0.5f; + } + lum0 = astc::clamp255f(lum0); + lum1 = astc::clamp255f(lum1); } - else + + if (fabsf(a0 - a1) < 3.0f) { - a0 += 0.5f; - a1 -= 0.5f; + if (a0 < a1) + { + a0 -= 0.5f; + a1 += 0.5f; + } + else + { + a0 += 0.5f; + a1 -= 0.5f; + } + a0 = astc::clamp255f(a0); + a1 = astc::clamp255f(a1); } - a0 = astc::clamp255f(a0); - a1 = astc::clamp255f(a1); } - output[0] = color_quantization_tables[quantization_level][astc::flt2int_rtn(lum0)]; - output[1] = color_quantization_tables[quantization_level][astc::flt2int_rtn(lum1)]; - output[2] = color_quantization_tables[quantization_level][astc::flt2int_rtn(a0)]; - output[3] = color_quantization_tables[quantization_level][astc::flt2int_rtn(a1)]; + output[0] = color_quant_tables[quant_level][astc::flt2int_rtn(lum0)]; + output[1] = color_quant_tables[quant_level][astc::flt2int_rtn(lum1)]; + output[2] = color_quant_tables[quant_level][astc::flt2int_rtn(a0)]; + output[3] = color_quant_tables[quant_level][astc::flt2int_rtn(a1)]; } // quantize and unquantize a number, wile making sure to retain the top two bits. static inline void quantize_and_unquantize_retain_top_two_bits( - int quantization_level, + int quant_level, int value_to_quantize, // 0 to 255. int* quantized_value, int* unquantized_value @@ -824,8 +800,8 @@ static inline void quantize_and_unquantize_retain_top_two_bits( do { - quantval = color_quantization_tables[quantization_level][value_to_quantize]; - uquantval = color_unquantization_tables[quantization_level][quantval]; + quantval = color_quant_tables[quant_level][value_to_quantize]; + uquantval = color_unquant_tables[quant_level][quantval]; // perform looping if the top two bits were modified by quant/unquant perform_loop = (value_to_quantize & 0xC0) != (uquantval & 0xC0); @@ -850,7 +826,7 @@ static inline void quantize_and_unquantize_retain_top_two_bits( // quantize and unquantize a number, wile making sure to retain the top four bits. static inline void quantize_and_unquantize_retain_top_four_bits( - int quantization_level, + int quant_level, int value_to_quantize, // 0 to 255. int *quantized_value, int *unquantized_value @@ -861,8 +837,8 @@ static inline void quantize_and_unquantize_retain_top_four_bits( do { - quantval = color_quantization_tables[quantization_level][value_to_quantize]; - uquantval = color_unquantization_tables[quantization_level][quantval]; + quantval = color_quant_tables[quant_level][value_to_quantize]; + uquantval = color_unquant_tables[quant_level][quantval]; // perform looping if the top two bits were modified by quant/unquant perform_loop = (value_to_quantize & 0xF0) != (uquantval & 0xF0); @@ -887,39 +863,21 @@ static inline void quantize_and_unquantize_retain_top_four_bits( /* HDR color encoding, take #3 */ static void quantize_hdr_rgbo3( - float4 color, + vfloat4 color, int output[4], - int quantization_level + int quant_level ) { - color.r += color.a; - color.g += color.a; - color.b += color.a; - - if (!(color.r > 0.0f)) - color.r = 0.0f; - else if (color.r > 65535.0f) - color.r = 65535.0f; - - if (!(color.g > 0.0f)) - color.g = 0.0f; - else if (color.g > 65535.0f) - color.g = 65535.0f; - - if (!(color.b > 0.0f)) - color.b = 0.0f; - else if (color.b > 65535.0f) - color.b = 65535.0f; - - if (!(color.a > 0.0f)) - color.a = 0.0f; - else if (color.a > 65535.0f) - color.a = 65535.0f; - - float4 color_bak = color; + color.set_lane<0>(color.lane<0>() + color.lane<3>()); + color.set_lane<1>(color.lane<1>() + color.lane<3>()); + color.set_lane<2>(color.lane<2>() + color.lane<3>()); + + color = clamp(0.0f, 65535.0f, color); + + vfloat4 color_bak = color; int majcomp; - if (color.r > color.g && color.r > color.b) + if (color.lane<0>() > color.lane<1>() && color.lane<0>() > color.lane<2>()) majcomp = 0; // red is largest component - else if (color.g > color.b) + else if (color.lane<1>() > color.lane<2>()) majcomp = 1; // green is largest component else majcomp = 2; // blue is largest component @@ -928,10 +886,10 @@ static void quantize_hdr_rgbo3( switch (majcomp) { case 1: - color = float4(color.g, color.r, color.b, color.a); + color = color.swz<1, 0, 2, 3>(); break; case 2: - color = float4(color.b, color.g, color.r, color.a); + color = color.swz<2, 1, 0, 3>(); break; default: break; @@ -969,10 +927,10 @@ static void quantize_hdr_rgbo3( 1.0f / 256.0f, }; - float r_base = color.r; - float g_base = color.r - color.g; - float b_base = color.r - color.b; - float s_base = color.a; + float r_base = color.lane<0>(); + float g_base = color.lane<0>() - color.lane<1>() ; + float b_base = color.lane<0>() - color.lane<2>() ; + float s_base = color.lane<3>() ; for (int mode = 0; mode < 5; mode++) { @@ -999,36 +957,21 @@ static void quantize_hdr_rgbo3( int r_quantval; int r_uquantval; - quantize_and_unquantize_retain_top_two_bits(quantization_level, r_lowbits, &r_quantval, &r_uquantval); + quantize_and_unquantize_retain_top_two_bits(quant_level, r_lowbits, &r_quantval, &r_uquantval); r_intval = (r_intval & ~0x3f) | (r_uquantval & 0x3f); - float r_fval = r_intval * mode_rscale; + float r_fval = static_cast(r_intval) * mode_rscale; // next, recompute G and B, then quantize and unquantize them. - float g_fval = r_fval - color.g; - float b_fval = r_fval - color.b; - if (g_fval < 0.0f) - { - g_fval = 0.0f; - } - else if (g_fval > 65535.0f) - { - g_fval = 65535.0f; - } + float g_fval = r_fval - color.lane<1>() ; + float b_fval = r_fval - color.lane<2>() ; - if (b_fval < 0.0f) - { - b_fval = 0.0f; - } - else if (b_fval > 65535.0f) - { - b_fval = 65535.0f; - } + g_fval = astc::clamp(g_fval, 0.0f, 65535.0f); + b_fval = astc::clamp(b_fval, 0.0f, 65535.0f); int g_intval = astc::flt2int_rtn(g_fval * mode_scale); int b_intval = astc::flt2int_rtn(b_fval * mode_scale); - if (g_intval >= gb_intcutoff || b_intval >= gb_intcutoff) { continue; @@ -1115,32 +1058,25 @@ static void quantize_hdr_rgbo3( int g_uquantval; int b_uquantval; - quantize_and_unquantize_retain_top_four_bits(quantization_level, g_lowbits, &g_quantval, &g_uquantval); + quantize_and_unquantize_retain_top_four_bits(quant_level, g_lowbits, &g_quantval, &g_uquantval); - quantize_and_unquantize_retain_top_four_bits(quantization_level, b_lowbits, &b_quantval, &b_uquantval); + quantize_and_unquantize_retain_top_four_bits(quant_level, b_lowbits, &b_quantval, &b_uquantval); g_intval = (g_intval & ~0x1f) | (g_uquantval & 0x1f); b_intval = (b_intval & ~0x1f) | (b_uquantval & 0x1f); - g_fval = g_intval * mode_rscale; - b_fval = b_intval * mode_rscale; + g_fval = static_cast(g_intval) * mode_rscale; + b_fval = static_cast(b_intval) * mode_rscale; // finally, recompute the scale value, based on the errors // introduced to red, green and blue. // If the error is positive, then the R,G,B errors combined have raised the color // value overall; as such, the scale value needs to be increased. - float rgb_errorsum = (r_fval - color.r) + (r_fval - g_fval - color.g) + (r_fval - b_fval - color.b); + float rgb_errorsum = (r_fval - color.lane<0>() ) + (r_fval - g_fval - color.lane<1>() ) + (r_fval - b_fval - color.lane<2>() ); float s_fval = s_base + rgb_errorsum * (1.0f / 3.0f); - if (s_fval < 0.0f) - { - s_fval = 0.0f; - } - else if (s_fval > 1e9f) - { - s_fval = 1e9f; - } + s_fval = astc::clamp(s_fval, 0.0f, 1e9f); int s_intval = astc::flt2int_rtn(s_fval * mode_scale); @@ -1194,7 +1130,7 @@ static void quantize_hdr_rgbo3( int s_quantval; int s_uquantval; - quantize_and_unquantize_retain_top_four_bits(quantization_level, s_lowbits, &s_quantval, &s_uquantval); + quantize_and_unquantize_retain_top_four_bits(quant_level, s_lowbits, &s_quantval, &s_uquantval); output[0] = r_quantval; output[1] = g_quantval; output[2] = b_quantval; @@ -1205,45 +1141,28 @@ static void quantize_hdr_rgbo3( // failed to encode any of the modes above? In that case, // encode using mode #5. float vals[4]; - int ivals[4]; - vals[0] = color_bak.r; - vals[1] = color_bak.g; - vals[2] = color_bak.b; - vals[3] = color_bak.a; + vals[0] = color_bak.lane<0>(); + vals[1] = color_bak.lane<1>(); + vals[2] = color_bak.lane<2>(); + vals[3] = color_bak.lane<3>(); + int ivals[4]; float cvals[3]; for (int i = 0; i < 3; i++) { - if (vals[i] < 0.0f) - { - vals[i] = 0.0f; - } - else if (vals[i] > 65020.0f) - { - vals[i] = 65020.0f; - } - + vals[i] = astc::clamp(vals[i], 0.0f, 65020.0f); ivals[i] = astc::flt2int_rtn(vals[i] * (1.0f / 512.0f)); - cvals[i] = ivals[i] * 512.0f; + cvals[i] = static_cast(ivals[i]) * 512.0f; } float rgb_errorsum = (cvals[0] - vals[0]) + (cvals[1] - vals[1]) + (cvals[2] - vals[2]); vals[3] += rgb_errorsum * (1.0f / 3.0f); - if (vals[3] < 0.0f) - { - vals[3] = 0.0f; - } - else if (vals[3] > 65020.0f) - { - vals[3] = 65020.0f; - } - + vals[3] = astc::clamp(vals[3], 0.0f, 65020.0f); ivals[3] = astc::flt2int_rtn(vals[3] * (1.0f / 512.0f)); int encvals[4]; - encvals[0] = (ivals[0] & 0x3f) | 0xC0; encvals[1] = (ivals[1] & 0x7f) | 0x80; encvals[2] = (ivals[2] & 0x7f) | 0x80; @@ -1252,81 +1171,31 @@ static void quantize_hdr_rgbo3( for (int i = 0; i < 4; i++) { int dummy; - quantize_and_unquantize_retain_top_four_bits(quantization_level, encvals[i], &(output[i]), &dummy); + quantize_and_unquantize_retain_top_four_bits(quant_level, encvals[i], &(output[i]), &dummy); } return; } static void quantize_hdr_rgb3( - float4 color0, - float4 color1, + vfloat4 color0, + vfloat4 color1, int output[6], - int quantization_level + int quant_level ) { - if (!(color0.r > 0.0f)) - { - color0.r = 0.0f; - } - else if (color0.r > 65535.0f) - { - color0.r = 65535.0f; - } - - if (!(color0.g > 0.0f)) - { - color0.g = 0.0f; - } - else if (color0.g > 65535.0f) - { - color0.g = 65535.0f; - } - - if (!(color0.b > 0.0f)) - { - color0.b = 0.0f; - } - else if (color0.b > 65535.0f) - { - color0.b = 65535.0f; - } - - if (!(color1.r > 0.0f)) - { - color1.r = 0.0f; - } - else if (color1.r > 65535.0f) - { - color1.r = 65535.0f; - } - - if (!(color1.g > 0.0f)) - { - color1.g = 0.0f; - } - else if (color1.g > 65535.0f) - { - color1.g = 65535.0f; - } + // Note: color*.lane<3> is not used so we can ignore it + color0 = clamp(0.0f, 65535.0f, color0); + color1 = clamp(0.0f, 65535.0f, color1); - if (!(color1.b > 0.0f)) - { - color1.b = 0.0f; - } - else if (color1.b > 65535.0f) - { - color1.b = 65535.0f; - } - - float4 color0_bak = color0; - float4 color1_bak = color1; + vfloat4 color0_bak = color0; + vfloat4 color1_bak = color1; int majcomp; - if (color1.r > color1.g && color1.r > color1.b) + if (color1.lane<0>() > color1.lane<1>() && color1.lane<0>() > color1.lane<2>()) { majcomp = 0; // red is largest } - else if (color1.g > color1.b) + else if (color1.lane<1>() > color1.lane<2>()) { majcomp = 1; // green is largest } @@ -1339,32 +1208,25 @@ static void quantize_hdr_rgb3( switch (majcomp) { case 1: // red-green swap - color0 = float4(color0.g, color0.r, color0.b, color0.a); - color1 = float4(color1.g, color1.r, color1.b, color1.a); + color0 = color0.swz<1, 0, 2, 3>(); + color1 = color1.swz<1, 0, 2, 3>(); break; case 2: // red-blue swap - color0 = float4(color0.b, color0.g, color0.r, color0.a); - color1 = float4(color1.b, color1.g, color1.r, color1.a); + color0 = color0.swz<2, 1, 0, 3>(); + color1 = color1.swz<2, 1, 0, 3>(); break; default: break; } - float a_base = color1.r; - if (a_base < 0.0f) - { - a_base = 0.0f; - } - else if (a_base > 65535.0f) - { - a_base = 65535.0f; - } + float a_base = color1.lane<0>(); + a_base = astc::clamp(a_base, 0.0f, 65535.0f); - float b0_base = a_base - color1.g; - float b1_base = a_base - color1.b; - float c_base = a_base - color0.r; - float d0_base = a_base - b0_base - c_base - color0.g; - float d1_base = a_base - b1_base - c_base - color0.b; + float b0_base = a_base - color1.lane<1>(); + float b1_base = a_base - color1.lane<2>(); + float c_base = a_base - color0.lane<0>(); + float d0_base = a_base - b0_base - c_base - color0.lane<1>(); + float d1_base = a_base - b1_base - c_base - color0.lane<2>(); // number of bits in the various fields in the various modes static const int mode_bits[8][4] = { @@ -1440,17 +1302,14 @@ static void quantize_hdr_rgb3( int a_intval = astc::flt2int_rtn(a_base * mode_scale); int a_lowbits = a_intval & 0xFF; - int a_quantval = color_quantization_tables[quantization_level][a_lowbits]; - int a_uquantval = color_unquantization_tables[quantization_level][a_quantval]; + int a_quantval = color_quant_tables[quant_level][a_lowbits]; + int a_uquantval = color_unquant_tables[quant_level][a_quantval]; a_intval = (a_intval & ~0xFF) | a_uquantval; - float a_fval = a_intval * mode_rscale; + float a_fval = static_cast(a_intval) * mode_rscale; // next, recompute C, then quantize and unquantize it - float c_fval = a_fval - color0.r; - if (c_fval < 0.0f) - c_fval = 0.0f; - else if (c_fval > 65535.0f) - c_fval = 65535.0f; + float c_fval = a_fval - color0.lane<0>(); + c_fval = astc::clamp(c_fval, 0.0f, 65535.0f); int c_intval = astc::flt2int_rtn(c_fval * mode_scale); @@ -1466,31 +1325,16 @@ static void quantize_hdr_rgb3( int c_quantval; int c_uquantval; - quantize_and_unquantize_retain_top_two_bits(quantization_level, c_lowbits, &c_quantval, &c_uquantval); + quantize_and_unquantize_retain_top_two_bits(quant_level, c_lowbits, &c_quantval, &c_uquantval); c_intval = (c_intval & ~0x3F) | (c_uquantval & 0x3F); - c_fval = c_intval * mode_rscale; + c_fval = static_cast(c_intval) * mode_rscale; // next, recompute B0 and B1, then quantize and unquantize them - float b0_fval = a_fval - color1.g; - float b1_fval = a_fval - color1.b; - if (b0_fval < 0.0f) - { - b0_fval = 0.0f; - } - else if (b0_fval > 65535.0f) - { - b0_fval = 65535.0f; - } - - if (b1_fval < 0.0f) - { - b1_fval = 0.0f; - } - else if (b1_fval > 65535.0f) - { - b1_fval = 65535.0f; - } + float b0_fval = a_fval - color1.lane<1>(); + float b1_fval = a_fval - color1.lane<2>(); + b0_fval = astc::clamp(b0_fval, 0.0f, 65535.0f); + b1_fval = astc::clamp(b1_fval, 0.0f, 65535.0f); int b0_intval = astc::flt2int_rtn(b0_fval * mode_scale); int b1_intval = astc::flt2int_rtn(b1_fval * mode_scale); @@ -1549,36 +1393,21 @@ static void quantize_hdr_rgb3( int b0_uquantval; int b1_uquantval; - quantize_and_unquantize_retain_top_two_bits(quantization_level, b0_lowbits, &b0_quantval, &b0_uquantval); + quantize_and_unquantize_retain_top_two_bits(quant_level, b0_lowbits, &b0_quantval, &b0_uquantval); - quantize_and_unquantize_retain_top_two_bits(quantization_level, b1_lowbits, &b1_quantval, &b1_uquantval); + quantize_and_unquantize_retain_top_two_bits(quant_level, b1_lowbits, &b1_quantval, &b1_uquantval); b0_intval = (b0_intval & ~0x3f) | (b0_uquantval & 0x3f); b1_intval = (b1_intval & ~0x3f) | (b1_uquantval & 0x3f); - b0_fval = b0_intval * mode_rscale; - b1_fval = b1_intval * mode_rscale; + b0_fval = static_cast(b0_intval) * mode_rscale; + b1_fval = static_cast(b1_intval) * mode_rscale; // finally, recompute D0 and D1, then quantize and unquantize them - float d0_fval = a_fval - b0_fval - c_fval - color0.g; - float d1_fval = a_fval - b1_fval - c_fval - color0.b; + float d0_fval = a_fval - b0_fval - c_fval - color0.lane<1>(); + float d1_fval = a_fval - b1_fval - c_fval - color0.lane<2>(); - if (d0_fval < -65535.0f) - { - d0_fval = -65535.0f; - } - else if (d0_fval > 65535.0f) - { - d0_fval = 65535.0f; - } - - if (d1_fval < -65535.0f) - { - d1_fval = -65535.0f; - } - else if (d1_fval > 65535.0f) - { - d1_fval = 65535.0f; - } + d0_fval = astc::clamp(d0_fval, -65535.0f, 65535.0f); + d1_fval = astc::clamp(d1_fval, -65535.0f, 65535.0f); int d0_intval = astc::flt2int_rtn(d0_fval * mode_scale); int d1_intval = astc::flt2int_rtn(d1_fval * mode_scale); @@ -1660,9 +1489,9 @@ static void quantize_hdr_rgb3( int d0_uquantval; int d1_uquantval; - quantize_and_unquantize_retain_top_four_bits(quantization_level, d0_lowbits, &d0_quantval, &d0_uquantval); + quantize_and_unquantize_retain_top_four_bits(quant_level, d0_lowbits, &d0_quantval, &d0_uquantval); - quantize_and_unquantize_retain_top_four_bits(quantization_level, d1_lowbits, &d1_quantval, &d1_uquantval); + quantize_and_unquantize_retain_top_four_bits(quant_level, d1_lowbits, &d1_quantval, &d1_uquantval); output[0] = a_quantval; output[1] = c_quantval; @@ -1679,70 +1508,62 @@ static void quantize_hdr_rgb3( // but usable. This representation is used if the light color is more than 4x the // color value of the dark color. float vals[6]; - vals[0] = color0_bak.r; - vals[1] = color1_bak.r; - vals[2] = color0_bak.g; - vals[3] = color1_bak.g; - vals[4] = color0_bak.b; - vals[5] = color1_bak.b; + vals[0] = color0_bak.lane<0>(); + vals[1] = color1_bak.lane<0>(); + vals[2] = color0_bak.lane<1>(); + vals[3] = color1_bak.lane<1>(); + vals[4] = color0_bak.lane<2>(); + vals[5] = color1_bak.lane<2>(); for (int i = 0; i < 6; i++) { - if (vals[i] < 0.0f) - { - vals[i] = 0.0f; - } - else if (vals[i] > 65020.0f) - { - vals[i] = 65020.0f; - } + vals[i] = astc::clamp(vals[i], 0.0f, 65020.0f); } for (int i = 0; i < 4; i++) { int idx = astc::flt2int_rtn(vals[i] * 1.0f / 256.0f); - output[i] = color_quantization_tables[quantization_level][idx]; + output[i] = color_quant_tables[quant_level][idx]; } for (int i = 4; i < 6; i++) { int dummy; int idx = astc::flt2int_rtn(vals[i] * 1.0f / 512.0f) + 128; - quantize_and_unquantize_retain_top_two_bits(quantization_level, idx, &(output[i]), &dummy); + quantize_and_unquantize_retain_top_two_bits(quant_level, idx, &(output[i]), &dummy); } return; } static void quantize_hdr_rgb_ldr_alpha3( - float4 color0, - float4 color1, + vfloat4 color0, + vfloat4 color1, int output[8], - int quantization_level + int quant_level ) { - color0.a *= (1.0f / 257.0f); - color1.a *= (1.0f / 257.0f); + float scale = 1.0f / 257.0f; - quantize_hdr_rgb3(color0, color1, output, quantization_level); + float a0 = astc::clamp255f(color0.lane<3>() * scale); + float a1 = astc::clamp255f(color1.lane<3>() * scale); - float a0 = astc::clamp255f(color0.a); - float a1 = astc::clamp255f(color1.a); - int ai0 = color_quantization_tables[quantization_level][astc::flt2int_rtn(a0)]; - int ai1 = color_quantization_tables[quantization_level][astc::flt2int_rtn(a1)]; + int ai0 = color_quant_tables[quant_level][astc::flt2int_rtn(a0)]; + int ai1 = color_quant_tables[quant_level][astc::flt2int_rtn(a1)]; output[6] = ai0; output[7] = ai1; + + quantize_hdr_rgb3(color0, color1, output, quant_level); } static void quantize_hdr_luminance_large_range3( - float4 color0, - float4 color1, + vfloat4 color0, + vfloat4 color1, int output[2], - int quantization_level + int quant_level ) { - - float lum1 = (color1.r + color1.g + color1.b) * (1.0f / 3.0f); - float lum0 = (color0.r + color0.g + color0.b) * (1.0f / 3.0f); + float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f); + float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f); if (lum1 < lum0) { @@ -1758,45 +1579,15 @@ static void quantize_hdr_luminance_large_range3( int upper_v0 = (ilum0 + 128) >> 8; int upper_v1 = (ilum1 + 128) >> 8; - if (upper_v0 < 0) - { - upper_v0 = 0; - } - else if (upper_v0 > 255) - { - upper_v0 = 255; - } - - if (upper_v1 < 0) - { - upper_v1 = 0; - } - else if (upper_v1 > 255) - { - upper_v1 = 255; - } + upper_v0 = astc::clamp(upper_v0, 0, 255); + upper_v1 = astc::clamp(upper_v1, 0, 255); // find the closest encodable point in the lower half of the code-point space int lower_v0 = (ilum1 + 256) >> 8; int lower_v1 = ilum0 >> 8; - if (lower_v0 < 0) - { - lower_v0 = 0; - } - else if (lower_v0 > 255) - { - lower_v0 = 255; - } - - if (lower_v1 < 0) - { - lower_v1 = 0; - } - else if (lower_v1 > 255) - { - lower_v1 = 255; - } + lower_v0 = astc::clamp(lower_v0, 0, 255); + lower_v1 = astc::clamp(lower_v1, 0, 255); // determine the distance between the point in code-point space and the input value int upper0_dec = upper_v0 << 8; @@ -1825,18 +1616,18 @@ static void quantize_hdr_luminance_large_range3( } // OK; encode. - output[0] = color_quantization_tables[quantization_level][v0]; - output[1] = color_quantization_tables[quantization_level][v1]; + output[0] = color_quant_tables[quant_level][v0]; + output[1] = color_quant_tables[quant_level][v1]; } -static int try_quantize_hdr_luminance_small_range3( - float4 color0, - float4 color1, +static bool try_quantize_hdr_luminance_small_range3( + vfloat4 color0, + vfloat4 color1, int output[2], - int quantization_level + int quant_level ) { - float lum1 = (color1.r + color1.g + color1.b) * (1.0f / 3.0f); - float lum0 = (color0.r + color0.g + color0.b) * (1.0f / 3.0f); + float lum0 = hadd_rgb_s(color0) * (1.0f / 3.0f); + float lum1 = hadd_rgb_s(color1) * (1.0f / 3.0f); if (lum1 < lum0) { @@ -1851,7 +1642,7 @@ static int try_quantize_hdr_luminance_small_range3( // difference of more than a factor-of-2 results in immediate failure. if (ilum1 - ilum0 > 2048) { - return 0; + return false; } int lowval, highval, diffval; @@ -1863,126 +1654,76 @@ static int try_quantize_hdr_luminance_small_range3( lowval = (ilum0 + 16) >> 5; highval = (ilum1 + 16) >> 5; - if (lowval < 0) - { - lowval = 0; - } - else if (lowval > 2047) - { - lowval = 2047; - } - - if (highval < 0) - { - highval = 0; - } - else if (highval > 2047) - { - highval = 2047; - } + lowval = astc::clamp(lowval, 0, 2047); + highval = astc::clamp(highval, 0, 2047); v0 = lowval & 0x7F; - v0e = color_quantization_tables[quantization_level][v0]; - v0d = color_unquantization_tables[quantization_level][v0e]; - if ((v0d & 0x80) == 0x80) - { - goto LOW_PRECISION_SUBMODE; - } - - lowval = (lowval & ~0x7F) | (v0d & 0x7F); - diffval = highval - lowval; - if (diffval < 0 || diffval > 15) - { - goto LOW_PRECISION_SUBMODE; - } + v0e = color_quant_tables[quant_level][v0]; + v0d = color_unquant_tables[quant_level][v0e]; - v1 = ((lowval >> 3) & 0xF0) | diffval; - v1e = color_quantization_tables[quantization_level][v1]; - v1d = color_unquantization_tables[quantization_level][v1e]; - if ((v1d & 0xF0) != (v1 & 0xF0)) + if (v0d < 0x80) { - goto LOW_PRECISION_SUBMODE; + lowval = (lowval & ~0x7F) | v0d; + diffval = highval - lowval; + if (diffval >= 0 && diffval <= 15) + { + v1 = ((lowval >> 3) & 0xF0) | diffval; + v1e = color_quant_tables[quant_level][v1]; + v1d = color_unquant_tables[quant_level][v1e]; + if ((v1d & 0xF0) == (v1 & 0xF0)) + { + output[0] = v0e; + output[1] = v1e; + return true; + } + } } - output[0] = v0e; - output[1] = v1e; - return 1; - // failed to encode the high-precision submode; well, then try to encode the // low-precision submode. -LOW_PRECISION_SUBMODE: lowval = (ilum0 + 32) >> 6; highval = (ilum1 + 32) >> 6; - if (lowval < 0) - { - lowval = 0; - } - else if (lowval > 1023) - { - lowval = 1023; - } - if (highval < 0) - { - highval = 0; - } - else if (highval > 1023) - { - highval = 1023; - } + lowval = astc::clamp(lowval, 0, 1023); + highval = astc::clamp(highval, 0, 1023); v0 = (lowval & 0x7F) | 0x80; - v0e = color_quantization_tables[quantization_level][v0]; - v0d = color_unquantization_tables[quantization_level][v0e]; + v0e = color_quant_tables[quant_level][v0]; + v0d = color_unquant_tables[quant_level][v0e]; if ((v0d & 0x80) == 0) { - return 0; + return false; } lowval = (lowval & ~0x7F) | (v0d & 0x7F); diffval = highval - lowval; if (diffval < 0 || diffval > 31) { - return 0; + return false; } v1 = ((lowval >> 2) & 0xE0) | diffval; - v1e = color_quantization_tables[quantization_level][v1]; - v1d = color_unquantization_tables[quantization_level][v1e]; + v1e = color_quant_tables[quant_level][v1]; + v1d = color_unquant_tables[quant_level][v1e]; if ((v1d & 0xE0) != (v1 & 0xE0)) { - return 0;; + return false; } output[0] = v0e; output[1] = v1e; - return 1; + return true; } static void quantize_hdr_alpha3( float alpha0, float alpha1, int output[2], - int quantization_level + int quant_level ) { - if (alpha0 < 0) - { - alpha0 = 0; - } - else if (alpha0 > 65280) - { - alpha0 = 65280; - } - - if (alpha1 < 0) - { - alpha1 = 0; - } - else if (alpha1 > 65280) - { - alpha1 = 65280; - } + alpha0 = astc::clamp(alpha0, 0.0f, 65280.0f); + alpha1 = astc::clamp(alpha1, 0.0f, 65280.0f); int ialpha0 = astc::flt2int_rtn(alpha0); int ialpha1 = astc::flt2int_rtn(alpha1); @@ -1999,8 +1740,8 @@ static void quantize_hdr_alpha3( val1 = (ialpha1 + (128 >> i)) >> (8 - i); v6 = (val0 & 0x7F) | ((i & 1) << 7); - v6e = color_quantization_tables[quantization_level][v6]; - v6d = color_unquantization_tables[quantization_level][v6e]; + v6e = color_quant_tables[quant_level][v6]; + v6d = color_unquant_tables[quant_level][v6e]; if ((v6 ^ v6d) & 0x80) { @@ -2018,10 +1759,10 @@ static void quantize_hdr_alpha3( } v7 = ((i & 2) << 6) | ((val0 >> 7) << (6 - i)) | (diffval & mask); - v7e = color_quantization_tables[quantization_level][v7]; - v7d = color_unquantization_tables[quantization_level][v7e]; + v7e = color_quant_tables[quant_level][v7]; + v7d = color_unquant_tables[quant_level][v7e]; - static const int testbits[3] = { 0xE0, 0xF0, 0xF8 }; + static const int testbits[3] { 0xE0, 0xF0, 0xF8 }; if ((v7 ^ v7d) & testbits[i]) { @@ -2039,8 +1780,8 @@ static void quantize_hdr_alpha3( v6 = val0 | 0x80; v7 = val1 | 0x80; - v6e = color_quantization_tables[quantization_level][v6]; - v7e = color_quantization_tables[quantization_level][v7]; + v6e = color_quant_tables[quant_level][v6]; + v7e = color_quant_tables[quant_level][v7]; output[0] = v6e; output[1] = v7e; @@ -2048,13 +1789,13 @@ static void quantize_hdr_alpha3( } static void quantize_hdr_rgb_alpha3( - float4 color0, - float4 color1, + vfloat4 color0, + vfloat4 color1, int output[8], - int quantization_level + int quant_level ) { - quantize_hdr_rgb3(color0, color1, output, quantization_level); - quantize_hdr_alpha3(color0.a, color1.a, output + 6, quantization_level); + quantize_hdr_rgb3(color0, color1, output, quant_level); + quantize_hdr_alpha3(color0.lane<3>(), color1.lane<3>(), output + 6, quant_level); } /* @@ -2062,133 +1803,126 @@ static void quantize_hdr_rgb_alpha3( delta-based representation; as such, it will report back the format it actually used. */ int pack_color_endpoints( - float4 color0, - float4 color1, - float4 rgbs_color, - float4 rgbo_color, + vfloat4 color0, + vfloat4 color1, + vfloat4 rgbs_color, + vfloat4 rgbo_color, int format, int* output, - int quantization_level + int quant_level ) { - assert(quantization_level >= 0 && quantization_level < 21); - // we do not support negative colors. - color0.r = MAX(color0.r, 0.0f); - color0.g = MAX(color0.g, 0.0f); - color0.b = MAX(color0.b, 0.0f); - color0.a = MAX(color0.a, 0.0f); + assert(quant_level >= 0 && quant_level < 21); - color1.r = MAX(color1.r, 0.0f); - color1.g = MAX(color1.g, 0.0f); - color1.b = MAX(color1.b, 0.0f); - color1.a = MAX(color1.a, 0.0f); + // we do not support negative colors. + color0 = max(color0, 0.0f); + color1 = max(color1, 0.0f); int retval = 0; - // TODO: Make format an endpoint_fmt enum type switch (format) { case FMT_RGB: - if (quantization_level <= 18) + if (quant_level <= 18) { - if (try_quantize_rgb_delta_blue_contract(color0, color1, output, quantization_level)) + if (try_quantize_rgb_delta_blue_contract(color0, color1, output, quant_level)) { retval = FMT_RGB_DELTA; break; } - if (try_quantize_rgb_delta(color0, color1, output, quantization_level)) + if (try_quantize_rgb_delta(color0, color1, output, quant_level)) { retval = FMT_RGB_DELTA; break; } } - if (try_quantize_rgb_blue_contract(color0, color1, output, quantization_level)) + if (try_quantize_rgb_blue_contract(color0, color1, output, quant_level)) { retval = FMT_RGB; break; } - quantize_rgb(color0, color1, output, quantization_level); + quantize_rgb(color0, color1, output, quant_level); retval = FMT_RGB; break; case FMT_RGBA: - if (quantization_level <= 18) + if (quant_level <= 18) { - if (try_quantize_rgba_delta_blue_contract(color0, color1, output, quantization_level)) + if (try_quantize_rgba_delta_blue_contract(color0, color1, output, quant_level)) { retval = FMT_RGBA_DELTA; break; } - if (try_quantize_rgba_delta(color0, color1, output, quantization_level)) + if (try_quantize_rgba_delta(color0, color1, output, quant_level)) { retval = FMT_RGBA_DELTA; break; } } - if (try_quantize_rgba_blue_contract(color0, color1, output, quantization_level)) + if (try_quantize_rgba_blue_contract(color0, color1, output, quant_level)) { retval = FMT_RGBA; break; } - quantize_rgba(color0, color1, output, quantization_level); + quantize_rgba(color0, color1, output, quant_level); retval = FMT_RGBA; break; case FMT_RGB_SCALE: - quantize_rgbs_new(rgbs_color, output, quantization_level); + quantize_rgbs_new(rgbs_color, output, quant_level); retval = FMT_RGB_SCALE; break; case FMT_HDR_RGB_SCALE: - quantize_hdr_rgbo3(rgbo_color, output, quantization_level); + quantize_hdr_rgbo3(rgbo_color, output, quant_level); retval = FMT_HDR_RGB_SCALE; break; case FMT_HDR_RGB: - quantize_hdr_rgb3(color0, color1, output, quantization_level); + quantize_hdr_rgb3(color0, color1, output, quant_level); retval = FMT_HDR_RGB; break; case FMT_RGB_SCALE_ALPHA: - quantize_rgbs_alpha_new(color0, color1, rgbs_color, output, quantization_level); + quantize_rgbs_alpha_new(color0, color1, rgbs_color, output, quant_level); retval = FMT_RGB_SCALE_ALPHA; break; case FMT_HDR_LUMINANCE_SMALL_RANGE: case FMT_HDR_LUMINANCE_LARGE_RANGE: - if (try_quantize_hdr_luminance_small_range3(color0, color1, output, quantization_level)) + if (try_quantize_hdr_luminance_small_range3(color0, color1, output, quant_level)) { retval = FMT_HDR_LUMINANCE_SMALL_RANGE; break; } - quantize_hdr_luminance_large_range3(color0, color1, output, quantization_level); + quantize_hdr_luminance_large_range3(color0, color1, output, quant_level); retval = FMT_HDR_LUMINANCE_LARGE_RANGE; break; case FMT_LUMINANCE: - quantize_luminance(color0, color1, output, quantization_level); + quantize_luminance(color0, color1, output, quant_level); retval = FMT_LUMINANCE; break; case FMT_LUMINANCE_ALPHA: - if (quantization_level <= 18) + if (quant_level <= 18) { - if (try_quantize_luminance_alpha_delta(color0, color1, output, quantization_level)) + if (try_quantize_luminance_alpha_delta(color0, color1, output, quant_level)) { retval = FMT_LUMINANCE_ALPHA_DELTA; break; } } - quantize_luminance_alpha(color0, color1, output, quantization_level); + quantize_luminance_alpha(color0, color1, output, quant_level); retval = FMT_LUMINANCE_ALPHA; break; case FMT_HDR_RGB_LDR_ALPHA: - quantize_hdr_rgb_ldr_alpha3(color0, color1, output, quantization_level); + quantize_hdr_rgb_ldr_alpha3(color0, color1, output, quant_level); retval = FMT_HDR_RGB_LDR_ALPHA; break; case FMT_HDR_RGBA: - quantize_hdr_rgb_alpha3(color0, color1, output, quantization_level); + quantize_hdr_rgb_alpha3(color0, color1, output, quant_level); retval = FMT_HDR_RGBA; break; } diff --git a/libkram/astc-encoder/astcenc_color_unquantize.cpp b/libkram/astc-encoder/astcenc_color_unquantize.cpp index 2321f0d7..427c8817 100644 --- a/libkram/astc-encoder/astcenc_color_unquantize.cpp +++ b/libkram/astc-encoder/astcenc_color_unquantize.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -15,308 +15,203 @@ // under the License. // ---------------------------------------------------------------------------- +#include + /** * @brief Functions for color unquantization. */ #include "astcenc_internal.h" -static int rgb_delta_unpack( - const int input[6], - int quantization_level, - uint4* output0, - uint4* output1 +static ASTCENC_SIMD_INLINE vint4 unquant_color( + int quant_level, + vint4 inputq ) { - // unquantize the color endpoints - int r0 = color_unquantization_tables[quantization_level][input[0]]; - int g0 = color_unquantization_tables[quantization_level][input[2]]; - int b0 = color_unquantization_tables[quantization_level][input[4]]; - - int r1 = color_unquantization_tables[quantization_level][input[1]]; - int g1 = color_unquantization_tables[quantization_level][input[3]]; - int b1 = color_unquantization_tables[quantization_level][input[5]]; - - // perform the bit-transfer procedure - r0 |= (r1 & 0x80) << 1; - g0 |= (g1 & 0x80) << 1; - b0 |= (b1 & 0x80) << 1; - r1 &= 0x7F; - g1 &= 0x7F; - b1 &= 0x7F; - if (r1 & 0x40) - r1 -= 0x80; - if (g1 & 0x40) - g1 -= 0x80; - if (b1 & 0x40) - b1 -= 0x80; - - r0 >>= 1; - g0 >>= 1; - b0 >>= 1; - r1 >>= 1; - g1 >>= 1; - b1 >>= 1; - - int rgbsum = r1 + g1 + b1; - - r1 += r0; - g1 += g0; - b1 += b0; - - int retval; - - int r0e, g0e, b0e; - int r1e, g1e, b1e; - - if (rgbsum >= 0) - { - r0e = r0; - g0e = g0; - b0e = b0; + const uint8_t* unq = color_unquant_tables[quant_level]; + return vint4(unq[inputq.lane<0>()], unq[inputq.lane<1>()], + unq[inputq.lane<2>()], unq[inputq.lane<3>()]); +} - r1e = r1; - g1e = g1; - b1e = b1; +static ASTCENC_SIMD_INLINE vint4 uncontract_color( + vint4 input +) { + vmask4 mask(true, true, false, false); + vint4 bc0 = asr<1>(input + input.lane<2>()); + return select(input, bc0, mask); +} - retval = 0; - } - else +static void rgba_delta_unpack( + vint4 input0q, + vint4 input1q, + int quant_level, + vint4& output0, + vint4& output1 +) { + // Unquantize color endpoints + vint4 input0 = unquant_color(quant_level, input0q); + vint4 input1 = unquant_color(quant_level, input1q); + + // Perform bit-transfer + input0 = input0 | lsl<1>(input1 & 0x80); + input1 = input1 & 0x7F; + vmask4 mask = (input1 & 0x40) != vint4::zero(); + input1 = select(input1, input1 - 0x80, mask); + + // Scale + input0 = asr<1>(input0); + input1 = asr<1>(input1); + + // Apply blue-uncontraction if needed + int rgb_sum = hadd_rgb_s(input1); + input1 = input1 + input0; + if (rgb_sum < 0) { - r0e = (r1 + b1) >> 1; - g0e = (g1 + b1) >> 1; - b0e = b1; - - r1e = (r0 + b0) >> 1; - g1e = (g0 + b0) >> 1; - b1e = b0; - - retval = 1; + input0 = uncontract_color(input0); + input1 = uncontract_color(input1); + std::swap(input0, input1); } - if (r0e < 0) - r0e = 0; - else if (r0e > 255) - r0e = 255; - - if (g0e < 0) - g0e = 0; - else if (g0e > 255) - g0e = 255; - - if (b0e < 0) - b0e = 0; - else if (b0e > 255) - b0e = 255; - - if (r1e < 0) - r1e = 0; - else if (r1e > 255) - r1e = 255; - - if (g1e < 0) - g1e = 0; - else if (g1e > 255) - g1e = 255; - - if (b1e < 0) - b1e = 0; - else if (b1e > 255) - b1e = 255; - - output0->r = r0e; - output0->g = g0e; - output0->b = b0e; - output0->a = 0xFF; - - output1->r = r1e; - output1->g = g1e; - output1->b = b1e; - output1->a = 0xFF; - - return retval; + output0 = clamp(0, 255, input0); + output1 = clamp(0, 255, input1); } -static int rgb_unpack( - const int input[6], - int quantization_level, - uint4* output0, - uint4* output1 +static void rgb_delta_unpack( + vint4 input0q, + vint4 input1q, + int quant_level, + vint4& output0, + vint4& output1 ) { - int ri0b = color_unquantization_tables[quantization_level][input[0]]; - int ri1b = color_unquantization_tables[quantization_level][input[1]]; - int gi0b = color_unquantization_tables[quantization_level][input[2]]; - int gi1b = color_unquantization_tables[quantization_level][input[3]]; - int bi0b = color_unquantization_tables[quantization_level][input[4]]; - int bi1b = color_unquantization_tables[quantization_level][input[5]]; - - if (ri0b + gi0b + bi0b > ri1b + gi1b + bi1b) - { - // blue-contraction - ri0b = (ri0b + bi0b) >> 1; - gi0b = (gi0b + bi0b) >> 1; - ri1b = (ri1b + bi1b) >> 1; - gi1b = (gi1b + bi1b) >> 1; - - output0->r = ri1b; - output0->g = gi1b; - output0->b = bi1b; - output0->a = 255; - - output1->r = ri0b; - output1->g = gi0b; - output1->b = bi0b; - output1->a = 255; - return 1; - } - else - { - output0->r = ri0b; - output0->g = gi0b; - output0->b = bi0b; - output0->a = 255; - - output1->r = ri1b; - output1->g = gi1b; - output1->b = bi1b; - output1->a = 255; - return 0; - } + rgba_delta_unpack(input0q, input1q, quant_level, output0, output1); + output0.set_lane<3>(255); + output1.set_lane<3>(255); } static void rgba_unpack( - const int input[8], - int quantization_level, - uint4* output0, - uint4* output1 + vint4 input0q, + vint4 input1q, + int quant_level, + vint4& output0, + vint4& output1 ) { - int order = rgb_unpack(input, quantization_level, output0, output1); - if (order == 0) - { - output0->a = color_unquantization_tables[quantization_level][input[6]]; - output1->a = color_unquantization_tables[quantization_level][input[7]]; - } - else + // Unquantize color endpoints + vint4 input0 = unquant_color(quant_level, input0q); + vint4 input1 = unquant_color(quant_level, input1q); + + // Apply blue-uncontraction if needed + if (hadd_rgb_s(input0) > hadd_rgb_s(input1)) { - output0->a = color_unquantization_tables[quantization_level][input[7]]; - output1->a = color_unquantization_tables[quantization_level][input[6]]; + input0 = uncontract_color(input0); + input1 = uncontract_color(input1); + std::swap(input0, input1); } + + output0 = input0; + output1 = input1; } -static void rgba_delta_unpack( - const int input[8], - int quantization_level, - uint4* output0, - uint4* output1 +static void rgb_unpack( + vint4 input0q, + vint4 input1q, + int quant_level, + vint4& output0, + vint4& output1 ) { - int a0 = color_unquantization_tables[quantization_level][input[6]]; - int a1 = color_unquantization_tables[quantization_level][input[7]]; - a0 |= (a1 & 0x80) << 1; - a1 &= 0x7F; - if (a1 & 0x40) - a1 -= 0x80; - a0 >>= 1; - a1 >>= 1; - a1 += a0; - - if (a1 < 0) - a1 = 0; - else if (a1 > 255) - a1 = 255; - - int order = rgb_delta_unpack(input, quantization_level, output0, output1); - if (order == 0) - { - output0->a = a0; - output1->a = a1; - } - else - { - output0->a = a1; - output1->a = a0; - } + rgba_unpack(input0q, input1q, quant_level, output0, output1); + output0.set_lane<3>(255); + output1.set_lane<3>(255); } -static void rgb_scale_unpack( - const int input[4], - int quantization_level, - uint4* output0, - uint4* output1 +static void rgb_scale_alpha_unpack( + vint4 input0q, + int alpha1q, + int scaleq, + int quant_level, + vint4& output0, + vint4& output1 ) { - int ir = color_unquantization_tables[quantization_level][input[0]]; - int ig = color_unquantization_tables[quantization_level][input[1]]; - int ib = color_unquantization_tables[quantization_level][input[2]]; + // Unquantize color endpoints + vint4 input = unquant_color(quant_level, input0q); + int alpha1 = color_unquant_tables[quant_level][alpha1q]; + int scale = color_unquant_tables[quant_level][scaleq]; - int iscale = color_unquantization_tables[quantization_level][input[3]]; + output1 = input; + output1.set_lane<3>(alpha1); - *output1 = uint4(ir, ig, ib, 255); - *output0 = uint4((ir * iscale) >> 8, (ig * iscale) >> 8, (ib * iscale) >> 8, 255); + output0 = asr<8>(input * scale); + output0.set_lane<3>(input.lane<3>()); } -static void rgb_scale_alpha_unpack( - const int input[6], - int quantization_level, - uint4* output0, - uint4* output1 +static void rgb_scale_unpack( + vint4 input0q, + int scaleq, + int quant_level, + vint4& output0, + vint4& output1 ) { - rgb_scale_unpack(input, quantization_level, output0, output1); - output0->a = color_unquantization_tables[quantization_level][input[4]]; - output1->a = color_unquantization_tables[quantization_level][input[5]]; + vint4 input = unquant_color(quant_level, input0q); + int scale = color_unquant_tables[quant_level][scaleq]; + + output1 = input; + output1.set_lane<3>(255); + + output0 = asr<8>(input * scale); + output0.set_lane<3>(255); } static void luminance_unpack( const int input[2], - int quantization_level, - uint4* output0, - uint4* output1 + int quant_level, + vint4* output0, + vint4* output1 ) { - int lum0 = color_unquantization_tables[quantization_level][input[0]]; - int lum1 = color_unquantization_tables[quantization_level][input[1]]; - *output0 = uint4(lum0, lum0, lum0, 255); - *output1 = uint4(lum1, lum1, lum1, 255); + int lum0 = color_unquant_tables[quant_level][input[0]]; + int lum1 = color_unquant_tables[quant_level][input[1]]; + *output0 = vint4(lum0, lum0, lum0, 255); + *output1 = vint4(lum1, lum1, lum1, 255); } static void luminance_delta_unpack( const int input[2], - int quantization_level, - uint4* output0, - uint4* output1 + int quant_level, + vint4* output0, + vint4* output1 ) { - int v0 = color_unquantization_tables[quantization_level][input[0]]; - int v1 = color_unquantization_tables[quantization_level][input[1]]; + int v0 = color_unquant_tables[quant_level][input[0]]; + int v1 = color_unquant_tables[quant_level][input[1]]; int l0 = (v0 >> 2) | (v1 & 0xC0); int l1 = l0 + (v1 & 0x3F); - if (l1 > 255) - l1 = 255; + l1 = astc::min(l1, 255); - *output0 = uint4(l0, l0, l0, 255); - *output1 = uint4(l1, l1, l1, 255); + *output0 = vint4(l0, l0, l0, 255); + *output1 = vint4(l1, l1, l1, 255); } static void luminance_alpha_unpack( const int input[4], - int quantization_level, - uint4* output0, - uint4* output1 + int quant_level, + vint4* output0, + vint4* output1 ) { - int lum0 = color_unquantization_tables[quantization_level][input[0]]; - int lum1 = color_unquantization_tables[quantization_level][input[1]]; - int alpha0 = color_unquantization_tables[quantization_level][input[2]]; - int alpha1 = color_unquantization_tables[quantization_level][input[3]]; - *output0 = uint4(lum0, lum0, lum0, alpha0); - *output1 = uint4(lum1, lum1, lum1, alpha1); + int lum0 = color_unquant_tables[quant_level][input[0]]; + int lum1 = color_unquant_tables[quant_level][input[1]]; + int alpha0 = color_unquant_tables[quant_level][input[2]]; + int alpha1 = color_unquant_tables[quant_level][input[3]]; + *output0 = vint4(lum0, lum0, lum0, alpha0); + *output1 = vint4(lum1, lum1, lum1, alpha1); } static void luminance_alpha_delta_unpack( const int input[4], - int quantization_level, - uint4* output0, - uint4* output1 + int quant_level, + vint4* output0, + vint4* output1 ) { - int lum0 = color_unquantization_tables[quantization_level][input[0]]; - int lum1 = color_unquantization_tables[quantization_level][input[1]]; - int alpha0 = color_unquantization_tables[quantization_level][input[2]]; - int alpha1 = color_unquantization_tables[quantization_level][input[3]]; + int lum0 = color_unquant_tables[quant_level][input[0]]; + int lum1 = color_unquant_tables[quant_level][input[1]]; + int alpha0 = color_unquant_tables[quant_level][input[2]]; + int alpha1 = color_unquant_tables[quant_level][input[3]]; lum0 |= (lum1 & 0x80) << 1; alpha0 |= (alpha1 & 0x80) << 1; @@ -334,31 +229,24 @@ static void luminance_alpha_delta_unpack( lum1 += lum0; alpha1 += alpha0; - if (lum1 < 0) - lum1 = 0; - else if (lum1 > 255) - lum1 = 255; + lum1 = astc::clamp(lum1, 0, 255); + alpha1 = astc::clamp(alpha1, 0, 255); - if (alpha1 < 0) - alpha1 = 0; - else if (alpha1 > 255) - alpha1 = 255; - - *output0 = uint4(lum0, lum0, lum0, alpha0); - *output1 = uint4(lum1, lum1, lum1, alpha1); + *output0 = vint4(lum0, lum0, lum0, alpha0); + *output1 = vint4(lum1, lum1, lum1, alpha1); } // RGB-offset format static void hdr_rgbo_unpack3( const int input[4], - int quantization_level, - uint4* output0, - uint4* output1 + int quant_level, + vint4* output0, + vint4* output1 ) { - int v0 = color_unquantization_tables[quantization_level][input[0]]; - int v1 = color_unquantization_tables[quantization_level][input[1]]; - int v2 = color_unquantization_tables[quantization_level][input[2]]; - int v3 = color_unquantization_tables[quantization_level][input[3]]; + int v0 = color_unquant_tables[quant_level][input[0]]; + int v1 = color_unquant_tables[quant_level][input[1]]; + int v2 = color_unquant_tables[quant_level][input[2]]; + int v3 = color_unquant_tables[quant_level][input[3]]; int modeval = ((v0 & 0xC0) >> 6) | (((v1 & 0x80) >> 7) << 2) | (((v2 & 0x80) >> 7) << 3); @@ -437,7 +325,7 @@ static void hdr_rgbo_unpack3( red |= bit5 << 10; // expand to 12 bits. - static const int shamts[6] = { 1, 1, 2, 3, 4, 5 }; + static const int shamts[6] { 1, 1, 2, 3, 4, 5 }; int shamt = shamts[mode]; red <<= shamt; green <<= shamt; @@ -489,23 +377,23 @@ static void hdr_rgbo_unpack3( if (blue0 < 0) blue0 = 0; - *output0 = uint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800); - *output1 = uint4(red << 4, green << 4, blue << 4, 0x7800); + *output0 = vint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800); + *output1 = vint4(red << 4, green << 4, blue << 4, 0x7800); } static void hdr_rgb_unpack3( const int input[6], - int quantization_level, - uint4* output0, - uint4* output1 + int quant_level, + vint4* output0, + vint4* output1 ) { - int v0 = color_unquantization_tables[quantization_level][input[0]]; - int v1 = color_unquantization_tables[quantization_level][input[1]]; - int v2 = color_unquantization_tables[quantization_level][input[2]]; - int v3 = color_unquantization_tables[quantization_level][input[3]]; - int v4 = color_unquantization_tables[quantization_level][input[4]]; - int v5 = color_unquantization_tables[quantization_level][input[5]]; + int v0 = color_unquant_tables[quant_level][input[0]]; + int v1 = color_unquant_tables[quant_level][input[1]]; + int v2 = color_unquant_tables[quant_level][input[2]]; + int v3 = color_unquant_tables[quant_level][input[3]]; + int v4 = color_unquant_tables[quant_level][input[4]]; + int v5 = color_unquant_tables[quant_level][input[5]]; // extract all the fixed-placement bitfields int modeval = ((v1 & 0x80) >> 7) | (((v2 & 0x80) >> 7) << 1) | (((v3 & 0x80) >> 7) << 2); @@ -514,8 +402,8 @@ static void hdr_rgb_unpack3( if (majcomp == 3) { - *output0 = uint4(v0 << 8, v2 << 8, (v4 & 0x7F) << 9, 0x7800); - *output1 = uint4(v1 << 8, v3 << 8, (v5 & 0x7F) << 9, 0x7800); + *output0 = vint4(v0 << 8, v2 << 8, (v4 & 0x7F) << 9, 0x7800); + *output1 = vint4(v1 << 8, v3 << 8, (v5 & 0x7F) << 9, 0x7800); return; } @@ -527,13 +415,12 @@ static void hdr_rgb_unpack3( int d1 = v5 & 0x7f; // get hold of the number of bits in 'd0' and 'd1' - static const int dbits_tab[8] = { 7, 6, 7, 6, 5, 6, 5, 6 }; + static const int dbits_tab[8] { 7, 6, 7, 6, 5, 6, 5, 6 }; int dbits = dbits_tab[modeval]; // extract six variable-placement bits int bit0 = (v2 >> 6) & 1; int bit1 = (v3 >> 6) & 1; - int bit2 = (v4 >> 6) & 1; int bit3 = (v5 >> 6) & 1; int bit4 = (v4 >> 5) & 1; @@ -565,23 +452,28 @@ static void hdr_rgb_unpack3( c |= bit2 << 7; if (ohmod & 0x5B) + { b0 |= bit0 << 6; - if (ohmod & 0x5B) b1 |= bit1 << 6; + } if (ohmod & 0x12) + { b0 |= bit2 << 7; - if (ohmod & 0x12) b1 |= bit3 << 7; + } if (ohmod & 0xAF) + { d0 |= bit4 << 5; - if (ohmod & 0xAF) d1 |= bit5 << 5; + } + if (ohmod & 0x5) + { d0 |= bit2 << 6; - if (ohmod & 0x5) d1 |= bit3 << 6; + } // sign-extend 'd0' and 'd1' // note: this code assumes that signed right-shift actually sign-fills, not zero-fills. @@ -613,35 +505,13 @@ static void hdr_rgb_unpack3( int blue0 = a - b1 - c - d1; // clamp the color components to [0,2^12 - 1] - if (red0 < 0) - red0 = 0; - else if (red0 > 0xFFF) - red0 = 0xFFF; + red0 = astc::clamp(red0, 0, 4095); + green0 = astc::clamp(green0, 0, 4095); + blue0 = astc::clamp(blue0, 0, 4095); - if (green0 < 0) - green0 = 0; - else if (green0 > 0xFFF) - green0 = 0xFFF; - - if (blue0 < 0) - blue0 = 0; - else if (blue0 > 0xFFF) - blue0 = 0xFFF; - - if (red1 < 0) - red1 = 0; - else if (red1 > 0xFFF) - red1 = 0xFFF; - - if (green1 < 0) - green1 = 0; - else if (green1 > 0xFFF) - green1 = 0xFFF; - - if (blue1 < 0) - blue1 = 0; - else if (blue1 > 0xFFF) - blue1 = 0xFFF; + red1 = astc::clamp(red1, 0, 4095); + green1 = astc::clamp(green1, 0, 4095); + blue1 = astc::clamp(blue1, 0, 4095); // switch around the color components int temp0, temp1; @@ -667,32 +537,32 @@ static void hdr_rgb_unpack3( break; } - *output0 = uint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800); - *output1 = uint4(red1 << 4, green1 << 4, blue1 << 4, 0x7800); + *output0 = vint4(red0 << 4, green0 << 4, blue0 << 4, 0x7800); + *output1 = vint4(red1 << 4, green1 << 4, blue1 << 4, 0x7800); } static void hdr_rgb_ldr_alpha_unpack3( const int input[8], - int quantization_level, - uint4* output0, - uint4* output1 + int quant_level, + vint4* output0, + vint4* output1 ) { - hdr_rgb_unpack3(input, quantization_level, output0, output1); + hdr_rgb_unpack3(input, quant_level, output0, output1); - int v6 = color_unquantization_tables[quantization_level][input[6]]; - int v7 = color_unquantization_tables[quantization_level][input[7]]; - output0->a = v6; - output1->a = v7; + int v6 = color_unquant_tables[quant_level][input[6]]; + int v7 = color_unquant_tables[quant_level][input[7]]; + output0->set_lane<3>(v6); + output1->set_lane<3>(v7); } static void hdr_luminance_small_range_unpack( const int input[2], - int quantization_level, - uint4* output0, - uint4* output1 + int quant_level, + vint4* output0, + vint4* output1 ) { - int v0 = color_unquantization_tables[quantization_level][input[0]]; - int v1 = color_unquantization_tables[quantization_level][input[1]]; + int v0 = color_unquant_tables[quant_level][input[0]]; + int v1 = color_unquant_tables[quant_level][input[1]]; int y0, y1; if (v0 & 0x80) @@ -710,18 +580,18 @@ static void hdr_luminance_small_range_unpack( if (y1 > 0xFFF) y1 = 0xFFF; - *output0 = uint4(y0 << 4, y0 << 4, y0 << 4, 0x7800); - *output1 = uint4(y1 << 4, y1 << 4, y1 << 4, 0x7800); + *output0 = vint4(y0 << 4, y0 << 4, y0 << 4, 0x7800); + *output1 = vint4(y1 << 4, y1 << 4, y1 << 4, 0x7800); } static void hdr_luminance_large_range_unpack( const int input[2], - int quantization_level, - uint4* output0, - uint4* output1 + int quant_level, + vint4* output0, + vint4* output1 ) { - int v0 = color_unquantization_tables[quantization_level][input[0]]; - int v1 = color_unquantization_tables[quantization_level][input[1]]; + int v0 = color_unquant_tables[quant_level][input[0]]; + int v1 = color_unquant_tables[quant_level][input[1]]; int y0, y1; if (v1 >= v0) @@ -734,19 +604,19 @@ static void hdr_luminance_large_range_unpack( y0 = (v1 << 4) + 8; y1 = (v0 << 4) - 8; } - *output0 = uint4(y0 << 4, y0 << 4, y0 << 4, 0x7800); - *output1 = uint4(y1 << 4, y1 << 4, y1 << 4, 0x7800); + *output0 = vint4(y0 << 4, y0 << 4, y0 << 4, 0x7800); + *output1 = vint4(y1 << 4, y1 << 4, y1 << 4, 0x7800); } static void hdr_alpha_unpack( const int input[2], - int quantization_level, + int quant_level, int* output0, int* output1 ) { - int v6 = color_unquantization_tables[quantization_level][input[0]]; - int v7 = color_unquantization_tables[quantization_level][input[1]]; + int v6 = color_unquant_tables[quant_level][input[0]]; + int v7 = color_unquant_tables[quant_level][input[1]]; int selector = ((v6 >> 7) & 1) | ((v7 >> 6) & 2); v6 &= 0x7F; @@ -781,226 +651,188 @@ static void hdr_alpha_unpack( static void hdr_rgb_hdr_alpha_unpack3( const int input[8], - int quantization_level, - uint4* output0, - uint4* output1 + int quant_level, + vint4* output0, + vint4* output1 ) { - hdr_rgb_unpack3(input, quantization_level, output0, output1); + hdr_rgb_unpack3(input, quant_level, output0, output1); int alpha0, alpha1; - hdr_alpha_unpack(input + 6, quantization_level, &alpha0, &alpha1); + hdr_alpha_unpack(input + 6, quant_level, &alpha0, &alpha1); - output0->a = alpha0; - output1->a = alpha1; + output0->set_lane<3>(alpha0); + output1->set_lane<3>(alpha1); } void unpack_color_endpoints( astcenc_profile decode_mode, int format, - int quantization_level, + int quant_level, const int* input, int* rgb_hdr, int* alpha_hdr, int* nan_endpoint, - uint4* output0, - uint4* output1 + vint4* output0, + vint4* output1 ) { + // TODO: Make these bools ... + + // Assume no NaNs and LDR endpoints + + // TODO: Review use of NaN endpoint. It's never set for HDR images ... *nan_endpoint = 0; + *rgb_hdr = 0; + *alpha_hdr = 0; + - // TODO: Make format the correct enum type switch (format) { case FMT_LUMINANCE: - *rgb_hdr = 0; - *alpha_hdr = 0; - luminance_unpack(input, quantization_level, output0, output1); + luminance_unpack(input, quant_level, output0, output1); break; case FMT_LUMINANCE_DELTA: - *rgb_hdr = 0; - *alpha_hdr = 0; - luminance_delta_unpack(input, quantization_level, output0, output1); + luminance_delta_unpack(input, quant_level, output0, output1); break; case FMT_HDR_LUMINANCE_SMALL_RANGE: *rgb_hdr = 1; *alpha_hdr = -1; - hdr_luminance_small_range_unpack(input, quantization_level, output0, output1); + hdr_luminance_small_range_unpack(input, quant_level, output0, output1); break; case FMT_HDR_LUMINANCE_LARGE_RANGE: *rgb_hdr = 1; *alpha_hdr = -1; - hdr_luminance_large_range_unpack(input, quantization_level, output0, output1); + hdr_luminance_large_range_unpack(input, quant_level, output0, output1); break; case FMT_LUMINANCE_ALPHA: - *rgb_hdr = 0; - *alpha_hdr = 0; - luminance_alpha_unpack(input, quantization_level, output0, output1); + luminance_alpha_unpack(input, quant_level, output0, output1); break; case FMT_LUMINANCE_ALPHA_DELTA: - *rgb_hdr = 0; - *alpha_hdr = 0; - luminance_alpha_delta_unpack(input, quantization_level, output0, output1); + luminance_alpha_delta_unpack(input, quant_level, output0, output1); break; case FMT_RGB_SCALE: - *rgb_hdr = 0; - *alpha_hdr = 0; - rgb_scale_unpack(input, quantization_level, output0, output1); + { + vint4 input0q(input[0], input[1], input[2], 0); + int scale = input[3]; + rgb_scale_unpack(input0q, scale, quant_level, *output0, *output1); + } break; case FMT_RGB_SCALE_ALPHA: - *rgb_hdr = 0; - *alpha_hdr = 0; - rgb_scale_alpha_unpack(input, quantization_level, output0, output1); + { + vint4 input0q(input[0], input[1], input[2], input[4]); + int alpha1q = input[5]; + int scaleq = input[3]; + rgb_scale_alpha_unpack(input0q, alpha1q, scaleq, quant_level, *output0, *output1); + } break; case FMT_HDR_RGB_SCALE: *rgb_hdr = 1; *alpha_hdr = -1; - hdr_rgbo_unpack3(input, quantization_level, output0, output1); + hdr_rgbo_unpack3(input, quant_level, output0, output1); break; case FMT_RGB: - *rgb_hdr = 0; - *alpha_hdr = 0; - rgb_unpack(input, quantization_level, output0, output1); + { + vint4 input0q(input[0], input[2], input[4], 0); + vint4 input1q(input[1], input[3], input[5], 0); + rgb_unpack(input0q, input1q, quant_level, *output0, *output1); + } break; case FMT_RGB_DELTA: - *rgb_hdr = 0; - *alpha_hdr = 0; - rgb_delta_unpack(input, quantization_level, output0, output1); + { + vint4 input0q(input[0], input[2], input[4], 0); + vint4 input1q(input[1], input[3], input[5], 0); + rgb_delta_unpack(input0q, input1q, quant_level, *output0, *output1); + } break; case FMT_HDR_RGB: *rgb_hdr = 1; *alpha_hdr = -1; - hdr_rgb_unpack3(input, quantization_level, output0, output1); + hdr_rgb_unpack3(input, quant_level, output0, output1); break; case FMT_RGBA: - *rgb_hdr = 0; - *alpha_hdr = 0; - rgba_unpack(input, quantization_level, output0, output1); + { + vint4 input0q(input[0], input[2], input[4], input[6]); + vint4 input1q(input[1], input[3], input[5], input[7]); + rgba_unpack(input0q, input1q, quant_level, *output0, *output1); + } break; case FMT_RGBA_DELTA: - *rgb_hdr = 0; - *alpha_hdr = 0; - rgba_delta_unpack(input, quantization_level, output0, output1); + { + vint4 input0q(input[0], input[2], input[4], input[6]); + vint4 input1q(input[1], input[3], input[5], input[7]); + rgba_delta_unpack(input0q, input1q, quant_level, *output0, *output1); + } break; case FMT_HDR_RGB_LDR_ALPHA: *rgb_hdr = 1; - *alpha_hdr = 0; - hdr_rgb_ldr_alpha_unpack3(input, quantization_level, output0, output1); + hdr_rgb_ldr_alpha_unpack3(input, quant_level, output0, output1); break; case FMT_HDR_RGBA: *rgb_hdr = 1; *alpha_hdr = 1; - hdr_rgb_hdr_alpha_unpack3(input, quantization_level, output0, output1); + hdr_rgb_hdr_alpha_unpack3(input, quant_level, output0, output1); break; } + // Assign a correct default alpha if (*alpha_hdr == -1) { if (decode_mode == ASTCENC_PRF_HDR) { - output0->a = 0x7800; - output1->a = 0x7800; + output0->set_lane<3>(0x7800); + output1->set_lane<3>(0x7800); *alpha_hdr = 1; } else { - output0->a = 0x00FF; - output1->a = 0x00FF; + output0->set_lane<3>(0x00FF); + output1->set_lane<3>(0x00FF); *alpha_hdr = 0; } } - switch (decode_mode) + vint4 ldr_scale(257); + vint4 hdr_scale(1); + vint4 output_scale = ldr_scale; + + // An LDR profile image + if ((decode_mode == ASTCENC_PRF_LDR) || + (decode_mode == ASTCENC_PRF_LDR_SRGB)) { - case ASTCENC_PRF_LDR_SRGB: + // Also matches HDR alpha, as cannot have HDR alpha without HDR RGB if (*rgb_hdr == 1) { - output0->r = 0xFF00; - output0->g = 0x0000; - output0->b = 0xFF00; - output0->a = 0xFF00; - - output1->r = 0xFF00; - output1->g = 0x0000; - output1->b = 0xFF00; - output1->a = 0xFF00; - } - else - { - output0->r *= 257; - output0->g *= 257; - output0->b *= 257; - output0->a *= 257; - - output1->r *= 257; - output1->g *= 257; - output1->b *= 257; - output1->a *= 257; - } - *rgb_hdr = 0; - *alpha_hdr = 0; - break; + *output0 = vint4(0xFF00, 0x0000, 0xFF00, 0xFF00); + *output1 = vint4(0xFF00, 0x0000, 0xFF00, 0xFF00); + output_scale = hdr_scale; - case ASTCENC_PRF_LDR: - if (*rgb_hdr == 1) - { - output0->r = 0xFFFF; - output0->g = 0xFFFF; - output0->b = 0xFFFF; - output0->a = 0xFFFF; - - output1->r = 0xFFFF; - output1->g = 0xFFFF; - output1->b = 0xFFFF; - output1->a = 0xFFFF; - *nan_endpoint = 1; - } - else - { - output0->r *= 257; - output0->g *= 257; - output0->b *= 257; - output0->a *= 257; - - output1->r *= 257; - output1->g *= 257; - output1->b *= 257; - output1->a *= 257; - } - *rgb_hdr = 0; - *alpha_hdr = 0; - break; - - case ASTCENC_PRF_HDR_RGB_LDR_A: - case ASTCENC_PRF_HDR: - if (*rgb_hdr == 0) - { - output0->r *= 257; - output0->g *= 257; - output0->b *= 257; - - output1->r *= 257; - output1->g *= 257; - output1->b *= 257; - } - if (*alpha_hdr == 0) - { - output0->a *= 257; - output1->a *= 257; + *rgb_hdr = 0; + *alpha_hdr = 0; } - break; } + // An HDR profile image + else + { + bool hrgb = *rgb_hdr == 1; + bool ha = *alpha_hdr == 1; + vmask4 hdr_lanes(hrgb, hrgb, hrgb, ha); + output_scale = select(ldr_scale, hdr_scale, hdr_lanes); + } + + *output0 = *output0 * output_scale; + *output1 = *output1 * output_scale; } diff --git a/libkram/astc-encoder/astcenc_compress_symbolic.cpp b/libkram/astc-encoder/astcenc_compress_symbolic.cpp index f75471c7..5b5e5519 100644 --- a/libkram/astc-encoder/astcenc_compress_symbolic.cpp +++ b/libkram/astc-encoder/astcenc_compress_symbolic.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -22,6 +22,7 @@ */ #include "astcenc_internal.h" +#include "astcenc_diagnostic_trace.h" #include #include @@ -57,40 +58,42 @@ static int realign_weights( pt += scb->partition_index; // Get the quantization table - const int packed_index = bsd->block_mode_to_packed[scb->block_mode]; - assert(packed_index >= 0 && packed_index < bsd->block_mode_packed_count); - const block_mode& bm = bsd->block_modes_packed[packed_index]; - int weight_quantization_level = bm.quantization_mode; - const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_level]); + const int packed_index = bsd->block_mode_packed_index[scb->block_mode]; + assert(packed_index >= 0 && packed_index < bsd->block_mode_count); + const block_mode& bm = bsd->block_modes[packed_index]; + int weight_quant_level = bm.quant_mode; + const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_level]); // Get the decimation table - const decimation_table *const *ixtab2 = bsd->decimation_tables; - const decimation_table *it = ixtab2[bm.decimation_mode]; - int weight_count = it->num_weights; + const decimation_table* dt = bsd->decimation_tables[bm.decimation_mode]; + int weight_count = dt->weight_count; int max_plane = bm.is_dual_plane; - int plane2_component = max_plane ? scb->plane2_color_component : 0; - int plane_mask = max_plane ? 1 << plane2_component : 0; + int plane2_component = bm.is_dual_plane ? scb->plane2_color_component : -1; + vmask4 plane_mask = vint4::lane_id() == vint4(plane2_component); // Decode the color endpoints int rgb_hdr; int alpha_hdr; int nan_endpoint; - int4 endpnt0[4]; - int4 endpnt1[4]; - float4 endpnt0f[4]; - float4 offset[4]; + vint4 endpnt0[4]; + vint4 endpnt1[4]; + vfloat4 endpnt0f[4]; + vfloat4 offset[4]; + + promise(partition_count > 0); + promise(weight_count > 0); + promise(max_plane >= 0); for (int pa_idx = 0; pa_idx < partition_count; pa_idx++) { unpack_color_endpoints(decode_mode, scb->color_formats[pa_idx], - scb->color_quantization_level, + scb->color_quant_level, scb->color_values[pa_idx], &rgb_hdr, &alpha_hdr, &nan_endpoint, - // TODO: Fix these casts ... - reinterpret_cast(&endpnt0[pa_idx]), - reinterpret_cast(&endpnt1[pa_idx])); + &endpnt0[pa_idx], + &endpnt1[pa_idx]); } uint8_t uq_pl_weights[MAX_WEIGHTS_PER_BLOCK]; @@ -103,16 +106,11 @@ static int realign_weights( for (int pa_idx = 0; pa_idx < partition_count; pa_idx++) { // Compute the endpoint delta for all channels in current plane - int4 epd = endpnt1[pa_idx] - endpnt0[pa_idx]; - - if (plane_mask & 1) epd.r = 0; - if (plane_mask & 2) epd.g = 0; - if (plane_mask & 4) epd.b = 0; - if (plane_mask & 8) epd.a = 0; + vint4 epd = endpnt1[pa_idx] - endpnt0[pa_idx]; + epd = select(epd, vint4::zero(), plane_mask); - endpnt0f[pa_idx] = float4((float)endpnt0[pa_idx].r, (float)endpnt0[pa_idx].g, - (float)endpnt0[pa_idx].b, (float)endpnt0[pa_idx].a); - offset[pa_idx] = float4((float)epd.r, (float)epd.g, (float)epd.b, (float)epd.a); + endpnt0f[pa_idx] = int_to_float(endpnt0[pa_idx]); + offset[pa_idx] = int_to_float(epd); offset[pa_idx] = offset[pa_idx] * (1.0f / 64.0f); } @@ -139,42 +137,42 @@ static int realign_weights( float down_error = 0.0f; // Interpolate the colors to create the diffs - int texels_to_evaluate = it->weight_num_texels[we_idx]; + int texels_to_evaluate = dt->weight_texel_count[we_idx]; + promise(texels_to_evaluate > 0); for (int te_idx = 0; te_idx < texels_to_evaluate; te_idx++) { - int texel = it->weight_texel[we_idx][te_idx]; - const uint8_t *texel_weights = it->texel_weights_texel[we_idx][te_idx]; - const float *texel_weights_float = it->texel_weights_float_texel[we_idx][te_idx]; + int texel = dt->weight_texel[te_idx][we_idx]; + const uint8_t *texel_weights = dt->texel_weights_texel[we_idx][te_idx]; + const float *texel_weights_float = dt->texel_weights_float_texel[we_idx][te_idx]; float twf0 = texel_weights_float[0]; float weight_base = - ((uqw * twf0 - + uq_pl_weights[texel_weights[1]] * texel_weights_float[1]) - + (uq_pl_weights[texel_weights[2]] * texel_weights_float[2] - + uq_pl_weights[texel_weights[3]] * texel_weights_float[3])); + ((static_cast(uqw) * twf0 + + static_cast(uq_pl_weights[texel_weights[1]]) * texel_weights_float[1]) + + (static_cast(uq_pl_weights[texel_weights[2]]) * texel_weights_float[2] + + static_cast(uq_pl_weights[texel_weights[3]]) * texel_weights_float[3])); int partition = pt->partition_of_texel[texel]; weight_base = weight_base + 0.5f; float plane_weight = astc::flt_rd(weight_base); - float plane_up_weight = astc::flt_rd(weight_base + uqw_next_dif * twf0) - plane_weight; - float plane_down_weight = astc::flt_rd(weight_base + uqw_prev_dif * twf0) - plane_weight; + float plane_up_weight = astc::flt_rd(weight_base + static_cast(uqw_next_dif) * twf0) - plane_weight; + float plane_down_weight = astc::flt_rd(weight_base + static_cast(uqw_prev_dif) * twf0) - plane_weight; - float4 color_offset = offset[partition]; - float4 color_base = endpnt0f[partition]; + vfloat4 color_offset = offset[partition]; + vfloat4 color_base = endpnt0f[partition]; - float4 color = color_base + color_offset * plane_weight; + vfloat4 color = color_base + color_offset * plane_weight; - float4 origcolor = float4(blk->data_r[texel], blk->data_g[texel], - blk->data_b[texel], blk->data_a[texel]); - float4 error_weight = float4(ewb->texel_weight_r[texel], ewb->texel_weight_g[texel], - ewb->texel_weight_b[texel], ewb->texel_weight_a[texel]); + vfloat4 origcolor = blk->texel(texel); + vfloat4 error_weight = vfloat4(ewb->texel_weight_r[texel], ewb->texel_weight_g[texel], + ewb->texel_weight_b[texel], ewb->texel_weight_a[texel]); - float4 colordiff = color - origcolor; - float4 color_up_diff = colordiff + color_offset * plane_up_weight; - float4 color_down_diff = colordiff + color_offset * plane_down_weight; - current_error += dot(colordiff * colordiff, error_weight); - up_error += dot(color_up_diff * color_up_diff, error_weight); - down_error += dot(color_down_diff * color_down_diff, error_weight); + vfloat4 colordiff = color - origcolor; + vfloat4 color_up_diff = colordiff + color_offset * plane_up_weight; + vfloat4 color_down_diff = colordiff + color_offset * plane_down_weight; + current_error += dot_s(colordiff * colordiff, error_weight); + up_error += dot_s(color_up_diff * color_up_diff, error_weight); + down_error += dot_s(color_down_diff * color_down_diff, error_weight); } // Check if the prev or next error is better, and if so use it @@ -194,7 +192,7 @@ static int realign_weights( // Prepare iteration for plane 2 weight_set8 = plane2_weight_set8; - plane_mask ^= 0xF; + plane_mask = ~plane_mask; } return adjustments; @@ -203,31 +201,35 @@ static int realign_weights( /* function for compressing a block symbolically, given that we have already decided on a partition */ -static void compress_symbolic_block_fixed_partition_1_plane( - astcenc_profile decode_mode, - float mode_cutoff, +static float compress_symbolic_block_fixed_partition_1_plane( + const astcenc_config& config, + bool only_always, int tune_candidate_limit, + float tune_errorval_threshold, int max_refinement_iters, const block_size_descriptor* bsd, - int partition_count, int partition_index, + int partition_count, + int partition_index, const imageblock* blk, const error_weight_block* ewb, - symbolic_compressed_block* scb, + symbolic_compressed_block& scb, compress_fixed_partition_buffers* tmpbuf ) { - static const int free_bits_for_partition_count[5] = { 0, 115 - 4, 111 - 4 - PARTITION_BITS, 108 - 4 - PARTITION_BITS, 105 - 4 - PARTITION_BITS }; + static const int free_bits_for_partition_count[5] = { + 0, 115 - 4, 111 - 4 - PARTITION_BITS, 108 - 4 - PARTITION_BITS, 105 - 4 - PARTITION_BITS + }; - const partition_info *pi = get_partition_table(bsd, partition_count); - pi += partition_index; + const partition_info *pt = get_partition_table(bsd, partition_count); + pt += partition_index; // first, compute ideal weights and endpoint colors, under the assumption that // there is no quantization or decimation going on. endpoints_and_weights *ei = &tmpbuf->ei1; endpoints_and_weights *eix = tmpbuf->eix1; - compute_endpoints_and_ideal_weights_1_plane(bsd, pi, blk, ewb, ei); + compute_endpoints_and_ideal_weights_1_plane(bsd, pt, blk, ewb, ei); // next, compute ideal weights and endpoint colors for every decimation. - const decimation_table *const *ixtab2 = bsd->decimation_tables; + const decimation_table *const *dts = bsd->decimation_tables; float *decimated_quantized_weights = tmpbuf->decimated_quantized_weights; float *decimated_weights = tmpbuf->decimated_weights; @@ -236,65 +238,52 @@ static void compress_symbolic_block_fixed_partition_1_plane( // for each decimation mode, compute an ideal set of weights // (that is, weights computed with the assumption that they are not quantized) - for (int i = 0; i < MAX_DECIMATION_MODES; i++) + for (int i = 0; i < bsd->decimation_mode_count; i++) { - if (bsd->permit_encode[i] == 0 || bsd->decimation_mode_maxprec_1plane[i] < 0 || bsd->decimation_mode_percentile[i] > mode_cutoff) + const decimation_mode& dm = bsd->decimation_modes[i]; + if (dm.maxprec_1plane < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit) { continue; } - eix[i] = *ei; - compute_ideal_weights_for_decimation_table(&(eix[i]), ixtab2[i], decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK, decimated_weights + i * MAX_WEIGHTS_PER_BLOCK); + compute_ideal_weights_for_decimation_table( + *ei, + eix[i], + *(dts[i]), + decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK, + decimated_weights + i * MAX_WEIGHTS_PER_BLOCK); } // compute maximum colors for the endpoints and ideal weights. // for each endpoint-and-ideal-weight pair, compute the smallest weight value // that will result in a color value greater than 1. - float4 min_ep = float4(10.0f); + vfloat4 min_ep(10.0f); for (int i = 0; i < partition_count; i++) { #ifdef DEBUG_CAPTURE_NAN fedisableexcept(FE_DIVBYZERO | FE_INVALID); #endif - float4 ep = float4( - (1.0f - ei->ep.endpt0[i].r) / (ei->ep.endpt1[i].r - ei->ep.endpt0[i].r), - (1.0f - ei->ep.endpt0[i].g) / (ei->ep.endpt1[i].g - ei->ep.endpt0[i].g), - (1.0f - ei->ep.endpt0[i].b) / (ei->ep.endpt1[i].b - ei->ep.endpt0[i].b), - (1.0f - ei->ep.endpt0[i].a) / (ei->ep.endpt1[i].a - ei->ep.endpt0[i].a)); + vfloat4 ep = (vfloat4(1.0f) - ei->ep.endpt0[i]) / (ei->ep.endpt1[i] - ei->ep.endpt0[i]); - if (ep.r > 0.5f && ep.r < min_ep.r) - { - min_ep.r = ep.r; - } - - if (ep.g > 0.5f && ep.g < min_ep.g) - { - min_ep.g = ep.g; - } - - if (ep.b > 0.5f && ep.b < min_ep.b) - { - min_ep.b = ep.b; - } - - if (ep.a > 0.5f && ep.a < min_ep.a) - { - min_ep.a = ep.a; - } + vmask4 use_ep = (ep > vfloat4(0.5f)) & (ep < min_ep); + min_ep = select(min_ep, ep, use_ep); #ifdef DEBUG_CAPTURE_NAN feenableexcept(FE_DIVBYZERO | FE_INVALID); #endif } - float min_wt_cutoff = MIN(MIN(min_ep.r, min_ep.g), MIN(min_ep.b, min_ep.a)); + float min_wt_cutoff = hmin_s(min_ep); // for each mode, use the angular method to compute a shift. float weight_low_value[MAX_WEIGHT_MODES]; float weight_high_value[MAX_WEIGHT_MODES]; - compute_angular_endpoints_1plane(mode_cutoff, bsd, decimated_quantized_weights, decimated_weights, weight_low_value, weight_high_value); + compute_angular_endpoints_1plane( + only_always, bsd, + decimated_quantized_weights, decimated_weights, + weight_low_value, weight_high_value); // for each mode (which specifies a decimation and a quantization): // * compute number of bits needed for the quantized weights. @@ -303,10 +292,10 @@ static void compress_symbolic_block_fixed_partition_1_plane( int qwt_bitcounts[MAX_WEIGHT_MODES]; float qwt_errors[MAX_WEIGHT_MODES]; - for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i) + for (int i = 0; i < bsd->block_mode_count; ++i) { - const block_mode& bm = bsd->block_modes_packed[i]; - if (bm.is_dual_plane != 0 || bm.percentile > mode_cutoff) + const block_mode& bm = bsd->block_modes[i]; + if (bm.is_dual_plane || (only_always && !bm.percentile_always) || !bm.percentile_hit) { qwt_errors[i] = 1e38f; continue; @@ -320,8 +309,9 @@ static void compress_symbolic_block_fixed_partition_1_plane( int decimation_mode = bm.decimation_mode; // compute weight bitcount for the mode - int bits_used_by_weights = compute_ise_bitcount(ixtab2[decimation_mode]->num_weights, - (quantization_method) bm.quantization_mode); + int bits_used_by_weights = get_ise_sequence_bitcount( + dts[decimation_mode]->weight_count, + (quant_method)bm.quant_mode); int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights; if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96) { @@ -331,15 +321,19 @@ static void compress_symbolic_block_fixed_partition_1_plane( qwt_bitcounts[i] = bitcount; // then, generate the optimized set of weights for the weight mode. - compute_ideal_quantized_weights_for_decimation_table(ixtab2[decimation_mode], - weight_low_value[i], weight_high_value[i], - decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * decimation_mode, - flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i, - u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i, - bm.quantization_mode); + compute_quantized_weights_for_decimation_table( + dts[decimation_mode], + weight_low_value[i], weight_high_value[i], + decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * decimation_mode, + flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i, + u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i, + bm.quant_mode); // then, compute weight-errors for the weight mode. - qwt_errors[i] = compute_error_of_weight_set(&(eix[decimation_mode]), ixtab2[decimation_mode], flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i); + qwt_errors[i] = compute_error_of_weight_set( + &(eix[decimation_mode]), + dts[decimation_mode], + flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * i); } // for each weighting mode, determine the optimal combination of color endpoint encodings @@ -347,128 +341,233 @@ static void compress_symbolic_block_fixed_partition_1_plane( int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][4]; int quantized_weight[TUNE_MAX_TRIAL_CANDIDATES]; - int color_quantization_level[TUNE_MAX_TRIAL_CANDIDATES]; - int color_quantization_level_mod[TUNE_MAX_TRIAL_CANDIDATES]; + int color_quant_level[TUNE_MAX_TRIAL_CANDIDATES]; + int color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES]; + determine_optimal_set_of_endpoint_formats_to_use( - bsd, pi, blk, ewb, &(ei->ep), -1, qwt_bitcounts, qwt_errors, + bsd, pt, blk, ewb, &(ei->ep), -1, qwt_bitcounts, qwt_errors, tune_candidate_limit, partition_format_specifiers, quantized_weight, - color_quantization_level, color_quantization_level_mod); + color_quant_level, color_quant_level_mod); // then iterate over the tune_candidate_limit believed-to-be-best modes to // find out which one is actually best. + float best_errorval_in_mode = 1e30f; + float best_errorval_in_scb = scb.errorval; + for (int i = 0; i < tune_candidate_limit; i++) { + TRACE_NODE(node0, "candidate"); + uint8_t *u8_weight_src; int weights_to_copy; const int qw_packed_index = quantized_weight[i]; if (qw_packed_index < 0) { - scb->error_block = 1; - scb++; + trace_add_data("failed", "error_block"); continue; } - assert(qw_packed_index >= 0 && qw_packed_index < bsd->block_mode_packed_count); - const block_mode& qw_bm = bsd->block_modes_packed[qw_packed_index]; + assert(qw_packed_index >= 0 && qw_packed_index < bsd->block_mode_count); + const block_mode& qw_bm = bsd->block_modes[qw_packed_index]; int decimation_mode = qw_bm.decimation_mode; - int weight_quantization_mode = qw_bm.quantization_mode; - const decimation_table *it = ixtab2[decimation_mode]; + int weight_quant_mode = qw_bm.quant_mode; + const decimation_table *dt = dts[decimation_mode]; u8_weight_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * qw_packed_index; + weights_to_copy = dt->weight_count; - weights_to_copy = it->num_weights; + trace_add_data("weight_x", dt->weight_x); + trace_add_data("weight_y", dt->weight_y); + trace_add_data("weight_z", dt->weight_z); + trace_add_data("weight_quant", weight_quant_mode); // recompute the ideal color endpoints before storing them. - float4 rgbs_colors[4]; - float4 rgbo_colors[4]; + vfloat4 rgbs_colors[4]; + vfloat4 rgbo_colors[4]; + // TODO: Can we ping-pong between two buffers and make this zero copy? + symbolic_compressed_block workscb; for (int l = 0; l < max_refinement_iters; l++) { - recompute_ideal_colors(weight_quantization_mode, &(eix[decimation_mode].ep), rgbs_colors, rgbo_colors, u8_weight_src, nullptr, -1, pi, it, blk, ewb); + recompute_ideal_colors_1plane( + weight_quant_mode, &(eix[decimation_mode].ep), + rgbs_colors, rgbo_colors, u8_weight_src, pt, dt, blk, ewb); // quantize the chosen color // store the colors for the block for (int j = 0; j < partition_count; j++) { - scb->color_formats[j] = pack_color_endpoints(eix[decimation_mode].ep.endpt0[j], - eix[decimation_mode].ep.endpt1[j], - rgbs_colors[j], rgbo_colors[j], partition_format_specifiers[i][j], scb->color_values[j], color_quantization_level[i]); + workscb.color_formats[j] = pack_color_endpoints( + eix[decimation_mode].ep.endpt0[j], + eix[decimation_mode].ep.endpt1[j], + rgbs_colors[j], + rgbo_colors[j], + partition_format_specifiers[i][j], + workscb.color_values[j], + color_quant_level[i]); } // if all the color endpoint modes are the same, we get a few more // bits to store colors; let's see if we can take advantage of this: // requantize all the colors and see if the endpoint modes remain the same; // if they do, then exploit it. - scb->color_formats_matched = 0; + workscb.color_formats_matched = 0; - if ((partition_count >= 2 && scb->color_formats[0] == scb->color_formats[1] - && color_quantization_level[i] != color_quantization_level_mod[i]) - && (partition_count == 2 || (scb->color_formats[0] == scb->color_formats[2] && (partition_count == 3 || (scb->color_formats[0] == scb->color_formats[3]))))) + if ((partition_count >= 2 && workscb.color_formats[0] == workscb.color_formats[1] + && color_quant_level[i] != color_quant_level_mod[i]) + && (partition_count == 2 || (workscb.color_formats[0] == workscb.color_formats[2] + && (partition_count == 3 || (workscb.color_formats[0] == workscb.color_formats[3]))))) { int colorvals[4][12]; - int color_formats_mod[4] = { 0 }; + int color_formats_mod[4] { 0 }; for (int j = 0; j < partition_count; j++) { - color_formats_mod[j] = pack_color_endpoints(eix[decimation_mode].ep.endpt0[j], - eix[decimation_mode].ep.endpt1[j], - rgbs_colors[j], rgbo_colors[j], partition_format_specifiers[i][j], colorvals[j], color_quantization_level_mod[i]); + color_formats_mod[j] = pack_color_endpoints( + eix[decimation_mode].ep.endpt0[j], + eix[decimation_mode].ep.endpt1[j], + rgbs_colors[j], + rgbo_colors[j], + partition_format_specifiers[i][j], + colorvals[j], + color_quant_level_mod[i]); } + if (color_formats_mod[0] == color_formats_mod[1] - && (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2] && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3]))))) + && (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2] + && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3]))))) { - scb->color_formats_matched = 1; + workscb.color_formats_matched = 1; for (int j = 0; j < 4; j++) { for (int k = 0; k < 12; k++) { - scb->color_values[j][k] = colorvals[j][k]; + workscb.color_values[j][k] = colorvals[j][k]; } } for (int j = 0; j < 4; j++) { - scb->color_formats[j] = color_formats_mod[j]; + workscb.color_formats[j] = color_formats_mod[j]; } } } // store header fields - scb->partition_count = partition_count; - scb->partition_index = partition_index; - scb->color_quantization_level = scb->color_formats_matched ? color_quantization_level_mod[i] : color_quantization_level[i]; - scb->block_mode = qw_bm.mode_index; - scb->error_block = 0; + workscb.partition_count = partition_count; + workscb.partition_index = partition_index; + workscb.color_quant_level = workscb.color_formats_matched ? color_quant_level_mod[i] : color_quant_level[i]; + workscb.block_mode = qw_bm.mode_index; + workscb.error_block = 0; - if (scb->color_quantization_level < 4) + if (workscb.color_quant_level < 4) { - scb->error_block = 1; // should never happen, but cannot prove it impossible. + workscb.error_block = 1; // should never happen, but cannot prove it impossible. + } + + // Pre-realign test + if (l == 0) + { + for (int j = 0; j < weights_to_copy; j++) + { + workscb.weights[j] = u8_weight_src[j]; + } + + float errorval = compute_symbolic_block_difference(config, bsd, &workscb, blk, ewb); + if (errorval == -1e30f) + { + errorval = -errorval; + workscb.error_block = 1; + } + + + trace_add_data("error_prerealign", errorval); + best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode); + + // Average refinement improvement is 3.5% per iteration + // (allow 5%), but the first iteration can help more so we give + // it a extra 10% leeway. Use this knowledge to drive a + // heuristic to skip blocks that are unlikely to catch up with + // the best block we have already. + int iters_remaining = max_refinement_iters - l; + float threshold = (0.05f * static_cast(iters_remaining)) + 1.1f; + if (errorval > (threshold * best_errorval_in_scb)) + { + break; + } + + if (errorval < best_errorval_in_scb) + { + best_errorval_in_scb = errorval; + workscb.errorval = errorval; + scb = workscb; + + if (errorval < tune_errorval_threshold) + { + return errorval; + } + } } // perform a final pass over the weights to try to improve them. int adjustments = realign_weights( - decode_mode, bsd, blk, ewb, scb, u8_weight_src, nullptr); + config.profile, bsd, blk, ewb, &workscb, + u8_weight_src, nullptr); - if (adjustments == 0) + // Post-realign test + for (int j = 0; j < weights_to_copy; j++) + { + workscb.weights[j] = u8_weight_src[j]; + } + + float errorval = compute_symbolic_block_difference(config, bsd, &workscb, blk, ewb); + if (errorval == -1e30f) + { + errorval = -errorval; + workscb.error_block = 1; + } + + trace_add_data("error_postrealign", errorval); + best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode); + + // Average refinement improvement is 3.5% per iteration, so skip + // blocks that are unlikely to catch up with the best block we + // have already. Assume a 5% per step to give benefit of the doubt + int iters_remaining = max_refinement_iters - 1 - l; + float threshold = (0.05f * static_cast(iters_remaining)) + 1.0f; + if (errorval > (threshold * best_errorval_in_scb)) { break; } - } - for (int j = 0; j < weights_to_copy; j++) - { - scb->plane1_weights[j] = u8_weight_src[j]; - } + if (errorval < best_errorval_in_scb) + { + best_errorval_in_scb = errorval; + workscb.errorval = errorval; + scb = workscb; - scb++; + if (errorval < tune_errorval_threshold) + { + return errorval; + } + } + + if (adjustments == 0) + { + break; + } + } } + + return best_errorval_in_mode; } -static void compress_symbolic_block_fixed_partition_2_planes( - astcenc_profile decode_mode, - float mode_cutoff, +static float compress_symbolic_block_fixed_partition_2_planes( + const astcenc_config& config, + bool only_always, int tune_candidate_limit, + float tune_errorval_threshold, int max_refinement_iters, const block_size_descriptor* bsd, int partition_count, @@ -476,24 +575,25 @@ static void compress_symbolic_block_fixed_partition_2_planes( int separate_component, const imageblock* blk, const error_weight_block* ewb, - symbolic_compressed_block* scb, + symbolic_compressed_block& scb, compress_fixed_partition_buffers* tmpbuf ) { - static const int free_bits_for_partition_count[5] = - { 0, 113 - 4, 109 - 4 - PARTITION_BITS, 106 - 4 - PARTITION_BITS, 103 - 4 - PARTITION_BITS }; + static const int free_bits_for_partition_count[5] = { + 0, 113 - 4, 109 - 4 - PARTITION_BITS, 106 - 4 - PARTITION_BITS, 103 - 4 - PARTITION_BITS + }; - const partition_info *pi = get_partition_table(bsd, partition_count); - pi += partition_index; + const partition_info *pt = get_partition_table(bsd, partition_count); + pt += partition_index; // first, compute ideal weights and endpoint colors endpoints_and_weights *ei1 = &tmpbuf->ei1; endpoints_and_weights *ei2 = &tmpbuf->ei2; endpoints_and_weights *eix1 = tmpbuf->eix1; endpoints_and_weights *eix2 = tmpbuf->eix2; - compute_endpoints_and_ideal_weights_2_planes(bsd, pi, blk, ewb, separate_component, ei1, ei2); + compute_endpoints_and_ideal_weights_2_planes(bsd, pt, blk, ewb, separate_component, ei1, ei2); // next, compute ideal weights and endpoint colors for every decimation. - const decimation_table *const *ixtab2 = bsd->decimation_tables; + const decimation_table *const *dts = bsd->decimation_tables; float *decimated_quantized_weights = tmpbuf->decimated_quantized_weights; float *decimated_weights = tmpbuf->decimated_weights; @@ -501,119 +601,75 @@ static void compress_symbolic_block_fixed_partition_2_planes( uint8_t *u8_quantized_decimated_quantized_weights = tmpbuf->u8_quantized_decimated_quantized_weights; // for each decimation mode, compute an ideal set of weights - for (int i = 0; i < MAX_DECIMATION_MODES; i++) + for (int i = 0; i < bsd->decimation_mode_count; i++) { - if (bsd->permit_encode[i] == 0 || bsd->decimation_mode_maxprec_2planes[i] < 0 || bsd->decimation_mode_percentile[i] > mode_cutoff) + const decimation_mode& dm = bsd->decimation_modes[i]; + if (dm.maxprec_2planes < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit) { continue; } - eix1[i] = *ei1; - eix2[i] = *ei2; - compute_ideal_weights_for_decimation_table(&(eix1[i]), ixtab2[i], decimated_quantized_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK, decimated_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK); - compute_ideal_weights_for_decimation_table(&(eix2[i]), ixtab2[i], decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK); + compute_ideal_weights_for_decimation_table( + *ei1, + eix1[i], + *(dts[i]), + decimated_quantized_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK, + decimated_weights + (2 * i) * MAX_WEIGHTS_PER_BLOCK); + + compute_ideal_weights_for_decimation_table( + *ei2, + eix2[i], + *(dts[i]), + decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, + decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK); } // compute maximum colors for the endpoints and ideal weights. // for each endpoint-and-ideal-weight pair, compute the smallest weight value // that will result in a color value greater than 1. - float4 min_ep1 = float4(10.0f); - float4 min_ep2 = float4(10.0f); + vfloat4 min_ep1(10.0f); + vfloat4 min_ep2(10.0f); for (int i = 0; i < partition_count; i++) { #ifdef DEBUG_CAPTURE_NAN fedisableexcept(FE_DIVBYZERO | FE_INVALID); #endif - float4 ep1 = float4( - (1.0f - ei1->ep.endpt0[i].r) / (ei1->ep.endpt1[i].r - ei1->ep.endpt0[i].r), - (1.0f - ei1->ep.endpt0[i].g) / (ei1->ep.endpt1[i].g - ei1->ep.endpt0[i].g), - (1.0f - ei1->ep.endpt0[i].b) / (ei1->ep.endpt1[i].b - ei1->ep.endpt0[i].b), - (1.0f - ei1->ep.endpt0[i].a) / (ei1->ep.endpt1[i].a - ei1->ep.endpt0[i].a)); + vfloat4 ep1 = (vfloat4(1.0f) - ei1->ep.endpt0[i]) / (ei1->ep.endpt1[i] - ei1->ep.endpt0[i]); + vmask4 use_ep1 = (ep1 > vfloat4(0.5f)) & (ep1 < min_ep1); + min_ep1 = select(min_ep1, ep1, use_ep1); - if (ep1.r > 0.5f && ep1.r < min_ep1.r) - { - min_ep1.r = ep1.r; - } - - if (ep1.g > 0.5f && ep1.g < min_ep1.g) - { - min_ep1.g = ep1.g; - } - - if (ep1.b > 0.5f && ep1.b < min_ep1.b) - { - min_ep1.b = ep1.b; - } - - if (ep1.a > 0.5f && ep1.a < min_ep1.a) - { - min_ep1.a = ep1.a; - } - - float4 ep2 = float4( - (1.0f - ei2->ep.endpt0[i].r) / (ei2->ep.endpt1[i].r - ei2->ep.endpt0[i].r), - (1.0f - ei2->ep.endpt0[i].g) / (ei2->ep.endpt1[i].g - ei2->ep.endpt0[i].g), - (1.0f - ei2->ep.endpt0[i].b) / (ei2->ep.endpt1[i].b - ei2->ep.endpt0[i].b), - (1.0f - ei2->ep.endpt0[i].a) / (ei2->ep.endpt1[i].a - ei2->ep.endpt0[i].a)); - - if (ep2.r > 0.5f && ep2.r < min_ep2.r) - { - min_ep2.r = ep2.r; - } - - if (ep2.g > 0.5f && ep2.g < min_ep2.g) - { - min_ep2.g = ep2.g; - } - - if (ep2.b > 0.5f && ep2.b < min_ep2.b) - { - min_ep2.b = ep2.b; - } - - if (ep2.a > 0.5f && ep2.a < min_ep2.a) - { - min_ep2.a = ep2.a; - } + vfloat4 ep2 = (vfloat4(1.0f) - ei2->ep.endpt0[i]) / (ei2->ep.endpt1[i] - ei2->ep.endpt0[i]); + vmask4 use_ep2 = (ep2 > vfloat4(0.5f)) & (ep2 < min_ep2); + min_ep2 = select(min_ep2, ep2, use_ep2); #ifdef DEBUG_CAPTURE_NAN feenableexcept(FE_DIVBYZERO | FE_INVALID); #endif } - float min_wt_cutoff1, min_wt_cutoff2; - switch (separate_component) - { - case 0: - min_wt_cutoff2 = min_ep2.r; - min_ep1.r = 1e30f; - break; - case 1: - min_wt_cutoff2 = min_ep2.g; - min_ep1.g = 1e30f; - break; - case 2: - min_wt_cutoff2 = min_ep2.b; - min_ep1.b = 1e30f; - break; - case 3: - min_wt_cutoff2 = min_ep2.a; - min_ep1.a = 1e30f; - break; - default: - min_wt_cutoff2 = 1e30f; - } + vfloat4 err_max(1e30f); + vmask4 err_mask = vint4::lane_id() == vint4(separate_component); + + // Set the separate component to max error in ep1 + min_ep1 = select(min_ep1, err_max, err_mask); - min_wt_cutoff1 = MIN(MIN(min_ep1.r, min_ep1.g), MIN(min_ep1.b, min_ep1.a)); + float min_wt_cutoff1 = hmin_s(min_ep1); + + // Set the minwt2 to the separate component min in ep2 + float min_wt_cutoff2 = hmin_s(select(err_max, min_ep2, err_mask)); float weight_low_value1[MAX_WEIGHT_MODES]; float weight_high_value1[MAX_WEIGHT_MODES]; float weight_low_value2[MAX_WEIGHT_MODES]; float weight_high_value2[MAX_WEIGHT_MODES]; - compute_angular_endpoints_2planes(mode_cutoff, bsd, decimated_quantized_weights, decimated_weights, weight_low_value1, weight_high_value1, weight_low_value2, weight_high_value2); + compute_angular_endpoints_2planes( + only_always, bsd, + decimated_quantized_weights, decimated_weights, + weight_low_value1, weight_high_value1, + weight_low_value2, weight_high_value2); // for each mode (which specifies a decimation and a quantization): // * generate an optimized set of quantized weights. @@ -622,10 +678,10 @@ static void compress_symbolic_block_fixed_partition_2_planes( int qwt_bitcounts[MAX_WEIGHT_MODES]; float qwt_errors[MAX_WEIGHT_MODES]; - for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i) + for (int i = 0; i < bsd->block_mode_count; ++i) { - const block_mode& bm = bsd->block_modes_packed[i]; - if (bm.is_dual_plane != 1 || bm.percentile > mode_cutoff) + const block_mode& bm = bsd->block_modes[i]; + if ((!bm.is_dual_plane) || (only_always && !bm.percentile_always) || !bm.percentile_hit) { qwt_errors[i] = 1e38f; continue; @@ -644,8 +700,9 @@ static void compress_symbolic_block_fixed_partition_2_planes( } // compute weight bitcount for the mode - int bits_used_by_weights = compute_ise_bitcount(2 * ixtab2[decimation_mode]->num_weights, - (quantization_method) bm.quantization_mode); + int bits_used_by_weights = get_ise_sequence_bitcount( + 2 * dts[decimation_mode]->weight_count, + (quant_method)bm.quant_mode); int bitcount = free_bits_for_partition_count[partition_count] - bits_used_by_weights; if (bitcount <= 0 || bits_used_by_weights < 24 || bits_used_by_weights > 96) { @@ -655,55 +712,62 @@ static void compress_symbolic_block_fixed_partition_2_planes( qwt_bitcounts[i] = bitcount; // then, generate the optimized set of weights for the mode. - compute_ideal_quantized_weights_for_decimation_table( - ixtab2[decimation_mode], + compute_quantized_weights_for_decimation_table( + dts[decimation_mode], weight_low_value1[i], weight_high_value1[i], decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode), flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i), - u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i), bm.quantization_mode); + u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i), bm.quant_mode); - compute_ideal_quantized_weights_for_decimation_table( - ixtab2[decimation_mode], + compute_quantized_weights_for_decimation_table( + dts[decimation_mode], weight_low_value2[i], weight_high_value2[i], decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * decimation_mode + 1), flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1), - u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1), bm.quantization_mode); + u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1), bm.quant_mode); // then, compute quantization errors for the block mode. qwt_errors[i] = compute_error_of_weight_set( &(eix1[decimation_mode]), - ixtab2[decimation_mode], + dts[decimation_mode], flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i)) + + compute_error_of_weight_set( &(eix2[decimation_mode]), - ixtab2[decimation_mode], + dts[decimation_mode], flt_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * i + 1)); } // decide the optimal combination of color endpoint encodings and weight encodings. int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][4]; int quantized_weight[TUNE_MAX_TRIAL_CANDIDATES]; - int color_quantization_level[TUNE_MAX_TRIAL_CANDIDATES]; - int color_quantization_level_mod[TUNE_MAX_TRIAL_CANDIDATES]; + int color_quant_level[TUNE_MAX_TRIAL_CANDIDATES]; + int color_quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES]; endpoints epm; merge_endpoints(&(ei1->ep), &(ei2->ep), separate_component, &epm); determine_optimal_set_of_endpoint_formats_to_use( - bsd, pi, blk, ewb, &epm, separate_component, qwt_bitcounts, qwt_errors, + bsd, pt, blk, ewb, &epm, separate_component, qwt_bitcounts, qwt_errors, tune_candidate_limit, partition_format_specifiers, quantized_weight, - color_quantization_level, color_quantization_level_mod); + color_quant_level, color_quant_level_mod); + + // then iterate over the tune_candidate_limit believed-to-be-best modes to + // find out which one is actually best. + float best_errorval_in_mode = 1e30f; + float best_errorval_in_scb = scb.errorval; for (int i = 0; i < tune_candidate_limit; i++) { + TRACE_NODE(node0, "candidate"); + const int qw_packed_index = quantized_weight[i]; if (qw_packed_index < 0) { - scb->error_block = 1; - scb++; + trace_add_data("failed", "error_block"); continue; } @@ -711,107 +775,199 @@ static void compress_symbolic_block_fixed_partition_2_planes( uint8_t *u8_weight2_src; int weights_to_copy; - assert(qw_packed_index >= 0 && qw_packed_index < bsd->block_mode_packed_count); - const block_mode& qw_bm = bsd->block_modes_packed[qw_packed_index]; + assert(qw_packed_index >= 0 && qw_packed_index < bsd->block_mode_count); + const block_mode& qw_bm = bsd->block_modes[qw_packed_index]; int decimation_mode = qw_bm.decimation_mode; - int weight_quantization_mode = qw_bm.quantization_mode; - const decimation_table *it = ixtab2[decimation_mode]; + int weight_quant_mode = qw_bm.quant_mode; + const decimation_table *dt = dts[decimation_mode]; u8_weight1_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * qw_packed_index); u8_weight2_src = u8_quantized_decimated_quantized_weights + MAX_WEIGHTS_PER_BLOCK * (2 * qw_packed_index + 1); + weights_to_copy = dt->weight_count; - weights_to_copy = it->num_weights; + trace_add_data("weight_x", dt->weight_x); + trace_add_data("weight_y", dt->weight_y); + trace_add_data("weight_z", dt->weight_z); + trace_add_data("weight_quant", weight_quant_mode); // recompute the ideal color endpoints before storing them. merge_endpoints(&(eix1[decimation_mode].ep), &(eix2[decimation_mode].ep), separate_component, &epm); - float4 rgbs_colors[4]; - float4 rgbo_colors[4]; + vfloat4 rgbs_colors[4]; + vfloat4 rgbo_colors[4]; + // TODO: Ping-pong between two buffers and make this zero copy + symbolic_compressed_block workscb; for (int l = 0; l < max_refinement_iters; l++) { - recompute_ideal_colors( - weight_quantization_mode, &epm, rgbs_colors, rgbo_colors, - u8_weight1_src, u8_weight2_src, separate_component, pi, it, blk, ewb); + recompute_ideal_colors_2planes( + weight_quant_mode, &epm, rgbs_colors, rgbo_colors, + u8_weight1_src, u8_weight2_src, separate_component, pt, dt, blk, ewb); // store the colors for the block for (int j = 0; j < partition_count; j++) { - scb->color_formats[j] = pack_color_endpoints( - epm.endpt0[j], epm.endpt1[j], + workscb.color_formats[j] = pack_color_endpoints( + epm.endpt0[j], + epm.endpt1[j], rgbs_colors[j], rgbo_colors[j], partition_format_specifiers[i][j], - scb->color_values[j], - color_quantization_level[i]); + workscb.color_values[j], + color_quant_level[i]); } - scb->color_formats_matched = 0; - if ((partition_count >= 2 && scb->color_formats[0] == scb->color_formats[1] - && color_quantization_level[i] != color_quantization_level_mod[i]) - && (partition_count == 2 || (scb->color_formats[0] == scb->color_formats[2] && (partition_count == 3 || (scb->color_formats[0] == scb->color_formats[3]))))) + workscb.color_formats_matched = 0; + + if ((partition_count >= 2 && workscb.color_formats[0] == workscb.color_formats[1] + && color_quant_level[i] != color_quant_level_mod[i]) + && (partition_count == 2 || (workscb.color_formats[0] == workscb.color_formats[2] + && (partition_count == 3 || (workscb.color_formats[0] == workscb.color_formats[3]))))) { int colorvals[4][12]; - int color_formats_mod[4] = { 0 }; + int color_formats_mod[4] { 0 }; for (int j = 0; j < partition_count; j++) { color_formats_mod[j] = pack_color_endpoints( - epm.endpt0[j], epm.endpt1[j], - rgbs_colors[j], rgbo_colors[j], - partition_format_specifiers[i][j], - colorvals[j], - color_quantization_level_mod[i]); + epm.endpt0[j], + epm.endpt1[j], + rgbs_colors[j], + rgbo_colors[j], + partition_format_specifiers[i][j], + colorvals[j], + color_quant_level_mod[i]); } if (color_formats_mod[0] == color_formats_mod[1] - && (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2] && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3]))))) + && (partition_count == 2 || (color_formats_mod[0] == color_formats_mod[2] + && (partition_count == 3 || (color_formats_mod[0] == color_formats_mod[3]))))) { - scb->color_formats_matched = 1; + workscb.color_formats_matched = 1; for (int j = 0; j < 4; j++) { for (int k = 0; k < 12; k++) { - scb->color_values[j][k] = colorvals[j][k]; + workscb.color_values[j][k] = colorvals[j][k]; } } for (int j = 0; j < 4; j++) { - scb->color_formats[j] = color_formats_mod[j]; + workscb.color_formats[j] = color_formats_mod[j]; } } } // store header fields - scb->partition_count = partition_count; - scb->partition_index = partition_index; - scb->color_quantization_level = scb->color_formats_matched ? color_quantization_level_mod[i] : color_quantization_level[i]; - scb->block_mode = qw_bm.mode_index; - scb->plane2_color_component = separate_component; - scb->error_block = 0; - - if (scb->color_quantization_level < 4) + workscb.partition_count = partition_count; + workscb.partition_index = partition_index; + workscb.color_quant_level = workscb.color_formats_matched ? color_quant_level_mod[i] : color_quant_level[i]; + workscb.block_mode = qw_bm.mode_index; + workscb.plane2_color_component = separate_component; + workscb.error_block = 0; + + if (workscb.color_quant_level < 4) { - scb->error_block = 1; // should never happen, but cannot prove it impossible + workscb.error_block = 1; // should never happen, but cannot prove it impossible + } + + // Pre-realign test + if (l == 0) + { + for (int j = 0; j < weights_to_copy; j++) + { + workscb.weights[j] = u8_weight1_src[j]; + workscb.weights[j + PLANE2_WEIGHTS_OFFSET] = u8_weight2_src[j]; + } + + float errorval = compute_symbolic_block_difference(config, bsd, &workscb, blk, ewb); + if (errorval == -1e30f) + { + errorval = -errorval; + workscb.error_block = 1; + } + + + trace_add_data("error_prerealign", errorval); + best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode); + + // Average refinement improvement is 3.5% per iteration + // (allow 5%), but the first iteration can help more so we give + // it a extra 10% leeway. Use this knowledge to drive a + // heuristic to skip blocks that are unlikely to catch up with + // the best block we have already. + int iters_remaining = max_refinement_iters - l; + float threshold = (0.05f * static_cast(iters_remaining)) + 1.1f; + if (errorval > (threshold * best_errorval_in_scb)) + { + break; + } + + if (errorval < best_errorval_in_scb) + { + best_errorval_in_scb = errorval; + workscb.errorval = errorval; + scb = workscb; + + if (errorval < tune_errorval_threshold) + { + return errorval; + } + } } + // perform a final pass over the weights to try to improve them. int adjustments = realign_weights( - decode_mode, bsd, blk, ewb, scb, u8_weight1_src, u8_weight2_src); + config.profile, bsd, blk, ewb, &workscb, + u8_weight1_src, u8_weight2_src); - if (adjustments == 0) + // Post-realign test + for (int j = 0; j < weights_to_copy; j++) + { + workscb.weights[j] = u8_weight1_src[j]; + workscb.weights[j + PLANE2_WEIGHTS_OFFSET] = u8_weight2_src[j]; + } + + float errorval = compute_symbolic_block_difference(config, bsd, &workscb, blk, ewb); + if (errorval == -1e30f) + { + errorval = -errorval; + workscb.error_block = 1; + } + + trace_add_data("error_postrealign", errorval); + best_errorval_in_mode = astc::min(errorval, best_errorval_in_mode); + + // Average refinement improvement is 3.5% per iteration, so skip + // blocks that are unlikely to catch up with the best block we + // have already. Assume a 5% per step to give benefit of the doubt + int iters_remaining = max_refinement_iters - 1 - l; + float threshold = (0.05f * static_cast(iters_remaining)) + 1.0f; + if (errorval > (threshold * best_errorval_in_scb)) { break; } - } - for (int j = 0; j < weights_to_copy; j++) - { - scb->plane1_weights[j] = u8_weight1_src[j]; - scb->plane2_weights[j] = u8_weight2_src[j]; - } + if (errorval < best_errorval_in_scb) + { + best_errorval_in_scb = errorval; + workscb.errorval = errorval; + scb = workscb; - scb++; + if (errorval < tune_errorval_threshold) + { + return errorval; + } + } + + if (adjustments == 0) + { + break; + } + } } + + return best_errorval_in_mode; } void expand_deblock_weights( @@ -821,9 +977,9 @@ void expand_deblock_weights( unsigned int ydim = ctx.config.block_y; unsigned int zdim = ctx.config.block_z; - float centerpos_x = (xdim - 1) * 0.5f; - float centerpos_y = (ydim - 1) * 0.5f; - float centerpos_z = (zdim - 1) * 0.5f; + float centerpos_x = static_cast(xdim - 1) * 0.5f; + float centerpos_y = static_cast(ydim - 1) * 0.5f; + float centerpos_z = static_cast(zdim - 1) * 0.5f; float *bef = ctx.deblock_weights; for (unsigned int z = 0; z < zdim; z++) @@ -832,13 +988,13 @@ void expand_deblock_weights( { for (unsigned int x = 0; x < xdim; x++) { - float xdif = (x - centerpos_x) / xdim; - float ydif = (y - centerpos_y) / ydim; - float zdif = (z - centerpos_z) / zdim; + float xdif = (static_cast(x) - centerpos_x) / static_cast(xdim); + float ydif = (static_cast(y) - centerpos_y) / static_cast(ydim); + float zdif = (static_cast(z) - centerpos_z) / static_cast(zdim); float wdif = 0.36f; float dist = astc::sqrt(xdif * xdif + ydif * ydif + zdif * zdif + wdif * wdif); - *bef = powf(dist, ctx.config.b_deblock_weight); + *bef = astc::pow(dist, ctx.config.b_deblock_weight); bef++; } } @@ -859,14 +1015,12 @@ static float prepare_error_weight_block( ctx.config.v_rgb_mean != 0.0f || ctx.config.v_rgb_stdev != 0.0f || \ ctx.config.v_a_mean != 0.0f || ctx.config.v_a_stdev != 0.0f; - float4 derv[MAX_TEXELS_PER_BLOCK]; + vfloat4 derv[MAX_TEXELS_PER_BLOCK]; imageblock_initialize_deriv(blk, bsd->texel_count, derv); - float4 color_weights = float4(ctx.config.cw_r_weight, - ctx.config.cw_g_weight, - ctx.config.cw_b_weight, - ctx.config.cw_a_weight); - - ewb->contains_zeroweight_texels = 0; + vfloat4 color_weights(ctx.config.cw_r_weight, + ctx.config.cw_g_weight, + ctx.config.cw_b_weight, + ctx.config.cw_a_weight); for (int z = 0; z < bsd->zdim; z++) { @@ -880,70 +1034,49 @@ static float prepare_error_weight_block( if (xpos >= input_image.dim_x || ypos >= input_image.dim_y || zpos >= input_image.dim_z) { - float4 weights = float4(1e-11f); - ewb->error_weights[idx] = weights; - ewb->contains_zeroweight_texels = 1; + ewb->error_weights[idx] = vfloat4(1e-11f); } else { - float4 error_weight = float4(ctx.config.v_rgb_base, - ctx.config.v_rgb_base, - ctx.config.v_rgb_base, - ctx.config.v_a_base); + vfloat4 error_weight(ctx.config.v_rgb_base, + ctx.config.v_rgb_base, + ctx.config.v_rgb_base, + ctx.config.v_a_base); int ydt = input_image.dim_x; int zdt = input_image.dim_x * input_image.dim_y; if (any_mean_stdev_weight) { - float4 avg = ctx.input_averages[zpos * zdt + ypos * ydt + xpos]; - if (avg.r < 6e-5f) - avg.r = 6e-5f; - if (avg.g < 6e-5f) - avg.g = 6e-5f; - if (avg.b < 6e-5f) - avg.b = 6e-5f; - if (avg.a < 6e-5f) - avg.a = 6e-5f; - + vfloat4 avg = ctx.input_averages[zpos * zdt + ypos * ydt + xpos]; + avg = max(avg, 6e-5f); avg = avg * avg; - float4 variance = ctx.input_variances[zpos * zdt + ypos * ydt + xpos]; + vfloat4 variance = ctx.input_variances[zpos * zdt + ypos * ydt + xpos]; variance = variance * variance; - float favg = (avg.r + avg.g + avg.b) * (1.0f / 3.0f); - float fvar = (variance.r + variance.g + variance.b) * (1.0f / 3.0f); + float favg = hadd_rgb_s(avg) * (1.0f / 3.0f); + float fvar = hadd_rgb_s(variance) * (1.0f / 3.0f); float mixing = ctx.config.v_rgba_mean_stdev_mix; - avg.r = favg * mixing + avg.r * (1.0f - mixing); - avg.g = favg * mixing + avg.g * (1.0f - mixing); - avg.b = favg * mixing + avg.b * (1.0f - mixing); + avg.set_lane<0>(favg * mixing + avg.lane<0>() * (1.0f - mixing)); + avg.set_lane<1>(favg * mixing + avg.lane<1>() * (1.0f - mixing)); + avg.set_lane<2>(favg * mixing + avg.lane<2>() * (1.0f - mixing)); - variance.r = fvar * mixing + variance.r * (1.0f - mixing); - variance.g = fvar * mixing + variance.g * (1.0f - mixing); - variance.b = fvar * mixing + variance.b * (1.0f - mixing); + variance.set_lane<0>(fvar * mixing + variance.lane<0>() * (1.0f - mixing)); + variance.set_lane<1>(fvar * mixing + variance.lane<1>() * (1.0f - mixing)); + variance.set_lane<2>(fvar * mixing + variance.lane<2>() * (1.0f - mixing)); - float4 stdev = float4(astc::sqrt(MAX(variance.r, 0.0f)), - astc::sqrt(MAX(variance.g, 0.0f)), - astc::sqrt(MAX(variance.b, 0.0f)), - astc::sqrt(MAX(variance.a, 0.0f))); + vfloat4 stdev = sqrt(max(variance, 0.0f)); - avg.r *= ctx.config.v_rgb_mean; - avg.g *= ctx.config.v_rgb_mean; - avg.b *= ctx.config.v_rgb_mean; - avg.a *= ctx.config.v_a_mean; + vfloat4 scalea(ctx.config.v_rgb_mean, ctx.config.v_rgb_mean, ctx.config.v_rgb_mean, ctx.config.v_a_mean); + avg = avg * scalea; - stdev.r *= ctx.config.v_rgb_stdev; - stdev.g *= ctx.config.v_rgb_stdev; - stdev.b *= ctx.config.v_rgb_stdev; - stdev.a *= ctx.config.v_a_stdev; + vfloat4 scales(ctx.config.v_rgb_stdev, ctx.config.v_rgb_stdev, ctx.config.v_rgb_stdev, ctx.config.v_a_stdev); + stdev = stdev * scales; error_weight = error_weight + avg + stdev; - - error_weight = float4(1.0f / error_weight.r, - 1.0f / error_weight.g, - 1.0f / error_weight.b, - 1.0f / error_weight.a); + error_weight = 1.0f / error_weight; } if (ctx.config.flags & ASTCENC_FLG_MAP_NORMAL) @@ -953,11 +1086,10 @@ static float prepare_error_weight_block( float yN = ((blk->data_a[idx] * (1.0f / 65535.0f)) - 0.5f) * 2.0f; float denom = 1.0f - xN * xN - yN * yN; - if (denom < 0.1f) - denom = 0.1f; + denom = astc::max(denom, 0.1f); denom = 1.0f / denom; - error_weight.r *= 1.0f + xN * xN * denom; - error_weight.a *= 1.0f + yN * yN * denom; + error_weight.set_lane<0>(error_weight.lane<0>() * (1.0f + xN * xN * denom)); + error_weight.set_lane<3>(error_weight.lane<3>() * (1.0f + yN * yN * denom)); } if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT) @@ -972,15 +1104,12 @@ static float prepare_error_weight_block( alpha_scale = blk->data_a[idx] * (1.0f / 65535.0f); } - if (alpha_scale < 0.0001f) - { - alpha_scale = 0.0001f; - } + alpha_scale = astc::max(alpha_scale, 0.0001f); alpha_scale *= alpha_scale; - error_weight.r *= alpha_scale; - error_weight.g *= alpha_scale; - error_weight.b *= alpha_scale; + error_weight.set_lane<0>(error_weight.lane<0>() * alpha_scale); + error_weight.set_lane<1>(error_weight.lane<1>() * alpha_scale); + error_weight.set_lane<2>(error_weight.lane<2>() * alpha_scale); } error_weight = error_weight * color_weights; @@ -994,47 +1123,44 @@ static float prepare_error_weight_block( // which is equivalent to dividing by the derivative of the transfer // function. - error_weight.r /= (derv[idx].r * derv[idx].r * 1e-10f); - error_weight.g /= (derv[idx].g * derv[idx].g * 1e-10f); - error_weight.b /= (derv[idx].b * derv[idx].b * 1e-10f); - error_weight.a /= (derv[idx].a * derv[idx].a * 1e-10f); - + error_weight = error_weight / (derv[idx] * derv[idx] * 1e-10f); ewb->error_weights[idx] = error_weight; - if (dot(error_weight, float4(1.0f, 1.0f, 1.0f, 1.0f)) < 1e-10f) - { - ewb->contains_zeroweight_texels = 1; - } } idx++; } } } - float4 error_weight_sum = float4(0.0f, 0.0f, 0.0f, 0.0f); + vfloat4 error_weight_sum = vfloat4::zero(); int texels_per_block = bsd->texel_count; for (int i = 0; i < texels_per_block; i++) { error_weight_sum = error_weight_sum + ewb->error_weights[i]; - ewb->texel_weight_r[i] = ewb->error_weights[i].r; - ewb->texel_weight_g[i] = ewb->error_weights[i].g; - ewb->texel_weight_b[i] = ewb->error_weights[i].b; - ewb->texel_weight_a[i] = ewb->error_weights[i].a; + float wr = ewb->error_weights[i].lane<0>(); + float wg = ewb->error_weights[i].lane<1>(); + float wb = ewb->error_weights[i].lane<2>(); + float wa = ewb->error_weights[i].lane<3>(); - ewb->texel_weight_rg[i] = (ewb->error_weights[i].r + ewb->error_weights[i].g) * 0.5f; - ewb->texel_weight_rb[i] = (ewb->error_weights[i].r + ewb->error_weights[i].b) * 0.5f; - ewb->texel_weight_gb[i] = (ewb->error_weights[i].g + ewb->error_weights[i].b) * 0.5f; - ewb->texel_weight_ra[i] = (ewb->error_weights[i].r + ewb->error_weights[i].a) * 0.5f; + ewb->texel_weight_r[i] = wr; + ewb->texel_weight_g[i] = wg; + ewb->texel_weight_b[i] = wb; + ewb->texel_weight_a[i] = wa; - ewb->texel_weight_gba[i] = (ewb->error_weights[i].g + ewb->error_weights[i].b + ewb->error_weights[i].a) * 0.333333f; - ewb->texel_weight_rba[i] = (ewb->error_weights[i].r + ewb->error_weights[i].b + ewb->error_weights[i].a) * 0.333333f; - ewb->texel_weight_rga[i] = (ewb->error_weights[i].r + ewb->error_weights[i].g + ewb->error_weights[i].a) * 0.333333f; - ewb->texel_weight_rgb[i] = (ewb->error_weights[i].r + ewb->error_weights[i].g + ewb->error_weights[i].b) * 0.333333f; + ewb->texel_weight_rg[i] = (wr + wg) * 0.5f; + ewb->texel_weight_rb[i] = (wr + wb) * 0.5f; + ewb->texel_weight_gb[i] = (wg + wb) * 0.5f; + ewb->texel_weight_ra[i] = (wr + wa) * 0.5f; - ewb->texel_weight[i] = (ewb->error_weights[i].r + ewb->error_weights[i].g + ewb->error_weights[i].b + ewb->error_weights[i].a) * 0.25f; + ewb->texel_weight_gba[i] = (wg + wb + wa) * 0.333333f; + ewb->texel_weight_rba[i] = (wr + wb + wa) * 0.333333f; + ewb->texel_weight_rga[i] = (wr + wg + wa) * 0.333333f; + ewb->texel_weight_rgb[i] = (wr + wg + wb) * 0.333333f; + + ewb->texel_weight[i] = (wr + wg + wb + wa) * 0.25f; } - return dot(error_weight_sum, float4(1.0f, 1.0f, 1.0f, 1.0f)); + return hadd_s(error_weight_sum); } static float prepare_block_statistics( @@ -1096,7 +1222,7 @@ static float prepare_block_statistics( aa_var += a * aw; } - float rpt = 1.0f / MAX(weight_sum, 1e-7f); + float rpt = 1.0f / astc::max(weight_sum, 1e-7f); rr_var -= rs * (rs * rpt); rg_cov -= gs * (rs * rpt); @@ -1112,12 +1238,12 @@ static float prepare_block_statistics( aa_var -= as * (as * rpt); - rg_cov *= astc::rsqrt(MAX(rr_var * gg_var, 1e-30f)); - rb_cov *= astc::rsqrt(MAX(rr_var * bb_var, 1e-30f)); - ra_cov *= astc::rsqrt(MAX(rr_var * aa_var, 1e-30f)); - gb_cov *= astc::rsqrt(MAX(gg_var * bb_var, 1e-30f)); - ga_cov *= astc::rsqrt(MAX(gg_var * aa_var, 1e-30f)); - ba_cov *= astc::rsqrt(MAX(bb_var * aa_var, 1e-30f)); + rg_cov *= astc::rsqrt(astc::max(rr_var * gg_var, 1e-30f)); + rb_cov *= astc::rsqrt(astc::max(rr_var * bb_var, 1e-30f)); + ra_cov *= astc::rsqrt(astc::max(rr_var * aa_var, 1e-30f)); + gb_cov *= astc::rsqrt(astc::max(gg_var * bb_var, 1e-30f)); + ga_cov *= astc::rsqrt(astc::max(gg_var * aa_var, 1e-30f)); + ba_cov *= astc::rsqrt(astc::max(bb_var * aa_var, 1e-30f)); if (astc::isnan(rg_cov)) rg_cov = 1.0f; if (astc::isnan(rb_cov)) rb_cov = 1.0f; @@ -1126,11 +1252,28 @@ static float prepare_block_statistics( if (astc::isnan(ga_cov)) ga_cov = 1.0f; if (astc::isnan(ba_cov)) ba_cov = 1.0f; - float lowest_correlation = MIN(fabsf(rg_cov), fabsf(rb_cov)); - lowest_correlation = MIN(lowest_correlation, fabsf(ra_cov)); - lowest_correlation = MIN(lowest_correlation, fabsf(gb_cov)); - lowest_correlation = MIN(lowest_correlation, fabsf(ga_cov)); - lowest_correlation = MIN(lowest_correlation, fabsf(ba_cov)); + float lowest_correlation = astc::min(fabsf(rg_cov), fabsf(rb_cov)); + lowest_correlation = astc::min(lowest_correlation, fabsf(ra_cov)); + lowest_correlation = astc::min(lowest_correlation, fabsf(gb_cov)); + lowest_correlation = astc::min(lowest_correlation, fabsf(ga_cov)); + lowest_correlation = astc::min(lowest_correlation, fabsf(ba_cov)); + + // Diagnostic trace points + trace_add_data("min_r", blk->data_min.lane<0>()); + trace_add_data("max_r", blk->data_max.lane<0>()); + trace_add_data("min_g", blk->data_min.lane<1>()); + trace_add_data("max_g", blk->data_max.lane<1>()); + trace_add_data("min_b", blk->data_min.lane<2>()); + trace_add_data("max_b", blk->data_max.lane<2>()); + trace_add_data("min_a", blk->data_min.lane<3>()); + trace_add_data("max_a", blk->data_max.lane<3>()); + trace_add_data("cov_rg", fabsf(rg_cov)); + trace_add_data("cov_rb", fabsf(rb_cov)); + trace_add_data("cov_ra", fabsf(ra_cov)); + trace_add_data("cov_gb", fabsf(gb_cov)); + trace_add_data("cov_ga", fabsf(ga_cov)); + trace_add_data("cov_ba", fabsf(ba_cov)); + return lowest_correlation; } @@ -1143,71 +1286,92 @@ void compress_block( compress_symbolic_block_buffers* tmpbuf) { astcenc_profile decode_mode = ctx.config.profile; + error_weight_block *ewb = &tmpbuf->ewb; const block_size_descriptor* bsd = ctx.bsd; + float lowest_correl; + + TRACE_NODE(node0, "block"); + trace_add_data("pos_x", blk->xpos); + trace_add_data("pos_y", blk->ypos); + trace_add_data("pos_z", blk->zpos); + + // Set stricter block targets for luminance data as we have more bits to + // play with - fewer endpoints and never need a second weight plane + bool block_is_l = imageblock_is_lum(blk); + float block_is_l_scale = block_is_l ? 1.0f / 1.5f : 1.0f; + + // Set slightly stricter block targets for lumalpha data as we have more + // bits to play with - fewer endpoints but may use a second weight plane + bool block_is_la = imageblock_is_lumalp(blk); + float block_is_la_scale = block_is_la ? 1.0f / 1.05f : 1.0f; + + bool block_skip_two_plane = false; + + // Default max partition, but +1 if only have 1 or 2 active components + int max_partitions = ctx.config.tune_partition_count_limit; + if (block_is_l || block_is_la) + { + max_partitions = astc::min(max_partitions + 1, 4); + } + + +#if defined(ASTCENC_DIAGNOSTICS) + // Do this early in diagnostic builds so we can dump uniform metrics + // for every block. Do it later in release builds to avoid redundant work! + float error_weight_sum = prepare_error_weight_block(ctx, input_image, bsd, blk, ewb); + float error_threshold = ctx.config.tune_db_limit + * error_weight_sum + * block_is_l_scale + * block_is_la_scale; + + lowest_correl = prepare_block_statistics(bsd->texel_count, blk, ewb); + + trace_add_data("tune_error_threshold", error_threshold); +#endif - if (blk->red_min == blk->red_max && blk->green_min == blk->green_max && blk->blue_min == blk->blue_max && blk->alpha_min == blk->alpha_max) + if (all(blk->data_min == blk->data_max)) { + TRACE_NODE(node1, "pass"); + trace_add_data("partition_count", 0); + trace_add_data("plane_count", 1); + // detected a constant-color block. Encode as FP16 if using HDR scb.error_block = 0; + scb.partition_count = 0; if ((decode_mode == ASTCENC_PRF_HDR) || (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A)) { scb.block_mode = -1; - scb.partition_count = 0; - float4 orig_color = blk->origin_texel; - scb.constant_color[0] = float_to_sf16(orig_color.r, SF_NEARESTEVEN); - scb.constant_color[1] = float_to_sf16(orig_color.g, SF_NEARESTEVEN); - scb.constant_color[2] = float_to_sf16(orig_color.b, SF_NEARESTEVEN); - scb.constant_color[3] = float_to_sf16(orig_color.a, SF_NEARESTEVEN); + vint4 color_f16 = float_to_float16(blk->origin_texel); + store(color_f16, scb.constant_color); } else { // Encode as UNORM16 if NOT using HDR. scb.block_mode = -2; - scb.partition_count = 0; - float4 orig_color = blk->origin_texel; - float red = orig_color.r; - float green = orig_color.g; - float blue = orig_color.b; - float alpha = orig_color.a; - - if (red < 0) - red = 0; - else if (red > 1) - red = 1; - - if (green < 0) - green = 0; - else if (green > 1) - green = 1; - - if (blue < 0) - blue = 0; - else if (blue > 1) - blue = 1; - - if (alpha < 0) - alpha = 0; - else if (alpha > 1) - alpha = 1; - - scb.constant_color[0] = astc::flt2int_rtn(red * 65535.0f); - scb.constant_color[1] = astc::flt2int_rtn(green * 65535.0f); - scb.constant_color[2] = astc::flt2int_rtn(blue * 65535.0f); - scb.constant_color[3] = astc::flt2int_rtn(alpha * 65535.0f); + vfloat4 color_f32 = clamp(0.0f, 1.0f, blk->origin_texel) * 65535.0f; + vint4 color_u16 = float_to_int_rtn(color_f32); + store(color_u16, scb.constant_color); } + trace_add_data("exit", "quality hit"); + symbolic_to_physical(*bsd, scb, pcb); return; } - error_weight_block *ewb = &tmpbuf->ewb; +#if !defined(ASTCENC_DIAGNOSTICS) float error_weight_sum = prepare_error_weight_block(ctx, input_image, bsd, blk, ewb); + float error_threshold = ctx.config.tune_db_limit + * error_weight_sum + * block_is_l_scale + * block_is_la_scale; +#endif - symbolic_compressed_block *tempblocks = tmpbuf->tempblocks; - - float error_of_best_block = 1e20f; + // Set SCB and mode errors to a very high error value + scb.errorval = 1e30f; + scb.error_block = 1; float best_errorvals_in_modes[13]; for (int i = 0; i < 13; i++) @@ -1217,8 +1381,6 @@ void compress_block( int uses_alpha = imageblock_uses_alpha(blk); - float mode_cutoff = ctx.config.tune_block_mode_limit / 100.0f; - // Trial using 1 plane of weights and 1 partition. // Most of the time we test it twice, first with a mode cutoff of 0 and @@ -1227,227 +1389,182 @@ void compress_block( // disabled for 4x4 and 5x4 blocks where it nearly always slows down the // compression and slightly reduces image quality. - float modecutoffs[2]; - float errorval_mult[2] = { 2.5, 1 }; - modecutoffs[0] = 0; - modecutoffs[1] = mode_cutoff; + float errorval_mult[2] = { + 1.0f / ctx.config.tune_mode0_mse_overshoot, + 1.0f + }; - float lowest_correl; - float best_errorval_in_mode; + static const float errorval_overshoot = 1.0f / ctx.config.tune_refinement_mse_overshoot; - int start_trial = bsd->texel_count < TUNE_MAX_TEXELS_MODE0_FASTPATH ? 1 : 0; + int start_trial = bsd->texel_count < (int)TUNE_MAX_TEXELS_MODE0_FASTPATH ? 1 : 0; for (int i = start_trial; i < 2; i++) { - compress_symbolic_block_fixed_partition_1_plane( - decode_mode, modecutoffs[i], + TRACE_NODE(node1, "pass"); + trace_add_data("partition_count", 1); + trace_add_data("plane_count", 1); + trace_add_data("search_mode", i); + + float errorval = compress_symbolic_block_fixed_partition_1_plane( + ctx.config, i == 0, ctx.config.tune_candidate_limit, + error_threshold * errorval_mult[i] * errorval_overshoot, ctx.config.tune_refinement_limit, - bsd, 1, 0, blk, ewb, tempblocks, &tmpbuf->planes); - - best_errorval_in_mode = 1e30f; - for (unsigned int j = 0; j < ctx.config.tune_candidate_limit; j++) - { - if (tempblocks[j].error_block) - { - continue; - } - - float errorval = compute_symbolic_block_difference(decode_mode, bsd, tempblocks + j, blk, ewb); - errorval *= errorval_mult[i]; - if (errorval < best_errorval_in_mode) - { - best_errorval_in_mode = errorval; - } - - if (errorval < error_of_best_block) - { - error_of_best_block = errorval; - scb = tempblocks[j]; - } - } + bsd, 1, 0, blk, ewb, scb, &tmpbuf->planes); // Mode 0 - best_errorvals_in_modes[0] = best_errorval_in_mode; - if ((error_of_best_block / error_weight_sum) < ctx.config.tune_db_limit) + best_errorvals_in_modes[0] = errorval; + if (errorval < (error_threshold * errorval_mult[i])) { + trace_add_data("exit", "quality hit"); goto END_OF_TESTS; } } +#if !defined(ASTCENC_DIAGNOSTICS) lowest_correl = prepare_block_statistics(bsd->texel_count, blk, ewb); +#endif + + block_skip_two_plane = lowest_correl > ctx.config.tune_two_plane_early_out_limit; // next, test the four possible 1-partition, 2-planes modes for (int i = 0; i < 4; i++) { + TRACE_NODE(node1, "pass"); + trace_add_data("partition_count", 1); + trace_add_data("plane_count", 2); + trace_add_data("plane_channel", i); - if (lowest_correl > ctx.config.tune_two_plane_early_out_limit) + if (block_skip_two_plane) { + trace_add_data("skip", "tune_two_plane_early_out_limit"); continue; } if (blk->grayscale && i != 3) { + trace_add_data("skip", "grayscale block"); continue; } if (!uses_alpha && i == 3) { + trace_add_data("skip", "no alpha channel"); continue; } - compress_symbolic_block_fixed_partition_2_planes( - decode_mode, mode_cutoff, + float errorval = compress_symbolic_block_fixed_partition_2_planes( + ctx.config, false, ctx.config.tune_candidate_limit, + error_threshold * errorval_overshoot, ctx.config.tune_refinement_limit, bsd, 1, // partition count 0, // partition index i, // the color component to test a separate plane of weights for. - blk, ewb, tempblocks, &tmpbuf->planes); - - best_errorval_in_mode = 1e30f; - for (unsigned int j = 0; j < ctx.config.tune_candidate_limit; j++) - { - if (tempblocks[j].error_block) - { - continue; - } - - float errorval = compute_symbolic_block_difference(decode_mode, bsd, tempblocks + j, blk, ewb); - if (errorval < best_errorval_in_mode) - { - best_errorval_in_mode = errorval; - } - - if (errorval < error_of_best_block) - { - error_of_best_block = errorval; - scb = tempblocks[j]; - } + blk, ewb, scb, &tmpbuf->planes); - // Modes 1-4 - best_errorvals_in_modes[i + 1] = best_errorval_in_mode; - } - - if ((error_of_best_block / error_weight_sum) < ctx.config.tune_db_limit) + // Modes 7, 10 (13 is unreachable) + if (errorval < error_threshold) { + trace_add_data("exit", "quality hit"); goto END_OF_TESTS; } } // find best blocks for 2, 3 and 4 partitions - for (int partition_count = 2; partition_count <= 4; partition_count++) + for (int partition_count = 2; partition_count <= max_partitions; partition_count++) { - int partition_indices_1plane[2]; - int partition_index_2planes; + int partition_indices_1plane[2] { 0, 0 }; + int partition_index_2planes = 0; find_best_partitionings(bsd, blk, ewb, partition_count, - ctx.config.tune_partition_limit, + ctx.config.tune_partition_index_limit, &(partition_indices_1plane[0]), &(partition_indices_1plane[1]), - &partition_index_2planes); + block_skip_two_plane ? nullptr : &partition_index_2planes); for (int i = 0; i < 2; i++) { - compress_symbolic_block_fixed_partition_1_plane( - decode_mode, mode_cutoff, + TRACE_NODE(node1, "pass"); + trace_add_data("partition_count", partition_count); + trace_add_data("partition_index", partition_indices_1plane[i]); + trace_add_data("plane_count", 1); + trace_add_data("search_mode", i); + + float errorval = compress_symbolic_block_fixed_partition_1_plane( + ctx.config, false, ctx.config.tune_candidate_limit, + error_threshold * errorval_overshoot, ctx.config.tune_refinement_limit, bsd, partition_count, partition_indices_1plane[i], - blk, ewb, tempblocks, &tmpbuf->planes); - - best_errorval_in_mode = 1e30f; - for (unsigned int j = 0; j < ctx.config.tune_candidate_limit; j++) - { - if (tempblocks[j].error_block) - { - continue; - } - - float errorval = compute_symbolic_block_difference(decode_mode, bsd, tempblocks + j, blk, ewb); - if (errorval < best_errorval_in_mode) - { - best_errorval_in_mode = errorval; - } - - if (errorval < error_of_best_block) - { - error_of_best_block = errorval; - scb = tempblocks[j]; - } - } + blk, ewb, scb, &tmpbuf->planes); // Modes 5, 6, 8, 9, 11, 12 - best_errorvals_in_modes[3 * (partition_count - 2) + 5 + i] = best_errorval_in_mode; - - if ((error_of_best_block / error_weight_sum) < ctx.config.tune_db_limit) + best_errorvals_in_modes[3 * (partition_count - 2) + 5 + i] = errorval; + if (errorval < error_threshold) { + trace_add_data("exit", "quality hit"); goto END_OF_TESTS; } } - if (partition_count == 2 && MIN(best_errorvals_in_modes[5], best_errorvals_in_modes[6]) > (best_errorvals_in_modes[0] * ctx.config.tune_partition_early_out_limit)) + if (partition_count == 2 && astc::min(best_errorvals_in_modes[5], best_errorvals_in_modes[6]) > (best_errorvals_in_modes[0] * ctx.config.tune_partition_early_out_limit)) { + trace_add_data("skip", "tune_partition_early_out_limit 1"); goto END_OF_TESTS; } // Skip testing dual weight planes for: // * 4 partitions (can't be encoded by the format) - // * Luminance only blocks (never need for a second plane) - // * Blocks with higher component correlation than the tuning cutoff - if ((partition_count == 4) || - (blk->grayscale && !uses_alpha) || - (lowest_correl > ctx.config.tune_two_plane_early_out_limit)) + if (partition_count == 4) { continue; } - - if (lowest_correl <= ctx.config.tune_two_plane_early_out_limit) + // * Luminance only blocks (never need for a second plane) + if (blk->grayscale && !uses_alpha) { - compress_symbolic_block_fixed_partition_2_planes( - decode_mode, - mode_cutoff, - ctx.config.tune_candidate_limit, - ctx.config.tune_refinement_limit, - bsd, - partition_count, - partition_index_2planes & (PARTITION_COUNT - 1), - partition_index_2planes >> PARTITION_BITS, - blk, ewb, tempblocks, &tmpbuf->planes); - - best_errorval_in_mode = 1e30f; - for (unsigned int j = 0; j < ctx.config.tune_candidate_limit; j++) - { - if (tempblocks[j].error_block) - { - continue; - } - - float errorval = compute_symbolic_block_difference(decode_mode, bsd, tempblocks + j, blk, ewb); - if (errorval < best_errorval_in_mode) - { - best_errorval_in_mode = errorval; - } + trace_add_data("skip", "grayscale no alpha block "); + continue; + } - if (errorval < error_of_best_block) - { - error_of_best_block = errorval; - scb = tempblocks[j]; - } - } + // * Blocks with higher component correlation than the tuning cutoff + if (block_skip_two_plane) + { + trace_add_data("skip", "tune_two_plane_early_out_limit"); + continue; + } - // Modes 7, 10 (13 is unreachable) - best_errorvals_in_modes[3 * (partition_count - 2) + 5 + 2] = best_errorval_in_mode; - if ((error_of_best_block / error_weight_sum) < ctx.config.tune_db_limit) - { - goto END_OF_TESTS; - } + TRACE_NODE(node1, "pass"); + trace_add_data("partition_count", partition_count); + trace_add_data("partition_index", partition_index_2planes & (PARTITION_COUNT - 1)); + trace_add_data("plane_count", 2); + trace_add_data("plane_channel", partition_index_2planes >> PARTITION_BITS); + + float errorval = compress_symbolic_block_fixed_partition_2_planes( + ctx.config, + false, + ctx.config.tune_candidate_limit, + error_threshold * errorval_overshoot, + ctx.config.tune_refinement_limit, + bsd, + partition_count, + partition_index_2planes & (PARTITION_COUNT - 1), + partition_index_2planes >> PARTITION_BITS, + blk, ewb, scb, &tmpbuf->planes); + + // Modes 7, 10 (13 is unreachable) + if (errorval < error_threshold) + { + trace_add_data("exit", "quality hit"); + goto END_OF_TESTS; } } + trace_add_data("exit", "quality not hit"); + END_OF_TESTS: - // compress/decompress to a physical block + // Compress to a physical block symbolic_to_physical(*bsd, scb, pcb); } diff --git a/libkram/astc-encoder/astcenc_compute_variance.cpp b/libkram/astc-encoder/astcenc_compute_variance.cpp index ae7f1170..eee65704 100644 --- a/libkram/astc-encoder/astcenc_compute_variance.cpp +++ b/libkram/astc-encoder/astcenc_compute_variance.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -34,8 +34,6 @@ #include -#define USE_2DARRAY 1 - /** * @brief Generate a prefix-sum array using Brent-Kung algorithm. * @@ -49,7 +47,7 @@ * @param stride The item spacing in the array; i.e. dense arrays should use 1. */ static void brent_kung_prefix_sum( - float4* d, + vfloat4* d, size_t items, int stride ) { @@ -65,7 +63,7 @@ static void brent_kung_prefix_sum( size_t start = lc_stride - 1; size_t iters = items >> log2_stride; - float4 *da = d + (start * stride); + vfloat4 *da = d + (start * stride); ptrdiff_t ofs = -(ptrdiff_t)(step * stride); size_t ofs_stride = stride << log2_stride; @@ -89,7 +87,7 @@ static void brent_kung_prefix_sum( size_t start = step + lc_stride - 1; size_t iters = (items - step) >> log2_stride; - float4 *da = d + (start * stride); + vfloat4 *da = d + (start * stride); ptrdiff_t ofs = -(ptrdiff_t)(step * stride); size_t ofs_stride = stride << log2_stride; @@ -119,26 +117,26 @@ static void compute_pixel_region_variance( float rgb_power = arg->rgb_power; float alpha_power = arg->alpha_power; astcenc_swizzle swz = arg->swz; - int have_z = arg->have_z; + bool have_z = arg->have_z; - int size_x = arg->size.r; - int size_y = arg->size.g; - int size_z = arg->size.b; + int size_x = arg->size_x; + int size_y = arg->size_y; + int size_z = arg->size_z; - int offset_x = arg->offset.r; - int offset_y = arg->offset.g; - int offset_z = arg->offset.b; + int offset_x = arg->offset_x; + int offset_y = arg->offset_y; + int offset_z = arg->offset_z; int avg_var_kernel_radius = arg->avg_var_kernel_radius; int alpha_kernel_radius = arg->alpha_kernel_radius; float *input_alpha_averages = ctx.input_alpha_averages; - float4 *input_averages = ctx.input_averages; - float4 *input_variances = ctx.input_variances; - float4 *work_memory = arg->work_memory; + vfloat4 *input_averages = ctx.input_averages; + vfloat4 *input_variances = ctx.input_variances; + vfloat4 *work_memory = arg->work_memory; // Compute memory sizes and dimensions that we need - int kernel_radius = MAX(avg_var_kernel_radius, alpha_kernel_radius); + int kernel_radius = astc::max(avg_var_kernel_radius, alpha_kernel_radius); int kerneldim = 2 * kernel_radius + 1; int kernel_radius_xy = kernel_radius; int kernel_radius_z = have_z ? kernel_radius : 0; @@ -151,8 +149,8 @@ static void compute_pixel_region_variance( int zd_start = have_z ? 1 : 0; int are_powers_1 = (rgb_power == 1.0f) && (alpha_power == 1.0f); - float4 *varbuf1 = work_memory; - float4 *varbuf2 = work_memory + sizeprod; + vfloat4 *varbuf1 = work_memory; + vfloat4 *varbuf2 = work_memory + sizeprod; // Scaling factors to apply to Y and Z for accesses into the work buffers int yst = padsize_x; @@ -166,18 +164,9 @@ static void compute_pixel_region_variance( #define VARBUF1(z, y, x) varbuf1[z * zst + y * yst + x] #define VARBUF2(z, y, x) varbuf2[z * zst + y * yst + x] - // True if any non-identity swizzle - bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) || - (swz.b != ASTCENC_SWZ_B) || (swz.a != ASTCENC_SWZ_A); - // Load N and N^2 values into the work buffers if (img->data_type == ASTCENC_TYPE_U8) { -#if USE_2DARRAY - uint8_t* data8 = static_cast(img->data); -#else - uint8_t*** data8 = static_cast(img->data); -#endif // Swizzle data structure 4 = ZERO, 5 = ONE uint8_t data[6]; data[ASTCENC_SWZ_0] = 0; @@ -187,6 +176,7 @@ static void compute_pixel_region_variance( { int z_src = (z - zd_start) + offset_z - kernel_radius_z; z_src = astc::clamp(z_src, 0, (int)(img->dim_z - 1)); + uint8_t* data8 = static_cast(img->data[z_src]); for (int y = 1; y < padsize_y; y++) { @@ -198,48 +188,25 @@ static void compute_pixel_region_variance( int x_src = (x - 1) + offset_x - kernel_radius_xy; x_src = astc::clamp(x_src, 0, (int)(img->dim_x - 1)); - float4 d; -#if USE_2DARRAY - int px = (y_src * img->dim_x + x_src) * 4; - - uint8_t r = data8[px + 0]; - uint8_t g = data8[px + 1]; - uint8_t b = data8[px + 2]; - uint8_t a = data8[px + 3]; - - if (needs_swz) - { - data[0] = r; - data[1] = g; - data[2] = b; - data[3] = a; - - r = data[swz.r]; - g = data[swz.g]; - b = data[swz.b]; - a = data[swz.a]; - } -#else - data[0] = data8[z_src][y_src][4 * x_src ]; - data[1] = data8[z_src][y_src][4 * x_src + 1]; - data[2] = data8[z_src][y_src][4 * x_src + 2]; - data[3] = data8[z_src][y_src][4 * x_src + 3]; - - uint8_t r = data[swz.r]; - uint8_t g = data[swz.g]; - uint8_t b = data[swz.b]; - uint8_t a = data[swz.a]; -#endif - // int to float conversion - d = float4((float)r, (float)g, (float)b, float(a)); - d = d * (1.0f / 255.0f); + data[0] = data8[(4 * img->dim_x * y_src) + (4 * x_src )]; + data[1] = data8[(4 * img->dim_x * y_src) + (4 * x_src + 1)]; + data[2] = data8[(4 * img->dim_x * y_src) + (4 * x_src + 2)]; + data[3] = data8[(4 * img->dim_x * y_src) + (4 * x_src + 3)]; + + uint8_t r = data[swz.r]; + uint8_t g = data[swz.g]; + uint8_t b = data[swz.b]; + uint8_t a = data[swz.a]; + + vfloat4 d = vfloat4 (r * (1.0f / 255.0f), + g * (1.0f / 255.0f), + b * (1.0f / 255.0f), + a * (1.0f / 255.0f)); if (!are_powers_1) { - d.r = powf(MAX(d.r, 1e-6f), rgb_power); - d.g = powf(MAX(d.g, 1e-6f), rgb_power); - d.b = powf(MAX(d.b, 1e-6f), rgb_power); - d.a = powf(MAX(d.a, 1e-6f), alpha_power); + vfloat4 exp(rgb_power, rgb_power, rgb_power, alpha_power); + d = pow(max(d, 1e-6f), exp); } VARBUF1(z, y, x) = d; @@ -250,9 +217,6 @@ static void compute_pixel_region_variance( } else if (img->data_type == ASTCENC_TYPE_F16) { -// TODO: apply USE_2DARRAY to FP16 inputs - uint16_t*** data16 = static_cast(img->data); - // Swizzle data structure 4 = ZERO, 5 = ONE (in FP16) uint16_t data[6]; data[ASTCENC_SWZ_0] = 0; @@ -262,6 +226,7 @@ static void compute_pixel_region_variance( { int z_src = (z - zd_start) + offset_z - kernel_radius_z; z_src = astc::clamp(z_src, 0, (int)(img->dim_z - 1)); + uint16_t* data16 = static_cast(img->data[z_src]); for (int y = 1; y < padsize_y; y++) { @@ -273,27 +238,18 @@ static void compute_pixel_region_variance( int x_src = (x - 1) + offset_x - kernel_radius_xy; x_src = astc::clamp(x_src, 0, (int)(img->dim_x - 1)); - data[0] = data16[z_src][y_src][4 * x_src ]; - data[1] = data16[z_src][y_src][4 * x_src + 1]; - data[2] = data16[z_src][y_src][4 * x_src + 2]; - data[3] = data16[z_src][y_src][4 * x_src + 3]; - - uint16_t r = data[swz.r]; - uint16_t g = data[swz.g]; - uint16_t b = data[swz.b]; - uint16_t a = data[swz.a]; + data[0] = data16[(4 * img->dim_x * y_src) + (4 * x_src )]; + data[1] = data16[(4 * img->dim_x * y_src) + (4 * x_src + 1)]; + data[2] = data16[(4 * img->dim_x * y_src) + (4 * x_src + 2)]; + data[3] = data16[(4 * img->dim_x * y_src) + (4 * x_src + 3)]; - float4 d = float4(sf16_to_float(r), - sf16_to_float(g), - sf16_to_float(b), - sf16_to_float(a)); + vint4 di(data[swz.r], data[swz.g], data[swz.b], data[swz.a]); + vfloat4 d = float16_to_float(di); if (!are_powers_1) { - d.r = powf(MAX(d.r, 1e-6f), rgb_power); - d.g = powf(MAX(d.g, 1e-6f), rgb_power); - d.b = powf(MAX(d.b, 1e-6f), rgb_power); - d.a = powf(MAX(d.a, 1e-6f), alpha_power); + vfloat4 exp(rgb_power, rgb_power, rgb_power, alpha_power); + d = pow(max(d, 1e-6f), exp); } VARBUF1(z, y, x) = d; @@ -305,11 +261,7 @@ static void compute_pixel_region_variance( else // if (img->data_type == ASTCENC_TYPE_F32) { assert(img->data_type == ASTCENC_TYPE_F32); -#if USE_2DARRAY - float4* data32 = static_cast(img->data); -#else - float*** data32 = static_cast(img->data); -#endif + // Swizzle data structure 4 = ZERO, 5 = ONE (in FP16) float data[6]; data[ASTCENC_SWZ_0] = 0.0f; @@ -319,6 +271,7 @@ static void compute_pixel_region_variance( { int z_src = (z - zd_start) + offset_z - kernel_radius_z; z_src = astc::clamp(z_src, 0, (int)(img->dim_z - 1)); + float* data32 = static_cast(img->data[z_src]); for (int y = 1; y < padsize_y; y++) { @@ -330,44 +283,22 @@ static void compute_pixel_region_variance( int x_src = (x - 1) + offset_x - kernel_radius_xy; x_src = astc::clamp(x_src, 0, (int)(img->dim_x - 1)); -#if USE_2DARRAY - assert(z_src == 0); - float4 d = data32[y_src * img->dim_x + x_src]; - - if (needs_swz) - { - data[0] = d.r; - data[1] = d.g; - data[2] = d.b; - data[3] = d.a; - - float r = data[swz.r]; - float g = data[swz.g]; - float b = data[swz.b]; - float a = data[swz.a]; - - d = float4(r,g,b,a); - } -#else - data[0] = data32[z_src][y_src][4 * x_src ]; - data[1] = data32[z_src][y_src][4 * x_src + 1]; - data[2] = data32[z_src][y_src][4 * x_src + 2]; - data[3] = data32[z_src][y_src][4 * x_src + 3]; - + data[0] = data32[(4 * img->dim_x * y_src) + (4 * x_src )]; + data[1] = data32[(4 * img->dim_x * y_src) + (4 * x_src + 1)]; + data[2] = data32[(4 * img->dim_x * y_src) + (4 * x_src + 2)]; + data[3] = data32[(4 * img->dim_x * y_src) + (4 * x_src + 3)]; + float r = data[swz.r]; float g = data[swz.g]; float b = data[swz.b]; float a = data[swz.a]; - float4 d = float4(r, g, b, a); -#endif + vfloat4 d(r, g, b, a); if (!are_powers_1) { - d.r = powf(MAX(d.r, 1e-6f), rgb_power); - d.g = powf(MAX(d.g, 1e-6f), rgb_power); - d.b = powf(MAX(d.b, 1e-6f), rgb_power); - d.a = powf(MAX(d.a, 1e-6f), alpha_power); + vfloat4 exp(rgb_power, rgb_power, rgb_power, alpha_power); + d = pow(max(d, 1e-6f), exp); } VARBUF1(z, y, x) = d; @@ -378,7 +309,7 @@ static void compute_pixel_region_variance( } // Pad with an extra layer of 0s; this forms the edge of the SAT tables - float4 vbz = float4(0.0f); + vfloat4 vbz = vfloat4::zero(); for (int z = 0; z < padsize_z; z++) { for (int y = 0; y < padsize_y; y++) @@ -479,11 +410,6 @@ static void compute_pixel_region_variance( int z_low = z_src - alpha_kernel_radius; int z_high = z_src + alpha_kernel_radius + 1; - astc::clamp(z_src, 0, (int)(img->dim_z - 1)); - astc::clamp(z_low, 0, (int)(img->dim_z - 1)); - astc::clamp(z_high, 0, (int)(img->dim_z - 1)); - - for (int y = 0; y < size_y; y++) { int y_src = y + kernel_radius_xy; @@ -491,10 +417,6 @@ static void compute_pixel_region_variance( int y_low = y_src - alpha_kernel_radius; int y_high = y_src + alpha_kernel_radius + 1; - astc::clamp(y_src, 0, (int)(img->dim_y - 1)); - astc::clamp(y_low, 0, (int)(img->dim_y - 1)); - astc::clamp(y_high, 0, (int)(img->dim_y - 1)); - for (int x = 0; x < size_x; x++) { int x_src = x + kernel_radius_xy; @@ -502,25 +424,21 @@ static void compute_pixel_region_variance( int x_low = x_src - alpha_kernel_radius; int x_high = x_src + alpha_kernel_radius + 1; - astc::clamp(x_src, 0, (int)(img->dim_x - 1)); - astc::clamp(x_low, 0, (int)(img->dim_x - 1)); - astc::clamp(x_high, 0, (int)(img->dim_x - 1)); - // Summed-area table lookups for alpha average - float vasum = ( VARBUF1(z_high, y_low, x_low).a - - VARBUF1(z_high, y_low, x_high).a - - VARBUF1(z_high, y_high, x_low).a - + VARBUF1(z_high, y_high, x_high).a) - - ( VARBUF1(z_low, y_low, x_low).a - - VARBUF1(z_low, y_low, x_high).a - - VARBUF1(z_low, y_high, x_low).a - + VARBUF1(z_low, y_high, x_high).a); + float vasum = ( VARBUF1(z_high, y_low, x_low).lane<3>() + - VARBUF1(z_high, y_low, x_high).lane<3>() + - VARBUF1(z_high, y_high, x_low).lane<3>() + + VARBUF1(z_high, y_high, x_high).lane<3>()) - + ( VARBUF1(z_low, y_low, x_low).lane<3>() + - VARBUF1(z_low, y_low, x_high).lane<3>() + - VARBUF1(z_low, y_high, x_low).lane<3>() + + VARBUF1(z_low, y_high, x_high).lane<3>()); int out_index = z_dst * zdt + y_dst * ydt + x_dst; input_alpha_averages[out_index] = (vasum * alpha_rsamples); // Summed-area table lookups for RGBA average and variance - float4 v1sum = ( VARBUF1(z_high, y_low, x_low) + vfloat4 v1sum = ( VARBUF1(z_high, y_low, x_low) - VARBUF1(z_high, y_low, x_high) - VARBUF1(z_high, y_high, x_low) + VARBUF1(z_high, y_high, x_high)) - @@ -529,7 +447,7 @@ static void compute_pixel_region_variance( - VARBUF1(z_low, y_high, x_low) + VARBUF1(z_low, y_high, x_high)); - float4 v2sum = ( VARBUF2(z_high, y_low, x_low) + vfloat4 v2sum = ( VARBUF2(z_high, y_low, x_low) - VARBUF2(z_high, y_low, x_high) - VARBUF2(z_high, y_high, x_low) + VARBUF2(z_high, y_high, x_high)) - @@ -539,11 +457,11 @@ static void compute_pixel_region_variance( + VARBUF2(z_low, y_high, x_high)); // Compute and emit the average - float4 avg = v1sum * avg_var_rsamples; + vfloat4 avg = v1sum * avg_var_rsamples; input_averages[out_index] = avg; // Compute and emit the actual variance - float4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum); + vfloat4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum); input_variances[out_index] = variance; } } @@ -558,10 +476,6 @@ static void compute_pixel_region_variance( int y_low = y_src - alpha_kernel_radius; int y_high = y_src + alpha_kernel_radius + 1; - astc::clamp(y_src, 0, (int)(img->dim_y - 1)); - astc::clamp(y_low, 0, (int)(img->dim_y - 1)); - astc::clamp(y_high, 0, (int)(img->dim_y - 1)); - for (int x = 0; x < size_x; x++) { int x_src = x + kernel_radius_xy; @@ -569,36 +483,32 @@ static void compute_pixel_region_variance( int x_low = x_src - alpha_kernel_radius; int x_high = x_src + alpha_kernel_radius + 1; - astc::clamp(x_src, 0, (int)(img->dim_x - 1)); - astc::clamp(x_low, 0, (int)(img->dim_x - 1)); - astc::clamp(x_high, 0, (int)(img->dim_x - 1)); - // Summed-area table lookups for alpha average - float vasum = VARBUF1(0, y_low, x_low).a - - VARBUF1(0, y_low, x_high).a - - VARBUF1(0, y_high, x_low).a - + VARBUF1(0, y_high, x_high).a; + float vasum = VARBUF1(0, y_low, x_low).lane<3>() + - VARBUF1(0, y_low, x_high).lane<3>() + - VARBUF1(0, y_high, x_low).lane<3>() + + VARBUF1(0, y_high, x_high).lane<3>(); int out_index = y_dst * ydt + x_dst; input_alpha_averages[out_index] = (vasum * alpha_rsamples); // summed-area table lookups for RGBA average and variance - float4 v1sum = VARBUF1(0, y_low, x_low) - - VARBUF1(0, y_low, x_high) - - VARBUF1(0, y_high, x_low) - + VARBUF1(0, y_high, x_high); + vfloat4 v1sum = VARBUF1(0, y_low, x_low) + - VARBUF1(0, y_low, x_high) + - VARBUF1(0, y_high, x_low) + + VARBUF1(0, y_high, x_high); - float4 v2sum = VARBUF2(0, y_low, x_low) - - VARBUF2(0, y_low, x_high) - - VARBUF2(0, y_high, x_low) - + VARBUF2(0, y_high, x_high); + vfloat4 v2sum = VARBUF2(0, y_low, x_low) + - VARBUF2(0, y_low, x_high) + - VARBUF2(0, y_high, x_low) + + VARBUF2(0, y_high, x_high); // Compute and emit the average - float4 avg = v1sum * avg_var_rsamples; + vfloat4 avg = v1sum * avg_var_rsamples; input_averages[out_index] = avg; // Compute and emit the actual variance - float4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum); + vfloat4 variance = mul2 * v2sum - mul1 * (v1sum * v1sum); input_variances[out_index] = variance; } } @@ -610,43 +520,44 @@ void compute_averages_and_variances( const avg_var_args &ag ) { pixel_region_variance_args arg = ag.arg; - arg.work_memory = new float4[ag.work_memory_size]; + arg.work_memory = new vfloat4[ag.work_memory_size]; - int size_x = ag.img_size.r; - int size_y = ag.img_size.g; - int size_z = ag.img_size.b; + int size_x = ag.img_size_x; + int size_y = ag.img_size_y; + int size_z = ag.img_size_z; - int step_x = ag.blk_size.r; - int step_y = ag.blk_size.g; - int step_z = ag.blk_size.b; + int step_xy = ag.blk_size_xy; + int step_z = ag.blk_size_z; - int y_tasks = (size_y + step_y - 1) / step_y; + int y_tasks = (size_y + step_xy - 1) / step_xy; // All threads run this processing loop until there is no work remaining while (true) { unsigned int count; - unsigned int base = ctx.manage_avg_var.get_task_assignment(1, count); + unsigned int base = ctx.manage_avg_var.get_task_assignment(16, count); if (!count) { break; } - assert(count == 1); - int z = (base / (y_tasks)) * step_z; - int y = (base - (z * y_tasks)) * step_y; + for (unsigned int i = base; i < base + count; i++) + { + int z = (i / (y_tasks)) * step_z; + int y = (i - (z * y_tasks)) * step_xy; - arg.size.b = MIN(step_z, size_z - z); - arg.offset.b = z; + arg.size_z = astc::min(step_z, size_z - z); + arg.offset_z = z; - arg.size.g = MIN(step_y, size_y - y); - arg.offset.g = y; + arg.size_y = astc::min(step_xy, size_y - y); + arg.offset_y = y; - for (int x = 0; x < size_x; x += step_x) - { - arg.size.r = MIN(step_x, size_x - x); - arg.offset.r = x; - compute_pixel_region_variance(ctx, &arg); + for (int x = 0; x < size_x; x += step_xy) + { + arg.size_x = astc::min(step_xy, size_x - x); + arg.offset_x = x; + compute_pixel_region_variance(ctx, &arg); + } } ctx.manage_avg_var.complete_task_assignment(count); @@ -671,20 +582,24 @@ unsigned int init_compute_averages_and_variances( int size_z = img.dim_z; // Compute maximum block size and from that the working memory buffer size - int kernel_radius = MAX(avg_var_kernel_radius, alpha_kernel_radius); + int kernel_radius = astc::max(avg_var_kernel_radius, alpha_kernel_radius); int kerneldim = 2 * kernel_radius + 1; - int have_z = (size_z > 1); + bool have_z = (size_z > 1); int max_blk_size_xy = have_z ? 16 : 32; - int max_blk_size_z = MIN(size_z, have_z ? 16 : 1); + int max_blk_size_z = astc::min(size_z, have_z ? 16 : 1); int max_padsize_xy = max_blk_size_xy + kerneldim; int max_padsize_z = max_blk_size_z + (have_z ? kerneldim : 0); // Perform block-wise averages-and-variances calculations across the image // Initialize fields which are not populated until later - arg.size = int3(0); - arg.offset = int3(0); + arg.size_x = 0; + arg.size_y = 0; + arg.size_z = 0; + arg.offset_x = 0; + arg.offset_y = 0; + arg.offset_z = 0; arg.work_memory = nullptr; arg.img = &img; @@ -696,8 +611,11 @@ unsigned int init_compute_averages_and_variances( arg.alpha_kernel_radius = alpha_kernel_radius; ag.arg = arg; - ag.img_size = int3(size_x, size_y, size_z); - ag.blk_size = int3(max_blk_size_xy, max_blk_size_xy, max_blk_size_z); + ag.img_size_x = size_x; + ag.img_size_y = size_y; + ag.img_size_z = size_z; + ag.blk_size_xy = max_blk_size_xy; + ag.blk_size_z = max_blk_size_z; ag.work_memory_size = 2 * max_padsize_xy * max_padsize_xy * max_padsize_z; // The parallel task count diff --git a/libkram/astc-encoder/astcenc_decompress_symbolic.cpp b/libkram/astc-encoder/astcenc_decompress_symbolic.cpp index e1038ec1..ac4a1cea 100644 --- a/libkram/astc-encoder/astcenc_decompress_symbolic.cpp +++ b/libkram/astc-encoder/astcenc_decompress_symbolic.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -26,51 +26,114 @@ static int compute_value_of_texel_int( int texel_to_get, - const decimation_table* it, + const decimation_table* dt, const int* weights ) { int summed_value = 8; - int weights_to_evaluate = it->texel_num_weights[texel_to_get]; + int weights_to_evaluate = dt->texel_weight_count[texel_to_get]; for (int i = 0; i < weights_to_evaluate; i++) { - summed_value += weights[it->texel_weights[texel_to_get][i]] * it->texel_weights_int[texel_to_get][i]; + summed_value += weights[dt->texel_weights_t4[texel_to_get][i]] + * dt->texel_weights_int_t4[texel_to_get][i]; } return summed_value >> 4; } -static uint4 lerp_color_int( +static vint4 lerp_color_int( astcenc_profile decode_mode, - uint4 color0, - uint4 color1, + vint4 color0, + vint4 color1, int weight, int plane2_weight, - int plane2_color_component // -1 in 1-plane mode + vmask4 plane2_mask ) { - uint4 weight1 = uint4( - plane2_color_component == 0 ? plane2_weight : weight, - plane2_color_component == 1 ? plane2_weight : weight, - plane2_color_component == 2 ? plane2_weight : weight, - plane2_color_component == 3 ? plane2_weight : weight); - - uint4 weight0 = uint4(64, 64, 64, 64) - weight1; + vint4 weight1 = select(vint4(weight), vint4(plane2_weight), plane2_mask); + vint4 weight0 = vint4(64) - weight1; if (decode_mode == ASTCENC_PRF_LDR_SRGB) { - color0 = uint4(color0.r >> 8, color0.g >> 8, color0.b >> 8, color0.a >> 8); - color1 = uint4(color1.r >> 8, color1.g >> 8, color1.b >> 8, color1.a >> 8); + color0 = asr<8>(color0); + color1 = asr<8>(color1); } - uint4 color = (color0 * weight0) + (color1 * weight1) + uint4(32, 32, 32, 32); - color = uint4(color.r >> 6, color.g >> 6, color.b >> 6, color.a >> 6); + vint4 color = (color0 * weight0) + (color1 * weight1) + vint4(32); + color = asr<6>(color); if (decode_mode == ASTCENC_PRF_LDR_SRGB) { - color = color * 257u; + color = color * vint4(257); } return color; } +// Turn packed unorm16 or LNS data into generic float data +static inline vfloat4 decode_texel( + vint4 data, + vmask4 lns_mask +) { + vint4 color_lns = vint4::zero(); + vint4 color_unorm = vint4::zero(); + + if (any(lns_mask)) + { + color_lns = lns_to_sf16(data); + } + + if (!all(lns_mask)) + { + color_unorm = unorm16_to_sf16(data); + } + + // Pick channels and then covert to FP16 + vint4 datai = select(color_unorm, color_lns, lns_mask); + return float16_to_float(datai); +} + +void unpack_weights( + const block_size_descriptor& bsd, + const symbolic_compressed_block& scb, + const decimation_table& dt, + bool is_dual_plane, + int weight_quant_level, + int weights_plane1[MAX_TEXELS_PER_BLOCK], + int weights_plane2[MAX_TEXELS_PER_BLOCK] +) { + // First, unquantize the weights ... + int uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK]; + int uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK]; + int weight_count = dt.weight_count; + + const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_level]); + + for (int i = 0; i < weight_count; i++) + { + uq_plane1_weights[i] = qat->unquantized_value[scb.weights[i]]; + } + + if (is_dual_plane) + { + for (int i = 0; i < weight_count; i++) + { + uq_plane2_weights[i] = qat->unquantized_value[scb.weights[i + PLANE2_WEIGHTS_OFFSET]]; + } + } + + // Second, undecimate the weights ... + for (int i = 0; i < bsd.texel_count; i++) + { + weights_plane1[i] = compute_value_of_texel_int(i, &dt, uq_plane1_weights); + } + + if (is_dual_plane) + { + for (int i = 0; i < bsd.texel_count; i++) + { + weights_plane2[i] = compute_value_of_texel_int(i, &dt, uq_plane2_weights); + } + } +} + void decompress_symbolic_block( astcenc_profile decode_mode, const block_size_descriptor* bsd, @@ -84,9 +147,14 @@ void decompress_symbolic_block( blk->ypos = ypos; blk->zpos = zpos; + blk->data_min = vfloat4::zero(); + blk->data_max = vfloat4::zero(); + blk->grayscale = false; + // if we detected an error-block, blow up immediately. if (scb->error_block) { + // TODO: Check this - isn't linear LDR magenta too? Same below ... if (decode_mode == ASTCENC_PRF_LDR_SRGB) { for (int i = 0; i < bsd->texel_count; i++) @@ -119,74 +187,52 @@ void decompress_symbolic_block( if (scb->block_mode < 0) { - float red = 0, green = 0, blue = 0, alpha = 0; + vfloat4 color; int use_lns = 0; int use_nan = 0; if (scb->block_mode == -2) { - int ired = scb->constant_color[0]; - int igreen = scb->constant_color[1]; - int iblue = scb->constant_color[2]; - int ialpha = scb->constant_color[3]; + vint4 colori(scb->constant_color); // For sRGB decoding a real decoder would just use the top 8 bits // for color conversion. We don't color convert, so linearly scale // the top 8 bits into the full 16 bit dynamic range if (decode_mode == ASTCENC_PRF_LDR_SRGB) { - ired = (ired >> 8) * 257; - igreen = (igreen >> 8) * 257; - iblue = (iblue >> 8) * 257; - ialpha = (ialpha >> 8) * 257; + colori = asr<8>(colori) * 257; } - red = sf16_to_float(unorm16_to_sf16(ired)); - green = sf16_to_float(unorm16_to_sf16(igreen)); - blue = sf16_to_float(unorm16_to_sf16(iblue)); - alpha = sf16_to_float(unorm16_to_sf16(ialpha)); - use_lns = 0; - use_nan = 0; + vint4 colorf16 = unorm16_to_sf16(colori); + color = float16_to_float(colorf16); } else { switch (decode_mode) { case ASTCENC_PRF_LDR_SRGB: - red = 1.0f; - green = 0.0f; - blue = 1.0f; - alpha = 1.0f; - use_lns = 0; - use_nan = 0; + color = vfloat4(1.0f, 0.0f, 1.0f, 1.0f); break; case ASTCENC_PRF_LDR: - red = 0.0f; - green = 0.0f; - blue = 0.0f; - alpha = 0.0f; - use_lns = 0; + color = vfloat4(0.0f); use_nan = 1; break; case ASTCENC_PRF_HDR_RGB_LDR_A: case ASTCENC_PRF_HDR: // constant-color block; unpack from FP16 to FP32. - red = sf16_to_float(scb->constant_color[0]); - green = sf16_to_float(scb->constant_color[1]); - blue = sf16_to_float(scb->constant_color[2]); - alpha = sf16_to_float(scb->constant_color[3]); + color = float16_to_float(vint4(scb->constant_color)); use_lns = 1; - use_nan = 0; break; } } + // TODO: Skip this and add constant color transfer to img block? for (int i = 0; i < bsd->texel_count; i++) { - blk->data_r[i] = red; - blk->data_g[i] = green; - blk->data_b[i] = blue; - blk->data_a[i] = alpha; + blk->data_r[i] = color.lane<0>(); + blk->data_g[i] = color.lane<1>(); + blk->data_b[i] = color.lane<2>(); + blk->data_a[i] = color.lane<3>(); blk->rgb_lns[i] = use_lns; blk->alpha_lns[i] = use_lns; blk->nan_texel[i] = use_nan; @@ -201,20 +247,20 @@ void decompress_symbolic_block( pt += scb->partition_index; // get the appropriate block descriptor - const decimation_table *const *ixtab2 = bsd->decimation_tables; + const decimation_table *const *dts = bsd->decimation_tables; - const int packed_index = bsd->block_mode_to_packed[scb->block_mode]; - assert(packed_index >= 0 && packed_index < bsd->block_mode_packed_count); - const block_mode& bm = bsd->block_modes_packed[packed_index]; - const decimation_table *it = ixtab2[bm.decimation_mode]; + const int packed_index = bsd->block_mode_packed_index[scb->block_mode]; + assert(packed_index >= 0 && packed_index < bsd->block_mode_count); + const block_mode& bm = bsd->block_modes[packed_index]; + const decimation_table *dt = dts[bm.decimation_mode]; int is_dual_plane = bm.is_dual_plane; - int weight_quantization_level = bm.quantization_mode; + int weight_quant_level = bm.quant_mode; // decode the color endpoints - uint4 color_endpoint0[4]; - uint4 color_endpoint1[4]; + vint4 color_endpoint0[4]; + vint4 color_endpoint1[4]; int rgb_hdr_endpoint[4]; int alpha_hdr_endpoint[4]; int nan_endpoint[4]; @@ -223,7 +269,7 @@ void decompress_symbolic_block( { unpack_color_endpoints(decode_mode, scb->color_formats[i], - scb->color_quantization_level, + scb->color_quant_level, scb->color_values[i], &(rgb_hdr_endpoint[i]), &(alpha_hdr_endpoint[i]), @@ -232,77 +278,53 @@ void decompress_symbolic_block( &(color_endpoint1[i])); } - // first unquantize the weights - int uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK]; - int uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK]; - int weight_count = it->num_weights; - - const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_level]); - - for (int i = 0; i < weight_count; i++) - { - uq_plane1_weights[i] = qat->unquantized_value[scb->plane1_weights[i]]; - } - - if (is_dual_plane) - { - for (int i = 0; i < weight_count; i++) - { - uq_plane2_weights[i] = qat->unquantized_value[scb->plane2_weights[i]]; - } - } - - // then undecimate them. + // Unquantize and undecimate the weights int weights[MAX_TEXELS_PER_BLOCK]; int plane2_weights[MAX_TEXELS_PER_BLOCK]; + unpack_weights(*bsd, *scb, *dt, is_dual_plane, weight_quant_level, weights, plane2_weights); - for (int i = 0; i < bsd->texel_count; i++) - { - weights[i] = compute_value_of_texel_int(i, it, uq_plane1_weights); - } + // Now that we have endpoint colors and weights, we can unpack texel colors + int plane2_color_component = is_dual_plane ? scb->plane2_color_component : -1; + vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_color_component); - if (is_dual_plane) + for (int i = 0; i < partition_count; i++) { - for (int i = 0; i < bsd->texel_count; i++) + vint4 ep0 = color_endpoint0[i]; + vint4 ep1 = color_endpoint1[i]; + bool rgb_lns = rgb_hdr_endpoint[i]; + bool nan = nan_endpoint[i]; + bool a_lns = alpha_hdr_endpoint[i]; + vmask4 lns_mask(rgb_lns, rgb_lns, rgb_lns, a_lns); + + int texel_count = pt->partition_texel_count[i]; + for (int j = 0; j < texel_count; j++) { - plane2_weights[i] = compute_value_of_texel_int(i, it, uq_plane2_weights); + int tix = pt->texels_of_partition[i][j]; + vint4 color = lerp_color_int(decode_mode, + ep0, + ep1, + weights[tix], + plane2_weights[tix], + plane2_mask); + + vfloat4 colorf = decode_texel(color, lns_mask); + + blk->nan_texel[tix] = nan; + blk->data_r[tix] = colorf.lane<0>(); + blk->data_g[tix] = colorf.lane<1>(); + blk->data_b[tix] = colorf.lane<2>(); + blk->data_a[tix] = colorf.lane<3>(); } } - - int plane2_color_component = scb->plane2_color_component; - - // now that we have endpoint colors and weights, we can unpack actual colors for - // each texel. - for (int i = 0; i < bsd->texel_count; i++) - { - int partition = pt->partition_of_texel[i]; - - uint4 color = lerp_color_int(decode_mode, - color_endpoint0[partition], - color_endpoint1[partition], - weights[i], - plane2_weights[i], - is_dual_plane ? plane2_color_component : -1); - - blk->rgb_lns[i] = rgb_hdr_endpoint[partition]; - blk->alpha_lns[i] = alpha_hdr_endpoint[partition]; - blk->nan_texel[i] = nan_endpoint[partition]; - - blk->data_r[i] = (float)color.r; - blk->data_g[i] = (float)color.g; - blk->data_b[i] = (float)color.b; - blk->data_a[i] = (float)color.a; - } - - imageblock_initialize_orig_from_work(blk, bsd->texel_count); - update_imageblock_flags(blk, bsd->xdim, bsd->ydim, bsd->zdim); } +// Returns a negative error for encodings we want to reject as a part of a +// heuristic check, e.g. for RGBM textures which have zero M values. float compute_symbolic_block_difference( - astcenc_profile decode_mode, + const astcenc_config& config, const block_size_descriptor* bsd, const symbolic_compressed_block* scb, - const imageblock* pb, + const imageblock* blk, const error_weight_block *ewb ) { // if we detected an error-block, blow up immediately. @@ -319,29 +341,35 @@ float compute_symbolic_block_difference( pt += scb->partition_index; // get the appropriate block descriptor - const decimation_table *const *ixtab2 = bsd->decimation_tables; + const decimation_table *const *dts = bsd->decimation_tables; - const int packed_index = bsd->block_mode_to_packed[scb->block_mode]; - assert(packed_index >= 0 && packed_index < bsd->block_mode_packed_count); - const block_mode& bm = bsd->block_modes_packed[packed_index]; - const decimation_table *it = ixtab2[bm.decimation_mode]; + const int packed_index = bsd->block_mode_packed_index[scb->block_mode]; + assert(packed_index >= 0 && packed_index < bsd->block_mode_count); + const block_mode& bm = bsd->block_modes[packed_index]; + const decimation_table *dt = dts[bm.decimation_mode]; int is_dual_plane = bm.is_dual_plane; + int weight_quant_level = bm.quant_mode; - int weight_quantization_level = bm.quantization_mode; + int weight_count = dt->weight_count; + int texel_count = bsd->texel_count; + + promise(partition_count > 0); + promise(weight_count > 0); + promise(texel_count > 0); // decode the color endpoints - uint4 color_endpoint0[4]; - uint4 color_endpoint1[4]; + vint4 color_endpoint0[4]; + vint4 color_endpoint1[4]; int rgb_hdr_endpoint[4]; int alpha_hdr_endpoint[4]; int nan_endpoint[4]; for (int i = 0; i < partition_count; i++) { - unpack_color_endpoints(decode_mode, + unpack_color_endpoints(config.profile, scb->color_formats[i], - scb->color_quantization_level, + scb->color_quant_level, scb->color_values[i], &(rgb_hdr_endpoint[i]), &(alpha_hdr_endpoint[i]), @@ -350,88 +378,69 @@ float compute_symbolic_block_difference( &(color_endpoint1[i])); } - // first unquantize the weights - int uq_plane1_weights[MAX_WEIGHTS_PER_BLOCK]; - int uq_plane2_weights[MAX_WEIGHTS_PER_BLOCK]; - int weight_count = it->num_weights; - - const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_level]); - - for (int i = 0; i < weight_count; i++) - { - uq_plane1_weights[i] = qat->unquantized_value[scb->plane1_weights[i]]; - } - - if (is_dual_plane) - { - for (int i = 0; i < weight_count; i++) - { - uq_plane2_weights[i] = qat->unquantized_value[scb->plane2_weights[i]]; - } - } - - // then undecimate them. + // Unquantize and undecimate the weights int weights[MAX_TEXELS_PER_BLOCK]; int plane2_weights[MAX_TEXELS_PER_BLOCK]; + unpack_weights(*bsd, *scb, *dt, is_dual_plane, weight_quant_level, weights, plane2_weights); - for (int i = 0; i < bsd->texel_count; i++) - { - weights[i] = compute_value_of_texel_int(i, it, uq_plane1_weights); - } - - if (is_dual_plane) - { - for (int i = 0; i < bsd->texel_count; i++) - { - plane2_weights[i] = compute_value_of_texel_int(i, it, uq_plane2_weights); - } - } - - int plane2_color_component = scb->plane2_color_component; + // Now that we have endpoint colors and weights, we can unpack texel colors + int plane2_color_component = is_dual_plane ? scb->plane2_color_component : -1; + vmask4 plane2_mask = vint4::lane_id() == vint4(plane2_color_component); - // now that we have endpoint colors and weights, we can unpack actual colors for - // each texel. float summa = 0.0f; - for (int i = 0; i < bsd->texel_count; i++) + for (int i = 0; i < texel_count; i++) { int partition = pt->partition_of_texel[i]; - uint4 color = lerp_color_int(decode_mode, - color_endpoint0[partition], - color_endpoint1[partition], - weights[i], - plane2_weights[i], - is_dual_plane ? plane2_color_component : -1); + vint4 ep0 = color_endpoint0[partition]; + vint4 ep1 = color_endpoint1[partition]; - float4 newColor = float4((float)color.r, - (float)color.g, - (float)color.b, - (float)color.a); + vint4 colori = lerp_color_int(config.profile, + ep0, + ep1, + weights[i], + plane2_weights[i], + plane2_mask); - float4 oldColor = float4(pb->data_r[i], - pb->data_g[i], - pb->data_b[i], - pb->data_a[i]); + vfloat4 color = int_to_float(colori); + vfloat4 oldColor = blk->texel(i); - float4 error = oldColor - newColor; + if (config.flags & ASTCENC_FLG_MAP_RGBM) + { + // Fail encodings that result in zero weight M pixels. Note that + // this can cause "interesting" artifacts if we reject all useful + // encodings - we typically get max brightness encodings instead + // which look just as bad. We recommend users apply a bias to their + // stored M value, limiting the lower value to 16 or 32 to avoid + // getting small M values post-quantization, but we can't prove it + // would never happen, especially at low bit rates ... + if (color.lane<3>() == 0.0f) + { + return -1e30f; + } - error.r = MIN(fabsf(error.r), 1e15f); - error.g = MIN(fabsf(error.g), 1e15f); - error.b = MIN(fabsf(error.b), 1e15f); - error.a = MIN(fabsf(error.a), 1e15f); + // Compute error based on decoded RGBM color + color = vfloat4( + color.lane<0>() * color.lane<3>() * config.rgbm_m_scale, + color.lane<1>() * color.lane<3>() * config.rgbm_m_scale, + color.lane<2>() * color.lane<3>() * config.rgbm_m_scale, + 1.0f + ); + + oldColor = vfloat4( + oldColor.lane<0>() * oldColor.lane<3>() * config.rgbm_m_scale, + oldColor.lane<1>() * oldColor.lane<3>() * config.rgbm_m_scale, + oldColor.lane<2>() * oldColor.lane<3>() * config.rgbm_m_scale, + 1.0f + ); + } + vfloat4 error = oldColor - color; + error = min(abs(error), 1e15f); error = error * error; - float4 errorWeight = float4(ewb->error_weights[i].r, - ewb->error_weights[i].g, - ewb->error_weights[i].b, - ewb->error_weights[i].a); - - float metric = dot(error, errorWeight); - if (metric >= 1e30f) metric = 1e30f; - if (metric != metric) metric = 0.0f; - - summa += metric; + float metric = dot_s(error, ewb->error_weights[i]); + summa += astc::min(metric, 1e30f); } return summa; diff --git a/libkram/astc-encoder/astcenc_diagnostic_trace.cpp b/libkram/astc-encoder/astcenc_diagnostic_trace.cpp new file mode 100644 index 00000000..2f6c8c3b --- /dev/null +++ b/libkram/astc-encoder/astcenc_diagnostic_trace.cpp @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: Apache-2.0 +// ---------------------------------------------------------------------------- +// Copyright 2021 Arm Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// ---------------------------------------------------------------------------- + +/** + * @brief Functions for the library entrypoint. + */ + +#if defined(ASTCENC_DIAGNOSTICS) + +#include +#include +#include + +#include "astcenc_diagnostic_trace.h" + +/** @brief The global trace logger. */ +static TraceLog* g_TraceLog = nullptr; + +/** @brief The JSON indentation level. */ +static const int g_trace_indent = 2; + +TraceLog::TraceLog( + const char* file_name): + m_file(file_name, std::ofstream::out | std::ofstream::binary) +{ + assert(!g_TraceLog); + g_TraceLog = this; + m_root = new TraceNode("root"); +} + +TraceNode* TraceLog::get_current_leaf() +{ + if (m_stack.size()) + { + return m_stack.back(); + } + + return nullptr; +} + +int TraceLog::get_depth() +{ + return m_stack.size(); +} + +TraceLog::~TraceLog() +{ + assert(g_TraceLog == this); + delete m_root; + g_TraceLog = nullptr; +} + +TraceNode::TraceNode( + const char* format, + ... +) { + // Format the name string + constexpr size_t bufsz = 256; + char buffer[bufsz]; + + va_list args; + va_start (args, format); + vsnprintf (buffer, bufsz, format, args); + va_end (args); + + // Guarantee there is a nul termintor + buffer[bufsz - 1] = 0; + + // Generate the node + TraceNode* parent = g_TraceLog->get_current_leaf(); + int depth = g_TraceLog->get_depth(); + g_TraceLog->m_stack.push_back(this); + + bool comma = parent && parent->m_attrib_count; + auto& out = g_TraceLog->m_file; + + if (parent) + { + parent->m_attrib_count++; + } + + if (comma) + { + out << ','; + } + + if (depth) + { + out << '\n'; + } + + int out_indent = (depth * 2) * g_trace_indent; + int in_indent = (depth * 2 + 1) * g_trace_indent; + + std::string out_indents(""); + if (out_indent) + { + out_indents = std::string(out_indent, ' '); + } + + std::string in_indents(in_indent, ' '); + + out << out_indents << "[ \"node\", \"" << buffer << "\",\n"; + out << in_indents << "["; +} + +void TraceNode::add_attrib( + std::string type, + std::string key, + std::string value +) { + (void)type; + + int depth = g_TraceLog->get_depth(); + int indent = (depth * 2) * g_trace_indent; + auto& out = g_TraceLog->m_file; + bool comma = m_attrib_count; + m_attrib_count++; + + if (comma) + { + out << ','; + } + + out << '\n'; + out << std::string(indent, ' ') << "[ " + << "\"" << key << "\", " + << value << " ]"; +} + +TraceNode::~TraceNode() +{ + g_TraceLog->m_stack.pop_back(); + + auto& out = g_TraceLog->m_file; + int depth = g_TraceLog->get_depth(); + int out_indent = (depth * 2) * g_trace_indent; + int in_indent = (depth * 2 + 1) * g_trace_indent; + + std::string out_indents(""); + if (out_indent) + { + out_indents = std::string(out_indent, ' '); + } + + std::string in_indents(in_indent, ' '); + + if (m_attrib_count) + { + out << "\n" << in_indents; + } + out << "]\n"; + + out << out_indents << "]"; +} + +void trace_add_data( + const char* key, + const char* format, + ... +) { + constexpr size_t bufsz = 256; + char buffer[bufsz]; + + va_list args; + va_start (args, format); + vsnprintf (buffer, bufsz, format, args); + va_end (args); + + // Guarantee there is a nul termintor + buffer[bufsz - 1] = 0; + + std::string value = "\"" + std::string(buffer) + "\""; + + TraceNode* node = g_TraceLog->get_current_leaf(); + node->add_attrib("str", key, value); +} + +void trace_add_data( + const char* key, + float value +) { + char buffer[256]; + sprintf(buffer, "%.20g", (double)value); + TraceNode* node = g_TraceLog->get_current_leaf(); + node->add_attrib("float", key, buffer); +} + +void trace_add_data( + const char* key, + int value +) { + TraceNode* node = g_TraceLog->get_current_leaf(); + node->add_attrib("int", key, std::to_string(value)); +} + +void trace_add_data( + const char* key, + unsigned int value +) { + TraceNode* node = g_TraceLog->get_current_leaf(); + node->add_attrib("int", key, std::to_string(value)); +} + +#endif diff --git a/libkram/astc-encoder/astcenc_diagnostic_trace.h b/libkram/astc-encoder/astcenc_diagnostic_trace.h new file mode 100644 index 00000000..e3c26afb --- /dev/null +++ b/libkram/astc-encoder/astcenc_diagnostic_trace.h @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: Apache-2.0 +// ---------------------------------------------------------------------------- +// Copyright 2021 Arm Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// ---------------------------------------------------------------------------- + +/** + * @brief This module provides a set of diagnostic tracing utilities. + * + * Overview + * ======== + * + * The built-in diagnostic trace tool generates a hierarchical JSON tree + * structure. The tree hierarchy contains three levels: + * + * - block + * - pass + * - candidate + * + * One block node exists for each compressed block in the image. One pass node + * exists for each major pass (N partition, M planes, O channel) applied to a + * block. One candidate node exists for each encoding candidate trialed for a + * pass. + * + * Each node contains both the hierarchy but also a number of attributes which + * explain the behavior. For example, the block node contains the block + * coordinates in the image, the pass explains the pass configuration, and the + * candidate will explain the candidate encoding such as weight decimation, + * refinement error, etc. + * + * Trace Nodes are designed as scope-managed C++ objects with stack-like + * push/pop behavior. Constructing a trace node on the stack will automatically + * add it to the current node as a child, and then make it the current node. + * Destroying the current node will pop the stack and set the parent to the + * current node. This provides a robust mechanism for ensuring reliable + * nesting in the tree structure. + * + * A set of utility macros are provided to add attribute annotations to the + * current trace node. + * + * Usage + * ===== + * + * Create Trace Nodes on the stack using the TRACE_NODE() macro. This will + * compile-out completely in builds with diagnostics disabled. + * + * Add annotations to the current trace node using the trace_add_data() macro. + * This will similarly compile out completely in builds with diagnostics + * disabled. + * + * If you need to add additional code to support diagnostics-only behavior wrap + * it in preprocessor guards: + * + * #if defined(ASTCENC_DIAGNOSTICS) + * #endif + */ + +#ifndef ASTCENC_DIAGNOSTIC_TRACE_INCLUDED +#define ASTCENC_DIAGNOSTIC_TRACE_INCLUDED + +#if defined(ASTCENC_DIAGNOSTICS) + +#include +#include +#include + +/** + * @brief Class representing a single node in the trace hierarchy. + */ +class TraceNode +{ +public: + /** + * @brief Construct a new node. + * + * Constructing a node will push to the the top of the stack, automatically + * making it a child of the current node, and then setting it to become the + * current node. + * + * @param format The format template for the node name. + * @param ... The format parameters. + */ + TraceNode(const char* format, ...); + + /** + * @brief Add an attribute to this node. + * + * Note that no quoting is applied to the @c value, so if quoting is + * needed it must be done by the caller. + * + * @param type The type of the attribute. + * @param key The key of the attribute. + * @param value The value of the attribute. + */ + void add_attrib(std::string type, std::string key, std::string value); + + /** + * @brief Destroy this node. + * + * Destroying a node will pop it from the top of the stack, making its + * parent the current node. It is invalid behavior to destroy a node that + * is not the current node; usage must conform to stack push-pop semantics. + */ + ~TraceNode(); + + /** + * @brief The number of attributes and child nodes in this node. + */ + unsigned int m_attrib_count { 0 }; +}; + +/** + * @brief Class representing the trace log file being written. + */ +class TraceLog +{ +public: + /** + * @brief Create a new trace log. + * + * The trace log is global; there can be only one at a time. + * + * @param file_name The name of the file to write. + */ + TraceLog(const char* file_name); + + /** + * @brief Detroy the trace log. + * + * Trace logs MUST be cleanly destroyed to ensure the file gets written. + */ + ~TraceLog(); + + /** + * @brief Get the current child node. + * + * @return The current leaf node. + */ + TraceNode* get_current_leaf(); + + /** + * @brief Get the stack depth of the current child node. + * + * @return The current leaf node stack depth. + */ + int get_depth(); + + /** + * @brief The file stream to write to. + */ + std::ofstream m_file; + + /** + * @brief The stack of nodes (newest at the back). + */ + std::vector m_stack; + +private: + /** + * @brief The root node in the JSON file. + */ + TraceNode* m_root; +}; + +/** + * @brief Utility macro to create a trace node on the stack. + * + * @param name The variable name to use. + * @param ... The name template and format parameters. + */ +#define TRACE_NODE(name, ...) TraceNode name(__VA_ARGS__); + +/** + * @brief Add a string annotation to the current node. + * + * @param key The name of the attribute. + * @param format The format template for the attribute value. + * @param ... The format parameters. + */ +void trace_add_data(const char* key, const char* format, ...); + +/** + * @brief Add a float annotation to the current node. + * + * @param key The name of the attribute. + * @param value The value of the attribute. + */ +void trace_add_data(const char* key, float value); + +/** + * @brief Add an integer annotation to the current node. + * + * @param key The name of the attribute. + * @param value The value of the attribute. + */ +void trace_add_data(const char* key, int value); + +/** + * @brief Add an unsigned integer annotation to the current node. + * + * @param key The name of the attribute. + * @param value The value of the attribute. + */ +void trace_add_data(const char* key, unsigned int value); + +#else + +#define TRACE_NODE(name, ...) + +#define trace_add_data(...) + +#endif + +#endif diff --git a/libkram/astc-encoder/astcenc_encoding_choice_error.cpp b/libkram/astc-encoder/astcenc_encoding_choice_error.cpp index c58175a3..c9c46d4a 100644 --- a/libkram/astc-encoder/astcenc_encoding_choice_error.cpp +++ b/libkram/astc-encoder/astcenc_encoding_choice_error.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -43,50 +43,101 @@ // helper function to merge two endpoint-colors void merge_endpoints( - const endpoints * ep1, // contains three of the color components - const endpoints * ep2, // contains the remaining color component + const endpoints* ep1, // contains three of the color components + const endpoints* ep2, // contains the remaining color component int separate_component, - endpoints * res + endpoints* res ) { int partition_count = ep1->partition_count; + vmask4 sep_mask = vint4::lane_id() == vint4(separate_component); + res->partition_count = partition_count; + promise(partition_count > 0); for (int i = 0; i < partition_count; i++) { - res->endpt0[i] = ep1->endpt0[i]; - res->endpt1[i] = ep1->endpt1[i]; + res->endpt0[i] = select(ep1->endpt0[i], ep2->endpt0[i], sep_mask); + res->endpt1[i] = select(ep1->endpt1[i], ep2->endpt1[i], sep_mask); } +} - switch (separate_component) +// function to compute the error across a tile when using a particular line for +// a particular partition. +static void compute_error_squared_rgb_single_partition( + int partition_to_test, + const block_size_descriptor* bsd, + const partition_info* pt, // the partition that we use when computing the squared-error. + const imageblock* blk, + const error_weight_block* ewb, + const processed_line3* uncor_pline, + float* uncor_err, + const processed_line3* samec_pline, + float* samec_err, + const processed_line3* rgbl_pline, + float* rgbl_err, + const processed_line3* l_pline, + float* l_err, + float* a_drop_err +) { + int texels_per_block = bsd->texel_count; + float uncor_errorsum = 0.0f; + float samec_errorsum = 0.0f; + float rgbl_errorsum = 0.0f; + float l_errorsum = 0.0f; + float a_drop_errorsum = 0.0f; + + for (int i = 0; i < texels_per_block; i++) { - case 0: - for (int i = 0; i < partition_count; i++) + int partition = pt->partition_of_texel[i]; + float texel_weight = ewb->texel_weight_rgb[i]; + if (partition != partition_to_test || texel_weight < 1e-20f) + { + continue; + } + + vfloat4 point = blk->texel(i); + vfloat4 ews = ewb->error_weights[i]; + + // Compute the error that arises from just ditching alpha + float default_alpha = imageblock_default_alpha(blk); + float omalpha = point.lane<3>() - default_alpha; + a_drop_errorsum += omalpha * omalpha * ews.lane<3>(); + { - res->endpt0[i].r = ep2->endpt0[i].r; - res->endpt1[i].r = ep2->endpt1[i].r; + float param = dot3_s(point, uncor_pline->bs); + vfloat4 rp1 = uncor_pline->amod + param * uncor_pline->bis; + vfloat4 dist = rp1 - point; + uncor_errorsum += dot3_s(ews, dist * dist); } - break; - case 1: - for (int i = 0; i < partition_count; i++) + { - res->endpt0[i].g = ep2->endpt0[i].g; - res->endpt1[i].g = ep2->endpt1[i].g; + float param = dot3_s(point, samec_pline->bs); + // No samec amod - we know it's always zero + vfloat4 rp1 = /* samec_pline->amod + */ param * samec_pline->bis; + vfloat4 dist = rp1 - point; + samec_errorsum += dot3_s(ews, dist * dist); } - break; - case 2: - for (int i = 0; i < partition_count; i++) + { - res->endpt0[i].b = ep2->endpt0[i].b; - res->endpt1[i].b = ep2->endpt1[i].b; + float param = dot3_s(point, rgbl_pline->bs); + vfloat4 rp1 = rgbl_pline->amod + param * rgbl_pline->bis; + vfloat4 dist = rp1 - point; + rgbl_errorsum += dot3_s(ews, dist * dist); } - break; - case 3: - for (int i = 0; i < partition_count; i++) + { - res->endpt0[i].a = ep2->endpt0[i].a; - res->endpt1[i].a = ep2->endpt1[i].a; + float param = dot3_s(point, l_pline->bs); + // No luma amod - we know it's always zero + vfloat4 rp1 = /* l_pline->amod + */ param * l_pline->bis; + vfloat4 dist = rp1 - point; + l_errorsum += dot3_s(ews, dist * dist); } - break; } + + *uncor_err = uncor_errorsum; + *samec_err = samec_errorsum; + *rgbl_err = rgbl_errorsum; + *l_err = l_errorsum; + *a_drop_err = a_drop_errorsum; } /* @@ -100,192 +151,126 @@ void merge_endpoints( */ void compute_encoding_choice_errors( const block_size_descriptor* bsd, - const imageblock* pb, - const partition_info* pi, + const imageblock* blk, + const partition_info* pt, const error_weight_block* ewb, int separate_component, // component that is separated out in 2-plane mode, -1 in 1-plane mode encoding_choice_errors* eci) { - int partition_count = pi->partition_count; + int partition_count = pt->partition_count; int texels_per_block = bsd->texel_count; - float3 averages[4]; - float3 directions_rgb[4]; - float4 error_weightings[4]; - float4 color_scalefactors[4]; - float4 inverse_color_scalefactors[4]; - - compute_partition_error_color_weightings(bsd, ewb, pi, error_weightings, color_scalefactors); - compute_averages_and_directions_rgb(pi, pb, ewb, color_scalefactors, averages, directions_rgb); - - line3 uncorr_rgb_lines[4]; - line3 samechroma_rgb_lines[4]; // for LDR-RGB-scale - line3 rgb_luma_lines[4]; // for HDR-RGB-scale - line3 luminance_lines[4]; - - processed_line3 proc_uncorr_rgb_lines[4]; - processed_line3 proc_samechroma_rgb_lines[4]; // for LDR-RGB-scale - processed_line3 proc_rgb_luma_lines[4]; // for HDR-RGB-scale - processed_line3 proc_luminance_lines[4]; - - for (int i = 0; i < partition_count; i++) - { - inverse_color_scalefactors[i].r = 1.0f / MAX(color_scalefactors[i].r, 1e-7f); - inverse_color_scalefactors[i].g = 1.0f / MAX(color_scalefactors[i].g, 1e-7f); - inverse_color_scalefactors[i].b = 1.0f / MAX(color_scalefactors[i].b, 1e-7f); - inverse_color_scalefactors[i].a = 1.0f / MAX(color_scalefactors[i].a, 1e-7f); - - float3 csf = float3(color_scalefactors[i].r, color_scalefactors[i].g, color_scalefactors[i].b); - float3 icsf = float3(inverse_color_scalefactors[i].r, inverse_color_scalefactors[i].g, inverse_color_scalefactors[i].b); - - uncorr_rgb_lines[i].a = averages[i]; - if (dot(directions_rgb[i], directions_rgb[i]) == 0.0f) - { - uncorr_rgb_lines[i].b = normalize(csf); - } - else - { - uncorr_rgb_lines[i].b = normalize(directions_rgb[i]); - } - - samechroma_rgb_lines[i].a = float3(0.0f); - if (dot(averages[i], averages[i]) < 1e-20f) - { - samechroma_rgb_lines[i].b = normalize(csf); - } - else - { - samechroma_rgb_lines[i].b = normalize(averages[i]); - } - - rgb_luma_lines[i].a = averages[i]; - rgb_luma_lines[i].b = normalize(csf); + promise(partition_count > 0); + promise(texels_per_block > 0); - luminance_lines[i].a = float3(0.0f); - luminance_lines[i].b = normalize(csf); + partition_metrics pms[4]; - proc_uncorr_rgb_lines[i].amod = (uncorr_rgb_lines[i].a - uncorr_rgb_lines[i].b * dot(uncorr_rgb_lines[i].a, uncorr_rgb_lines[i].b)) * icsf; - proc_uncorr_rgb_lines[i].bs = uncorr_rgb_lines[i].b * csf; - proc_uncorr_rgb_lines[i].bis = uncorr_rgb_lines[i].b * icsf; + compute_partition_error_color_weightings(*ewb, *pt, pms); - proc_samechroma_rgb_lines[i].amod = (samechroma_rgb_lines[i].a - samechroma_rgb_lines[i].b * dot(samechroma_rgb_lines[i].a, samechroma_rgb_lines[i].b)) * icsf; - proc_samechroma_rgb_lines[i].bs = samechroma_rgb_lines[i].b * csf; - proc_samechroma_rgb_lines[i].bis = samechroma_rgb_lines[i].b * icsf; + compute_avgs_and_dirs_3_comp(pt, blk, ewb, 3, pms); - proc_rgb_luma_lines[i].amod = (rgb_luma_lines[i].a - rgb_luma_lines[i].b * dot(rgb_luma_lines[i].a, rgb_luma_lines[i].b)) * icsf; - proc_rgb_luma_lines[i].bs = rgb_luma_lines[i].b * csf; - proc_rgb_luma_lines[i].bis = rgb_luma_lines[i].b * icsf; - - proc_luminance_lines[i].amod = (luminance_lines[i].a - luminance_lines[i].b * dot(luminance_lines[i].a, luminance_lines[i].b)) * icsf; - proc_luminance_lines[i].bs = luminance_lines[i].b * csf; - proc_luminance_lines[i].bis = luminance_lines[i].b * icsf; - } - - float uncorr_rgb_error[4]; - float samechroma_rgb_error[4]; - float rgb_luma_error[4]; - float luminance_rgb_error[4]; - - for (int i = 0; i < partition_count; i++) - { - uncorr_rgb_error[i] = compute_error_squared_rgb_single_partition(i, bsd, pi, pb, ewb, &(proc_uncorr_rgb_lines[i])); - - samechroma_rgb_error[i] = compute_error_squared_rgb_single_partition(i, bsd, pi, pb, ewb, &(proc_samechroma_rgb_lines[i])); - - rgb_luma_error[i] = compute_error_squared_rgb_single_partition(i, bsd, pi, pb, ewb, &(proc_rgb_luma_lines[i])); - - luminance_rgb_error[i] = compute_error_squared_rgb_single_partition(i, bsd, pi, pb, ewb, &(proc_luminance_lines[i])); - } - - // compute the error that arises from just ditching alpha and RGB - float alpha_drop_error[4]; - float rgb_drop_error[4]; - - for (int i = 0; i < partition_count; i++) - { - alpha_drop_error[i] = 0; - rgb_drop_error[i] = 0; - } - - for (int i = 0; i < texels_per_block; i++) - { - int partition = pi->partition_of_texel[i]; - float alpha = pb->data_a[i]; - float default_alpha = pb->alpha_lns[i] ? (float)0x7800 : (float)0xFFFF; - - float omalpha = alpha - default_alpha; - alpha_drop_error[partition] += omalpha * omalpha * ewb->error_weights[i].a; - - float red = pb->data_r[i]; - float green = pb->data_g[i]; - float blue = pb->data_b[i]; - rgb_drop_error[partition] += red * red * ewb->error_weights[i].r + - green * green * ewb->error_weights[i].g + - blue * blue * ewb->error_weights[i].b; - } - - // check if we are eligible for blue-contraction and offset-encoding endpoints ep; if (separate_component == -1) { endpoints_and_weights ei; - compute_endpoints_and_ideal_weights_1_plane(bsd, pi, pb, ewb, &ei); + compute_endpoints_and_ideal_weights_1_plane(bsd, pt, blk, ewb, &ei); ep = ei.ep; } else { endpoints_and_weights ei1, ei2; - compute_endpoints_and_ideal_weights_2_planes(bsd, pi, pb, ewb, separate_component, &ei1, &ei2); - + compute_endpoints_and_ideal_weights_2_planes(bsd, pt, blk, ewb, separate_component, &ei1, &ei2); merge_endpoints(&(ei1.ep), &(ei2.ep), separate_component, &ep); } - int eligible_for_offset_encode[4]; - int eligible_for_blue_contraction[4]; - for (int i = 0; i < partition_count; i++) - { - float4 endpt0 = ep.endpt0[i]; - float4 endpt1 = ep.endpt1[i]; - - float4 endpt_dif = endpt1 - endpt0; - if (fabsf(endpt_dif.r) < (0.12f * 65535.0f) && - fabsf(endpt_dif.g) < (0.12f * 65535.0f) && - fabsf(endpt_dif.b) < (0.12f * 65535.0f)) - { - eligible_for_offset_encode[i] = 1; - } - else - { - eligible_for_offset_encode[i] = 0; - } - - endpt0.r += (endpt0.r - endpt0.b); - endpt0.g += (endpt0.g - endpt0.b); - endpt1.r += (endpt1.r - endpt1.b); - endpt1.g += (endpt1.g - endpt1.b); - if (endpt0.r > (0.01f * 65535.0f) && endpt0.r < (0.99f * 65535.0f) && - endpt1.r > (0.01f * 65535.0f) && endpt1.r < (0.99f * 65535.0f) && - endpt0.g > (0.01f * 65535.0f) && endpt0.g < (0.99f * 65535.0f) && - endpt1.g > (0.01f * 65535.0f) && endpt1.g < (0.99f * 65535.0f)) - { - eligible_for_blue_contraction[i] = 1; - } - else - { - eligible_for_blue_contraction[i] = 0; - } - } - - // finally, gather up our results for (int i = 0; i < partition_count; i++) { - eci[i].rgb_scale_error = (samechroma_rgb_error[i] - uncorr_rgb_error[i]) * 0.7f; // empirical - eci[i].rgb_luma_error = (rgb_luma_error[i] - uncorr_rgb_error[i]) * 1.5f; // wild guess - eci[i].luminance_error = (luminance_rgb_error[i] - uncorr_rgb_error[i]) * 3.0f; // empirical - eci[i].alpha_drop_error = alpha_drop_error[i] * 3.0f; - eci[i].rgb_drop_error = rgb_drop_error[i] * 3.0f; - eci[i].can_offset_encode = eligible_for_offset_encode[i]; - eci[i].can_blue_contract = eligible_for_blue_contraction[i]; + partition_metrics& pm = pms[i]; + + // TODO: Can we skip rgb_luma_lines for LDR images? + line3 uncor_rgb_lines; + line3 samec_rgb_lines; // for LDR-RGB-scale + line3 rgb_luma_lines; // for HDR-RGB-scale + + processed_line3 uncor_rgb_plines; + processed_line3 samec_rgb_plines; // for LDR-RGB-scale + processed_line3 rgb_luma_plines; // for HDR-RGB-scale + processed_line3 luminance_plines; + + float uncorr_rgb_error; + float samechroma_rgb_error; + float rgb_luma_error; + float luminance_rgb_error; + float alpha_drop_error; + + vfloat4 csf = pm.color_scale; + csf.set_lane<3>(0.0f); + vfloat4 csfn = normalize(csf); + + vfloat4 icsf = pm.icolor_scale; + icsf.set_lane<3>(0.0f); + + uncor_rgb_lines.a = pm.avg; + uncor_rgb_lines.b = normalize_safe(pm.dir.swz<0, 1, 2>(), csfn); + + samec_rgb_lines.a = vfloat4::zero(); + samec_rgb_lines.b = normalize_safe(pm.avg.swz<0, 1, 2>(), csfn); + + rgb_luma_lines.a = pm.avg; + rgb_luma_lines.b = csfn; + + uncor_rgb_plines.amod = (uncor_rgb_lines.a - uncor_rgb_lines.b * dot3(uncor_rgb_lines.a, uncor_rgb_lines.b)) * icsf; + uncor_rgb_plines.bs = uncor_rgb_lines.b * csf; + uncor_rgb_plines.bis = uncor_rgb_lines.b * icsf; + + // Same chroma always goes though zero, so this is simpler than the others + samec_rgb_plines.amod = vfloat4::zero(); + samec_rgb_plines.bs = samec_rgb_lines.b * csf; + samec_rgb_plines.bis = samec_rgb_lines.b * icsf; + + rgb_luma_plines.amod = (rgb_luma_lines.a - rgb_luma_lines.b * dot3(rgb_luma_lines.a, rgb_luma_lines.b)) * icsf; + rgb_luma_plines.bs = rgb_luma_lines.b * csf; + rgb_luma_plines.bis = rgb_luma_lines.b * icsf; + + // Luminance always goes though zero, so this is simpler than the others + luminance_plines.amod = vfloat4::zero(); + luminance_plines.bs = csfn * csf; + luminance_plines.bis = csfn * icsf; + + compute_error_squared_rgb_single_partition( + i, bsd, pt, blk, ewb, + &uncor_rgb_plines, &uncorr_rgb_error, + &samec_rgb_plines, &samechroma_rgb_error, + &rgb_luma_plines, &rgb_luma_error, + &luminance_plines, &luminance_rgb_error, + &alpha_drop_error); + + // Determine if we can offset encode RGB lanes + vfloat4 endpt0 = ep.endpt0[i]; + vfloat4 endpt1 = ep.endpt1[i]; + vfloat4 endpt_diff = abs(endpt1 - endpt0); + vmask4 endpt_can_offset = endpt_diff < vfloat4(0.12f * 65535.0f); + bool can_offset_encode = (mask(endpt_can_offset) & 0x7) == 0x7; + + // Determine if we can blue contract encode RGB lanes + vfloat4 endpt_diff_bc( + endpt0.lane<0>() + (endpt0.lane<0>() - endpt0.lane<2>()), + endpt1.lane<0>() + (endpt1.lane<0>() - endpt1.lane<2>()), + endpt0.lane<1>() + (endpt0.lane<1>() - endpt0.lane<2>()), + endpt1.lane<1>() + (endpt1.lane<1>() - endpt1.lane<2>()) + ); + + vmask4 endpt_can_bc_lo = endpt_diff_bc > vfloat4(0.01f * 65535.0f); + vmask4 endpt_can_bc_hi = endpt_diff_bc < vfloat4(0.99f * 65535.0f); + bool can_blue_contract = (mask(endpt_can_bc_lo & endpt_can_bc_hi) & 0x7) == 0x7; + + // Store out the settings + eci[i].rgb_scale_error = (samechroma_rgb_error - uncorr_rgb_error) * 0.7f; // empirical + eci[i].rgb_luma_error = (rgb_luma_error - uncorr_rgb_error) * 1.5f; // wild guess + eci[i].luminance_error = (luminance_rgb_error - uncorr_rgb_error) * 3.0f; // empirical + eci[i].alpha_drop_error = alpha_drop_error * 3.0f; + eci[i].can_offset_encode = can_offset_encode; + eci[i].can_blue_contract = can_blue_contract; } } diff --git a/libkram/astc-encoder/astcenc_entry.cpp b/libkram/astc-encoder/astcenc_entry.cpp index 78e0806c..2cd4366c 100644 --- a/libkram/astc-encoder/astcenc_entry.cpp +++ b/libkram/astc-encoder/astcenc_entry.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -19,11 +19,13 @@ * @brief Functions for the library entrypoint. */ +#include #include #include #include "astcenc.h" #include "astcenc_internal.h" +#include "astcenc_diagnostic_trace.h" // The ASTC codec is written with the assumption that a float threaded through // the "if32" union will in fact be stored and reloaded as a 32-bit IEEE-754 single-precision @@ -48,8 +50,8 @@ static astcenc_error validate_cpu_float() static astcenc_error validate_cpu_isa() { - #if ASTCENC_SSE >= 42 - if (!cpu_supports_sse42()) + #if ASTCENC_SSE >= 41 + if (!cpu_supports_sse41()) { return ASTCENC_ERR_BAD_CPU_ISA; } @@ -62,6 +64,13 @@ static astcenc_error validate_cpu_isa() } #endif + #if ASTCENC_F16C >= 1 + if (!cpu_supports_f16c()) + { + return ASTCENC_ERR_BAD_CPU_ISA; + } + #endif + #if ASTCENC_AVX >= 2 if (!cpu_supports_avx2()) { @@ -72,10 +81,59 @@ static astcenc_error validate_cpu_isa() return ASTCENC_SUCCESS; } +/** + * @brief Record of the quality tuning parameter values. + * + * See the @c astcenc_config structure for detailed parameter documentation. + * + * Note that the mse_overshoot entries are scaling factors relative to the + * base MSE to hit db_limit. A 20% overshoot is harder to hit for a higher + * base db_limit, so we may actually use lower ratios for the more through + * search presets because the underlying db_limit is so much higher. + */ +struct astcenc_preset_config { + float quality; + unsigned int tune_partition_count_limit; + unsigned int tune_partition_index_limit; + unsigned int tune_block_mode_limit; + unsigned int tune_refinement_limit; + unsigned int tune_candidate_limit; + float tune_db_limit_a_base; + float tune_db_limit_b_base; + float tune_mode0_mse_overshoot; + float tune_refinement_mse_overshoot; + float tune_partition_early_out_limit; + float tune_two_plane_early_out_limit; +}; + +/** + * @brief The static quality presets that are built-in. + */ +static const std::array preset_configs {{ + { + ASTCENC_PRE_FASTEST, + 4, 2, 30, 1, 1, 79, 57, 2.0f, 2.0f, 1.0f, 0.5f + }, { + ASTCENC_PRE_FAST, + 4, 4, 50, 2, 2, 85, 63, 3.5f, 3.5f, 1.0f, 0.5f + }, { + ASTCENC_PRE_MEDIUM, + 4, 25, 75, 2, 2, 95, 70, 1.75f, 1.75f, 1.2f, 0.75f + }, { + ASTCENC_PRE_THOROUGH, + 4, 75, 92, 4, 4, 105, 77, 10.0f, 10.0f, 2.5f, 0.95f + }, { + ASTCENC_PRE_EXHAUSTIVE, + 4, 1024, 100, 4, 4, 200, 200, 10.0f, 10.0f, 10.0f, 0.99f + } +}}; + static astcenc_error validate_profile( astcenc_profile profile ) { - switch(profile) + // Values in this enum are from an external user, so not guaranteed to be + // bounded to the enum values + switch(static_cast(profile)) { case ASTCENC_PRF_LDR_SRGB: case ASTCENC_PRF_LDR: @@ -112,7 +170,9 @@ static astcenc_error validate_flags( } // Flags field must only contain at most a single map type - exMask = ASTCENC_FLG_MAP_MASK | ASTCENC_FLG_MAP_NORMAL; + exMask = ASTCENC_FLG_MAP_MASK + | ASTCENC_FLG_MAP_NORMAL + | ASTCENC_FLG_MAP_RGBM; if (astc::popcount(flags & exMask) > 1) { return ASTCENC_ERR_BAD_FLAGS; @@ -125,7 +185,8 @@ static astcenc_error validate_flags( static astcenc_error validate_compression_swz( astcenc_swz swizzle ) { - switch(swizzle) + // Not all enum values are handled; SWZ_Z is invalid for compression + switch(static_cast(swizzle)) { case ASTCENC_SWZ_R: case ASTCENC_SWZ_G: @@ -157,7 +218,9 @@ static astcenc_error validate_compression_swizzle( static astcenc_error validate_decompression_swz( astcenc_swz swizzle ) { - switch(swizzle) + // Values in this enum are from an external user, so not guaranteed to be + // bounded to the enum values + switch(static_cast(swizzle)) { case ASTCENC_SWZ_R: case ASTCENC_SWZ_G: @@ -197,8 +260,7 @@ static astcenc_error validate_decompression_swizzle( * make no sense algorithmically will return an error. */ static astcenc_error validate_config( - astcenc_config &config, - unsigned int thread_count + astcenc_config &config ) { astcenc_error status; @@ -220,12 +282,6 @@ static astcenc_error validate_config( return status; } - // Decompress-only contexts must be single threaded - if ((config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) && (thread_count > 1)) - { - return ASTCENC_ERR_BAD_PARAM; - } - #if defined(ASTCENC_DECOMPRESS_ONLY) // Decompress-only builds only support decompress-only contexts if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)) @@ -234,36 +290,41 @@ static astcenc_error validate_config( } #endif - config.v_rgba_mean_stdev_mix = MAX(config.v_rgba_mean_stdev_mix, 0.0f); - config.v_rgb_power = MAX(config.v_rgb_power, 0.0f); - config.v_rgb_base = MAX(config.v_rgb_base, 0.0f); - config.v_rgb_mean = MAX(config.v_rgb_mean, 0.0f); - config.v_rgb_stdev = MAX(config.v_rgb_stdev, 0.0f); - config.v_a_power = MAX(config.v_a_power, 0.0f); - config.v_a_base = MAX(config.v_a_base, 0.0f); - config.v_a_mean = MAX(config.v_a_mean, 0.0f); - config.v_a_stdev = MAX(config.v_a_stdev, 0.0f); - - config.b_deblock_weight = MAX(config.b_deblock_weight, 0.0f); - - config.tune_partition_limit = astc::clampi(config.tune_partition_limit, 1, PARTITION_COUNT); - config.tune_block_mode_limit = astc::clampi(config.tune_block_mode_limit, 1, 100); - config.tune_refinement_limit = MAX(config.tune_refinement_limit, 1); - config.tune_candidate_limit = astc::clampi(config.tune_candidate_limit, 1, TUNE_MAX_TRIAL_CANDIDATES); - config.tune_db_limit = MAX(config.tune_db_limit, 0.0f); - config.tune_partition_early_out_limit = MAX(config.tune_partition_early_out_limit, 0.0f); - config.tune_two_plane_early_out_limit = MAX(config.tune_two_plane_early_out_limit, 0.0f); + config.v_rgba_mean_stdev_mix = astc::max(config.v_rgba_mean_stdev_mix, 0.0f); + config.v_rgb_power = astc::max(config.v_rgb_power, 0.0f); + config.v_rgb_base = astc::max(config.v_rgb_base, 0.0f); + config.v_rgb_mean = astc::max(config.v_rgb_mean, 0.0f); + config.v_rgb_stdev = astc::max(config.v_rgb_stdev, 0.0f); + config.v_a_power = astc::max(config.v_a_power, 0.0f); + config.v_a_base = astc::max(config.v_a_base, 0.0f); + config.v_a_mean = astc::max(config.v_a_mean, 0.0f); + config.v_a_stdev = astc::max(config.v_a_stdev, 0.0f); + + config.b_deblock_weight = astc::max(config.b_deblock_weight, 0.0f); + + config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f); + + config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u); + config.tune_partition_index_limit = astc::clamp(config.tune_partition_index_limit, 1u, (unsigned int)PARTITION_COUNT); + config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u); + config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u); + config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES); + config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f); + config.tune_mode0_mse_overshoot = astc::max(config.tune_mode0_mse_overshoot, 1.0f); + config.tune_refinement_mse_overshoot = astc::max(config.tune_refinement_mse_overshoot, 1.0f); + config.tune_partition_early_out_limit = astc::max(config.tune_partition_early_out_limit, 0.0f); + config.tune_two_plane_early_out_limit = astc::max(config.tune_two_plane_early_out_limit, 0.0f); // Specifying a zero weight color component is not allowed; force to small value - float max_weight = MAX(MAX(config.cw_r_weight, config.cw_g_weight), - MAX(config.cw_b_weight, config.cw_a_weight)); + float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight), + astc::max(config.cw_b_weight, config.cw_a_weight)); if (max_weight > 0.0f) { max_weight /= 1000.0f; - config.cw_r_weight = MAX(config.cw_r_weight, max_weight); - config.cw_g_weight = MAX(config.cw_g_weight, max_weight); - config.cw_b_weight = MAX(config.cw_b_weight, max_weight); - config.cw_a_weight = MAX(config.cw_a_weight, max_weight); + config.cw_r_weight = astc::max(config.cw_r_weight, max_weight); + config.cw_g_weight = astc::max(config.cw_g_weight, max_weight); + config.cw_b_weight = astc::max(config.cw_b_weight, max_weight); + config.cw_a_weight = astc::max(config.cw_a_weight, max_weight); } // If all color components error weights are zero then return an error else @@ -279,17 +340,18 @@ astcenc_error astcenc_config_init( unsigned int block_x, unsigned int block_y, unsigned int block_z, - astcenc_preset preset, + float quality, unsigned int flags, - astcenc_config& config + astcenc_config* configp ) { astcenc_error status; + astcenc_config& config = *configp; // Zero init all config fields; although most of will be over written std::memset(&config, 0, sizeof(config)); // Process the block size - block_z = MAX(block_z, 1); // For 2D blocks Z==0 is accepted, but convert to 1 + block_z = astc::max(block_z, 1u); // For 2D blocks Z==0 is accepted, but convert to 1 status = validate_block_size(block_x, block_y, block_z); if (status != ASTCENC_SUCCESS) { @@ -303,58 +365,84 @@ astcenc_error astcenc_config_init( float texels = static_cast(block_x * block_y * block_z); float ltexels = logf(texels) / logf(10.0f); - // Process the performance preset; note that this must be done before we - // process any additional settings, such as color profile and flags, which - // may replace some of these settings with more use case tuned values - switch(preset) - { - case ASTCENC_PRE_FASTEST: - config.tune_partition_limit = 2; - config.tune_block_mode_limit = 25; - config.tune_refinement_limit = 1; - config.tune_candidate_limit = MIN(1, TUNE_MAX_TRIAL_CANDIDATES); - config.tune_db_limit = MAX(70 - 35 * ltexels, 53 - 19 * ltexels); - config.tune_partition_early_out_limit = 1.0f; - config.tune_two_plane_early_out_limit = 0.5f; - break; - case ASTCENC_PRE_FAST: - config.tune_partition_limit = 4; - config.tune_block_mode_limit = 50; - config.tune_refinement_limit = 1; - config.tune_candidate_limit = MIN(2, TUNE_MAX_TRIAL_CANDIDATES); - config.tune_db_limit = MAX(85 - 35 * ltexels, 63 - 19 * ltexels); - config.tune_partition_early_out_limit = 1.0f; - config.tune_two_plane_early_out_limit = 0.5f; - break; - case ASTCENC_PRE_MEDIUM: - config.tune_partition_limit = 25; - config.tune_block_mode_limit = 75; - config.tune_refinement_limit = 2; - config.tune_candidate_limit = MIN(2, TUNE_MAX_TRIAL_CANDIDATES); - config.tune_db_limit = MAX(95 - 35 * ltexels, 70 - 19 * ltexels); - config.tune_partition_early_out_limit = 1.2f; - config.tune_two_plane_early_out_limit = 0.75f; - break; - case ASTCENC_PRE_THOROUGH: - config.tune_partition_limit = 100; - config.tune_block_mode_limit = 95; - config.tune_refinement_limit = 4; - config.tune_candidate_limit = MIN(3, TUNE_MAX_TRIAL_CANDIDATES); - config.tune_db_limit = MAX(105 - 35 * ltexels, 77 - 19 * ltexels); - config.tune_partition_early_out_limit = 2.5f; - config.tune_two_plane_early_out_limit = 0.95f; - break; - case ASTCENC_PRE_EXHAUSTIVE: - config.tune_partition_limit = 1024; - config.tune_block_mode_limit = 100; - config.tune_refinement_limit = 4; - config.tune_candidate_limit = MIN(4, TUNE_MAX_TRIAL_CANDIDATES); - config.tune_db_limit = 999.0f; - config.tune_partition_early_out_limit = 1000.0f; - config.tune_two_plane_early_out_limit = 0.99f; - break; - default: - return ASTCENC_ERR_BAD_PRESET; + // Process the performance quality level or preset; note that this must be + // done before we process any additional settings, such as color profile + // and flags, which may replace some of these settings with more use case + // tuned values + if (quality < ASTCENC_PRE_FASTEST || + quality > ASTCENC_PRE_EXHAUSTIVE) + { + return ASTCENC_ERR_BAD_QUALITY; + } + + // Determine which preset to use, or which pair to interpolate + size_t start; + size_t end; + for (end = 0; end < preset_configs.size(); end++) + { + if (preset_configs[end].quality >= quality) + { + break; + } + } + + start = end == 0 ? 0 : end - 1; + + // Start and end node are the same - so just transfer the values. + if (start == end) + { + config.tune_partition_count_limit = preset_configs[start].tune_partition_count_limit; + config.tune_partition_index_limit = preset_configs[start].tune_partition_index_limit; + config.tune_block_mode_limit = preset_configs[start].tune_block_mode_limit; + config.tune_refinement_limit = preset_configs[start].tune_refinement_limit; + config.tune_candidate_limit = astc::min(preset_configs[start].tune_candidate_limit, + TUNE_MAX_TRIAL_CANDIDATES); + config.tune_db_limit = astc::max(preset_configs[start].tune_db_limit_a_base - 35 * ltexels, + preset_configs[start].tune_db_limit_b_base - 19 * ltexels); + + config.tune_mode0_mse_overshoot = preset_configs[start].tune_mode0_mse_overshoot; + config.tune_refinement_mse_overshoot = preset_configs[start].tune_refinement_mse_overshoot; + + config.tune_partition_early_out_limit = preset_configs[start].tune_partition_early_out_limit; + config.tune_two_plane_early_out_limit = preset_configs[start].tune_two_plane_early_out_limit; + } + // Start and end node are not the same - so interpolate between them + else + { + auto& node_a = preset_configs[start]; + auto& node_b = preset_configs[end]; + + float wt_range = node_b.quality - node_a.quality; + assert(wt_range > 0); + + // Compute interpolation factors + float wt_node_a = (node_b.quality - quality) / wt_range; + float wt_node_b = (quality - node_a.quality) / wt_range; + + #define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b)) + #define LERPI(param) astc::flt2int_rtn(\ + (((float)node_a.param) * wt_node_a) + \ + (((float)node_b.param) * wt_node_b)) + #define LERPUI(param) (unsigned int)LERPI(param) + + config.tune_partition_count_limit = LERPI(tune_partition_count_limit); + config.tune_partition_index_limit = LERPI(tune_partition_index_limit); + config.tune_block_mode_limit = LERPI(tune_block_mode_limit); + config.tune_refinement_limit = LERPI(tune_refinement_limit); + config.tune_candidate_limit = astc::min(LERPUI(tune_candidate_limit), + TUNE_MAX_TRIAL_CANDIDATES); + config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels, + LERP(tune_db_limit_b_base) - 19 * ltexels); + + config.tune_mode0_mse_overshoot = LERP(tune_mode0_mse_overshoot); + config.tune_refinement_mse_overshoot = LERP(tune_refinement_mse_overshoot); + + config.tune_partition_early_out_limit = LERP(tune_partition_early_out_limit); + config.tune_two_plane_early_out_limit = LERP(tune_two_plane_early_out_limit); + + #undef LERP + #undef LERPI + #undef LERPUI } // Set heuristics to the defaults for each color profile @@ -368,10 +456,13 @@ astcenc_error astcenc_config_init( config.a_scale_radius = 0; - config.b_deblock_weight = 0.0f; + config.rgbm_m_scale = 0.0f; config.profile = profile; - switch(profile) + + // Values in this enum are from an external user, so not guaranteed to be + // bounded to the enum values + switch(static_cast(profile)) { case ASTCENC_PRF_LDR: case ASTCENC_PRF_LDR_SRGB: @@ -424,14 +515,12 @@ astcenc_error astcenc_config_init( if (flags & ASTCENC_FLG_MAP_NORMAL) { - config.cw_r_weight = 1.0f; config.cw_g_weight = 0.0f; config.cw_b_weight = 0.0f; - config.cw_a_weight = 1.0f; config.tune_partition_early_out_limit = 1000.0f; config.tune_two_plane_early_out_limit = 0.99f; - if (flags & ASTCENC_FLG_USE_PERCEPTUAL) + if (flags & ASTCENC_FLG_USE_PERCEPTUAL) { config.b_deblock_weight = 1.8f; config.v_rgba_radius = 3; @@ -453,19 +542,26 @@ astcenc_error astcenc_config_init( config.v_a_stdev = 25.0f; } + if (flags & ASTCENC_FLG_MAP_RGBM) + { + config.rgbm_m_scale = 5.0f; + config.cw_a_weight = 2.0f * config.rgbm_m_scale; + } + config.flags = flags; return ASTCENC_SUCCESS; } astcenc_error astcenc_context_alloc( - astcenc_config const& config, + const astcenc_config* configp, unsigned int thread_count, astcenc_context** context ) { astcenc_error status; astcenc_context* ctx = nullptr; block_size_descriptor* bsd = nullptr; + const astcenc_config& config = *configp; status = validate_cpu_float(); if (status != ASTCENC_SUCCESS) @@ -484,6 +580,14 @@ astcenc_error astcenc_context_alloc( return ASTCENC_ERR_BAD_PARAM; } +#if defined(ASTCENC_DIAGNOSTICS) + // Force single threaded compressor use in diagnostic mode. + if (thread_count != 1) + { + return ASTCENC_ERR_BAD_PARAM; + } +#endif + ctx = new astcenc_context; ctx->thread_count = thread_count; ctx->config = config; @@ -495,7 +599,7 @@ astcenc_error astcenc_context_alloc( ctx->input_alpha_averages = nullptr; // Copy the config first and validate the copy (we may modify it) - status = validate_config(ctx->config, thread_count); + status = validate_config(ctx->config); if (status != ASTCENC_SUCCESS) { delete ctx; @@ -503,7 +607,9 @@ astcenc_error astcenc_context_alloc( } bsd = new block_size_descriptor; - init_block_size_descriptor(config.block_x, config.block_y, config.block_z, bsd); + bool can_omit_modes = config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY; + init_block_size_descriptor(config.block_x, config.block_y, config.block_z, + can_omit_modes, static_cast(config.tune_block_mode_limit) / 100.0f, bsd); ctx->bsd = bsd; #if !defined(ASTCENC_DECOMPRESS_ONLY) @@ -516,7 +622,7 @@ astcenc_error astcenc_context_alloc( // Turn a dB limit into a per-texel error for faster use later if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB)) { - ctx->config.tune_db_limit = powf(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f; + ctx->config.tune_db_limit = astc::pow(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f; } else { @@ -527,27 +633,36 @@ astcenc_error astcenc_context_alloc( ctx->working_buffers = aligned_malloc(worksize , 32); if (!ctx->working_buffers) { - goto error_oom; + term_block_size_descriptor(bsd); + delete bsd; + delete ctx; + *context = nullptr; + return ASTCENC_ERR_OUT_OF_MEM; } } #endif +#if defined(ASTCENC_DIAGNOSTICS) + ctx->trace_log = new TraceLog(ctx->config.trace_file_path); + if(!ctx->trace_log->m_file) + { + return ASTCENC_ERR_DTRACE_FAILURE; + } + + trace_add_data("block_x", config.block_x); + trace_add_data("block_y", config.block_y); + trace_add_data("block_z", config.block_z); +#endif + *context = ctx; // TODO: Currently static memory; should move to context memory #if !defined(ASTCENC_DECOMPRESS_ONLY) prepare_angular_tables(); #endif - build_quantization_mode_table(); + build_quant_mode_table(); return ASTCENC_SUCCESS; - -error_oom: - term_block_size_descriptor(bsd); - delete bsd; - delete ctx; - *context = nullptr; - return ASTCENC_ERR_OUT_OF_MEM; } void astcenc_context_free( @@ -557,6 +672,9 @@ void astcenc_context_free( { aligned_free(ctx->working_buffers); term_block_size_descriptor(ctx->bsd); +#if defined(ASTCENC_DIAGNOSTICS) + delete ctx->trace_log; +#endif delete ctx->bsd; delete ctx; } @@ -571,15 +689,17 @@ static void compress_image( uint8_t* buffer ) { const block_size_descriptor *bsd = ctx.bsd; + astcenc_profile decode_mode = ctx.config.profile; + imageblock blk; + int block_x = bsd->xdim; int block_y = bsd->ydim; int block_z = bsd->zdim; - astcenc_profile decode_mode = ctx.config.profile; - imageblock pb; int dim_x = image.dim_x; int dim_y = image.dim_y; int dim_z = image.dim_z; + int xblocks = (dim_x + block_x - 1) / block_x; int yblocks = (dim_y + block_y - 1) / block_y; int zblocks = (dim_z + block_z - 1) / block_z; @@ -597,7 +717,7 @@ static void compress_image( while (true) { unsigned int count; - unsigned int base = ctx.manage_compress.get_task_assignment(4, count); + unsigned int base = ctx.manage_compress.get_task_assignment(16, count); if (!count) { break; @@ -611,25 +731,74 @@ static void compress_image( int y = rem / row_blocks; int x = rem - (y * row_blocks); - // Decompress - fetch_imageblock(decode_mode, image, &pb, bsd, x * block_x, y * block_y, z * block_z, swizzle); + // Test if we can apply some basic alpha-scale RDO + bool use_full_block = true; + if (ctx.config.a_scale_radius != 0 && block_z == 1) + { + int start_x = x * block_x; + int end_x = astc::min(dim_x, start_x + block_x); + + int start_y = y * block_y; + int end_y = astc::min(dim_y, start_y + block_y); + + // SATs accumulate error, so don't test exactly zero. Test for + // less than 1 alpha in the expanded block footprint that + // includes the alpha radius. + int x_footprint = block_x + + 2 * (ctx.config.a_scale_radius - 1); + + int y_footprint = block_y + + 2 * (ctx.config.a_scale_radius - 1); + + float footprint = (float)(x_footprint * y_footprint); + float threshold = 0.9f / (255.0f * footprint); + + // Do we have any alpha values? + use_full_block = false; + for (int ay = start_y; ay < end_y; ay++) + { + for (int ax = start_x; ax < end_x; ax++) + { + float a_avg = ctx.input_alpha_averages[ay * dim_x + ax]; + if (a_avg > threshold) + { + use_full_block = true; + ax = end_x; + ay = end_y; + } + } + } + } + // Fetch the full block for compression + if (use_full_block) + { + fetch_imageblock(decode_mode, image, &blk, bsd, x * block_x, y * block_y, z * block_z, swizzle); + } + // Apply alpha scale RDO - substitute constant color block + else + { + blk.origin_texel = vfloat4::zero(); + blk.data_min = vfloat4::zero(); + blk.data_max = blk.data_min; + blk.grayscale = false; + } int offset = ((z * yblocks + y) * xblocks + x) * 16; uint8_t *bp = buffer + offset; physical_compressed_block* pcb = reinterpret_cast(bp); symbolic_compressed_block scb; - compress_block(ctx, image, &pb, scb, *pcb, temp_buffers); + compress_block(ctx, image, &blk, scb, *pcb, temp_buffers); } ctx.manage_compress.complete_task_assignment(count); - }; + } } #endif astcenc_error astcenc_compress_image( astcenc_context* ctx, - astcenc_image& image, + astcenc_image* imagep, astcenc_swizzle swizzle, uint8_t* data_out, size_t data_len, @@ -637,7 +806,7 @@ astcenc_error astcenc_compress_image( ) { #if defined(ASTCENC_DECOMPRESS_ONLY) (void)ctx; - (void)image; + (void)imagep; (void)swizzle; (void)data_out; (void)data_len; @@ -645,6 +814,7 @@ astcenc_error astcenc_compress_image( return ASTCENC_ERR_BAD_CONTEXT; #else astcenc_error status; + astcenc_image& image = *imagep; if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) { @@ -678,15 +848,16 @@ astcenc_error astcenc_compress_image( } if (ctx->config.v_rgb_mean != 0.0f || ctx->config.v_rgb_stdev != 0.0f || - ctx->config.v_a_mean != 0.0f || ctx->config.v_a_stdev != 0.0f) + ctx->config.v_a_mean != 0.0f || ctx->config.v_a_stdev != 0.0f || + ctx->config.a_scale_radius != 0) { // First thread to enter will do setup, other threads will subsequently // enter the critical section but simply skip over the initialization auto init_avg_var = [ctx, &image, swizzle]() { // Perform memory allocations for the destination buffers size_t texel_count = image.dim_x * image.dim_y * image.dim_z; - ctx->input_averages = new float4[texel_count]; - ctx->input_variances = new float4[texel_count]; + ctx->input_averages = new vfloat4[texel_count]; + ctx->input_variances = new vfloat4[texel_count]; ctx->input_alpha_averages = new float[texel_count]; return init_compute_averages_and_variances( @@ -747,13 +918,21 @@ astcenc_error astcenc_compress_reset( } astcenc_error astcenc_decompress_image( - astcenc_context* context, + astcenc_context* ctx, const uint8_t* data, size_t data_len, - astcenc_image& image_out, - astcenc_swizzle swizzle + astcenc_image* image_outp, + astcenc_swizzle swizzle, + unsigned int thread_index ) { astcenc_error status; + astcenc_image& image_out = *image_outp; + + // Today this doesn't matter (working set on stack) but might in future ... + if (thread_index >= ctx->thread_count) + { + return ASTCENC_ERR_BAD_PARAM; + } status = validate_decompression_swizzle(swizzle); if (status != ASTCENC_SUCCESS) @@ -761,14 +940,17 @@ astcenc_error astcenc_decompress_image( return status; } - unsigned int block_x = context->config.block_x; - unsigned int block_y = context->config.block_y; - unsigned int block_z = context->config.block_z; + unsigned int block_x = ctx->config.block_x; + unsigned int block_y = ctx->config.block_y; + unsigned int block_z = ctx->config.block_z; unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x; unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y; unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z; + int row_blocks = xblocks; + int plane_blocks = xblocks * yblocks; + // Check we have enough output space (16 bytes per block) size_t size_needed = xblocks * yblocks * zblocks * 16; if (data_len < size_needed) @@ -776,39 +958,189 @@ astcenc_error astcenc_decompress_image( return ASTCENC_ERR_OUT_OF_MEM; } - imageblock pb; + imageblock blk; - for (unsigned int z = 0; z < zblocks; z++) + // Only the first thread actually runs the initializer + ctx->manage_decompress.init(zblocks * yblocks * xblocks); + + // All threads run this processing loop until there is no work remaining + while (true) { - for (unsigned int y = 0; y < yblocks; y++) + unsigned int count; + unsigned int base = ctx->manage_decompress.get_task_assignment(128, count); + if (!count) { - for (unsigned int x = 0; x < xblocks; x++) - { - unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16; - const uint8_t* bp = data + offset; - physical_compressed_block pcb = *(physical_compressed_block *) bp; - symbolic_compressed_block scb; + break; + } - physical_to_symbolic(*context->bsd, pcb, scb); + for (unsigned int i = base; i < base + count; i++) + { + // Decode i into x, y, z block indices + int z = i / plane_blocks; + unsigned int rem = i - (z * plane_blocks); + int y = rem / row_blocks; + int x = rem - (y * row_blocks); - decompress_symbolic_block(context->config.profile, context->bsd, - x * block_x, y * block_y, z * block_z, - &scb, &pb); + unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16; + const uint8_t* bp = data + offset; + physical_compressed_block pcb = *(const physical_compressed_block*)bp; + symbolic_compressed_block scb; - write_imageblock(image_out, &pb, context->bsd, - x * block_x, y * block_y, z * block_z, swizzle); - } + physical_to_symbolic(*ctx->bsd, pcb, scb); + + decompress_symbolic_block(ctx->config.profile, ctx->bsd, + x * block_x, y * block_y, z * block_z, + &scb, &blk); + + write_imageblock(image_out, &blk, ctx->bsd, + x * block_x, y * block_y, z * block_z, swizzle); + } + + ctx->manage_decompress.complete_task_assignment(count); + } + + return ASTCENC_SUCCESS; +} + +astcenc_error astcenc_decompress_reset( + astcenc_context* ctx +) { + ctx->manage_decompress.reset(); + return ASTCENC_SUCCESS; +} + +astcenc_error astcenc_get_block_info( + astcenc_context* ctx, + const uint8_t data[16], + astcenc_block_info* info +) { +#if defined(ASTCENC_DECOMPRESS_ONLY) + (void)ctx; + (void)data; + (void)info; + return ASTCENC_ERR_BAD_CONTEXT; +#else + // Decode the compressed data into a symbolic form + physical_compressed_block pcb = *(const physical_compressed_block*)data; + symbolic_compressed_block scb; + physical_to_symbolic(*ctx->bsd, pcb, scb); + + // Fetch the appropriate partition and decimation tables + block_size_descriptor& bsd = *ctx->bsd; + + // Start from a clean slate + memset(info, 0, sizeof(*info)); + + // Basic info we can always populate + info->profile = ctx->config.profile; + + info->block_x = ctx->config.block_x; + info->block_y = ctx->config.block_y; + info->block_z = ctx->config.block_z; + info->texel_count = bsd.texel_count; + + // Check for error blocks first - block_mode will be negative + info->is_error_block = scb.error_block != 0; + if (info->is_error_block) + { + return ASTCENC_SUCCESS; + } + + // Check for constant color blocks second - block_mode will be negative + info->is_constant_block = scb.block_mode < 0; + if (info->is_constant_block) + { + return ASTCENC_SUCCESS; + } + + // Otherwise, handle a full block with partition payload; values are known + // to be valid once the two conditions above have been checked + int partition_count = scb.partition_count; + const partition_info* pt = get_partition_table(&bsd, partition_count); + pt += scb.partition_index; + + const int packed_index = bsd.block_mode_packed_index[scb.block_mode]; + assert(packed_index >= 0 && packed_index < bsd.block_mode_count); + const block_mode& bm = bsd.block_modes[packed_index]; + const decimation_table& dt = *bsd.decimation_tables[bm.decimation_mode]; + + info->weight_x = dt.weight_x; + info->weight_y = dt.weight_y; + info->weight_z = dt.weight_z; + + info->is_dual_plane_block = bm.is_dual_plane != 0; + + info->partition_count = scb.partition_count; + info->partition_index = scb.partition_index; + info->dual_plane_component = scb.plane2_color_component; + + info->color_level_count = get_quant_method_levels((quant_method)scb.color_quant_level); + info->weight_level_count = get_quant_method_levels((quant_method)bm.quant_mode); + + // Unpack color endpoints for each active partition + for (int i = 0; i < scb.partition_count; i++) + { + int rgb_hdr; + int a_hdr; + int nan; + vint4 endpnt[2]; + + unpack_color_endpoints(ctx->config.profile, + scb.color_formats[i], + scb.color_quant_level, + scb.color_values[i], + &rgb_hdr, &a_hdr, &nan, + endpnt, endpnt + 1); + + // Store the color endpoint mode info + info->color_endpoint_modes[i] = scb.color_formats[i]; + info->is_hdr_block |= (rgb_hdr != 0) | (a_hdr != 0); + + // Store the unpacked and decoded color endpoint + vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr); + for (int j = 0; j < 2; j++) + { + vint4 color_lns = lns_to_sf16(endpnt[j]); + vint4 color_unorm = unorm16_to_sf16(endpnt[j]); + vint4 datai = select(color_unorm, color_lns, hdr_mask); + store(float16_to_float(datai), info->color_endpoints[i][j]); } } + // Unpack weights for each texel + int weight_plane1[MAX_TEXELS_PER_BLOCK]; + int weight_plane2[MAX_TEXELS_PER_BLOCK]; + + unpack_weights(bsd, scb, dt, bm.is_dual_plane, bm.quant_mode, weight_plane1, weight_plane2); + for (int i = 0; i < bsd.texel_count; i++) + { + info->weight_values_plane1[i] = (float)weight_plane1[i] / (float)TEXEL_WEIGHT_SUM; + if (info->is_dual_plane_block) + { + info->weight_values_plane2[i] = (float)weight_plane2[i] / (float)TEXEL_WEIGHT_SUM; + } + } + + // Unpack partition assignments for each texel + for (int i = 0; i < bsd.texel_count; i++) + { + info->partition_assignment[i] = pt->partition_of_texel[i]; + } + return ASTCENC_SUCCESS; +#endif } + const char* astcenc_get_error_string( astcenc_error status ) { - switch(status) + // Values in this enum are from an external user, so not guaranteed to be + // bounded to the enum values + switch(static_cast(status)) { + case ASTCENC_SUCCESS: + return "ASTCENC_SUCCESS"; case ASTCENC_ERR_OUT_OF_MEM: return "ASTCENC_ERR_OUT_OF_MEM"; case ASTCENC_ERR_BAD_CPU_FLOAT: @@ -821,8 +1153,8 @@ const char* astcenc_get_error_string( return "ASTCENC_ERR_BAD_BLOCK_SIZE"; case ASTCENC_ERR_BAD_PROFILE: return "ASTCENC_ERR_BAD_PROFILE"; - case ASTCENC_ERR_BAD_PRESET: - return "ASTCENC_ERR_BAD_PRESET"; + case ASTCENC_ERR_BAD_QUALITY: + return "ASTCENC_ERR_BAD_QUALITY"; case ASTCENC_ERR_BAD_FLAGS: return "ASTCENC_ERR_BAD_FLAGS"; case ASTCENC_ERR_BAD_SWIZZLE: @@ -831,6 +1163,10 @@ const char* astcenc_get_error_string( return "ASTCENC_ERR_BAD_CONTEXT"; case ASTCENC_ERR_NOT_IMPLEMENTED: return "ASTCENC_ERR_NOT_IMPLEMENTED"; +#if defined(ASTCENC_DIAGNOSTICS) + case ASTCENC_ERR_DTRACE_FAILURE: + return "ASTCENC_ERR_DTRACE_FAILURE"; +#endif default: return nullptr; } diff --git a/libkram/astc-encoder/astcenc_find_best_partitioning.cpp b/libkram/astc-encoder/astcenc_find_best_partitioning.cpp index d32d8d32..7658e331 100644 --- a/libkram/astc-encoder/astcenc_find_best_partitioning.cpp +++ b/libkram/astc-encoder/astcenc_find_best_partitioning.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -53,163 +53,68 @@ #include "astcenc_internal.h" -static void compute_alpha_range( - int texels_per_block, - const partition_info* pt, - const imageblock* blk, - const error_weight_block* ewb, - float alpha_range[4] +static void compute_partition_error_color_weightings_and_range( + const imageblock& blk, + const error_weight_block& ewb, + const partition_info& pt, + partition_metrics pm[4] ) { - float alpha_min[4]; - float alpha_max[4]; + int partition_count = pt.partition_count; - int partition_count = pt->partition_count; for (int i = 0; i < partition_count; i++) { - alpha_min[i] = 1e38f; - alpha_max[i] = -1e38f; - } + vfloat4 error_weight(1e-12f); + vfloat4 rgba_min(1e38f); + vfloat4 rgba_max(-1e38f); - for (int i = 0; i < texels_per_block; i++) - { - if (ewb->texel_weight[i] > 1e-10f) + int texel_count = pt.partition_texel_count[i]; + for (int j = 0; j < texel_count; j++) { - int partition = pt->partition_of_texel[i]; - float alphaval = blk->data_a[i]; + int tidx = pt.texels_of_partition[i][j]; + error_weight = error_weight + ewb.error_weights[tidx]; - if (alphaval > alpha_max[partition]) + if (ewb.texel_weight[tidx] > 1e-10f) { - alpha_max[partition] = alphaval; - } - - if (alphaval < alpha_min[partition]) - { - alpha_min[partition] = alphaval; + vfloat4 data = blk.texel(tidx); + rgba_min = min(data, rgba_min); + rgba_max = max(data, rgba_max); } } - } - for (int i = 0; i < partition_count; i++) - { - alpha_range[i] = alpha_max[i] - alpha_min[i]; - if (alpha_range[i] <= 0.0f) - { - alpha_range[i] = 1e-10f; - } + error_weight = error_weight / pt.partition_texel_count[i]; + vfloat4 csf = sqrt(error_weight); + vfloat4 range = max(rgba_max - rgba_min, 1e-10f); + pm[i].error_weight = error_weight; + pm[i].color_scale = csf; + pm[i].icolor_scale = 1.0f / max(csf, 1e-7f); + pm[i].range_sq = range * range; } } -static void compute_rgb_range( - int texels_per_block, - const partition_info* pt, - const imageblock* blk, - const error_weight_block* ewb, - float3 rgb_range[4] +void compute_partition_error_color_weightings( + const error_weight_block& ewb, + const partition_info& pt, + partition_metrics pm[4] ) { - float3 rgb_min[4]; - float3 rgb_max[4]; - - int partition_count = pt->partition_count; - for (int i = 0; i < partition_count; i++) - { - rgb_min[i] = float3(1e38f); - rgb_max[i] = float3(-1e38f); - } - - for (int i = 0; i < texels_per_block; i++) - { - if (ewb->texel_weight[i] > 1e-10f) - { - int partition = pt->partition_of_texel[i]; - - float redval = blk->data_r[i]; - if (redval > rgb_max[partition].r) - { - rgb_max[partition].r = redval; - } - - if (redval < rgb_min[partition].r) - { - rgb_min[partition].r = redval; - } - - float greenval = blk->data_g[i]; - if (greenval > rgb_max[partition].g) - { - rgb_max[partition].g = greenval; - } - - if (greenval < rgb_min[partition].g) - { - rgb_min[partition].g = greenval; - } - - float blueval = blk->data_b[i]; - if (blueval > rgb_max[partition].b) - { - rgb_max[partition].b = blueval; - } - - if (blueval < rgb_min[partition].b) - { - rgb_min[partition].b = blueval; - } - } - } + int partition_count = pt.partition_count; - // Covert min/max into ranges forcing a min range of 1e-10 - // to avoid divide by zeros later ... for (int i = 0; i < partition_count; i++) { - rgb_range[i].r = rgb_max[i].r - rgb_min[i].r; - if (rgb_range[i].r <= 0.0f) - { - rgb_range[i].r = 1e-10f; - } + vfloat4 error_weight(1e-12f); - rgb_range[i].g = rgb_max[i].g - rgb_min[i].g; - if (rgb_range[i].g <= 0.0f) + int texel_count = pt.partition_texel_count[i]; + for (int j = 0; j < texel_count; j++) { - rgb_range[i].g = 1e-10f; + int tidx = pt.texels_of_partition[i][j]; + error_weight = error_weight + ewb.error_weights[tidx]; } - rgb_range[i].b = rgb_max[i].b - rgb_min[i].b; - if (rgb_range[i].b <= 0.0f) - { - rgb_range[i].b = 1e-10f; - } - } -} - -void compute_partition_error_color_weightings( - const block_size_descriptor* bsd, - const error_weight_block* ewb, - const partition_info* pi, - float4 error_weightings[4], - float4 color_scalefactors[4] -) { - int texels_per_block = bsd->texel_count; - int pcnt = pi->partition_count; - - for (int i = 0; i < pcnt; i++) - { - error_weightings[i] = float4(1e-12f); - } - - for (int i = 0; i < texels_per_block; i++) - { - int part = pi->partition_of_texel[i]; - error_weightings[part] = error_weightings[part] + ewb->error_weights[i]; - } - - for (int i = 0; i < pcnt; i++) - { - error_weightings[i] = error_weightings[i] * (1.0f / pi->texels_per_partition[i]); - } + error_weight = error_weight / pt.partition_texel_count[i]; + vfloat4 csf = sqrt(error_weight); - for (int i = 0; i < pcnt; i++) - { - color_scalefactors[i] = sqrt(error_weightings[i]); + pm[i].error_weight = error_weight; + pm[i].color_scale = csf; + pm[i].icolor_scale = 1.0f / max(csf, 1e-7f); } } @@ -220,15 +125,15 @@ void find_best_partitionings( const error_weight_block* ewb, int partition_count, int partition_search_limit, - int* best_partition_uncorrelated, - int* best_partition_samechroma, + int* best_partition_uncor, + int* best_partition_samec, int* best_partition_dualplane ) { // constant used to estimate quantization error for a given partitioning; // the optimal value for this constant depends on bitrate. // These constants have been determined empirically. int texels_per_block = bsd->texel_count; - float weight_imprecision_estim = 100.0f; + float weight_imprecision_estim = 0.055f; if (texels_per_block <= 20) { weight_imprecision_estim = 0.03f; @@ -241,34 +146,32 @@ void find_best_partitionings( { weight_imprecision_estim = 0.05f; } - else - { - weight_imprecision_estim = 0.055f; - } + + weight_imprecision_estim = weight_imprecision_estim * weight_imprecision_estim; int partition_sequence[PARTITION_COUNT]; kmeans_compute_partition_ordering(bsd, partition_count, blk, partition_sequence); - float weight_imprecision_estim_squared = weight_imprecision_estim * weight_imprecision_estim; - int uses_alpha = imageblock_uses_alpha(blk); const partition_info* ptab = get_partition_table(bsd, partition_count); // Partitioning errors assuming uncorrelated-chrominance endpoints - float uncorr_best_error { ERROR_CALC_DEFAULT }; - int uncorr_best_partition { 0 }; + float uncor_best_error { ERROR_CALC_DEFAULT }; + int uncor_best_partition { 0 }; // Partitioning errors assuming same-chrominance endpoints // Store two so we can always return one different to uncorr - float samechroma_best_errors[2] { ERROR_CALC_DEFAULT, ERROR_CALC_DEFAULT }; - int samechroma_best_partitions[2] { 0, 0 }; + float samec_best_errors[2] { ERROR_CALC_DEFAULT, ERROR_CALC_DEFAULT }; + int samec_best_partitions[2] { 0, 0 }; // Partitioning errors assuming that one color component is uncorrelated - float separate_best_error { ERROR_CALC_DEFAULT }; - int separate_best_partition { 0 }; - int separate_best_component { 0 }; + float sep_best_error { ERROR_CALC_DEFAULT }; + int sep_best_partition { 0 }; + int sep_best_component { 0 }; + + bool skip_two_plane = best_partition_dualplane == nullptr; if (uses_alpha) { @@ -280,167 +183,100 @@ void find_best_partitionings( int bk_partition_count = ptab[partition].partition_count; if (bk_partition_count < partition_count) { - continue; + break; } - // compute the weighting to give to each color channel - // in each partition. - float4 error_weightings[4]; - float4 color_scalefactors[4]; - float4 inverse_color_scalefactors[4]; - compute_partition_error_color_weightings(bsd, ewb, ptab + partition, error_weightings, color_scalefactors); - - for (int j = 0; j < partition_count; j++) - { - inverse_color_scalefactors[j].r = 1.0f / MAX(color_scalefactors[j].r, 1e-7f); - inverse_color_scalefactors[j].g = 1.0f / MAX(color_scalefactors[j].g, 1e-7f); - inverse_color_scalefactors[j].b = 1.0f / MAX(color_scalefactors[j].b, 1e-7f); - inverse_color_scalefactors[j].a = 1.0f / MAX(color_scalefactors[j].a, 1e-7f); - } + // Compute weighting to give to each channel in each partition + partition_metrics pms[4]; - float4 averages[4]; - float4 directions_rgba[4]; + compute_partition_error_color_weightings_and_range(*blk, *ewb, *(ptab + partition), pms); - compute_averages_and_directions_rgba(ptab + partition, blk, ewb, - color_scalefactors, averages, - directions_rgba); + compute_avgs_and_dirs_4_comp(ptab + partition, blk, ewb, pms); - line4 uncorr_lines[4]; - line4 samechroma_lines[4]; - line3 separate_red_lines[4]; - line3 separate_green_lines[4]; - line3 separate_blue_lines[4]; - line3 separate_alpha_lines[4]; + line4 uncor_lines[4]; + line4 samec_lines[4]; + line3 sep_r_lines[4]; + line3 sep_g_lines[4]; + line3 sep_b_lines[4]; + line3 sep_a_lines[4]; - processed_line4 proc_uncorr_lines[4]; - processed_line4 proc_samechroma_lines[4]; - processed_line3 proc_separate_red_lines[4]; - processed_line3 proc_separate_green_lines[4]; - processed_line3 proc_separate_blue_lines[4]; - processed_line3 proc_separate_alpha_lines[4]; + processed_line4 uncor_plines[4]; + processed_line4 samec_plines[4]; + processed_line3 sep_r_plines[4]; + processed_line3 sep_g_plines[4]; + processed_line3 sep_b_plines[4]; + processed_line3 sep_a_plines[4]; - float uncorr_linelengths[4]; - float samechroma_linelengths[4]; - float4 separate_linelengths[4]; + float uncor_line_lens[4]; + float samec_line_lens[4]; for (int j = 0; j < partition_count; j++) { - uncorr_lines[j].a = averages[j]; - if (dot(directions_rgba[j], directions_rgba[j]) == 0.0f) - { - uncorr_lines[j].b = normalize(float4(1.0f)); - } - else - { - uncorr_lines[j].b = normalize(directions_rgba[j]); - } + partition_metrics& pm = pms[j]; - proc_uncorr_lines[j].amod = (uncorr_lines[j].a - uncorr_lines[j].b * dot(uncorr_lines[j].a, uncorr_lines[j].b)) * inverse_color_scalefactors[j]; - proc_uncorr_lines[j].bs = (uncorr_lines[j].b * color_scalefactors[j]); - proc_uncorr_lines[j].bis = (uncorr_lines[j].b * inverse_color_scalefactors[j]); + uncor_lines[j].a = pm.avg; + uncor_lines[j].b = normalize_safe(pm.dir, unit4()); - samechroma_lines[j].a = float4(0.0f); - if (dot(averages[j], averages[j]) == 0.0f) - { - samechroma_lines[j].b = normalize(float4(1.0f)); - } - else - { - samechroma_lines[j].b = normalize(averages[j]); - } + uncor_plines[j].amod = (uncor_lines[j].a - uncor_lines[j].b * dot(uncor_lines[j].a, uncor_lines[j].b)) * pm.icolor_scale; + uncor_plines[j].bs = uncor_lines[j].b * pm.color_scale; + uncor_plines[j].bis = uncor_lines[j].b * pm.icolor_scale; - proc_samechroma_lines[j].amod = (samechroma_lines[j].a - samechroma_lines[j].b * dot(samechroma_lines[j].a, samechroma_lines[j].b)) * inverse_color_scalefactors[j]; - proc_samechroma_lines[j].bs = (samechroma_lines[j].b * color_scalefactors[j]); - proc_samechroma_lines[j].bis = (samechroma_lines[j].b * inverse_color_scalefactors[j]); + samec_lines[j].a = vfloat4::zero(); + samec_lines[j].b = normalize_safe(pm.avg, unit4()); - separate_red_lines[j].a = float3(averages[j].g, averages[j].b, averages[j].a); - float3 dirs_gba = float3(directions_rgba[j].g, directions_rgba[j].b, directions_rgba[j].a); - if (dot(dirs_gba, dirs_gba) == 0.0f) - { - separate_red_lines[j].b = normalize(float3(1.0f, 1.0f, 1.0f)); - } - else - { - separate_red_lines[j].b = normalize(dirs_gba); - } + samec_plines[j].amod = vfloat4::zero(); + samec_plines[j].bs = samec_lines[j].b * pm.color_scale; + samec_plines[j].bis = samec_lines[j].b * pm.icolor_scale; - separate_green_lines[j].a = float3(averages[j].r, averages[j].b, averages[j].a); - float3 dirs_rba = float3(directions_rgba[j].r, directions_rgba[j].b, directions_rgba[j].a); - if (dot(dirs_rba, dirs_rba) == 0.0f) - { - separate_green_lines[j].b = normalize(float3(1.0f, 1.0f, 1.0f)); - } - else + if (!skip_two_plane) { - separate_green_lines[j].b = normalize(dirs_rba); - } + sep_r_lines[j].a = pm.avg.swz<1, 2, 3>(); + vfloat4 dirs_gba = pm.dir.swz<1, 2, 3>(); + sep_r_lines[j].b = normalize_safe(dirs_gba, unit3()); - separate_blue_lines[j].a = float3(averages[j].r, averages[j].g, averages[j].a); - float3 dirs_rga = float3(directions_rgba[j].r, directions_rgba[j].g, directions_rgba[j].a); - if (dot(dirs_rga, dirs_rga) == 0.0f) - { - separate_blue_lines[j].b = normalize(float3(1.0f, 1.0f, 1.0f)); - } - else - { - separate_blue_lines[j].b = normalize(dirs_rga); - } + sep_g_lines[j].a = pm.avg.swz<0, 2, 3>(); + vfloat4 dirs_rba = pm.dir.swz<0, 2, 3>(); + sep_g_lines[j].b = normalize_safe(dirs_rba, unit3()); - separate_alpha_lines[j].a = float3(averages[j].r, averages[j].g, averages[j].b); - float3 dirs_rgb = float3(directions_rgba[j].r, directions_rgba[j].g, directions_rgba[j].b); - if (dot(dirs_rgb, dirs_rgb) == 0.0f) - { - separate_alpha_lines[j].b = normalize(float3(1.0f, 1.0f, 1.0f)); - } - else - { - separate_alpha_lines[j].b = normalize(dirs_rgb); - } + sep_b_lines[j].a = pm.avg.swz<0, 1, 3>(); + vfloat4 dirs_rga = pm.dir.swz<0, 1, 3>(); + sep_b_lines[j].b = normalize_safe(dirs_rga, unit3()); + + sep_a_lines[j].a = pm.avg.swz<0, 1, 2>(); + vfloat4 dirs_rgb = pm.dir.swz<0, 1, 2>(); + sep_a_lines[j].b = normalize_safe(dirs_rgb, unit3()); - proc_separate_red_lines[j].amod = (separate_red_lines[j].a - separate_red_lines[j].b * dot(separate_red_lines[j].a, separate_red_lines[j].b)) * float3(inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b, inverse_color_scalefactors[j].a); - proc_separate_red_lines[j].bs = (separate_red_lines[j].b * float3(color_scalefactors[j].g, color_scalefactors[j].b, color_scalefactors[j].a)); - proc_separate_red_lines[j].bis = (separate_red_lines[j].b * float3(inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b, inverse_color_scalefactors[j].a)); + sep_r_plines[j].amod = (sep_r_lines[j].a - sep_r_lines[j].b * dot3(sep_r_lines[j].a, sep_r_lines[j].b)) * pm.icolor_scale.swz<1, 2, 3, 0>(); + sep_r_plines[j].bs = (sep_r_lines[j].b * pm.color_scale.swz<1, 2, 3, 0>()); + sep_r_plines[j].bis = (sep_r_lines[j].b * pm.icolor_scale.swz<1, 2, 3, 0>()); - proc_separate_green_lines[j].amod = - (separate_green_lines[j].a - separate_green_lines[j].b * dot(separate_green_lines[j].a, separate_green_lines[j].b)) * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].b, inverse_color_scalefactors[j].a); - proc_separate_green_lines[j].bs = (separate_green_lines[j].b * float3(color_scalefactors[j].r, color_scalefactors[j].b, color_scalefactors[j].a)); - proc_separate_green_lines[j].bis = (separate_green_lines[j].b * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].b, inverse_color_scalefactors[j].a)); + sep_g_plines[j].amod = (sep_g_lines[j].a - sep_g_lines[j].b * dot3(sep_g_lines[j].a, sep_g_lines[j].b)) * pm.icolor_scale.swz<0, 2, 3, 1>(); + sep_g_plines[j].bs = (sep_g_lines[j].b * pm.color_scale.swz<0, 2, 3, 1>()); + sep_g_plines[j].bis = (sep_g_lines[j].b * pm.icolor_scale.swz<0, 2, 3, 1>()); - proc_separate_blue_lines[j].amod = (separate_blue_lines[j].a - separate_blue_lines[j].b * dot(separate_blue_lines[j].a, separate_blue_lines[j].b)) * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].a); - proc_separate_blue_lines[j].bs = (separate_blue_lines[j].b * float3(color_scalefactors[j].r, color_scalefactors[j].g, color_scalefactors[j].a)); - proc_separate_blue_lines[j].bis = (separate_blue_lines[j].b * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].a)); + sep_b_plines[j].amod = (sep_b_lines[j].a - sep_b_lines[j].b * dot3(sep_b_lines[j].a, sep_b_lines[j].b)) * pm.icolor_scale.swz<0, 1, 3, 2>(); + sep_b_plines[j].bs = (sep_b_lines[j].b * pm.color_scale.swz<0, 1, 3, 2>()); + sep_b_plines[j].bis = (sep_b_lines[j].b * pm.icolor_scale.swz<0, 1, 3, 2>()); - proc_separate_alpha_lines[j].amod = - (separate_alpha_lines[j].a - separate_alpha_lines[j].b * dot(separate_alpha_lines[j].a, separate_alpha_lines[j].b)) * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b); - proc_separate_alpha_lines[j].bs = (separate_alpha_lines[j].b * float3(color_scalefactors[j].r, color_scalefactors[j].g, color_scalefactors[j].b)); - proc_separate_alpha_lines[j].bis = (separate_alpha_lines[j].b * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b)); + sep_a_plines[j].amod = (sep_a_lines[j].a - sep_a_lines[j].b * dot3(sep_a_lines[j].a, sep_a_lines[j].b)) * pm.icolor_scale.swz<0, 1, 2, 3>(); + sep_a_plines[j].bs = (sep_a_lines[j].b * pm.color_scale.swz<0, 1, 2, 3>()); + sep_a_plines[j].bis = (sep_a_lines[j].b * pm.icolor_scale.swz<0, 1, 2, 3>()); + } } - float uncorr_error = 0.0f; - float samechroma_error = 0.0f; - float4 separate_error = float4(0.0f); + float uncor_error = 0.0f; + float samec_error = 0.0f; + vfloat4 sep_error = vfloat4::zero(); + compute_error_squared_rgba(ptab + partition, blk, ewb, - proc_uncorr_lines, - proc_samechroma_lines, - proc_separate_red_lines, - proc_separate_green_lines, - proc_separate_blue_lines, - proc_separate_alpha_lines, - uncorr_linelengths, - samechroma_linelengths, - separate_linelengths, - &uncorr_error, - &samechroma_error, - &separate_error); - - // compute minimum & maximum alpha values in each partition - float3 rgb_range[4]; - float alpha_range[4]; - - compute_alpha_range(bsd->texel_count, ptab + partition, blk, ewb, alpha_range); - compute_rgb_range(bsd->texel_count, ptab + partition, blk, ewb, rgb_range); + uncor_plines, + samec_plines, + uncor_line_lens, + samec_line_lens, + &uncor_error, + &samec_error); /* Compute an estimate of error introduced by weight quantization imprecision. @@ -456,84 +292,91 @@ void find_best_partitionings( for (int j = 0; j < partition_count; j++) { - float tpp = (float)(ptab[partition].texels_per_partition[j]); - - float4 ics = inverse_color_scalefactors[j]; - float4 error_weights = error_weightings[j] * (tpp * weight_imprecision_estim_squared); - - float4 uncorr_vector = (uncorr_lines[j].b * uncorr_linelengths[j]) * ics; - float4 samechroma_vector = (samechroma_lines[j].b * samechroma_linelengths[j]) * ics; - float3 separate_red_vector = (separate_red_lines[j].b * separate_linelengths[j].r) * float3(ics.g, ics.b, ics.a); - float3 separate_green_vector = (separate_green_lines[j].b * separate_linelengths[j].g) * float3(ics.r, ics.b, ics.a); - float3 separate_blue_vector = (separate_blue_lines[j].b * separate_linelengths[j].b) * float3(ics.r, ics.g, ics.a); - float3 separate_alpha_vector = (separate_alpha_lines[j].b * separate_linelengths[j].a) * float3(ics.r, ics.g, ics.b); - - uncorr_vector = uncorr_vector * uncorr_vector; - samechroma_vector = samechroma_vector * samechroma_vector; - separate_red_vector = separate_red_vector * separate_red_vector; - separate_green_vector = separate_green_vector * separate_green_vector; - separate_blue_vector = separate_blue_vector * separate_blue_vector; - separate_alpha_vector = separate_alpha_vector * separate_alpha_vector; - - uncorr_error += dot(uncorr_vector, error_weights); - samechroma_error += dot(samechroma_vector, error_weights); - separate_error.r += dot(separate_red_vector, float3(error_weights.g, error_weights.b, error_weights.a)); - separate_error.g += dot(separate_green_vector, float3(error_weights.r, error_weights.b, error_weights.a)); - separate_error.b += dot(separate_blue_vector, float3(error_weights.r, error_weights.g, error_weights.a)); - separate_error.a += dot(separate_alpha_vector, float3(error_weights.r, error_weights.g, error_weights.b)); - - separate_error.r += rgb_range[j].r * rgb_range[j].r * error_weights.r; - separate_error.g += rgb_range[j].g * rgb_range[j].g * error_weights.g; - separate_error.b += rgb_range[j].b * rgb_range[j].b * error_weights.b; - separate_error.a += alpha_range[j] * alpha_range[j] * error_weights.a; + partition_metrics& pm = pms[j]; + float tpp = (float)(ptab[partition].partition_texel_count[j]); + + vfloat4 ics = pm.icolor_scale; + vfloat4 error_weights = pm.error_weight * (tpp * weight_imprecision_estim); + + vfloat4 uncor_vector = uncor_lines[j].b * uncor_line_lens[j] * ics; + vfloat4 samec_vector = samec_lines[j].b * samec_line_lens[j] * ics; + + uncor_vector = uncor_vector * uncor_vector; + samec_vector = samec_vector * samec_vector; + + uncor_error += dot_s(uncor_vector, error_weights); + samec_error += dot_s(samec_vector, error_weights); + + if (!skip_two_plane) + { + vfloat4 sep_r_vector = sep_r_lines[j].b * ics.swz<1, 2, 3, 0>(); + vfloat4 sep_g_vector = sep_g_lines[j].b * ics.swz<0, 2, 3, 1>(); + vfloat4 sep_b_vector = sep_b_lines[j].b * ics.swz<0, 1, 3, 2>(); + vfloat4 sep_a_vector = sep_a_lines[j].b * ics.swz<0, 1, 2, 3>(); + + sep_r_vector = sep_r_vector * sep_r_vector; + sep_g_vector = sep_g_vector * sep_g_vector; + sep_b_vector = sep_b_vector * sep_b_vector; + sep_a_vector = sep_a_vector * sep_a_vector; + + vfloat4 sep_err_inc(dot3_s(sep_r_vector, error_weights.swz<1, 2, 3, 0>()), + dot3_s(sep_g_vector, error_weights.swz<0, 2, 3, 1>()), + dot3_s(sep_b_vector, error_weights.swz<0, 1, 3, 2>()), + dot3_s(sep_a_vector, error_weights.swz<0, 1, 2, 3>())); + + sep_error = sep_error + sep_err_inc + pm.range_sq * error_weights; + } } - if (uncorr_error < uncorr_best_error) + if (uncor_error < uncor_best_error) { - uncorr_best_error = uncorr_error; - uncorr_best_partition = partition; + uncor_best_error = uncor_error; + uncor_best_partition = partition; } - if (samechroma_error < samechroma_best_errors[0]) + if (samec_error < samec_best_errors[0]) { - samechroma_best_errors[1] = samechroma_best_errors[0]; - samechroma_best_partitions[1] = samechroma_best_partitions[0]; + samec_best_errors[1] = samec_best_errors[0]; + samec_best_partitions[1] = samec_best_partitions[0]; - samechroma_best_errors[0] = samechroma_error; - samechroma_best_partitions[0] = partition; + samec_best_errors[0] = samec_error; + samec_best_partitions[0] = partition; } - else if (samechroma_error < samechroma_best_errors[1]) + else if (samec_error < samec_best_errors[1]) { - samechroma_best_errors[1] = samechroma_error; - samechroma_best_partitions[1] = partition; + samec_best_errors[1] = samec_error; + samec_best_partitions[1] = partition; } - if (separate_error.r < separate_best_error) + if (!skip_two_plane) { - separate_best_error = separate_error.r; - separate_best_partition = partition; - separate_best_component = 0; - } + if (sep_error.lane<0>() < sep_best_error) + { + sep_best_error = sep_error.lane<0>(); + sep_best_partition = partition; + sep_best_component = 0; + } - if (separate_error.g < separate_best_error) - { - separate_best_error = separate_error.g; - separate_best_partition = partition; - separate_best_component = 1; - } + if (sep_error.lane<1>() < sep_best_error) + { + sep_best_error = sep_error.lane<1>(); + sep_best_partition = partition; + sep_best_component = 1; + } - if (separate_error.b < separate_best_error) - { - separate_best_error = separate_error.b; - separate_best_partition = partition; - separate_best_component = 2; - } + if (sep_error.lane<2>() < sep_best_error) + { + sep_best_error = sep_error.lane<2>(); + sep_best_partition = partition; + sep_best_component = 2; + } - if (separate_error.a < separate_best_error) - { - separate_best_error = separate_error.a; - separate_best_partition = partition; - separate_best_component = 3; + if (sep_error.lane<3>() < sep_best_error) + { + sep_best_error = sep_error.lane<3>(); + sep_best_partition = partition; + sep_best_component = 3; + } } } } @@ -546,144 +389,104 @@ void find_best_partitionings( int bk_partition_count = ptab[partition].partition_count; if (bk_partition_count < partition_count) { - continue; + break; } - // compute the weighting to give to each color channel - // in each partition. - float4 error_weightings[4]; - float4 color_scalefactors[4]; - float4 inverse_color_scalefactors[4]; - - compute_partition_error_color_weightings(bsd, ewb, ptab + partition, error_weightings, color_scalefactors); - - for (int j = 0; j < partition_count; j++) - { - inverse_color_scalefactors[j].r = 1.0f / MAX(color_scalefactors[j].r, 1e-7f); - inverse_color_scalefactors[j].g = 1.0f / MAX(color_scalefactors[j].g, 1e-7f); - inverse_color_scalefactors[j].b = 1.0f / MAX(color_scalefactors[j].b, 1e-7f); - inverse_color_scalefactors[j].a = 1.0f / MAX(color_scalefactors[j].a, 1e-7f); - } + // Compute weighting to give to each channel in each partition + partition_metrics pms[4]; - float3 averages[4]; - float3 directions_rgb[4]; + compute_partition_error_color_weightings_and_range(*blk, *ewb, *(ptab + partition), pms); - compute_averages_and_directions_rgb(ptab + partition, blk, ewb, color_scalefactors, averages, directions_rgb); + compute_avgs_and_dirs_3_comp(ptab + partition, blk, ewb, 3, pms); - line3 uncorr_lines[4]; - line3 samechroma_lines[4]; - line2 separate_red_lines[4]; - line2 separate_green_lines[4]; - line2 separate_blue_lines[4]; + partition_lines3 plines[4]; - processed_line3 proc_uncorr_lines[4]; - processed_line3 proc_samechroma_lines[4]; + line2 sep_r_lines[4]; + line2 sep_g_lines[4]; + line2 sep_b_lines[4]; - processed_line2 proc_separate_red_lines[4]; - processed_line2 proc_separate_green_lines[4]; - processed_line2 proc_separate_blue_lines[4]; - - float uncorr_linelengths[4]; - float samechroma_linelengths[4]; - float3 separate_linelengths[4]; + processed_line2 sep_r_plines[4]; + processed_line2 sep_g_plines[4]; + processed_line2 sep_b_plines[4]; for (int j = 0; j < partition_count; j++) { - uncorr_lines[j].a = averages[j]; - if (dot(directions_rgb[j], directions_rgb[j]) == 0.0f) - { - uncorr_lines[j].b = normalize(float3(1.0f)); - } - else - { - uncorr_lines[j].b = normalize(directions_rgb[j]); - } + partition_metrics& pm = pms[j]; + partition_lines3& pl = plines[j]; - samechroma_lines[j].a = float3(0.0f); - if (dot(averages[j], averages[j]) == 0.0f) - { - samechroma_lines[j].b = normalize(float3(1.0f)); - } - else - { - samechroma_lines[j].b = normalize(averages[j]); - } + pl.uncor_line.a = pm.avg; + pl.uncor_line.b = normalize_safe(pm.dir.swz<0, 1, 2>(), unit3()); - proc_uncorr_lines[j].amod = (uncorr_lines[j].a - uncorr_lines[j].b * dot(uncorr_lines[j].a, uncorr_lines[j].b)) * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b); - proc_uncorr_lines[j].bs = (uncorr_lines[j].b * float3(color_scalefactors[j].r, color_scalefactors[j].g, color_scalefactors[j].b)); - proc_uncorr_lines[j].bis = (uncorr_lines[j].b * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b)); + pl.samec_line.a = vfloat4::zero(); + pl.samec_line.b = normalize_safe(pm.avg.swz<0, 1, 2>(), unit3()); - proc_samechroma_lines[j].amod = (samechroma_lines[j].a - samechroma_lines[j].b * dot(samechroma_lines[j].a, samechroma_lines[j].b)) * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b); - proc_samechroma_lines[j].bs = (samechroma_lines[j].b * float3(color_scalefactors[j].r, color_scalefactors[j].g, color_scalefactors[j].b)); - proc_samechroma_lines[j].bis = (samechroma_lines[j].b * float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b)); + pl.uncor_pline.amod = (pl.uncor_line.a - pl.uncor_line.b * dot3(pl.uncor_line.a, pl.uncor_line.b)) * pm.icolor_scale.swz<0, 1, 2, 3>(); + pl.uncor_pline.bs = (pl.uncor_line.b * pm.color_scale.swz<0, 1, 2, 3>()); + pl.uncor_pline.bis = (pl.uncor_line.b * pm.icolor_scale.swz<0, 1, 2, 3>()); - separate_red_lines[j].a = float2(averages[j].g, averages[j].b); - float2 dirs_gb = float2(directions_rgb[j].g, directions_rgb[j].b); - if (dot(dirs_gb, dirs_gb) == 0.0f) - { - separate_red_lines[j].b = normalize(float2(1.0f)); - } - else - { - separate_red_lines[j].b = normalize(dirs_gb); - } - - separate_green_lines[j].a = float2(averages[j].r, averages[j].b); - float2 dirs_rb = float2(directions_rgb[j].r, directions_rgb[j].b); - if (dot(dirs_rb, dirs_rb) == 0.0f) - { - separate_green_lines[j].b = normalize(float2(1.0f)); - } - else - { - separate_green_lines[j].b = normalize(dirs_rb); - } + pl.samec_pline.amod = vfloat4::zero(); + pl.samec_pline.bs = (pl.samec_line.b * pm.color_scale.swz<0, 1, 2, 3>()); + pl.samec_pline.bis = (pl.samec_line.b * pm.icolor_scale.swz<0, 1, 2, 3>()); - separate_blue_lines[j].a = float2(averages[j].r, averages[j].g); - float2 dirs_rg = float2(directions_rgb[j].r, directions_rgb[j].g); - if (dot(dirs_rg, dirs_rg) == 0.0f) + if (!skip_two_plane) { - separate_blue_lines[j].b = normalize(float2(1.0f)); + sep_r_lines[j].a = pm.avg.swz<1, 2>(); + float2 dirs_gb = pm.dir.swz<1, 2>(); + if (dot(dirs_gb, dirs_gb) == 0.0f) + { + sep_r_lines[j].b = normalize(float2(1.0f)); + } + else + { + sep_r_lines[j].b = normalize(dirs_gb); + } + + sep_g_lines[j].a = pm.avg.swz<0, 2>(); + float2 dirs_rb = pm.dir.swz<0, 2>(); + if (dot(dirs_rb, dirs_rb) == 0.0f) + { + sep_g_lines[j].b = normalize(float2(1.0f)); + } + else + { + sep_g_lines[j].b = normalize(dirs_rb); + } + + sep_b_lines[j].a = pm.avg.swz<0, 1>(); + float2 dirs_rg = pm.dir.swz<0, 1>(); + if (dot(dirs_rg, dirs_rg) == 0.0f) + { + sep_b_lines[j].b = normalize(float2(1.0f)); + } + else + { + sep_b_lines[j].b = normalize(dirs_rg); + } + + sep_r_plines[j].amod = (sep_r_lines[j].a - sep_r_lines[j].b * dot(sep_r_lines[j].a, sep_r_lines[j].b)) * pm.icolor_scale.swz<1, 2>(); + sep_r_plines[j].bs = (sep_r_lines[j].b * pm.color_scale.swz<1, 2>()); + sep_r_plines[j].bis = (sep_r_lines[j].b * pm.icolor_scale.swz<1, 2>()); + + sep_g_plines[j].amod = (sep_g_lines[j].a - sep_g_lines[j].b * dot(sep_g_lines[j].a, sep_g_lines[j].b)) * pm.icolor_scale.swz<0, 2>(); + sep_g_plines[j].bs = (sep_g_lines[j].b * pm.color_scale.swz<0, 2>()); + sep_g_plines[j].bis = (sep_g_lines[j].b * pm.icolor_scale.swz<0, 2>()); + + sep_b_plines[j].amod = (sep_b_lines[j].a - sep_b_lines[j].b * dot(sep_b_lines[j].a, sep_b_lines[j].b)) * pm.icolor_scale.swz<0, 1>(); + sep_b_plines[j].bs = (sep_b_lines[j].b * pm.color_scale.swz<0, 1>()); + sep_b_plines[j].bis = (sep_b_lines[j].b * pm.icolor_scale.swz<0, 1>()); } - else - { - separate_blue_lines[j].b = normalize(dirs_rg); - } - - proc_separate_red_lines[j].amod = (separate_red_lines[j].a - separate_red_lines[j].b * dot(separate_red_lines[j].a, separate_red_lines[j].b)) * float2(inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b); - proc_separate_red_lines[j].bs = (separate_red_lines[j].b * float2(color_scalefactors[j].g, color_scalefactors[j].b)); - proc_separate_red_lines[j].bis = (separate_red_lines[j].b * float2(inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b)); - - proc_separate_green_lines[j].amod = (separate_green_lines[j].a - separate_green_lines[j].b * dot(separate_green_lines[j].a, separate_green_lines[j].b)) * float2(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].b); - proc_separate_green_lines[j].bs = (separate_green_lines[j].b * float2(color_scalefactors[j].r, color_scalefactors[j].b)); - proc_separate_green_lines[j].bis = (separate_green_lines[j].b * float2(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].b)); - - proc_separate_blue_lines[j].amod = (separate_blue_lines[j].a - separate_blue_lines[j].b * dot(separate_blue_lines[j].a, separate_blue_lines[j].b)) * float2(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g); - proc_separate_blue_lines[j].bs = (separate_blue_lines[j].b * float2(color_scalefactors[j].r, color_scalefactors[j].g)); - proc_separate_blue_lines[j].bis = (separate_blue_lines[j].b * float2(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g)); } - float uncorr_error = 0.0f; - float samechroma_error = 0.0f; - float3 separate_error = float3(0.0f); + float uncor_error = 0.0f; + float samec_error = 0.0f; + vfloat4 sep_error = vfloat4(0.0f); compute_error_squared_rgb(ptab + partition, blk, ewb, - proc_uncorr_lines, - proc_samechroma_lines, - proc_separate_red_lines, - proc_separate_green_lines, - proc_separate_blue_lines, - uncorr_linelengths, - samechroma_linelengths, - separate_linelengths, - &uncorr_error, - &samechroma_error, - &separate_error); - - float3 rgb_range[4]; - compute_rgb_range(bsd->texel_count, ptab + partition, blk, ewb, rgb_range); + plines, + uncor_error, + samec_error); /* compute an estimate of error introduced by weight imprecision. @@ -699,85 +502,102 @@ void find_best_partitionings( for (int j = 0; j < partition_count; j++) { - float tpp = (float)(ptab[partition].texels_per_partition[j]); + partition_metrics& pm = pms[j]; + partition_lines3& pl = plines[j]; + + float tpp = (float)(ptab[partition].partition_texel_count[j]); + + vfloat4 ics = pm.icolor_scale; + ics.set_lane<3>(0.0f); + + vfloat4 error_weights = pm.error_weight * (tpp * weight_imprecision_estim); + error_weights.set_lane<3>(0.0f); - float3 ics = float3(inverse_color_scalefactors[j].r, inverse_color_scalefactors[j].g, inverse_color_scalefactors[j].b); - float3 error_weights = float3(error_weightings[j].r, error_weightings[j].g, error_weightings[j].b) * (tpp * weight_imprecision_estim_squared); + vfloat4 uncor_vector = (pl.uncor_line.b * pl.uncor_line_len) * ics; + vfloat4 samec_vector = (pl.samec_line.b * pl.samec_line_len) * ics; - float3 uncorr_vector = (uncorr_lines[j].b * uncorr_linelengths[j]) * ics; - float3 samechroma_vector = (samechroma_lines[j].b * samechroma_linelengths[j]) * ics; + uncor_vector = uncor_vector * uncor_vector; + samec_vector = samec_vector * samec_vector; - float2 separate_red_vector = (separate_red_lines[j].b * separate_linelengths[j].r) * float2(ics.g, ics.b); - float2 separate_green_vector = (separate_green_lines[j].b * separate_linelengths[j].g) * float2(ics.r, ics.b); - float2 separate_blue_vector = (separate_blue_lines[j].b * separate_linelengths[j].b) * float2(ics.r, ics.g); + uncor_error += dot3_s(uncor_vector, error_weights); + samec_error += dot3_s(samec_vector, error_weights); - uncorr_vector = uncorr_vector * uncorr_vector; - samechroma_vector = samechroma_vector * samechroma_vector; - separate_red_vector = separate_red_vector * separate_red_vector; - separate_green_vector = separate_green_vector * separate_green_vector; - separate_blue_vector = separate_blue_vector * separate_blue_vector; + if (!skip_two_plane) + { + float2 sep_r_vector = sep_r_lines[j].b * ics.swz<1, 2>(); + float2 sep_g_vector = sep_g_lines[j].b * ics.swz<0, 2>(); + float2 sep_b_vector = sep_b_lines[j].b * ics.swz<0, 1>(); + + sep_r_vector = sep_r_vector * sep_r_vector; + sep_g_vector = sep_g_vector * sep_g_vector; + sep_b_vector = sep_b_vector * sep_b_vector; - uncorr_error += dot(uncorr_vector, error_weights); - samechroma_error += dot(samechroma_vector, error_weights); - separate_error.r += dot(separate_red_vector, float2(error_weights.g, error_weights.b)); - separate_error.g += dot(separate_green_vector, float2(error_weights.r, error_weights.b)); - separate_error.b += dot(separate_blue_vector, float2(error_weights.r, error_weights.r)); + sep_error.set_lane<0>(sep_error.lane<0>() + dot(sep_r_vector, error_weights.swz<1, 2>())); + sep_error.set_lane<1>(sep_error.lane<1>() + dot(sep_g_vector, error_weights.swz<0, 2>())); + sep_error.set_lane<2>(sep_error.lane<2>() + dot(sep_b_vector, error_weights.swz<0, 1>())); - separate_error.r += rgb_range[j].r * rgb_range[j].r * error_weights.r; - separate_error.g += rgb_range[j].g * rgb_range[j].g * error_weights.g; - separate_error.b += rgb_range[j].b * rgb_range[j].b * error_weights.b; + sep_error.set_lane<0>(sep_error.lane<0>() + pm.range_sq.lane<0>() * error_weights.lane<0>()); + sep_error.set_lane<1>(sep_error.lane<1>() + pm.range_sq.lane<1>() * error_weights.lane<1>()); + sep_error.set_lane<2>(sep_error.lane<2>() + pm.range_sq.lane<2>() * error_weights.lane<2>()); + } } - if (uncorr_error < uncorr_best_error) + if (uncor_error < uncor_best_error) { - uncorr_best_error = uncorr_error; - uncorr_best_partition = partition; + uncor_best_error = uncor_error; + uncor_best_partition = partition; } - if (samechroma_error < samechroma_best_errors[0]) + if (samec_error < samec_best_errors[0]) { - samechroma_best_errors[1] = samechroma_best_errors[0]; - samechroma_best_partitions[1] = samechroma_best_partitions[0]; + samec_best_errors[1] = samec_best_errors[0]; + samec_best_partitions[1] = samec_best_partitions[0]; - samechroma_best_errors[0] = samechroma_error; - samechroma_best_partitions[0] = partition; + samec_best_errors[0] = samec_error; + samec_best_partitions[0] = partition; } - else if (samechroma_error < samechroma_best_errors[1]) + else if (samec_error < samec_best_errors[1]) { - samechroma_best_errors[1] = samechroma_error; - samechroma_best_partitions[1] = partition; + samec_best_errors[1] = samec_error; + samec_best_partitions[1] = partition; } - if (separate_error.r < separate_best_error) + if (!skip_two_plane) { - separate_best_error = separate_error.r; - separate_best_partition = partition; - separate_best_component = 0; - } + if (sep_error.lane<0>() < sep_best_error) + { + sep_best_error = sep_error.lane<0>(); + sep_best_partition = partition; + sep_best_component = 0; + } - if (separate_error.g < separate_best_error) - { - separate_best_error = separate_error.g; - separate_best_partition = partition; - separate_best_component = 1; - } + if (sep_error.lane<1>() < sep_best_error) + { + sep_best_error = sep_error.lane<1>(); + sep_best_partition = partition; + sep_best_component = 1; + } - if (separate_error.b < separate_best_error) - { - separate_best_error = separate_error.b; - separate_best_partition = partition; - separate_best_component = 2; + if (sep_error.lane<2>() < sep_best_error) + { + sep_best_error = sep_error.lane<2>(); + sep_best_partition = partition; + sep_best_component = 2; + } } } } - *best_partition_uncorrelated = uncorr_best_partition; + *best_partition_uncor = uncor_best_partition; - int index { samechroma_best_partitions[0] != uncorr_best_partition ? 0 : 1 }; - *best_partition_samechroma = samechroma_best_partitions[index]; + int index = samec_best_partitions[0] != uncor_best_partition ? 0 : 1; + *best_partition_samec = samec_best_partitions[index]; - *best_partition_dualplane = (separate_best_component << PARTITION_BITS) | - (separate_best_partition); + if (best_partition_dualplane) + { + *best_partition_dualplane = (sep_best_component << PARTITION_BITS) | + (sep_best_partition); + } } #endif diff --git a/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp b/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp index 9390c518..6e1906cf 100644 --- a/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp +++ b/libkram/astc-encoder/astcenc_ideal_endpoints_and_weights.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -34,7 +34,7 @@ #include #endif -static void compute_endpoints_and_ideal_weights_1_component( +static void compute_endpoints_and_ideal_weights_1_comp( const block_size_descriptor* bsd, const partition_info* pt, const imageblock* blk, @@ -44,16 +44,21 @@ static void compute_endpoints_and_ideal_weights_1_component( ) { int partition_count = pt->partition_count; ei->ep.partition_count = partition_count; + promise(partition_count > 0); + + int texel_count = bsd->texel_count; + promise(texel_count > 0); + + float lowvalues[4] { 1e10f, 1e10f, 1e10f, 1e10f }; + float highvalues[4] { -1e10f, -1e10f, -1e10f, -1e10f }; - float lowvalues[4], highvalues[4]; float partition_error_scale[4]; float linelengths_rcp[4]; - int texels_per_block = bsd->texel_count; - - const float *error_weights; + const float *error_weights = nullptr; const float* data_vr = nullptr; - assert(component <= 3); + + assert(component < 4); switch (component) { case 0: @@ -68,34 +73,21 @@ static void compute_endpoints_and_ideal_weights_1_component( error_weights = ewb->texel_weight_b; data_vr = blk->data_b; break; - case 3: + default: error_weights = ewb->texel_weight_a; data_vr = blk->data_a; break; } - for (int i = 0; i < partition_count; i++) - { - lowvalues[i] = 1e10f; - highvalues[i] = -1e10f; - } - - for (int i = 0; i < texels_per_block; i++) + for (int i = 0; i < texel_count; i++) { if (error_weights[i] > 1e-10f) { float value = data_vr[i]; int partition = pt->partition_of_texel[i]; - if (value < lowvalues[partition]) - { - lowvalues[partition] = value; - } - - if (value > highvalues[partition]) - { - highvalues[partition] = value; - } + lowvalues[partition] = astc::min(value, lowvalues[partition]); + highvalues[partition] = astc::max(value, highvalues[partition]); } } @@ -109,63 +101,34 @@ static void compute_endpoints_and_ideal_weights_1_component( highvalues[i] = 0.0f; } - if (diff < 1e-7f) - { - diff = 1e-7f; - } + diff = astc::max(diff, 1e-7f); partition_error_scale[i] = diff * diff; linelengths_rcp[i] = 1.0f / diff; } - for (int i = 0; i < texels_per_block; i++) + for (int i = 0; i < texel_count; i++) { float value = data_vr[i]; int partition = pt->partition_of_texel[i]; value -= lowvalues[partition]; value *= linelengths_rcp[partition]; - - if (value > 1.0f) - { - value = 1.0f; - } - else if (!(value > 0.0f)) - { - value = 0.0f; - } + value = astc::clamp1f(value); ei->weights[i] = value; ei->weight_error_scale[i] = partition_error_scale[partition] * error_weights[i]; assert(!astc::isnan(ei->weight_error_scale[i])); } + vmask4 sep_mask = vint4::lane_id() == vint4(component); for (int i = 0; i < partition_count; i++) { - ei->ep.endpt0[i] = float4(blk->red_min, blk->green_min, blk->blue_min, blk->alpha_min); - ei->ep.endpt1[i] = float4(blk->red_max, blk->green_max, blk->blue_max, blk->alpha_max); - switch (component) - { - case 0: // red/x - ei->ep.endpt0[i].r = lowvalues[i]; - ei->ep.endpt1[i].r = highvalues[i]; - break; - case 1: // green/y - ei->ep.endpt0[i].g = lowvalues[i]; - ei->ep.endpt1[i].g = highvalues[i]; - break; - case 2: // blue/z - ei->ep.endpt0[i].b = lowvalues[i]; - ei->ep.endpt1[i].b = highvalues[i]; - break; - case 3: // alpha/w - ei->ep.endpt0[i].a = lowvalues[i]; - ei->ep.endpt1[i].a = highvalues[i]; - break; - } + ei->ep.endpt0[i] = select(blk->data_min, vfloat4(lowvalues[i]), sep_mask); + ei->ep.endpt1[i] = select(blk->data_max, vfloat4(highvalues[i]), sep_mask); } } -static void compute_endpoints_and_ideal_weights_2_components( +static void compute_endpoints_and_ideal_weights_2_comp( const block_size_descriptor* bsd, const partition_info* pt, const imageblock* blk, @@ -176,9 +139,12 @@ static void compute_endpoints_and_ideal_weights_2_components( ) { int partition_count = pt->partition_count; ei->ep.partition_count = partition_count; + promise(partition_count > 0); + + int texel_count = bsd->texel_count; + promise(texel_count > 0); - float4 error_weightings[4]; - float4 color_scalefactors[4]; + partition_metrics pms[4]; float2 scalefactors[4]; @@ -204,48 +170,49 @@ static void compute_endpoints_and_ideal_weights_2_components( data_vg = blk->data_b; } - int texels_per_block = bsd->texel_count; - - compute_partition_error_color_weightings(bsd, ewb, pt, error_weightings, color_scalefactors); + compute_partition_error_color_weightings(*ewb, *pt, pms); for (int i = 0; i < partition_count; i++) { float s1 = 0, s2 = 0; + assert(component1 < 4); switch (component1) { case 0: - s1 = color_scalefactors[i].r; + s1 = pms[i].color_scale.lane<0>(); break; case 1: - s1 = color_scalefactors[i].g; + s1 = pms[i].color_scale.lane<1>(); break; case 2: - s1 = color_scalefactors[i].b; + s1 = pms[i].color_scale.lane<2>(); break; - case 3: - s1 = color_scalefactors[i].a; + default: + s1 = pms[i].color_scale.lane<3>(); break; } + assert(component2 < 4); switch (component2) { case 0: - s2 = color_scalefactors[i].r; + s2 = pms[i].color_scale.lane<0>(); break; case 1: - s2 = color_scalefactors[i].g; + s2 = pms[i].color_scale.lane<1>(); break; case 2: - s2 = color_scalefactors[i].b; + s2 = pms[i].color_scale.lane<2>(); break; - case 3: - s2 = color_scalefactors[i].a; + default: + s2 = pms[i].color_scale.lane<3>(); break; } scalefactors[i] = normalize(float2(s1, s2)) * 1.41421356f; } - float lowparam[4], highparam[4]; + float lowparam[4] { 1e10f, 1e10f, 1e10f, 1e10f }; + float highparam[4] { -1e10f, -1e10f, -1e10f, -1e10f }; float2 averages[4]; float2 directions[4]; @@ -254,35 +221,28 @@ static void compute_endpoints_and_ideal_weights_2_components( float scale[4]; float length_squared[4]; - for (int i = 0; i < partition_count; i++) - { - lowparam[i] = 1e10; - highparam[i] = -1e10; - } - - compute_averages_and_directions_2_components(pt, blk, ewb, scalefactors, component1, component2, averages, directions); + compute_avgs_and_dirs_2_comp(pt, blk, ewb, scalefactors, component1, component2, averages, directions); for (int i = 0; i < partition_count; i++) { - float2 egv = directions[i]; - if (egv.r + egv.g < 0.0f) - directions[i] = float2(0.0f) - egv; - } + float2 dir = directions[i]; + if (dir.r + dir.g < 0.0f) + { + dir = float2(0.0f) - dir; + } - for (int i = 0; i < partition_count; i++) - { lines[i].a = averages[i]; - if (dot(directions[i], directions[i]) == 0.0f) + if (dot(dir, dir) == 0.0f) { lines[i].b = normalize(float2(1.0f)); } else { - lines[i].b = normalize(directions[i]); + lines[i].b = normalize(dir); } } - for (int i = 0; i < texels_per_block; i++) + for (int i = 0; i < texel_count; i++) { if (error_weights[i] > 1e-10f) { @@ -292,15 +252,8 @@ static void compute_endpoints_and_ideal_weights_2_components( float param = dot(point - l.a, l.b); ei->weights[i] = param; - if (param < lowparam[partition]) - { - lowparam[partition] = param; - } - - if (param > highparam[partition]) - { - highparam[partition] = param; - } + lowparam[partition] = astc::min(param, lowparam[partition]); + highparam[partition] = astc::max(param, highparam[partition]); } else { @@ -323,11 +276,7 @@ static void compute_endpoints_and_ideal_weights_2_components( // it is possible for a uniform-color partition to produce length=0; this // causes NaN-production and NaN-propagation later on. Set length to // a small value to avoid this problem. - if (length < 1e-7f) - { - length = 1e-7f; - } - + length = astc::max(length, 1e-7f); length_squared[i] = length * length; scale[i] = 1.0f / length; @@ -344,67 +293,22 @@ static void compute_endpoints_and_ideal_weights_2_components( highvalues[i] = ep1; } + vmask4 comp1_mask = vint4::lane_id() == vint4(component1); + vmask4 comp2_mask = vint4::lane_id() == vint4(component2); for (int i = 0; i < partition_count; i++) { - ei->ep.endpt0[i] = float4(blk->red_min, blk->green_min, blk->blue_min, blk->alpha_min); - ei->ep.endpt1[i] = float4(blk->red_max, blk->green_max, blk->blue_max, blk->alpha_max); - - float2 ep0 = lowvalues[i]; - float2 ep1 = highvalues[i]; - - switch (component1) - { - case 0: - ei->ep.endpt0[i].r = ep0.r; - ei->ep.endpt1[i].r = ep1.r; - break; - case 1: - ei->ep.endpt0[i].g = ep0.r; - ei->ep.endpt1[i].g = ep1.r; - break; - case 2: - ei->ep.endpt0[i].b = ep0.r; - ei->ep.endpt1[i].b = ep1.r; - break; - case 3: - ei->ep.endpt0[i].a = ep0.r; - ei->ep.endpt1[i].a = ep1.r; - break; - } + vfloat4 ep0 = select(blk->data_min, vfloat4(lowvalues[i].r), comp1_mask); + vfloat4 ep1 = select(blk->data_max, vfloat4(highvalues[i].r), comp1_mask); - switch (component2) - { - case 0: - ei->ep.endpt0[i].r = ep0.g; - ei->ep.endpt1[i].r = ep1.g; - break; - case 1: - ei->ep.endpt0[i].g = ep0.g; - ei->ep.endpt1[i].g = ep1.g; - break; - case 2: - ei->ep.endpt0[i].b = ep0.g; - ei->ep.endpt1[i].b = ep1.g; - break; - case 3: - ei->ep.endpt0[i].a = ep0.g; - ei->ep.endpt1[i].a = ep1.g; - break; - } + ei->ep.endpt0[i] = select(ep0, vfloat4(lowvalues[i].g), comp2_mask); + ei->ep.endpt1[i] = select(ep1, vfloat4(highvalues[i].g), comp2_mask); } - for (int i = 0; i < texels_per_block; i++) + for (int i = 0; i < texel_count; i++) { int partition = pt->partition_of_texel[i]; float idx = (ei->weights[i] - lowparam[partition]) * scale[partition]; - if (idx > 1.0f) - { - idx = 1.0f; - } - else if (!(idx > 0.0f)) - { - idx = 0.0f; - } + idx = astc::clamp1f(idx); ei->weights[i] = idx; ei->weight_error_scale[i] = length_squared[partition] * error_weights[i]; @@ -412,43 +316,42 @@ static void compute_endpoints_and_ideal_weights_2_components( } } -static void compute_endpoints_and_ideal_weights_3_components( +static void compute_endpoints_and_ideal_weights_3_comp( const block_size_descriptor* bsd, const partition_info* pt, const imageblock* blk, const error_weight_block* ewb, endpoints_and_weights* ei, - int omittedComponent + int omitted_component ) { int partition_count = pt->partition_count; ei->ep.partition_count = partition_count; + promise(partition_count > 0); - float4 error_weightings[4]; - float4 color_scalefactors[4]; - - float3 scalefactors[4]; + int texel_count= bsd->texel_count; + promise(texel_count > 0); - int texels_per_block = bsd->texel_count; + partition_metrics pms[4]; const float *error_weights; const float* data_vr = nullptr; const float* data_vg = nullptr; const float* data_vb = nullptr; - if (omittedComponent == 0) + if (omitted_component == 0) { error_weights = ewb->texel_weight_gba; data_vr = blk->data_g; data_vg = blk->data_b; data_vb = blk->data_a; } - else if (omittedComponent == 1) + else if (omitted_component == 1) { error_weights = ewb->texel_weight_rba; data_vr = blk->data_r; data_vg = blk->data_b; data_vb = blk->data_a; } - else if (omittedComponent == 2) + else if (omitted_component == 2) { error_weights = ewb->texel_weight_rga; data_vr = blk->data_r; @@ -463,96 +366,79 @@ static void compute_endpoints_and_ideal_weights_3_components( data_vb = blk->data_b; } - compute_partition_error_color_weightings(bsd, ewb, pt, error_weightings, color_scalefactors); + compute_partition_error_color_weightings(*ewb, *pt, pms); for (int i = 0; i < partition_count; i++) { float s1 = 0, s2 = 0, s3 = 0; - switch (omittedComponent) + assert(omitted_component < 4); + switch (omitted_component) { case 0: - s1 = color_scalefactors[i].g; - s2 = color_scalefactors[i].b; - s3 = color_scalefactors[i].a; + s1 = pms[i].color_scale.lane<1>(); + s2 = pms[i].color_scale.lane<2>(); + s3 = pms[i].color_scale.lane<3>(); break; case 1: - s1 = color_scalefactors[i].r; - s2 = color_scalefactors[i].b; - s3 = color_scalefactors[i].a; + s1 = pms[i].color_scale.lane<0>(); + s2 = pms[i].color_scale.lane<2>(); + s3 = pms[i].color_scale.lane<3>(); break; case 2: - s1 = color_scalefactors[i].r; - s2 = color_scalefactors[i].g; - s3 = color_scalefactors[i].a; + s1 = pms[i].color_scale.lane<0>(); + s2 = pms[i].color_scale.lane<1>(); + s3 = pms[i].color_scale.lane<3>(); break; - case 3: - s1 = color_scalefactors[i].r; - s2 = color_scalefactors[i].g; - s3 = color_scalefactors[i].b; + default: + s1 = pms[i].color_scale.lane<0>(); + s2 = pms[i].color_scale.lane<1>(); + s3 = pms[i].color_scale.lane<2>(); break; } - scalefactors[i] = normalize(float3(s1, s2, s3)) * 1.73205080f; + pms[i].color_scale = normalize(vfloat4(s1, s2, s3, 0.0f)) * 1.73205080f; } - float lowparam[4], highparam[4]; - - float3 averages[4]; - float3 directions[4]; + float lowparam[4] { 1e10f, 1e10f, 1e10f, 1e10f }; + float highparam[4] { -1e10f, -1e10f, -1e10f, -1e10f }; line3 lines[4]; float scale[4]; float length_squared[4]; - for (int i = 0; i < partition_count; i++) - { - lowparam[i] = 1e10f; - highparam[i] = -1e10f; - } - - compute_averages_and_directions_3_components(pt, blk, ewb, scalefactors, omittedComponent, averages, directions); + compute_avgs_and_dirs_3_comp(pt, blk, ewb, omitted_component, pms); for (int i = 0; i < partition_count; i++) { - float3 direc = directions[i]; - if (direc.r + direc.g + direc.b < 0.0f) + vfloat4 dir = pms[i].dir; + if (hadd_rgb_s(dir) < 0.0f) { - directions[i] = float3(0.0f) - direc; + dir = vfloat4(0.0f) - dir; } - } - for (int i = 0; i < partition_count; i++) - { - lines[i].a = averages[i]; - if (dot(directions[i], directions[i]) == 0.0f) + lines[i].a = pms[i].avg; + if (dot3_s(dir, dir) == 0.0f) { - lines[i].b = normalize(float3(1.0f)); + lines[i].b = normalize(vfloat4(1.0f, 1.0f, 1.0f, 0.0f)); } else { - lines[i].b = normalize(directions[i]); + lines[i].b = normalize(dir); } } - for (int i = 0; i < texels_per_block; i++) + for (int i = 0; i < texel_count; i++) { if (error_weights[i] > 1e-10f) { int partition = pt->partition_of_texel[i]; - float3 point = float3(data_vr[i], data_vg[i], data_vb[i]) * scalefactors[partition]; + vfloat4 point = vfloat4(data_vr[i], data_vg[i], data_vb[i], 0.0f) * pms[partition].color_scale; line3 l = lines[partition]; - float param = dot(point - l.a, l.b); + float param = dot3_s(point - l.a, l.b); ei->weights[i] = param; - if (param < lowparam[partition]) - { - lowparam[partition] = param; - } - - if (param > highparam[partition]) - { - highparam[partition] = param; - } + lowparam[partition] = astc::min(param, lowparam[partition]); + highparam[partition] = astc::max(param, highparam[partition]); } else { @@ -560,9 +446,6 @@ static void compute_endpoints_and_ideal_weights_3_components( } } - float3 lowvalues[4]; - float3 highvalues[4]; - for (int i = 0; i < partition_count; i++) { float length = highparam[i] - lowparam[i]; @@ -575,90 +458,48 @@ static void compute_endpoints_and_ideal_weights_3_components( // it is possible for a uniform-color partition to produce length=0; this // causes NaN-production and NaN-propagation later on. Set length to // a small value to avoid this problem. - if (length < 1e-7f) - { - length = 1e-7f; - } + length = astc::max(length, 1e-7f); length_squared[i] = length * length; scale[i] = 1.0f / length; - float3 ep0 = lines[i].a + lines[i].b * lowparam[i]; - float3 ep1 = lines[i].a + lines[i].b * highparam[i]; - - ep0.r /= scalefactors[i].r; - ep0.g /= scalefactors[i].g; - ep0.b /= scalefactors[i].b; - - ep1.r /= scalefactors[i].r; - ep1.g /= scalefactors[i].g; - ep1.b /= scalefactors[i].b; - - lowvalues[i] = ep0; - highvalues[i] = ep1; - } + vfloat4 ep0 = lines[i].a + lines[i].b * lowparam[i]; + vfloat4 ep1 = lines[i].a + lines[i].b * highparam[i]; - for (int i = 0; i < partition_count; i++) - { - ei->ep.endpt0[i] = float4(blk->red_min, blk->green_min, blk->blue_min, blk->alpha_min); - ei->ep.endpt1[i] = float4(blk->red_max, blk->green_max, blk->blue_max, blk->alpha_max); + ep0 = ep0 / pms[i].color_scale; + ep1 = ep1 / pms[i].color_scale; - float3 ep0 = lowvalues[i]; - float3 ep1 = highvalues[i]; + vfloat4 bmin = blk->data_min; + vfloat4 bmax = blk->data_max; - switch (omittedComponent) + // TODO: Probably a programmatic vector permute we can do here ... + assert(omitted_component < 4); + switch (omitted_component) { case 0: - ei->ep.endpt0[i].g = ep0.r; - ei->ep.endpt0[i].b = ep0.g; - ei->ep.endpt0[i].a = ep0.b; - - ei->ep.endpt1[i].g = ep1.r; - ei->ep.endpt1[i].b = ep1.g; - ei->ep.endpt1[i].a = ep1.b; + ei->ep.endpt0[i] = vfloat4(bmin.lane<0>(), ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>()); + ei->ep.endpt1[i] = vfloat4(bmax.lane<0>(), ep1.lane<0>(), ep1.lane<1>(), ep1.lane<2>()); break; case 1: - ei->ep.endpt0[i].r = ep0.r; - ei->ep.endpt0[i].b = ep0.g; - ei->ep.endpt0[i].a = ep0.b; - - ei->ep.endpt1[i].r = ep1.r; - ei->ep.endpt1[i].b = ep1.g; - ei->ep.endpt1[i].a = ep1.b; + ei->ep.endpt0[i] = vfloat4(ep0.lane<0>(), bmin.lane<1>(), ep0.lane<1>(), ep0.lane<2>()); + ei->ep.endpt1[i] = vfloat4(ep1.lane<0>(), bmax.lane<1>(), ep1.lane<1>(), ep1.lane<2>()); break; case 2: - ei->ep.endpt0[i].r = ep0.r; - ei->ep.endpt0[i].g = ep0.g; - ei->ep.endpt0[i].a = ep0.b; - - ei->ep.endpt1[i].r = ep1.r; - ei->ep.endpt1[i].g = ep1.g; - ei->ep.endpt1[i].a = ep1.b; + ei->ep.endpt0[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), bmin.lane<2>(), ep0.lane<2>()); + ei->ep.endpt1[i] = vfloat4(ep1.lane<0>(), ep1.lane<1>(), bmax.lane<2>(), ep1.lane<2>()); break; - case 3: - ei->ep.endpt0[i].r = ep0.r; - ei->ep.endpt0[i].g = ep0.g; - ei->ep.endpt0[i].b = ep0.b; - - ei->ep.endpt1[i].r = ep1.r; - ei->ep.endpt1[i].g = ep1.g; - ei->ep.endpt1[i].b = ep1.b; + default: + ei->ep.endpt0[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), bmin.lane<3>()); + ei->ep.endpt1[i] = vfloat4(ep1.lane<0>(), ep1.lane<1>(), ep1.lane<2>(), bmax.lane<3>()); break; } } - for (int i = 0; i < texels_per_block; i++) + for (int i = 0; i < texel_count; i++) { int partition = pt->partition_of_texel[i]; float idx = (ei->weights[i] - lowparam[partition]) * scale[partition]; - if (idx > 1.0f) - { - idx = 1.0f; - } - else if (!(idx > 0.0f)) - { - idx = 0.0f; - } + idx = astc::clamp1f(idx); ei->weights[i] = idx; ei->weight_error_scale[i] = length_squared[partition] * error_weights[i]; @@ -666,7 +507,7 @@ static void compute_endpoints_and_ideal_weights_3_components( } } -static void compute_endpoints_and_ideal_weights_rgba( +static void compute_endpoints_and_ideal_weights_4_comp( const block_size_descriptor* bsd, const partition_info* pt, const imageblock* blk, @@ -676,81 +517,65 @@ static void compute_endpoints_and_ideal_weights_rgba( const float *error_weights = ewb->texel_weight; int partition_count = pt->partition_count; - float lowparam[4], highparam[4]; - for (int i = 0; i < partition_count; i++) - { - lowparam[i] = 1e10; - highparam[i] = -1e10; - } - float4 averages[4]; - float4 directions_rgba[4]; + int texel_count= bsd->texel_count; + promise(texel_count > 0); + promise(partition_count > 0); + + float lowparam[4] { 1e10, 1e10, 1e10, 1e10 }; + float highparam[4] { -1e10, -1e10, -1e10, -1e10 }; line4 lines[4]; float scale[4]; float length_squared[4]; - float4 error_weightings[4]; - float4 color_scalefactors[4]; - float4 scalefactors[4]; - - int texels_per_block = bsd->texel_count; + partition_metrics pms[4]; - compute_partition_error_color_weightings(bsd, ewb, pt, error_weightings, color_scalefactors); + compute_partition_error_color_weightings(*ewb, *pt, pms); for (int i = 0; i < partition_count; i++) { - scalefactors[i] = normalize(color_scalefactors[i]) * 2.0f; + pms[i].color_scale = normalize(pms[i].color_scale) * 2.0f; } - compute_averages_and_directions_rgba(pt, blk, ewb, scalefactors, averages, directions_rgba); + compute_avgs_and_dirs_4_comp(pt, blk, ewb, pms); // if the direction-vector ends up pointing from light to dark, FLIP IT! // this will make the first endpoint the darkest one. for (int i = 0; i < partition_count; i++) { - float4 direc = directions_rgba[i]; - if (direc.r + direc.g + direc.b < 0.0f) + vfloat4 dir = pms[i].dir; + if (hadd_rgb_s(dir) < 0.0f) { - directions_rgba[i] = float4(0.0f) - direc; + dir = vfloat4::zero() - dir; } - } - for (int i = 0; i < partition_count; i++) - { - lines[i].a = averages[i]; - if (dot(directions_rgba[i], directions_rgba[i]) == 0.0f) + lines[i].a = pms[i].avg; + if (dot_s(dir, dir) == 0.0f) { - lines[i].b = normalize(float4(1.0f)); + lines[i].b = normalize(vfloat4(1.0f)); } else { - lines[i].b = normalize(directions_rgba[i]); + lines[i].b = normalize(dir); } } - for (int i = 0; i < texels_per_block; i++) + for (int i = 0; i < texel_count; i++) { if (error_weights[i] > 1e-10f) { int partition = pt->partition_of_texel[i]; - float4 point = float4(blk->data_r[i], blk->data_g[i], blk->data_b[i], blk->data_a[i]) * scalefactors[partition]; + vfloat4 point = blk->texel(i) * pms[partition].color_scale; line4 l = lines[partition]; - float param = dot(point - l.a, l.b); + float param = dot_s(point - l.a, l.b); ei->weights[i] = param; - if (param < lowparam[partition]) - { - lowparam[partition] = param; - } - - if (param > highparam[partition]) - { - highparam[partition] = param; - } + lowparam[partition] = astc::min(param, lowparam[partition]); + highparam[partition] = astc::max(param, highparam[partition]); } else { @@ -770,43 +595,24 @@ static void compute_endpoints_and_ideal_weights_rgba( // it is possible for a uniform-color partition to produce length=0; this // causes NaN-production and NaN-propagation later on. Set length to // a small value to avoid this problem. - if (length < 1e-7f) - { - length = 1e-7f; - } + length = astc::max(length, 1e-7f); length_squared[i] = length * length; scale[i] = 1.0f / length; - float4 ep0 = lines[i].a + lines[i].b * lowparam[i]; - float4 ep1 = lines[i].a + lines[i].b * highparam[i]; - - ep0.r /= scalefactors[i].r; - ep0.g /= scalefactors[i].g; - ep0.b /= scalefactors[i].b; - ep0.a /= scalefactors[i].a; + vfloat4 ep0 = lines[i].a + lines[i].b * lowparam[i]; + vfloat4 ep1 = lines[i].a + lines[i].b * highparam[i]; - ep1.r /= scalefactors[i].r; - ep1.g /= scalefactors[i].g; - ep1.b /= scalefactors[i].b; - ep1.a /= scalefactors[i].a; - - ei->ep.endpt0[i] = ep0; - ei->ep.endpt1[i] = ep1; + ei->ep.endpt0[i] = ep0 / pms[i].color_scale; + ei->ep.endpt1[i] = ep1 / pms[i].color_scale; } - for (int i = 0; i < texels_per_block; i++) + for (int i = 0; i < texel_count; i++) { int partition = pt->partition_of_texel[i]; float idx = (ei->weights[i] - lowparam[partition]) * scale[partition]; - if (idx > 1.0f) - { - idx = 1.0f; - } - else if (!(idx > 0.0f)) - { - idx = 0.0f; - } + idx = astc::clamp1f(idx); + ei->weights[i] = idx; ei->weight_error_scale[i] = error_weights[i] * length_squared[partition]; assert(!astc::isnan(ei->weight_error_scale[i])); @@ -830,11 +636,11 @@ void compute_endpoints_and_ideal_weights_1_plane( int uses_alpha = imageblock_uses_alpha(blk); if (uses_alpha) { - compute_endpoints_and_ideal_weights_rgba(bsd, pt, blk, ewb, ei); + compute_endpoints_and_ideal_weights_4_comp(bsd, pt, blk, ewb, ei); } else { - compute_endpoints_and_ideal_weights_3_components(bsd, pt, blk, ewb, ei, 3); + compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei, 3); } } @@ -848,48 +654,50 @@ void compute_endpoints_and_ideal_weights_2_planes( endpoints_and_weights* ei2 ) { int uses_alpha = imageblock_uses_alpha(blk); + + assert(separate_component < 4); switch (separate_component) { - case 0: // separate weights for red - if (uses_alpha == 1) + case 0: // separate weights for red + if (uses_alpha) { - compute_endpoints_and_ideal_weights_3_components(bsd, pt, blk, ewb, ei1, 0); + compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei1, 0); } else { - compute_endpoints_and_ideal_weights_2_components(bsd, pt, blk, ewb, ei1, 1, 2); + compute_endpoints_and_ideal_weights_2_comp(bsd, pt, blk, ewb, ei1, 1, 2); } - compute_endpoints_and_ideal_weights_1_component(bsd, pt, blk, ewb, ei2, 0); + compute_endpoints_and_ideal_weights_1_comp(bsd, pt, blk, ewb, ei2, 0); break; - case 1: // separate weights for green - if (uses_alpha == 1) + case 1: // separate weights for green + if (uses_alpha) { - compute_endpoints_and_ideal_weights_3_components(bsd, pt, blk, ewb, ei1, 1); + compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei1, 1); } else { - compute_endpoints_and_ideal_weights_2_components(bsd, pt, blk, ewb, ei1, 0, 2); + compute_endpoints_and_ideal_weights_2_comp(bsd, pt, blk, ewb, ei1, 0, 2); } - compute_endpoints_and_ideal_weights_1_component(bsd, pt, blk, ewb, ei2, 1); + compute_endpoints_and_ideal_weights_1_comp(bsd, pt, blk, ewb, ei2, 1); break; - case 2: // separate weights for blue - if (uses_alpha == 1) + case 2: // separate weights for blue + if (uses_alpha) { - compute_endpoints_and_ideal_weights_3_components(bsd, pt, blk, ewb, ei1, 2); + compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei1, 2); } else { - compute_endpoints_and_ideal_weights_2_components(bsd, pt, blk, ewb, ei1, 0, 1); + compute_endpoints_and_ideal_weights_2_comp(bsd, pt, blk, ewb, ei1, 0, 1); } - compute_endpoints_and_ideal_weights_1_component(bsd, pt, blk, ewb, ei2, 2); + compute_endpoints_and_ideal_weights_1_comp(bsd, pt, blk, ewb, ei2, 2); break; - case 3: // separate weights for alpha - assert(uses_alpha != 0); - compute_endpoints_and_ideal_weights_3_components(bsd, pt, blk, ewb, ei1, 3); - compute_endpoints_and_ideal_weights_1_component(bsd, pt, blk, ewb, ei2, 3); + default: // separate weights for alpha + assert(uses_alpha); + compute_endpoints_and_ideal_weights_3_comp(bsd, pt, blk, ewb, ei1, 3); + compute_endpoints_and_ideal_weights_1_comp(bsd, pt, blk, ewb, ei2, 3); break; } } @@ -929,150 +737,328 @@ void compute_endpoints_and_ideal_weights_2_planes( go into a given texel. */ -static float compute_value_of_texel_flt( - int texel_to_get, - const decimation_table* it, +float compute_error_of_weight_set( + const endpoints_and_weights* eai, + const decimation_table* dt, const float* weights ) { - const uint8_t *texel_weights = it->texel_weights[texel_to_get]; - const float *texel_weights_float = it->texel_weights_float[texel_to_get]; + vfloat4 error_summav = vfloat4::zero(); + float error_summa = 0.0f; + int texel_count = dt->texel_count; - return (weights[texel_weights[0]] * texel_weights_float[0] + - weights[texel_weights[1]] * texel_weights_float[1]) + - (weights[texel_weights[2]] * texel_weights_float[2] + - weights[texel_weights[3]] * texel_weights_float[3]); -} + int i = 0; -static inline float compute_error_of_texel( - const endpoints_and_weights * eai, - int texel_to_get, - const decimation_table* it, - const float *weights -) { - float current_value = compute_value_of_texel_flt(texel_to_get, it, weights); - float valuedif = current_value - eai->weights[texel_to_get]; - return valuedif * valuedif * eai->weight_error_scale[texel_to_get]; -} + // Process SIMD-width texel coordinates at at time while we can + int clipped_texel_count = round_down_to_simd_multiple_vla(texel_count); + for (/* */; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH) + { + // Load the bilinear filter texel weight indexes + vint weight_idx0 = vint(&(dt->texel_weights_4t[0][i])); + vint weight_idx1 = vint(&(dt->texel_weights_4t[1][i])); + vint weight_idx2 = vint(&(dt->texel_weights_4t[2][i])); + vint weight_idx3 = vint(&(dt->texel_weights_4t[3][i])); + + // Load the bilinear filter texel weights + vfloat weight_val0 = gatherf(weights, weight_idx0); + vfloat weight_val1 = gatherf(weights, weight_idx1); + vfloat weight_val2 = gatherf(weights, weight_idx2); + vfloat weight_val3 = gatherf(weights, weight_idx3); + + // Load the weight contributions for each texel + // TODO: Should we rename this dt->texel_weights_float field? + vfloat tex_weight_float0 = loada(&(dt->texel_weights_float_4t[0][i])); + vfloat tex_weight_float1 = loada(&(dt->texel_weights_float_4t[1][i])); + vfloat tex_weight_float2 = loada(&(dt->texel_weights_float_4t[2][i])); + vfloat tex_weight_float3 = loada(&(dt->texel_weights_float_4t[3][i])); + + // Compute the bilinear interpolation + vfloat current_values = (weight_val0 * tex_weight_float0 + + weight_val1 * tex_weight_float1) + + (weight_val2 * tex_weight_float2 + + weight_val3 * tex_weight_float3); + + // Compute the error between the computed value and the ideal weight + vfloat actual_values = loada(&(eai->weights[i])); + vfloat diff = current_values - actual_values; + vfloat significance = loada(&(eai->weight_error_scale[i])); + vfloat error = diff * diff * significance; + + haccumulate(error_summav, error); + } -float compute_error_of_weight_set( - const endpoints_and_weights* eai, - const decimation_table* it, - const float* weights -) { - int texel_count = it->num_texels; - float error_summa = 0.0; - for (int i = 0; i < texel_count; i++) + // Loop tail + // Error is buffered and accumulated in blocks of 4 to ensure that + // the partial sums added to the accumulator are invariant with the + // vector implementation, irrespective of vector size ... + alignas(16) float errorsum_tmp[4] { 0 }; + for (/* */; i < texel_count; i++) { - error_summa += compute_error_of_texel(eai, i, it, weights); + // This isn't the ideal access pattern, but the cache lines are probably + // already in the cache due to the vector loop above, so go with it ... + float current_value = (weights[dt->texel_weights_4t[0][i]] * dt->texel_weights_float_4t[0][i] + + weights[dt->texel_weights_4t[1][i]] * dt->texel_weights_float_4t[1][i]) + + (weights[dt->texel_weights_4t[2][i]] * dt->texel_weights_float_4t[2][i] + + weights[dt->texel_weights_4t[3][i]] * dt->texel_weights_float_4t[3][i]); + + float valuedif = current_value - eai->weights[i]; + float error = valuedif * valuedif * eai->weight_error_scale[i]; + + // Accumulate error sum in the temporary array + int error_index = i & 0x3; + errorsum_tmp[error_index] = error; + +#if ASTCENC_SIMD_WIDTH == 8 + // Zero the temporary staging buffer every 4 items unless last. Note + // that this block can only trigger for 6x5 blocks, all other partials + // tails are shorter than 4 ... + if ((i & 0x7) == 0x03) + { + haccumulate(error_summav, vfloat4::loada(errorsum_tmp)); + storea(vfloat4::zero(), errorsum_tmp); + } +#endif } + + // Accumulate the loop tail using the vfloat4 swizzle + haccumulate(error_summav, vfloat4::loada(errorsum_tmp)); + + // Resolve the final scalar accumulator sum + haccumulate(error_summa, error_summav); + return error_summa; } -/* - Given a complete weight set and a decimation table, try to - compute the optimal weight set (assuming infinite precision) - given the selected decimation table. -*/ +/* See header for documentation. */ +// Note: This function is vectorized, but needs to use gathers to access the +// decimation table structures so vectorization is currently only enabled for +// AVX2. The implementation loops over decimated weights, and then texels for +// each weight. We know the backing memory is "large enough" we can can +// overshoot the weight count to always use full vectors without a loop tail. +// The inner loop operates on 8 weights, each of which may have a different +// number of texels referenced by it. We iterate over the max reference count, +// and then use lane masks to disable lanes that are no longer in scope. void compute_ideal_weights_for_decimation_table( - const endpoints_and_weights* eai, - const decimation_table* it, - float* weight_set, - float* weights + const endpoints_and_weights& eai_in, + endpoints_and_weights& eai_out, + const decimation_table& dt, + float* RESTRICT weight_set, + float* RESTRICT weights ) { - int texels_per_block = it->num_texels; - int weight_count = it->num_weights; + int texel_count = dt.texel_count; + int weight_count = dt.weight_count; - // perform a shortcut in the case of a complete decimation table - if (texels_per_block == weight_count) + promise(texel_count > 0); + promise(weight_count > 0); + + // This function includes a copy of the epw from eai_in to eai_out. We do it + // here because we want to load the data anyway, so we can avoid loading it + // from memory twice. + eai_out.ep = eai_in.ep; + + // If we have a 1:1 mapping just shortcut the computation - clone the + // weights into both the weight set and the output epw copy. + if (texel_count == weight_count) { - for (int i = 0; i < it->num_texels; i++) + for (int i = 0; i < texel_count; i++) { - int texel = it->weight_texel[i][0]; - weight_set[i] = eai->weights[texel]; - weights[i] = eai->weight_error_scale[texel]; + assert(i == dt.weight_texel[0][i]); + weight_set[i] = eai_in.weights[i]; + weights[i] = eai_in.weight_error_scale[i]; + + eai_out.weights[i] = eai_in.weights[i]; + eai_out.weight_error_scale[i] = eai_in.weight_error_scale[i]; } return; } + // If we don't have a 1:1 mapping just clone the weights into the output + // epw copy and then do the full algorithm to decimate weights. + else + { + for (int i = 0; i < texel_count; i++) + { + eai_out.weights[i] = eai_in.weights[i]; + eai_out.weight_error_scale[i] = eai_in.weight_error_scale[i]; + } + } - // if the shortcut is not available, we will instead compute a simple estimate - // and perform a single iteration of refinement on that estimate. - float infilled_weights[MAX_TEXELS_PER_BLOCK]; + // Otherwise compute an estimate and perform single refinement iteration + alignas(ASTCENC_VECALIGN) float infilled_weights[MAX_TEXELS_PER_BLOCK]; - // compute an initial average for each weight. - for (int i = 0; i < weight_count; i++) + // Compute an initial average for each decimated weight +#if ASTCENC_SIMD_WIDTH >= 8 + int clipped_weight_count = round_up_to_simd_multiple_vla(weight_count); + for (int i = 0; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH) { - int texel_count = it->weight_num_texels[i]; + // Start with a small value to avoid div-by-zero later + vfloat weight_weight(1e-10f); + vfloat initial_weight = vfloat::zero(); + + // Accumulate error weighting of all the texels using this weight + vint weight_texel_count(dt.weight_texel_count + i); + int max_texel_count = hmax(weight_texel_count).lane<0>(); + promise(max_texel_count > 0); + + for (int j = 0; j < max_texel_count; j++) + { + // Not all lanes may actually use j texels, so mask out if idle + vmask active = weight_texel_count > vint(j); + + vint texel(dt.weight_texel[j] + i); + texel = select(vint::zero(), texel, active); - float weight_weight = 1e-10f; // to avoid 0/0 later on + vfloat weight = loada(dt.weights_flt[j] + i); + weight = select(vfloat::zero(), weight, active); + + vfloat contrib_weight = weight * gatherf(eai_in.weight_error_scale, texel); + + weight_weight = weight_weight + contrib_weight; + initial_weight = initial_weight + gatherf(eai_in.weights, texel) * contrib_weight; + } + + storea(weight_weight, weights + i); + storea(initial_weight / weight_weight, weight_set + i); + } +#else + for (int i = 0; i < weight_count; i++) + { + // Start with a small value to avoid div-by-zero later + float weight_weight = 1e-10f; float initial_weight = 0.0f; - for (int j = 0; j < texel_count; j++) + + // Accumulate error weighting of all the texels using this weight + int weight_texel_count = dt.weight_texel_count[i]; + promise(weight_texel_count > 0); + + for (int j = 0; j < weight_texel_count; j++) { - int texel = it->weight_texel[i][j]; - float weight = it->weights_flt[i][j]; - float contrib_weight = weight * eai->weight_error_scale[texel]; + int texel = dt.weight_texel[j][i]; + float weight = dt.weights_flt[j][i]; + float contrib_weight = weight * eai_in.weight_error_scale[texel]; weight_weight += contrib_weight; - initial_weight += eai->weights[texel] * contrib_weight; + initial_weight += eai_in.weights[texel] * contrib_weight; } weights[i] = weight_weight; - weight_set[i] = initial_weight / weight_weight; // this is the 0/0 that is to be avoided. + weight_set[i] = initial_weight / weight_weight; } +#endif - for (int i = 0; i < texels_per_block; i++) + // Populate the interpolated weight grid based on the initital average +#if ASTCENC_SIMD_WIDTH >= 8 + // Process SIMD-width texel coordinates at at time while we can + int clipped_texel_count = round_up_to_simd_multiple_vla(texel_count); + for (int i = 0; i < clipped_texel_count; i += ASTCENC_SIMD_WIDTH) + { + vint texel_weights_0(dt.texel_weights_4t[0] + i); + vint texel_weights_1(dt.texel_weights_4t[1] + i); + vint texel_weights_2(dt.texel_weights_4t[2] + i); + vint texel_weights_3(dt.texel_weights_4t[3] + i); + + vfloat weight_set_0 = gatherf(weight_set, texel_weights_0); + vfloat weight_set_1 = gatherf(weight_set, texel_weights_1); + vfloat weight_set_2 = gatherf(weight_set, texel_weights_2); + vfloat weight_set_3 = gatherf(weight_set, texel_weights_3); + + vfloat texel_weights_float_0 = loada(dt.texel_weights_float_4t[0] + i); + vfloat texel_weights_float_1 = loada(dt.texel_weights_float_4t[1] + i); + vfloat texel_weights_float_2 = loada(dt.texel_weights_float_4t[2] + i); + vfloat texel_weights_float_3 = loada(dt.texel_weights_float_4t[3] + i); + + vfloat weight = (weight_set_0 * texel_weights_float_0 + + weight_set_1 * texel_weights_float_1) + + (weight_set_2 * texel_weights_float_2 + + weight_set_3 * texel_weights_float_3); + + storea(weight, infilled_weights + i); + } +#else + for (int i = 0; i < texel_count; i++) { - const uint8_t *texel_weights = it->texel_weights[i]; - const float *texel_weights_float = it->texel_weights_float[i]; + const uint8_t *texel_weights = dt.texel_weights_t4[i]; + const float *texel_weights_float = dt.texel_weights_float_t4[i]; infilled_weights[i] = (weight_set[texel_weights[0]] * texel_weights_float[0] - + weight_set[texel_weights[1]] * texel_weights_float[1]) - + (weight_set[texel_weights[2]] * texel_weights_float[2] - + weight_set[texel_weights[3]] * texel_weights_float[3]); + + weight_set[texel_weights[1]] * texel_weights_float[1]) + + (weight_set[texel_weights[2]] * texel_weights_float[2] + + weight_set[texel_weights[3]] * texel_weights_float[3]); } +#endif + // Perform a single iteration of refinement constexpr float stepsize = 0.25f; - constexpr float ch0_scale = 4.0f * (stepsize * stepsize * (1.0f / (TEXEL_WEIGHT_SUM * TEXEL_WEIGHT_SUM))); - constexpr float ch1_scale = -2.0f * (stepsize * (2.0f / TEXEL_WEIGHT_SUM)); - constexpr float chd_scale = (ch1_scale / ch0_scale) * stepsize; + constexpr float chd_scale = -TEXEL_WEIGHT_SUM; - for (int i = 0; i < weight_count; i++) +#if ASTCENC_SIMD_WIDTH >= 8 + for (int i = 0; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH) { - float weight_val = weight_set[i]; + // Start with a small value to avoid div-by-zero later + vfloat weight_val = loada(weight_set + i); + + // Accumulate error weighting of all the texels using this weight + vfloat error_change0(1e-10f); + vfloat error_change1(0.0f); + + // Accumulate error weighting of all the texels using this weight + vint weight_texel_count(dt.weight_texel_count + i); + int max_texel_count = hmax(weight_texel_count).lane<0>(); + promise(max_texel_count > 0); + + for (int j = 0; j < max_texel_count; j++) + { + // Not all lanes may actually use j texels, so mask out if idle + vmask active = weight_texel_count > vint(j); - const uint8_t *weight_texel_ptr = it->weight_texel[i]; - const float *weights_ptr = it->weights_flt[i]; + vint texel(dt.weight_texel[j] + i); + texel = select(vint::zero(), texel, active); - // compute the two error changes that can occur from perturbing the current index. - int num_weights = it->weight_num_texels[i]; + vfloat contrib_weight = loada(dt.weights_flt[j] + i); + contrib_weight = select(vfloat::zero(), contrib_weight, active); + + vfloat scale = gatherf(eai_in.weight_error_scale, texel) * contrib_weight; + vfloat old_weight = gatherf(infilled_weights, texel); + vfloat ideal_weight = gatherf(eai_in.weights, texel); + + error_change0 = error_change0 + contrib_weight * scale; + error_change1 = error_change1 + (old_weight - ideal_weight) * scale; + } + + vfloat step = (error_change1 * chd_scale) / error_change0; + step = clamp(-stepsize, stepsize, step); + + // update the weight + storea(weight_val + step, weight_set + i); + } +#else + for (int i = 0; i < weight_count; i++) + { + float weight_val = weight_set[i]; - float error_change0 = 1e-10f; // done in order to ensure that this value isn't 0, in order to avoid a possible divide by zero later. + // Start with a small value to avoid div-by-zero later + float error_change0 = 1e-10f; float error_change1 = 0.0f; - for (int k = 0; k < num_weights; k++) + // Compute the two error changes that occur from perturbing the current index + int weight_texel_count = dt.weight_texel_count[i]; + promise(weight_texel_count > 0); + for (int k = 0; k < weight_texel_count; k++) { - uint8_t weight_texel = weight_texel_ptr[k]; - float weights2 = weights_ptr[k]; + uint8_t texel = dt.weight_texel[k][i]; + float contrib_weight = dt.weights_flt[k][i]; - float scale = eai->weight_error_scale[weight_texel] * weights2; - float old_weight = infilled_weights[weight_texel]; - float ideal_weight = eai->weights[weight_texel]; + float scale = eai_in.weight_error_scale[texel] * contrib_weight; + float old_weight = infilled_weights[texel]; + float ideal_weight = eai_in.weights[texel]; - error_change0 += weights2 * scale; + error_change0 += contrib_weight * scale; error_change1 += (old_weight - ideal_weight) * scale; } float step = (error_change1 * chd_scale) / error_change0; - // clamp the step-value. - if (step < -stepsize) - { - step = -stepsize; - } - else if (step > stepsize) - { - step = stepsize; - } + step = astc::clamp(step, -stepsize, stepsize); // update the weight weight_set[i] = weight_val + step; } +#endif } /* @@ -1086,20 +1072,20 @@ void compute_ideal_weights_for_decimation_table( Repeat until we have made a complete processing pass over all weights without triggering any perturbations *OR* we have run 4 full passes. */ -void compute_ideal_quantized_weights_for_decimation_table( - const decimation_table* it, +void compute_quantized_weights_for_decimation_table( + const decimation_table* dt, float low_bound, float high_bound, const float* weight_set_in, float* weight_set_out, uint8_t* quantized_weight_set, - int quantization_level + int quant_level ) { - int weight_count = it->num_weights; - const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[quantization_level]); + int weight_count = dt->weight_count; + const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[quant_level]); - static const int quant_levels[12] = { 2,3,4,5,6,8,10,12,16,20,24,32 }; - float quant_level_m1 = (float)(quant_levels[quantization_level] - 1); + static const int quant_levels[12] { 2,3,4,5,6,8,10,12,16,20,24,32 }; + float quant_level_m1 = (float)(quant_levels[quant_level] - 1); // Quantize the weight set using both the specified low/high bounds // and the standard 0..1 weight bounds. @@ -1123,48 +1109,42 @@ void compute_ideal_quantized_weights_for_decimation_table( #if ASTCENC_SIMD_WIDTH > 1 // SIMD loop; process weights in SIMD width batches while we can. - int clipped_weight_count = weight_count & ~(ASTCENC_SIMD_WIDTH-1); vfloat scalev(scale); vfloat scaled_low_boundv(scaled_low_bound); vfloat quant_level_m1v(quant_level_m1); vfloat rscalev(rscale); vfloat low_boundv(low_bound); - for (/*Vector loop */; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH) + + int clipped_weight_count = round_down_to_simd_multiple_vla(weight_count); + for (/* */; i < clipped_weight_count; i += ASTCENC_SIMD_WIDTH) { vfloat ix = loada(&weight_set_in[i]) * scalev - scaled_low_boundv; - ix = saturate(ix); // upper bound must be smaller than 1 to avoid an array overflow below. + ix = clampzo(ix); - // look up the two closest indexes and return the one that was closest. + //Llook up the two closest indexes and return the one that was closest. vfloat ix1 = ix * quant_level_m1v; - vint weight = floatToInt(ix1); - vint weight1 = weight+vint(1); + vint weight = float_to_int(ix1); + vint weight1 = weight + vint(1); vfloat ixl = gatherf(qat->unquantized_value_unsc, weight); vfloat ixh = gatherf(qat->unquantized_value_unsc, weight1); - vmask mask = ixl + ixh < vfloat(128.0f) * ix; + vmask mask = (ixl + ixh) < (vfloat(128.0f) * ix); weight = select(weight, weight1, mask); ixl = select(ixl, ixh, mask); // Invert the weight-scaling that was done initially - store(ixl * rscalev + low_boundv, &weight_set_out[i]); + storea(ixl * rscalev + low_boundv, &weight_set_out[i]); vint scm = gatheri(qat->scramble_map, weight); vint scn = pack_low_bytes(scm); store_nbytes(scn, &quantized_weight_set[i]); } #endif // #if ASTCENC_SIMD_WIDTH > 1 - // Process remaining weights in a scalar way. - for (/* Loop tail */; i < weight_count; i++) + // Loop tail + for (/* */; i < weight_count; i++) { float ix = (weight_set_in[i] * scale) - scaled_low_bound; - if (ix < 0.0f) - { - ix = 0.0f; - } - if (ix > 1.0f) // upper bound must be smaller than 1 to avoid an array overflow below. - { - ix = 1.0f; - } + ix = astc::clamp1f(ix); // look up the two closest indexes and return the one that was closest. float ix1 = ix * quant_level_m1; @@ -1184,9 +1164,9 @@ void compute_ideal_quantized_weights_for_decimation_table( } } -static inline float4 compute_rgbovec( - float4 rgba_weight_sum, - float3 weight_weight_sum, +static inline vfloat4 compute_rgbovec( + vfloat4 rgba_weight_sum, + vfloat4 weight_weight_sum, float red_sum, float green_sum, float blue_sum, @@ -1197,18 +1177,18 @@ static inline float4 compute_rgbovec( // has a regular structure, we can simplify the inverse calculation. This // gives us ~24 multiplications, down from 96 for a generic inverse - // mat[0] = float4(rgba_ws.x, 0.0f, 0.0f, wght_ws.x); - // mat[1] = float4( 0.0f, rgba_ws.y, 0.0f, wght_ws.y); - // mat[2] = float4( 0.0f, 0.0f, rgba_ws.z, wght_ws.z); - // mat[3] = float4(wght_ws.x, wght_ws.y, wght_ws.z, psum); + // mat[0] = vfloat4(rgba_ws.x, 0.0f, 0.0f, wght_ws.x); + // mat[1] = vfloat4( 0.0f, rgba_ws.y, 0.0f, wght_ws.y); + // mat[2] = vfloat4( 0.0f, 0.0f, rgba_ws.z, wght_ws.z); + // mat[3] = vfloat4(wght_ws.x, wght_ws.y, wght_ws.z, psum); // mat = invert(mat); - float X = rgba_weight_sum.r; - float Y = rgba_weight_sum.g; - float Z = rgba_weight_sum.b; - float P = weight_weight_sum.r; - float Q = weight_weight_sum.g; - float R = weight_weight_sum.b; + float X = rgba_weight_sum.lane<0>(); + float Y = rgba_weight_sum.lane<1>(); + float Z = rgba_weight_sum.lane<2>(); + float P = weight_weight_sum.lane<0>(); + float Q = weight_weight_sum.lane<1>(); + float R = weight_weight_sum.lane<2>(); float S = psum; float PP = P * P; @@ -1232,78 +1212,75 @@ static inline float4 compute_rgbovec( // Actually compute the adjugate matrix, not the inverse, and apply the // multiplication by 1/det to the vector separately. - float4 mat0 = float4(DT, ZQP, RYP, mZYP); - float4 mat1 = float4(ZQP, SZmRR * X - Z * PP, RQX, mZQX); - float4 mat2 = float4(RYP, RQX, (S * Y - QQ) * X - Y * PP, mRYX); - float4 mat3 = float4(mZYP, mZQX, mRYX, Z * YX); - float4 vect = float4(red_sum, green_sum, blue_sum, qsum) * rdet; + vfloat4 mat0(DT, ZQP, RYP, mZYP); + vfloat4 mat1(ZQP, SZmRR * X - Z * PP, RQX, mZQX); + vfloat4 mat2(RYP, RQX, (S * Y - QQ) * X - Y * PP, mRYX); + vfloat4 mat3(mZYP, mZQX, mRYX, Z * YX); + vfloat4 vect = vfloat4(red_sum, green_sum, blue_sum, qsum) * rdet; #ifdef DEBUG_CAPTURE_NAN fedisableexcept(FE_DIVBYZERO | FE_INVALID); #endif - return float4(dot(mat0, vect), - dot(mat1, vect), - dot(mat2, vect), - dot(mat3, vect)); + return vfloat4(dot_s(mat0, vect), + dot_s(mat1, vect), + dot_s(mat2, vect), + dot_s(mat3, vect)); } /* for a given weight set, we wish to recompute the colors so that they are optimal for a particular weight set. */ -void recompute_ideal_colors( - int weight_quantization_mode, +void recompute_ideal_colors_2planes( + int weight_quant_mode, endpoints* ep, // contains the endpoints we wish to update - float4* rgbs_vectors, // used to return RGBS-vectors for endpoint mode #6 - float4* rgbo_vectors, // used to return RGBO-vectors for endpoint mode #7 + vfloat4* rgbs_vectors, // used to return RGBS-vectors for endpoint mode #6 + vfloat4* rgbo_vectors, // used to return RGBO-vectors for endpoint mode #7 const uint8_t* weight_set8, // the current set of weight values const uint8_t* plane2_weight_set8, // nullptr if plane 2 is not actually used. int plane2_color_component, // color component for 2nd plane of weights; -1 if the 2nd plane of weights is not present - const partition_info* pi, - const decimation_table* it, - const imageblock* pb, // picture-block containing the actual data. + const partition_info* pt, + const decimation_table* dt, + const imageblock* blk, // picture-block containing the actual data. const error_weight_block* ewb ) { - const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quantization_mode]); + const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_mode]); float weight_set[MAX_WEIGHTS_PER_BLOCK]; float plane2_weight_set[MAX_WEIGHTS_PER_BLOCK]; - for (int i = 0; i < it->num_weights; i++) + for (int i = 0; i < dt->weight_count; i++) { weight_set[i] = qat->unquantized_value[weight_set8[i]] * (1.0f / 64.0f); } if (plane2_weight_set8) { - for (int i = 0; i < it->num_weights; i++) + for (int i = 0; i < dt->weight_count; i++) { plane2_weight_set[i] = qat->unquantized_value[plane2_weight_set8[i]] * (1.0f / 64.0f); } } - int partition_count = pi->partition_count; + int partition_count = pt->partition_count; for (int i = 0; i < partition_count; i++) { - float4 rgba_sum = float4(1e-17f); - float4 rgba_weight_sum = float4(1e-17f); + vfloat4 rgba_sum(1e-17f); + vfloat4 rgba_weight_sum(1e-17f); - int texelcount = pi->texels_per_partition[i]; - const uint8_t *texel_indexes = pi->texels_of_partition[i]; + int texelcount = pt->partition_texel_count[i]; + const uint8_t *texel_indexes = pt->texels_of_partition[i]; for (int j = 0; j < texelcount; j++) { int tix = texel_indexes[j]; - float4 rgba = float4(pb->data_r[tix], pb->data_g[tix], pb->data_b[tix], pb->data_a[tix]); - float4 error_weight = float4(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]); + vfloat4 rgba = blk->texel(tix); + vfloat4 error_weight(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]); rgba_sum = rgba_sum + (rgba * error_weight); rgba_weight_sum = rgba_weight_sum + error_weight; } - float3 scale_direction = normalize(float3( - rgba_sum.r * (1.0f / rgba_weight_sum.r), - rgba_sum.g * (1.0f / rgba_weight_sum.g), - rgba_sum.b * (1.0f / rgba_weight_sum.b))); + vfloat4 scale_direction = normalize((rgba_sum * (1.0f / rgba_weight_sum)).swz<0, 1, 2>()); float scale_max = 0.0f; float scale_min = 1e10f; @@ -1313,22 +1290,22 @@ void recompute_ideal_colors( float wmin2 = 1.0f; float wmax2 = 0.0f; - float4 left_sum = float4(0.0f); - float4 middle_sum = float4(0.0f); - float4 right_sum = float4(0.0f); + vfloat4 left_sum = vfloat4::zero(); + vfloat4 middle_sum = vfloat4::zero(); + vfloat4 right_sum = vfloat4::zero(); - float4 left2_sum = float4(0.0f); - float4 middle2_sum = float4(0.0f); - float4 right2_sum = float4(0.0f); + vfloat4 left2_sum = vfloat4::zero(); + vfloat4 middle2_sum = vfloat4::zero(); + vfloat4 right2_sum = vfloat4::zero(); - float3 lmrs_sum = float3(0.0f); + vfloat4 lmrs_sum = vfloat4(0.0f); - float4 color_vec_x = float4(0.0f); - float4 color_vec_y = float4(0.0f); + vfloat4 color_vec_x = vfloat4::zero(); + vfloat4 color_vec_y = vfloat4::zero(); float2 scale_vec = float2(0.0f); - float3 weight_weight_sum = float3(1e-17f); + vfloat4 weight_weight_sum = vfloat4(1e-17f); float psum = 1e-17f; // FIXME: the loop below has too many responsibilities, making it inefficient. @@ -1336,51 +1313,38 @@ void recompute_ideal_colors( { int tix = texel_indexes[j]; - float4 rgba = float4(pb->data_r[tix], pb->data_g[tix], pb->data_b[tix], pb->data_a[tix]); - float4 color_weight = float4(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]); + vfloat4 rgba = blk->texel(tix); + vfloat4 color_weight(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]); - float3 color_weight3 = float3(color_weight.r, color_weight.g, color_weight.b); - float3 rgb = float3(rgba.r, rgba.g, rgba.b); + vfloat4 color_weight3 = color_weight.swz<0, 1, 2>(); + vfloat4 rgb = rgba.swz<0, 1, 2>(); // FIXME: move this calculation out to the color block. - float ls_weight = (color_weight.r + color_weight.g + color_weight.b); + float ls_weight = hadd_rgb_s(color_weight); - const uint8_t *texel_weights = it->texel_weights[tix]; - const float *texel_weights_float = it->texel_weights_float[tix]; + const uint8_t *texel_weights = dt->texel_weights_t4[tix]; + const float *texel_weights_float = dt->texel_weights_float_t4[tix]; float idx0 = (weight_set[texel_weights[0]] * texel_weights_float[0] + weight_set[texel_weights[1]] * texel_weights_float[1]) + (weight_set[texel_weights[2]] * texel_weights_float[2] + weight_set[texel_weights[3]] * texel_weights_float[3]); float om_idx0 = 1.0f - idx0; - if (idx0 > wmax1) - { - wmax1 = idx0; - } + wmin1 = astc::min(idx0, wmin1); + wmax1 = astc::max(idx0, wmax1); - if (idx0 < wmin1) - { - wmin1 = idx0; - } - - float scale = dot(scale_direction, rgb); - if (scale < scale_min) - { - scale_min = scale; - } - - if (scale > scale_max) - { - scale_max = scale; - } + float scale = dot3_s(scale_direction, rgb); + scale_min = astc::min(scale, scale_min); + scale_max = astc::max(scale, scale_max); - float4 left = color_weight * (om_idx0 * om_idx0); - float4 middle = color_weight * (om_idx0 * idx0); - float4 right = color_weight * (idx0 * idx0); + vfloat4 left = color_weight * (om_idx0 * om_idx0); + vfloat4 middle = color_weight * (om_idx0 * idx0); + vfloat4 right = color_weight * (idx0 * idx0); - float3 lmrs = float3(om_idx0 * om_idx0, - om_idx0 * idx0, - idx0 * idx0) * ls_weight; + vfloat4 lmrs = vfloat4(om_idx0 * om_idx0, + om_idx0 * idx0, + idx0 * idx0, + 0.0f) * ls_weight; left_sum = left_sum + left; middle_sum = middle_sum + middle; @@ -1399,34 +1363,27 @@ void recompute_ideal_colors( + plane2_weight_set[texel_weights[3]] * texel_weights_float[3]); om_idx1 = 1.0f - idx1; - if (idx1 > wmax2) - { - wmax2 = idx1; - } - - if (idx1 < wmin2) - { - wmin2 = idx1; - } + wmin2 = astc::min(idx1, wmin2); + wmax2 = astc::max(idx1, wmax2); - float4 left2 = color_weight * (om_idx1 * om_idx1); - float4 middle2 = color_weight * (om_idx1 * idx1); - float4 right2 = color_weight * (idx1 * idx1); + vfloat4 left2 = color_weight * (om_idx1 * om_idx1); + vfloat4 middle2 = color_weight * (om_idx1 * idx1); + vfloat4 right2 = color_weight * (idx1 * idx1); left2_sum = left2_sum + left2; middle2_sum = middle2_sum + middle2; right2_sum = right2_sum + right2; } - float4 color_idx = float4((plane2_color_component == 0) ? idx1 : idx0, - (plane2_color_component == 1) ? idx1 : idx0, - (plane2_color_component == 2) ? idx1 : idx0, - (plane2_color_component == 3) ? idx1 : idx0); + vfloat4 color_idx((plane2_color_component == 0) ? idx1 : idx0, + (plane2_color_component == 1) ? idx1 : idx0, + (plane2_color_component == 2) ? idx1 : idx0, + (plane2_color_component == 3) ? idx1 : idx0); - float3 color_idx3 = float3(color_idx.r, color_idx.g, color_idx.b); + vfloat4 color_idx3 = color_idx.swz<0, 1, 2>(); - float4 cwprod = color_weight * rgba; - float4 cwiprod = cwprod * color_idx; + vfloat4 cwprod = color_weight * rgba; + vfloat4 cwiprod = cwprod * color_idx; color_vec_y = color_vec_y + cwiprod; color_vec_x = color_vec_x + (cwprod - cwiprod); @@ -1435,29 +1392,29 @@ void recompute_ideal_colors( weight_weight_sum = weight_weight_sum + (color_weight3 * color_idx3); - psum += dot(color_weight3 * color_idx3, color_idx3); + psum += dot3_s(color_weight3 * color_idx3, color_idx3); } // calculations specific to mode #7, the HDR RGB-scale mode. // FIXME: Can we skip this for LDR textures? - float red_sum = color_vec_x.r + color_vec_y.r; - float green_sum = color_vec_x.g + color_vec_y.g; - float blue_sum = color_vec_x.b + color_vec_y.b; - float qsum = color_vec_y.r + color_vec_y.g + color_vec_y.b; + float red_sum = color_vec_x.lane<0>() + color_vec_y.lane<0>(); + float green_sum = color_vec_x.lane<1>() + color_vec_y.lane<1>(); + float blue_sum = color_vec_x.lane<2>() + color_vec_y.lane<2>(); + float qsum = hadd_rgb_s(color_vec_y); #ifdef DEBUG_CAPTURE_NAN fedisableexcept(FE_DIVBYZERO | FE_INVALID); #endif - float4 rgbovec = compute_rgbovec(rgba_weight_sum, weight_weight_sum, - red_sum, green_sum, blue_sum, psum, qsum); + vfloat4 rgbovec = compute_rgbovec(rgba_weight_sum, weight_weight_sum, + red_sum, green_sum, blue_sum, psum, qsum); rgbo_vectors[i] = rgbovec; // We will occasionally get a failure due to the use of a singular // (non-invertible) matrix. Record whether such a failure has taken // place; if it did, compute rgbo_vectors[] with a different method // later on. - float chkval = dot(rgbovec, rgbovec); + float chkval = dot_s(rgbovec, rgbovec); int rgbo_fail = chkval != chkval; // Initialize the luminance and scale vectors with a reasonable @@ -1466,56 +1423,31 @@ void recompute_ideal_colors( fedisableexcept(FE_DIVBYZERO | FE_INVALID); #endif - float scalediv = scale_min * (1.0f / MAX(scale_max, 1e-10f)); - if (!(scalediv > 0.0f)) - { - scalediv = 0.0f; // set to zero if scalediv is negative, or NaN. - } - - if (scalediv > 1.0f) - { - scalediv = 1.0f; - } + float scalediv = scale_min * (1.0f / astc::max(scale_max, 1e-10f)); + scalediv = astc::clamp1f(scalediv); #ifdef DEBUG_CAPTURE_NAN feenableexcept(FE_DIVBYZERO | FE_INVALID); #endif - float3 sds = scale_direction * scale_max; + vfloat4 sds = scale_direction * scale_max; - rgbs_vectors[i] = float4(sds.r, sds.g, sds.b, scalediv); + rgbs_vectors[i] = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), scalediv); if (wmin1 >= wmax1 * 0.999f) { // if all weights in the partition were equal, then just take average // of all colors in the partition and use that as both endpoint colors. - float4 avg = (color_vec_x + color_vec_y) * - float4(1.0f / rgba_weight_sum.r, - 1.0f / rgba_weight_sum.g, - 1.0f / rgba_weight_sum.b, - 1.0f / rgba_weight_sum.a); - - if (plane2_color_component != 0 && avg.r == avg.r) - { - ep->endpt0[i].r = ep->endpt1[i].r = avg.r; - } - - if (plane2_color_component != 1 && avg.g == avg.g) - { - ep->endpt0[i].g = ep->endpt1[i].g = avg.g; - } + vfloat4 avg = (color_vec_x + color_vec_y) * (1.0f / rgba_weight_sum); - if (plane2_color_component != 2 && avg.b == avg.b) - { - ep->endpt0[i].b = ep->endpt1[i].b = avg.b; - } + vmask4 p1_mask = vint4::lane_id() != vint4(plane2_color_component); + vmask4 notnan_mask = avg == avg; + vmask4 full_mask = p1_mask & notnan_mask; - if (plane2_color_component != 3 && avg.a == avg.a) - { - ep->endpt0[i].a = ep->endpt1[i].a = avg.a; - } + ep->endpt0[i] = select(ep->endpt0[i], avg, full_mask); + ep->endpt1[i] = select(ep->endpt1[i], avg, full_mask); - rgbs_vectors[i] = float4(sds.r, sds.g, sds.b, 1.0f); + rgbs_vectors[i] = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), 1.0f); } else { @@ -1526,58 +1458,39 @@ void recompute_ideal_colors( fedisableexcept(FE_DIVBYZERO | FE_INVALID); #endif - float4 color_det1 = (left_sum * right_sum) - (middle_sum * middle_sum); - float4 color_rdet1 = float4(1.0f / color_det1.r, - 1.0f / color_det1.g, - 1.0f / color_det1.b, - 1.0f / color_det1.a ); + vfloat4 color_det1 = (left_sum * right_sum) - (middle_sum * middle_sum); + vfloat4 color_rdet1 = 1.0f / color_det1; - float ls_det1 = (lmrs_sum.r * lmrs_sum.b) - (lmrs_sum.g * lmrs_sum.g); + float ls_det1 = (lmrs_sum.lane<0>() * lmrs_sum.lane<2>()) - (lmrs_sum.lane<1>() * lmrs_sum.lane<1>()); float ls_rdet1 = 1.0f / ls_det1; - float4 color_mss1 = (left_sum * left_sum) - + (2.0f * middle_sum * middle_sum) - + (right_sum * right_sum); + vfloat4 color_mss1 = (left_sum * left_sum) + + (2.0f * middle_sum * middle_sum) + + (right_sum * right_sum); - float ls_mss1 = (lmrs_sum.r * lmrs_sum.r) - + (2.0f * lmrs_sum.g * lmrs_sum.g) - + (lmrs_sum.b * lmrs_sum.b); + float ls_mss1 = (lmrs_sum.lane<0>() * lmrs_sum.lane<0>()) + + (2.0f * lmrs_sum.lane<1>() * lmrs_sum.lane<1>()) + + (lmrs_sum.lane<2>() * lmrs_sum.lane<2>()); - float4 ep0 = (right_sum * color_vec_x - middle_sum * color_vec_y) * color_rdet1; - float4 ep1 = (left_sum * color_vec_y - middle_sum * color_vec_x) * color_rdet1; + vfloat4 ep0 = (right_sum * color_vec_x - middle_sum * color_vec_y) * color_rdet1; + vfloat4 ep1 = (left_sum * color_vec_y - middle_sum * color_vec_x) * color_rdet1; - float scale_ep0 = (lmrs_sum.b * scale_vec.r - lmrs_sum.g * scale_vec.g) * ls_rdet1; - float scale_ep1 = (lmrs_sum.r * scale_vec.g - lmrs_sum.g * scale_vec.r) * ls_rdet1; + float scale_ep0 = (lmrs_sum.lane<2>() * scale_vec.r - lmrs_sum.lane<1>() * scale_vec.g) * ls_rdet1; + float scale_ep1 = (lmrs_sum.lane<0>() * scale_vec.g - lmrs_sum.lane<1>() * scale_vec.r) * ls_rdet1; - if (plane2_color_component != 0 && fabsf(color_det1.r) > (color_mss1.r * 1e-4f) && ep0.r == ep0.r && ep1.r == ep1.r) - { - ep->endpt0[i].r = ep0.r; - ep->endpt1[i].r = ep1.r; - } + vmask4 p1_mask = vint4::lane_id() != vint4(plane2_color_component); + vmask4 det_mask = abs(color_det1) > (color_mss1 * 1e-4f); + vmask4 notnan_mask = (ep0 == ep0) & (ep1 == ep1); + vmask4 full_mask = p1_mask & det_mask & notnan_mask; - if (plane2_color_component != 1 && fabsf(color_det1.g) > (color_mss1.g * 1e-4f) && ep0.g == ep0.g && ep1.g == ep1.g) - { - ep->endpt0[i].g = ep0.g; - ep->endpt1[i].g = ep1.g; - } - - if (plane2_color_component != 2 && fabsf(color_det1.b) > (color_mss1.b * 1e-4f) && ep0.b == ep0.b && ep1.b == ep1.b) - { - ep->endpt0[i].b = ep0.b; - ep->endpt1[i].b = ep1.b; - } - - if (plane2_color_component != 3 && fabsf(color_det1.a) > (color_mss1.a * 1e-4f) && ep0.a == ep0.a && ep1.a == ep1.a) - { - ep->endpt0[i].a = ep0.a; - ep->endpt1[i].a = ep1.a; - } + ep->endpt0[i] = select(ep->endpt0[i], ep0, full_mask); + ep->endpt1[i] = select(ep->endpt1[i], ep1, full_mask); if (fabsf(ls_det1) > (ls_mss1 * 1e-4f) && scale_ep0 == scale_ep0 && scale_ep1 == scale_ep1 && scale_ep0 < scale_ep1) { float scalediv2 = scale_ep0 * (1.0f / scale_ep1); - float3 sdsm = scale_direction * scale_ep1; - rgbs_vectors[i] = float4(sdsm.r, sdsm.g, sdsm.b, scalediv2); + vfloat4 sdsm = scale_direction * scale_ep1; + rgbs_vectors[i] = vfloat4(sdsm.lane<0>(), sdsm.lane<1>(), sdsm.lane<2>(), scalediv2); } #ifdef DEBUG_CAPTURE_NAN @@ -1591,31 +1504,14 @@ void recompute_ideal_colors( { // if all weights in the partition were equal, then just take average // of all colors in the partition and use that as both endpoint colors. - float4 avg = (color_vec_x + color_vec_y) - * float4(1.0f / rgba_weight_sum.r, - 1.0f / rgba_weight_sum.g, - 1.0f / rgba_weight_sum.b, - 1.0f / rgba_weight_sum.a); - - if (plane2_color_component == 0 && avg.r == avg.r) - { - ep->endpt0[i].r = ep->endpt1[i].r = avg.r; - } - - if (plane2_color_component == 1 && avg.g == avg.g) - { - ep->endpt0[i].g = ep->endpt1[i].g = avg.g; - } - - if (plane2_color_component == 2 && avg.b == avg.b) - { - ep->endpt0[i].b = ep->endpt1[i].b = avg.b; - } - - if (plane2_color_component == 3 && avg.a == avg.a) - { - ep->endpt0[i].a = ep->endpt1[i].a = avg.a; - } + vfloat4 avg = (color_vec_x + color_vec_y) * (1.0f / rgba_weight_sum); + + vmask4 p2_mask = vint4::lane_id() == vint4(plane2_color_component); + vmask4 notnan_mask = avg == avg; + vmask4 full_mask = p2_mask & notnan_mask; + + ep->endpt0[i] = select(ep->endpt0[i], avg, full_mask); + ep->endpt1[i] = select(ep->endpt1[i], avg, full_mask); } else { @@ -1625,42 +1521,23 @@ void recompute_ideal_colors( // otherwise, complete the analytic calculation of ideal-endpoint-values // for the given set of texel weights and pixel colors. - float4 color_det2 = (left2_sum * right2_sum) - (middle2_sum * middle2_sum); - float4 color_rdet2 = float4(1.0f / color_det2.r, - 1.0f / color_det2.g, - 1.0f / color_det2.b, - 1.0f / color_det2.a); - - float4 color_mss2 = (left2_sum * left2_sum) - + (2.0f * middle2_sum * middle2_sum) - + (right2_sum * right2_sum); - - float4 ep0 = (right2_sum * color_vec_x - middle2_sum * color_vec_y) * color_rdet2; - float4 ep1 = (left2_sum * color_vec_y - middle2_sum * color_vec_x) * color_rdet2; - - if (plane2_color_component == 0 && fabsf(color_det2.r) > (color_mss2.r * 1e-4f) && ep0.r == ep0.r && ep1.r == ep1.r) - { - ep->endpt0[i].r = ep0.r; - ep->endpt1[i].r = ep1.r; - } - - if (plane2_color_component == 1 && fabsf(color_det2.g) > (color_mss2.g * 1e-4f) && ep0.g == ep0.g && ep1.g == ep1.g) - { - ep->endpt0[i].g = ep0.g; - ep->endpt1[i].g = ep1.g; - } - - if (plane2_color_component == 2 && fabsf(color_det2.b) > (color_mss2.b * 1e-4f) && ep0.b == ep0.b && ep1.b == ep1.b) - { - ep->endpt0[i].b = ep0.b; - ep->endpt1[i].b = ep1.b; - } - - if (plane2_color_component == 3 && fabsf(color_det2.a) > (color_mss2.a * 1e-4f) && ep0.a == ep0.a && ep1.a == ep1.a) - { - ep->endpt0[i].a = ep0.a; - ep->endpt1[i].a = ep1.a; - } + vfloat4 color_det2 = (left2_sum * right2_sum) - (middle2_sum * middle2_sum); + vfloat4 color_rdet2 = 1.0f / color_det2; + + vfloat4 color_mss2 = (left2_sum * left2_sum) + + (2.0f * middle2_sum * middle2_sum) + + (right2_sum * right2_sum); + + vfloat4 ep0 = (right2_sum * color_vec_x - middle2_sum * color_vec_y) * color_rdet2; + vfloat4 ep1 = (left2_sum * color_vec_y - middle2_sum * color_vec_x) * color_rdet2; + + vmask4 p2_mask = vint4::lane_id() == vint4(plane2_color_component); + vmask4 det_mask = abs(color_det2) > (color_mss2 * 1e-4f); + vmask4 notnan_mask = (ep0 == ep0) & (ep1 == ep1); + vmask4 full_mask = p2_mask & det_mask & notnan_mask; + + ep->endpt0[i] = select(ep->endpt0[i], ep0, full_mask); + ep->endpt1[i] = select(ep->endpt1[i], ep1, full_mask); #ifdef DEBUG_CAPTURE_NAN feenableexcept(FE_DIVBYZERO | FE_INVALID); @@ -1672,19 +1549,259 @@ void recompute_ideal_colors( // a somewhat-sensible value anyway if (rgbo_fail) { - float4 v0 = ep->endpt0[i]; - float4 v1 = ep->endpt1[i]; - float avgdif = ((v1.r - v0.r) + (v1.g - v0.g) + (v1.b - v0.b)) * (1.0f / 3.0f); + vfloat4 v0 = ep->endpt0[i]; + vfloat4 v1 = ep->endpt1[i]; + float avgdif = hadd_rgb_s(v1 - v0) * (1.0f / 3.0f); + avgdif = astc::max(avgdif, 0.0f); + + vfloat4 avg = (v0 + v1) * 0.5f; + vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f; + + rgbo_vectors[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif); + } + } +} + +/* for a given weight set, we wish to recompute the colors so that they are optimal for a particular weight set. */ +void recompute_ideal_colors_1plane( + int weight_quant_mode, + endpoints* ep, // contains the endpoints we wish to update + vfloat4* rgbs_vectors, // used to return RGBS-vectors for endpoint mode #6 + vfloat4* rgbo_vectors, // used to return RGBO-vectors for endpoint mode #7 + const uint8_t* weight_set8, // the current set of weight values + const partition_info* pt, + const decimation_table* dt, + const imageblock* blk, // picture-block containing the actual data. + const error_weight_block* ewb +) { + int weight_count = dt->weight_count; + int partition_count = pt->partition_count; + + promise(weight_count > 0); + promise(partition_count > 0); + + const quantization_and_transfer_table *qat = &(quant_and_xfer_tables[weight_quant_mode]); + + float weight_set[MAX_WEIGHTS_PER_BLOCK]; + for (int i = 0; i < weight_count; i++) + { + weight_set[i] = qat->unquantized_value[weight_set8[i]] * (1.0f / 64.0f); + } + + for (int i = 0; i < partition_count; i++) + { + vfloat4 rgba_sum(1e-17f); + vfloat4 rgba_weight_sum(1e-17f); + + int texelcount = pt->partition_texel_count[i]; + const uint8_t *texel_indexes = pt->texels_of_partition[i]; + + promise(texelcount > 0); + for (int j = 0; j < texelcount; j++) + { + int tix = texel_indexes[j]; + + vfloat4 rgba = blk->texel(tix); + vfloat4 error_weight(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]); + + rgba_sum = rgba_sum + (rgba * error_weight); + rgba_weight_sum = rgba_weight_sum + error_weight; + } + + vfloat4 scale_direction = normalize((rgba_sum * (1.0f / rgba_weight_sum)).swz<0, 1, 2>()); + + float scale_max = 0.0f; + float scale_min = 1e10f; + + float wmin1 = 1.0f; + float wmax1 = 0.0f; + + vfloat4 left_sum = vfloat4::zero(); + vfloat4 middle_sum = vfloat4::zero(); + vfloat4 right_sum = vfloat4::zero(); + + vfloat4 lmrs_sum = vfloat4(0.0f); + + vfloat4 color_vec_x = vfloat4::zero(); + vfloat4 color_vec_y = vfloat4::zero(); + + float2 scale_vec = float2(0.0f); + + vfloat4 weight_weight_sum = vfloat4(1e-17f); + float psum = 1e-17f; + + // FIXME: the loop below has too many responsibilities, making it inefficient. + for (int j = 0; j < texelcount; j++) + { + int tix = texel_indexes[j]; + + vfloat4 rgba = blk->texel(tix); + vfloat4 color_weight(ewb->texel_weight_r[tix], ewb->texel_weight_g[tix], ewb->texel_weight_b[tix], ewb->texel_weight_a[tix]); + + vfloat4 color_weight3 = color_weight.swz<0, 1, 2>(); + vfloat4 rgb = rgba.swz<0, 1, 2>(); + + // FIXME: move this calculation out to the color block. + float ls_weight = hadd_rgb_s(color_weight); + + const uint8_t *texel_weights = dt->texel_weights_t4[tix]; + const float *texel_weights_float = dt->texel_weights_float_t4[tix]; + float idx0 = (weight_set[texel_weights[0]] * texel_weights_float[0] + + weight_set[texel_weights[1]] * texel_weights_float[1]) + + (weight_set[texel_weights[2]] * texel_weights_float[2] + + weight_set[texel_weights[3]] * texel_weights_float[3]); + + float om_idx0 = 1.0f - idx0; + wmin1 = astc::min(idx0, wmin1); + wmax1 = astc::max(idx0, wmax1); + + float scale = dot3_s(scale_direction, rgb); + scale_min = astc::min(scale, scale_min); + scale_max = astc::max(scale, scale_max); + + vfloat4 left = color_weight * (om_idx0 * om_idx0); + vfloat4 middle = color_weight * (om_idx0 * idx0); + vfloat4 right = color_weight * (idx0 * idx0); + + vfloat4 lmrs = vfloat4(om_idx0 * om_idx0, + om_idx0 * idx0, + idx0 * idx0, + 0.0f) * ls_weight; + + left_sum = left_sum + left; + middle_sum = middle_sum + middle; + right_sum = right_sum + right; + + lmrs_sum = lmrs_sum + lmrs; + + vfloat4 color_idx(idx0); + vfloat4 color_idx3(idx0); + + vfloat4 cwprod = color_weight * rgba; + vfloat4 cwiprod = cwprod * color_idx; + + color_vec_y = color_vec_y + cwiprod; + color_vec_x = color_vec_x + (cwprod - cwiprod); + + scale_vec = scale_vec + float2(om_idx0, idx0) * (ls_weight * scale); + + weight_weight_sum = weight_weight_sum + (color_weight3 * color_idx3); + + psum += dot3_s(color_weight3 * color_idx3, color_idx3); + } + + // calculations specific to mode #7, the HDR RGB-scale mode. + // FIXME: Can we skip this for LDR textures? + float red_sum = color_vec_x.lane<0>() + color_vec_y.lane<0>(); + float green_sum = color_vec_x.lane<1>() + color_vec_y.lane<1>(); + float blue_sum = color_vec_x.lane<2>() + color_vec_y.lane<2>(); + float qsum = hadd_rgb_s(color_vec_y); + + #ifdef DEBUG_CAPTURE_NAN + fedisableexcept(FE_DIVBYZERO | FE_INVALID); + #endif + + vfloat4 rgbovec = compute_rgbovec(rgba_weight_sum, weight_weight_sum, + red_sum, green_sum, blue_sum, psum, qsum); + rgbo_vectors[i] = rgbovec; + + // We will occasionally get a failure due to the use of a singular + // (non-invertible) matrix. Record whether such a failure has taken + // place; if it did, compute rgbo_vectors[] with a different method + // later on. + float chkval = dot_s(rgbovec, rgbovec); + int rgbo_fail = chkval != chkval; + + // Initialize the luminance and scale vectors with a reasonable + // default, just in case the subsequent calculation blows up. + #ifdef DEBUG_CAPTURE_NAN + fedisableexcept(FE_DIVBYZERO | FE_INVALID); + #endif + + float scalediv = scale_min * (1.0f / astc::max(scale_max, 1e-10f)); + scalediv = astc::clamp1f(scalediv); + + #ifdef DEBUG_CAPTURE_NAN + feenableexcept(FE_DIVBYZERO | FE_INVALID); + #endif + + vfloat4 sds = scale_direction * scale_max; + + rgbs_vectors[i] = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), scalediv); + + if (wmin1 >= wmax1 * 0.999f) + { + // if all weights in the partition were equal, then just take average + // of all colors in the partition and use that as both endpoint colors. + vfloat4 avg = (color_vec_x + color_vec_y) * (1.0f / rgba_weight_sum); + + vmask4 notnan_mask = avg == avg; + ep->endpt0[i] = select(ep->endpt0[i], avg, notnan_mask); + ep->endpt1[i] = select(ep->endpt1[i], avg, notnan_mask); + + rgbs_vectors[i] = vfloat4(sds.lane<0>(), sds.lane<1>(), sds.lane<2>(), 1.0f); + } + else + { + // otherwise, complete the analytic calculation of ideal-endpoint-values + // for the given set of texel weights and pixel colors. + + #ifdef DEBUG_CAPTURE_NAN + fedisableexcept(FE_DIVBYZERO | FE_INVALID); + #endif + + vfloat4 color_det1 = (left_sum * right_sum) - (middle_sum * middle_sum); + vfloat4 color_rdet1 = 1.0f / color_det1; + + float ls_det1 = (lmrs_sum.lane<0>() * lmrs_sum.lane<2>()) - (lmrs_sum.lane<1>() * lmrs_sum.lane<1>()); + float ls_rdet1 = 1.0f / ls_det1; + + vfloat4 color_mss1 = (left_sum * left_sum) + + (2.0f * middle_sum * middle_sum) + + (right_sum * right_sum); + + float ls_mss1 = (lmrs_sum.lane<0>() * lmrs_sum.lane<0>()) + + (2.0f * lmrs_sum.lane<1>() * lmrs_sum.lane<1>()) + + (lmrs_sum.lane<2>() * lmrs_sum.lane<2>()); + + vfloat4 ep0 = (right_sum * color_vec_x - middle_sum * color_vec_y) * color_rdet1; + vfloat4 ep1 = (left_sum * color_vec_y - middle_sum * color_vec_x) * color_rdet1; + + vmask4 det_mask = abs(color_det1) > (color_mss1 * 1e-4f); + vmask4 notnan_mask = (ep0 == ep0) & (ep1 == ep1); + vmask4 full_mask = det_mask & notnan_mask; + + ep->endpt0[i] = select(ep->endpt0[i], ep0, full_mask); + ep->endpt1[i] = select(ep->endpt1[i], ep1, full_mask); - if (avgdif <= 0.0f) + float scale_ep0 = (lmrs_sum.lane<2>() * scale_vec.r - lmrs_sum.lane<1>() * scale_vec.g) * ls_rdet1; + float scale_ep1 = (lmrs_sum.lane<0>() * scale_vec.g - lmrs_sum.lane<1>() * scale_vec.r) * ls_rdet1; + + if (fabsf(ls_det1) > (ls_mss1 * 1e-4f) && scale_ep0 == scale_ep0 && scale_ep1 == scale_ep1 && scale_ep0 < scale_ep1) { - avgdif = 0.0f; + float scalediv2 = scale_ep0 * (1.0f / scale_ep1); + vfloat4 sdsm = scale_direction * scale_ep1; + rgbs_vectors[i] = vfloat4(sdsm.lane<0>(), sdsm.lane<1>(), sdsm.lane<2>(), scalediv2); } - float4 avg = (v0 + v1) * 0.5f; - float4 ep0 = avg - float4(avgdif, avgdif, avgdif, avgdif) * 0.5f; + #ifdef DEBUG_CAPTURE_NAN + feenableexcept(FE_DIVBYZERO | FE_INVALID); + #endif + } + + // if the calculation of an RGB-offset vector failed, try to compute + // a somewhat-sensible value anyway + if (rgbo_fail) + { + vfloat4 v0 = ep->endpt0[i]; + vfloat4 v1 = ep->endpt1[i]; + float avgdif = ((v1.lane<0>() - v0.lane<0>()) + (v1.lane<1>() - v0.lane<1>()) + (v1.lane<2>() - v0.lane<2>())) * (1.0f / 3.0f); + avgdif = astc::max(avgdif, 0.0f); + + vfloat4 avg = (v0 + v1) * 0.5f; + vfloat4 ep0 = avg - vfloat4(avgdif) * 0.5f; - rgbo_vectors[i] = float4(ep0.r, ep0.g, ep0.b, avgdif); + rgbo_vectors[i] = vfloat4(ep0.lane<0>(), ep0.lane<1>(), ep0.lane<2>(), avgdif); } } } diff --git a/libkram/astc-encoder/astcenc_image.cpp b/libkram/astc-encoder/astcenc_image.cpp index 50eceffc..1d67c01b 100644 --- a/libkram/astc-encoder/astcenc_image.cpp +++ b/libkram/astc-encoder/astcenc_image.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -24,256 +24,108 @@ #include "astcenc_internal.h" -// hack in 2d array support for f32 on encode, and u8 on decode -#define USE_2DARRAY 1 - -// conversion functions between the LNS representation and the FP16 representation. -static float float_to_lns(float p) -{ - if (astc::isnan(p) || p <= 1.0f / 67108864.0f) - { - // underflow or NaN value, return 0. - // We count underflow if the input value is smaller than 2^-26. - return 0.0f; - } - - if (fabsf(p) >= 65536.0f) - { - // overflow, return a +INF value - return 65535.0f; - } - - int expo; - float normfrac = frexpf(p, &expo); - float p1; - if (expo < -13) - { - // input number is smaller than 2^-14. In this case, multiply by 2^25. - p1 = p * 33554432.0f; - expo = 0; - } - else - { - expo += 14; - p1 = (normfrac - 0.5f) * 4096.0f; - } - - if (p1 < 384.0f) - p1 *= 4.0f / 3.0f; - else if (p1 <= 1408.0f) - p1 += 128.0f; - else - p1 = (p1 + 512.0f) * (4.0f / 5.0f); - - p1 += ((float)expo) * 2048.0f; - return p1 + 1.0f; -} - -static uint16_t lns_to_sf16(uint16_t p) -{ - uint16_t mc = p & 0x7FF; - uint16_t ec = p >> 11; - uint16_t mt; - if (mc < 512) - mt = 3 * mc; - else if (mc < 1536) - mt = 4 * mc - 512; - else - mt = 5 * mc - 2048; - - uint16_t res = (ec << 10) | (mt >> 3); - if (res >= 0x7BFF) - res = 0x7BFF; - return res; -} - -// conversion function from 16-bit LDR value to FP16. -// note: for LDR interpolation, it is impossible to get a denormal result; -// this simplifies the conversion. -// FALSE; we can receive a very small UNORM16 through the constant-block. -uint16_t unorm16_to_sf16(uint16_t p) -{ - if (p == 0xFFFF) - return 0x3C00; // value of 1.0 . - if (p < 4) - return p << 8; - - int lz = clz32(p) - 16; - p <<= (lz + 1); - p >>= 6; - p |= (14 - lz) << 10; - return p; -} - void imageblock_initialize_deriv( - const imageblock* pb, + const imageblock* blk, int pixelcount, - float4* dptr + vfloat4* dptr ) { + // TODO: For LDR on the current codec we can skip this if no LNS and just + // early-out as we use the same LNS settings everywhere ... for (int i = 0; i < pixelcount; i++) { - // compute derivatives for RGB first - if (pb->rgb_lns[i]) + vfloat4 derv_unorm(65535.0f); + vfloat4 derv_lns = vfloat4::zero(); + + // TODO: Pack these into bits and avoid the disjoint fetch + int rgb_lns = blk->rgb_lns[i]; + int a_lns = blk->alpha_lns[i]; + + // Compute derivatives if we have any use of LNS + if (rgb_lns || a_lns) { - float3 fdata = float3(pb->data_r[i], pb->data_g[i], pb->data_b[i]); - fdata.r = sf16_to_float(lns_to_sf16((uint16_t)fdata.r)); - fdata.g = sf16_to_float(lns_to_sf16((uint16_t)fdata.g)); - fdata.b = sf16_to_float(lns_to_sf16((uint16_t)fdata.b)); - - float r = MAX(fdata.r, 6e-5f); - float g = MAX(fdata.g, 6e-5f); - float b = MAX(fdata.b, 6e-5f); - - float rderiv = (float_to_lns(r * 1.05f) - float_to_lns(r)) / (r * 0.05f); - float gderiv = (float_to_lns(g * 1.05f) - float_to_lns(g)) / (g * 0.05f); - float bderiv = (float_to_lns(b * 1.05f) - float_to_lns(b)) / (b * 0.05f); - - // the derivative may not actually take values smaller than 1/32 or larger than 2^25; - // if it does, we clamp it. - if (rderiv < (1.0f / 32.0f)) - { - rderiv = (1.0f / 32.0f); - } - else if (rderiv > 33554432.0f) - { - rderiv = 33554432.0f; - } + vfloat4 data = blk->texel(i); + vint4 datai = lns_to_sf16(float_to_int(data)); - if (gderiv < (1.0f / 32.0f)) - { - gderiv = (1.0f / 32.0f); - } - else if (gderiv > 33554432.0f) - { - gderiv = 33554432.0f; - } + vfloat4 dataf = float16_to_float(datai); + dataf = max(dataf, 6e-5f); - if (bderiv < (1.0f / 32.0f)) - { - bderiv = (1.0f / 32.0f); - } - else if (bderiv > 33554432.0f) - { - bderiv = 33554432.0f; - } + vfloat4 data_lns1 = dataf * 1.05f; + data_lns1 = float_to_lns(data_lns1); - dptr->r = rderiv; - dptr->g = gderiv; - dptr->b = bderiv; - } - else - { - dptr->r = 65535.0f; - dptr->g = 65535.0f; - dptr->b = 65535.0f; - } + vfloat4 data_lns2 = dataf; + data_lns2 = float_to_lns(data_lns2); - // then compute derivatives for Alpha - if (pb->alpha_lns[i]) - { - float fdata = pb->data_a[i]; - fdata = sf16_to_float(lns_to_sf16((uint16_t)fdata)); - - float a = MAX(fdata, 6e-5f); - float aderiv = (float_to_lns(a * 1.05f) - float_to_lns(a)) / (a * 0.05f); - // the derivative may not actually take values smaller than 1/32 or larger than 2^25; - // if it does, we clamp it. - if (aderiv < (1.0f / 32.0f)) - { - aderiv = (1.0f / 32.0f); - } - else if (aderiv > 33554432.0f) - { - aderiv = 33554432.0f; - } + vfloat4 divisor_lns = dataf * 0.05f; - dptr->a = aderiv; - } - else - { - dptr->a = 65535.0f; + // Clamp derivatives between 1/32 and 2^25 + float lo = 1.0f / 32.0f; + float hi = 33554432.0f; + derv_lns = clamp(lo, hi, (data_lns1 - data_lns2) / divisor_lns); } + vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns); + vmask4 lns_mask = use_lns != vint4::zero(); + *dptr = select(derv_unorm, derv_lns, lns_mask); dptr++; } } // helper function to initialize the work-data from the orig-data -void imageblock_initialize_work_from_orig( - imageblock* pb, +static void imageblock_initialize_work_from_orig( + imageblock* blk, int pixelcount ) { - pb->origin_texel = float4(pb->data_r[0], pb->data_g[0], - pb->data_b[0], pb->data_a[0]); + blk->origin_texel = blk->texel(0); + + vfloat4 data_min(1e38f); + vfloat4 data_max(-1e38f); + bool grayscale = true; for (int i = 0; i < pixelcount; i++) { - float4 inc = float4(pb->data_r[i], pb->data_g[i], - pb->data_b[i], pb->data_a[i]); + vfloat4 data = blk->texel(i); + vfloat4 color_lns = vfloat4::zero(); + vfloat4 color_unorm = data * 65535.0f; - if (pb->rgb_lns[i]) - { - pb->data_r[i] = float_to_lns(inc.r); - pb->data_g[i] = float_to_lns(inc.g); - pb->data_b[i] = float_to_lns(inc.b); - } - else - { - pb->data_r[i] = inc.r * 65535.0f; - pb->data_g[i] = inc.g * 65535.0f; - pb->data_b[i] = inc.b * 65535.0f; - } + int rgb_lns = blk->rgb_lns[i]; + int a_lns = blk->alpha_lns[i]; - if (pb->alpha_lns[i]) + if (rgb_lns || a_lns) { - pb->data_a[i] = float_to_lns(inc.a); + color_lns = float_to_lns(data); } - else - { - pb->data_a[i] = inc.a * 65535.0f; - } - } -} -// helper function to initialize the orig-data from the work-data -void imageblock_initialize_orig_from_work( - imageblock* pb, - int pixelcount -) { - for (int i = 0; i < pixelcount; i++) - { - float4 inc = float4(pb->data_r[i], pb->data_g[i], - pb->data_b[i], pb->data_a[i]); + vint4 use_lns(rgb_lns, rgb_lns, rgb_lns, a_lns); + vmask4 lns_mask = use_lns != vint4::zero(); + data = select(color_unorm, color_lns, lns_mask); - if (pb->rgb_lns[i]) - { - pb->data_r[i] = sf16_to_float(lns_to_sf16((uint16_t)inc.r)); - pb->data_g[i] = sf16_to_float(lns_to_sf16((uint16_t)inc.g)); - pb->data_b[i] = sf16_to_float(lns_to_sf16((uint16_t)inc.b)); - } - else - { - pb->data_r[i] = sf16_to_float(unorm16_to_sf16((uint16_t)inc.r)); - pb->data_g[i] = sf16_to_float(unorm16_to_sf16((uint16_t)inc.g)); - pb->data_b[i] = sf16_to_float(unorm16_to_sf16((uint16_t)inc.b)); - } + // Compute block metadata + data_min = min(data_min, data); + data_max = max(data_max, data); - if (pb->alpha_lns[i]) + if (grayscale && (data.lane<0>() != data.lane<1>() || data.lane<0>() != data.lane<2>())) { - pb->data_a[i] = sf16_to_float(lns_to_sf16((uint16_t)inc.a)); - } - else - { - pb->data_a[i] = sf16_to_float(unorm16_to_sf16((uint16_t)inc.a)); + grayscale = false; } + + // Store block data + blk->data_r[i] = data.lane<0>(); + blk->data_g[i] = data.lane<1>(); + blk->data_b[i] = data.lane<2>(); + blk->data_a[i] = data.lane<3>(); } + + // Store block metadata + blk->data_min = data_min; + blk->data_max = data_max; + blk->grayscale = grayscale; } // fetch an imageblock from the input file. void fetch_imageblock( astcenc_profile decode_mode, const astcenc_image& img, - imageblock* pb, // picture-block to initialize with image data + imageblock* blk, // picture-block to initialize with image data const block_size_descriptor* bsd, // position in texture. int xpos, @@ -285,9 +137,9 @@ void fetch_imageblock( int ysize = img.dim_y; int zsize = img.dim_z; - pb->xpos = xpos; - pb->ypos = ypos; - pb->zpos = zpos; + blk->xpos = xpos; + blk->ypos = ypos; + blk->zpos = zpos; // True if any non-identity swizzle bool needs_swz = (swz.r != ASTCENC_SWZ_R) || (swz.g != ASTCENC_SWZ_G) || @@ -300,49 +152,24 @@ void fetch_imageblock( data[ASTCENC_SWZ_0] = 0x00; data[ASTCENC_SWZ_1] = 0xFF; -#if USE_2DARRAY - uint8_t* data8 = static_cast(img.data); -#else - uint8_t*** data8 = static_cast(img.data); -#endif for (int z = 0; z < bsd->zdim; z++) { - int zi = zpos + z; - if (zi < 0) - zi = 0; - if (zi >= zsize) - zi = zsize - 1; + int zi = astc::min(zpos + z, zsize - 1); + uint8_t* data8 = static_cast(img.data[zi]); for (int y = 0; y < bsd->ydim; y++) { - int yi = ypos + y; - if (yi < 0) - yi = 0; - if (yi >= ysize) - yi = ysize - 1; - + int yi = astc::min(ypos + y, ysize - 1); + for (int x = 0; x < bsd->xdim; x++) { - int xi = xpos + x; - if (xi < 0) - xi = 0; - if (xi >= xsize) - xi = xsize - 1; -#if USE_2DARRAY - int px = (yi * xsize + xi) * 4; - assert(zi == 0); - assert(px >= 0 && px < (xsize * ysize * 4)); - - int r = data8[px + 0]; - int g = data8[px + 1]; - int b = data8[px + 2]; - int a = data8[px + 3]; -#else - int r = data8[zi][yi][4 * xi ]; - int g = data8[zi][yi][4 * xi + 1]; - int b = data8[zi][yi][4 * xi + 2]; - int a = data8[zi][yi][4 * xi + 3]; -#endif + int xi = astc::min(xpos + x, xsize - 1); + + int r = data8[(4 * xsize * yi) + (4 * xi )]; + int g = data8[(4 * xsize * yi) + (4 * xi + 1)]; + int b = data8[(4 * xsize * yi) + (4 * xi + 2)]; + int a = data8[(4 * xsize * yi) + (4 * xi + 3)]; + if (needs_swz) { data[ASTCENC_SWZ_R] = r; @@ -356,10 +183,10 @@ void fetch_imageblock( a = data[swz.a]; } - pb->data_r[idx] = r / 255.0f; - pb->data_g[idx] = g / 255.0f; - pb->data_b[idx] = b / 255.0f; - pb->data_a[idx] = a / 255.0f; + blk->data_r[idx] = static_cast(r) / 255.0f; + blk->data_g[idx] = static_cast(g) / 255.0f; + blk->data_b[idx] = static_cast(b) / 255.0f; + blk->data_a[idx] = static_cast(a) / 255.0f; idx++; } } @@ -371,34 +198,23 @@ void fetch_imageblock( data[ASTCENC_SWZ_0] = 0x0000; data[ASTCENC_SWZ_1] = 0x3C00; - uint16_t*** data16 = static_cast(img.data); for (int z = 0; z < bsd->zdim; z++) { + int zi = astc::min(zpos + z, zsize - 1); + uint16_t* data16 = static_cast(img.data[zi]); + for (int y = 0; y < bsd->ydim; y++) { + int yi = astc::min(ypos + y, ysize - 1); + for (int x = 0; x < bsd->xdim; x++) { - int xi = xpos + x; - int yi = ypos + y; - int zi = zpos + z; - // clamp XY coordinates to the picture. - if (xi < 0) - xi = 0; - if (yi < 0) - yi = 0; - if (zi < 0) - zi = 0; - if (xi >= xsize) - xi = xsize - 1; - if (yi >= ysize) - yi = ysize - 1; - if (zi >= ysize) - zi = zsize - 1; - - int r = data16[zi][yi][4 * xi ]; - int g = data16[zi][yi][4 * xi + 1]; - int b = data16[zi][yi][4 * xi + 2]; - int a = data16[zi][yi][4 * xi + 3]; + int xi = astc::min(xpos + x, xsize - 1); + + int r = data16[(4 * xsize * yi) + (4 * xi )]; + int g = data16[(4 * xsize * yi) + (4 * xi + 1)]; + int b = data16[(4 * xsize * yi) + (4 * xi + 2)]; + int a = data16[(4 * xsize * yi) + (4 * xi + 3)]; if (needs_swz) { @@ -413,10 +229,11 @@ void fetch_imageblock( a = data[swz.a]; } - pb->data_r[idx] = MAX(sf16_to_float(r), 1e-8f); - pb->data_g[idx] = MAX(sf16_to_float(g), 1e-8f); - pb->data_b[idx] = MAX(sf16_to_float(b), 1e-8f); - pb->data_a[idx] = MAX(sf16_to_float(a), 1e-8f); + vfloat4 dataf = max(float16_to_float(vint4(r, g, b, a)), 1e-8f); + blk->data_r[idx] = dataf.lane<0>(); + blk->data_g[idx] = dataf.lane<1>(); + blk->data_b[idx] = dataf.lane<2>(); + blk->data_a[idx] = dataf.lane<3>(); idx++; } } @@ -430,76 +247,25 @@ void fetch_imageblock( data[ASTCENC_SWZ_0] = 0.0f; data[ASTCENC_SWZ_1] = 1.0f; -#if USE_2DARRAY - float4* data32 = static_cast(img.data); -#else - float*** data32 = static_cast(img.data); -#endif for (int z = 0; z < bsd->zdim; z++) { - int zi = zpos + z; - if (zi < 0) - zi = 0; - if (zi >= ysize) - zi = zsize - 1; - + int zi = astc::min(zpos + z, zsize - 1); + float* data32 = static_cast(img.data[zi]); + for (int y = 0; y < bsd->ydim; y++) { - int yi = ypos + y; - if (yi < 0) - yi = 0; - if (yi >= ysize) - yi = ysize - 1; - + int yi = astc::min(ypos + y, ysize - 1); + for (int x = 0; x < bsd->xdim; x++) { - // clamp XY coordinates to the picture. - int xi = xpos + x; - if (xi < 0) - xi = 0; - if (xi >= xsize) - xi = xsize - 1; - -#if USE_2DARRAY - int px = (yi * xsize + xi); // * 4; - assert(zi == 0); - assert(px >= 0 && px < (xsize * ysize)); - float4 val = data32[px]; - val = max(val, float4(1e-8f)); // why can't this 0, the U8 Path does? - - if (needs_swz) - { - // prob best as a swizzle, and then select in 0/1 elements - // instead of a 6 array lookup which isn't simd compatible. - float r = val.r; - float g = val.g; - float b = val.b; - float a = val.a; - - data[ASTCENC_SWZ_R] = r; - data[ASTCENC_SWZ_G] = g; - data[ASTCENC_SWZ_B] = b; - data[ASTCENC_SWZ_A] = a; - - val.r = data[swz.r]; - val.g = data[swz.g]; - val.b = data[swz.b]; - val.a = data[swz.a]; - } - - // ugh, this pulls out of simd to planar - pb->data_r[idx] = val.r; - pb->data_g[idx] = val.g; - pb->data_b[idx] = val.b; - pb->data_a[idx] = val.a; - -#else - float r = data32[zi][yi][4 * xi ]; - float g = data32[zi][yi][4 * xi + 1]; - float b = data32[zi][yi][4 * xi + 2]; - float a = data32[zi][yi][4 * xi + 3]; - - if (needs_swz) + int xi = astc::min(xpos + x, xsize - 1); + + float r = data32[(4 * xsize * yi) + (4 * xi )]; + float g = data32[(4 * xsize * yi) + (4 * xi + 1)]; + float b = data32[(4 * xsize * yi) + (4 * xi + 2)]; + float a = data32[(4 * xsize * yi) + (4 * xi + 3)]; + + if (needs_swz) { data[ASTCENC_SWZ_R] = r; data[ASTCENC_SWZ_G] = g; @@ -511,12 +277,11 @@ void fetch_imageblock( b = data[swz.b]; a = data[swz.a]; } - - pb->data_r[idx] = MAX(r, 1e-8f); - pb->data_g[idx] = MAX(g, 1e-8f); - pb->data_b[idx] = MAX(b, 1e-8f); - pb->data_a[idx] = MAX(a, 1e-8f); -#endif + + blk->data_r[idx] = astc::max(r, 1e-8f); + blk->data_g[idx] = astc::max(g, 1e-8f); + blk->data_b[idx] = astc::max(b, 1e-8f); + blk->data_a[idx] = astc::max(a, 1e-8f); idx++; } } @@ -529,18 +294,17 @@ void fetch_imageblock( // impose the choice on every pixel when encoding. for (int i = 0; i < bsd->texel_count; i++) { - pb->rgb_lns[i] = rgb_lns; - pb->alpha_lns[i] = alpha_lns; - pb->nan_texel[i] = 0; + blk->rgb_lns[i] = rgb_lns; + blk->alpha_lns[i] = alpha_lns; + blk->nan_texel[i] = 0; } - imageblock_initialize_work_from_orig(pb, bsd->texel_count); - update_imageblock_flags(pb, bsd->xdim, bsd->ydim, bsd->zdim); + imageblock_initialize_work_from_orig(blk, bsd->texel_count); } void write_imageblock( astcenc_image& img, - const imageblock* pb, // picture-block to initialize with image data. We assume that orig_data is valid. + const imageblock* blk, // picture-block to initialize with image data. We assume that orig_data is valid. const block_size_descriptor* bsd, // position to write the block to int xpos, @@ -548,11 +312,22 @@ void write_imageblock( int zpos, astcenc_swizzle swz ) { - const uint8_t *nptr = pb->nan_texel; + const uint8_t *nptr = blk->nan_texel; int xsize = img.dim_x; int ysize = img.dim_y; int zsize = img.dim_z; + int x_start = xpos; + int x_end = std::min(xsize, xpos + bsd->xdim); + int x_nudge = bsd->xdim - (x_end - x_start); + + int y_start = ypos; + int y_end = std::min(ysize, ypos + bsd->ydim); + int y_nudge = (bsd->ydim - (y_end - y_start)) * bsd->xdim; + + int z_start = zpos; + int z_end = std::min(zsize, zpos + bsd->zdim); + float data[7]; data[ASTCENC_SWZ_0] = 0.0f; data[ASTCENC_SWZ_1] = 1.0f; @@ -568,280 +343,174 @@ void write_imageblock( int idx = 0; if (img.data_type == ASTCENC_TYPE_U8) { -#if USE_2DARRAY - uint8_t* data8 = static_cast(img.data); -#else - uint8_t*** data8 = static_cast(img.data); -#endif - for (int z = 0; z < bsd->zdim; z++) + for (int z = z_start; z < z_end; z++) { - for (int y = 0; y < bsd->ydim; y++) + // Fetch the image plane + uint8_t* data8 = static_cast(img.data[z]); + + for (int y = y_start; y < y_end; y++) { - for (int x = 0; x < bsd->xdim; x++) + for (int x = x_start; x < x_end; x++) { - int xi = xpos + x; - int yi = ypos + y; - int zi = zpos + z; + vint4 colori = vint4::zero(); - if (xi >= 0 && yi >= 0 && zi >= 0 && xi < xsize && yi < ysize && zi < zsize) + if (*nptr) { - int ri, gi, bi, ai; + // Can't display NaN - show magenta error color + colori = vint4(0xFF, 0x00, 0xFF, 0xFF); + } + else if (needs_swz) + { + data[ASTCENC_SWZ_R] = blk->data_r[idx]; + data[ASTCENC_SWZ_G] = blk->data_g[idx]; + data[ASTCENC_SWZ_B] = blk->data_b[idx]; + data[ASTCENC_SWZ_A] = blk->data_a[idx]; - if (*nptr) - { - // NaN-pixel, but we can't display it. Display purple instead. - ri = 0xFF; - gi = 0x00; - bi = 0xFF; - ai = 0xFF; - } - else if (needs_swz) + if (needs_z) { - data[ASTCENC_SWZ_R] = pb->data_r[idx]; - data[ASTCENC_SWZ_G] = pb->data_g[idx]; - data[ASTCENC_SWZ_B] = pb->data_b[idx]; - data[ASTCENC_SWZ_A] = pb->data_a[idx]; - - if (needs_z) + float xcoord = (data[0] * 2.0f) - 1.0f; + float ycoord = (data[3] * 2.0f) - 1.0f; + float zcoord = 1.0f - xcoord * xcoord - ycoord * ycoord; + if (zcoord < 0.0f) { - float xcoord = (data[0] * 2.0f) - 1.0f; - float ycoord = (data[3] * 2.0f) - 1.0f; - float zcoord = 1.0f - xcoord * xcoord - ycoord * ycoord; - if (zcoord < 0.0f) - { - zcoord = 0.0f; - } - data[ASTCENC_SWZ_Z] = (astc::sqrt(zcoord) * 0.5f) + 0.5f; + zcoord = 0.0f; } - - ri = astc::flt2int_rtn(MIN(data[swz.r], 1.0f) * 255.0f); - gi = astc::flt2int_rtn(MIN(data[swz.g], 1.0f) * 255.0f); - bi = astc::flt2int_rtn(MIN(data[swz.b], 1.0f) * 255.0f); - ai = astc::flt2int_rtn(MIN(data[swz.a], 1.0f) * 255.0f); + data[ASTCENC_SWZ_Z] = (astc::sqrt(zcoord) * 0.5f) + 0.5f; } - else - { - ri = astc::flt2int_rtn(MIN(pb->data_r[idx], 1.0f) * 255.0f); - gi = astc::flt2int_rtn(MIN(pb->data_g[idx], 1.0f) * 255.0f); - bi = astc::flt2int_rtn(MIN(pb->data_b[idx], 1.0f) * 255.0f); - ai = astc::flt2int_rtn(MIN(pb->data_a[idx], 1.0f) * 255.0f); - } -#if USE_2DARRAY - int px = (yi * xsize + xi) * 4; - assert(zi == 0); - assert(px >= 0 && px < (xsize * ysize * 4)); - - data8[px + 0] = ri; - data8[px + 1] = gi; - data8[px + 2] = bi; - data8[px + 3] = ai; - -#else - data8[zi][yi][4 * xi ] = ri; - data8[zi][yi][4 * xi + 1] = gi; - data8[zi][yi][4 * xi + 2] = bi; - data8[zi][yi][4 * xi + 3] = ai; -#endif + + vfloat4 color = vfloat4(data[swz.r], data[swz.g], data[swz.b], data[swz.a]); + colori = float_to_int_rtn(min(color, 1.0f) * 255.0f); } + else + { + vfloat4 color = blk->texel(idx); + colori = float_to_int_rtn(min(color, 1.0f) * 255.0f); + } + + colori = pack_low_bytes(colori); + store_nbytes(colori, data8 + (4 * xsize * y) + (4 * x )); + idx++; nptr++; } + idx += x_nudge; + nptr += x_nudge; } + idx += y_nudge; + nptr += y_nudge; } } else if (img.data_type == ASTCENC_TYPE_F16) { - uint16_t*** data16 = static_cast(img.data); - for (int z = 0; z < bsd->zdim; z++) + for (int z = z_start; z < z_end; z++) { - for (int y = 0; y < bsd->ydim; y++) + // Fetch the image plane + uint16_t* data16 = static_cast(img.data[z]); + + for (int y = y_start; y < y_end; y++) { - for (int x = 0; x < bsd->xdim; x++) + for (int x = x_start; x < x_end; x++) { - int xi = xpos + x; - int yi = ypos + y; - int zi = zpos + z; + vint4 color; - if (xi >= 0 && yi >= 0 && zi >= 0 && xi < xsize && yi < ysize && zi < zsize) + if (*nptr) { - int ri, gi, bi, ai; + color = vint4(0xFFFF); + } + else if (needs_swz) + { + data[ASTCENC_SWZ_R] = blk->data_r[idx]; + data[ASTCENC_SWZ_G] = blk->data_g[idx]; + data[ASTCENC_SWZ_B] = blk->data_b[idx]; + data[ASTCENC_SWZ_A] = blk->data_a[idx]; - if (*nptr) - { - ri = 0xFFFF; - gi = 0xFFFF; - bi = 0xFFFF; - ai = 0xFFFF; - } - else if (needs_swz) + if (needs_z) { - data[ASTCENC_SWZ_R] = pb->data_r[idx]; - data[ASTCENC_SWZ_G] = pb->data_g[idx]; - data[ASTCENC_SWZ_B] = pb->data_b[idx]; - data[ASTCENC_SWZ_A] = pb->data_a[idx]; - - if (needs_z) + float xN = (data[0] * 2.0f) - 1.0f; + float yN = (data[3] * 2.0f) - 1.0f; + float zN = 1.0f - xN * xN - yN * yN; + if (zN < 0.0f) { - float xN = (data[0] * 2.0f) - 1.0f; - float yN = (data[3] * 2.0f) - 1.0f; - float zN = 1.0f - xN * xN - yN * yN; - if (zN < 0.0f) - { - zN = 0.0f; - } - data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f; + zN = 0.0f; } - - ri = float_to_sf16(data[swz.r], SF_NEARESTEVEN); - gi = float_to_sf16(data[swz.g], SF_NEARESTEVEN); - bi = float_to_sf16(data[swz.b], SF_NEARESTEVEN); - ai = float_to_sf16(data[swz.a], SF_NEARESTEVEN); - } - else - { - ri = float_to_sf16(pb->data_r[idx], SF_NEARESTEVEN); - gi = float_to_sf16(pb->data_g[idx], SF_NEARESTEVEN); - bi = float_to_sf16(pb->data_b[idx], SF_NEARESTEVEN); - ai = float_to_sf16(pb->data_a[idx], SF_NEARESTEVEN); + data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f; } - data16[zi][yi][4 * xi ] = ri; - data16[zi][yi][4 * xi + 1] = gi; - data16[zi][yi][4 * xi + 2] = bi; - data16[zi][yi][4 * xi + 3] = ai; + vfloat4 colorf(data[swz.r], data[swz.g], data[swz.b], data[swz.a]); + color = float_to_float16(colorf); } + else + { + vfloat4 colorf = blk->texel(idx); + color = float_to_float16(colorf); + } + + data16[(4 * xsize * y) + (4 * x )] = (uint16_t)color.lane<0>(); + data16[(4 * xsize * y) + (4 * x + 1)] = (uint16_t)color.lane<1>(); + data16[(4 * xsize * y) + (4 * x + 2)] = (uint16_t)color.lane<2>(); + data16[(4 * xsize * y) + (4 * x + 3)] = (uint16_t)color.lane<3>(); + idx++; nptr++; } + idx += x_nudge; + nptr += x_nudge; } + idx += y_nudge; + nptr += y_nudge; } } else // if (img.data_type == ASTCENC_TYPE_F32) { assert(img.data_type == ASTCENC_TYPE_F32); - - float*** data32 = static_cast(img.data); - for (int z = 0; z < bsd->zdim; z++) + + for (int z = z_start; z < z_end; z++) { - for (int y = 0; y < bsd->ydim; y++) + // Fetch the image plane + float* data32 = static_cast(img.data[z]); + + for (int y = y_start; y < y_end; y++) { - for (int x = 0; x < bsd->xdim; x++) + for (int x = x_start; x < x_end; x++) { - int xi = xpos + x; - int yi = ypos + y; - int zi = zpos + z; + vfloat4 color = blk->texel(idx); - if (xi >= 0 && yi >= 0 && zi >= 0 && xi < xsize && yi < ysize && zi < zsize) + if (*nptr) { - float rf, gf, bf, af; + color = vfloat4(std::numeric_limits::quiet_NaN()); + } + else if (needs_swz) + { + data[ASTCENC_SWZ_R] = color.lane<0>(); + data[ASTCENC_SWZ_G] = color.lane<1>(); + data[ASTCENC_SWZ_B] = color.lane<2>(); + data[ASTCENC_SWZ_A] = color.lane<3>(); - if (*nptr) - { - rf = std::numeric_limits::quiet_NaN(); - gf = std::numeric_limits::quiet_NaN(); - bf = std::numeric_limits::quiet_NaN(); - af = std::numeric_limits::quiet_NaN(); - } - else if (needs_swz) + if (needs_z) { - data[ASTCENC_SWZ_R] = pb->data_r[idx]; - data[ASTCENC_SWZ_G] = pb->data_g[idx]; - data[ASTCENC_SWZ_B] = pb->data_b[idx]; - data[ASTCENC_SWZ_A] = pb->data_a[idx]; - - if (needs_z) + float xN = (data[0] * 2.0f) - 1.0f; + float yN = (data[3] * 2.0f) - 1.0f; + float zN = 1.0f - xN * xN - yN * yN; + if (zN < 0.0f) { - float xN = (data[0] * 2.0f) - 1.0f; - float yN = (data[3] * 2.0f) - 1.0f; - float zN = 1.0f - xN * xN - yN * yN; - if (zN < 0.0f) - { - zN = 0.0f; - } - data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f; + zN = 0.0f; } - - rf = data[swz.r]; - gf = data[swz.g]; - bf = data[swz.b]; - af = data[swz.a]; - } - else - { - rf = pb->data_r[idx]; - gf = pb->data_g[idx]; - bf = pb->data_b[idx]; - af = pb->data_a[idx]; + data[ASTCENC_SWZ_Z] = (astc::sqrt(zN) * 0.5f) + 0.5f; } - data32[zi][yi][4 * xi ] = rf; - data32[zi][yi][4 * xi + 1] = gf; - data32[zi][yi][4 * xi + 2] = bf; - data32[zi][yi][4 * xi + 3] = af; + color = vfloat4(data[swz.r], data[swz.g], data[swz.b], data[swz.a]); } + + store(color, data32 + (4 * xsize * y) + (4 * x )); + idx++; nptr++; } + idx += x_nudge; + nptr += x_nudge; } + idx += y_nudge; + nptr += y_nudge; } } } - -/* - For an imageblock, update its flags. - The updating is done based on data, not orig_data. -*/ -void update_imageblock_flags( - imageblock* pb, - int xdim, - int ydim, - int zdim -) { - float red_min = 1e38f, red_max = -1e38f; - float green_min = 1e38f, green_max = -1e38f; - float blue_min = 1e38f, blue_max = -1e38f; - float alpha_min = 1e38f, alpha_max = -1e38f; - - int texels_per_block = xdim * ydim * zdim; - - int grayscale = 1; - - for (int i = 0; i < texels_per_block; i++) - { - float red = pb->data_r[i]; - float green = pb->data_g[i]; - float blue = pb->data_b[i]; - float alpha = pb->data_a[i]; - if (red < red_min) - red_min = red; - if (red > red_max) - red_max = red; - if (green < green_min) - green_min = green; - if (green > green_max) - green_max = green; - if (blue < blue_min) - blue_min = blue; - if (blue > blue_max) - blue_max = blue; - if (alpha < alpha_min) - alpha_min = alpha; - if (alpha > alpha_max) - alpha_max = alpha; - - if (grayscale == 1 && (red != green || red != blue)) - { - grayscale = 0; - } - } - - pb->red_min = red_min; - pb->red_max = red_max; - pb->green_min = green_min; - pb->green_max = green_max; - pb->blue_min = blue_min; - pb->blue_max = blue_max; - pb->alpha_min = alpha_min; - pb->alpha_max = alpha_max; - pb->grayscale = grayscale; -} diff --git a/libkram/astc-encoder/astcenc_integer_sequence.cpp b/libkram/astc-encoder/astcenc_integer_sequence.cpp index 63df91cd..d7079446 100644 --- a/libkram/astc-encoder/astcenc_integer_sequence.cpp +++ b/libkram/astc-encoder/astcenc_integer_sequence.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -21,6 +21,8 @@ #include "astcenc_internal.h" +#include + // unpacked quint triplets for each packed-quint value static const uint8_t quints_of_integer[128][3] = { {0, 0, 0}, {1, 0, 0}, {2, 0, 0}, {3, 0, 0}, @@ -329,94 +331,111 @@ static const uint8_t integer_of_trits[3][3][3][3][3] = { } }; -static void find_number_of_bits_trits_quints( - int quantization_level, - int* bits, - int* trits, - int* quints +/** + * @brief The number of bits, trits, and quints needed for a quant level. + */ +struct btq_count { + /**< The quantization level. */ + uint8_t quant; + + /**< The number of bits. */ + uint8_t bits; + + /**< The number of trits. */ + uint8_t trits; + + /**< The number of quints. */ + uint8_t quints; +}; + +/** + * @brief The table of bits, trits, and quints needed for a quant encode. + */ +static const std::array btq_counts = {{ + { QUANT_2, 1, 0, 0 }, + { QUANT_3, 0, 1, 0 }, + { QUANT_4, 2, 0, 0 }, + { QUANT_5, 0, 0, 1 }, + { QUANT_6, 1, 1, 0 }, + { QUANT_8, 3, 0, 0 }, + { QUANT_10, 1, 0, 1 }, + { QUANT_12, 2, 1, 0 }, + { QUANT_16, 4, 0, 0 }, + { QUANT_20, 2, 0, 1 }, + { QUANT_24, 3, 1, 0 }, + { QUANT_32, 5, 0, 0 }, + { QUANT_40, 3, 0, 1 }, + { QUANT_48, 4, 1, 0 }, + { QUANT_64, 6, 0, 0 }, + { QUANT_80, 4, 0, 1 }, + { QUANT_96, 5, 1, 0 }, + { QUANT_128, 7, 0, 0 }, + { QUANT_160, 5, 0, 1 }, + { QUANT_192, 6, 1, 0 }, + { QUANT_256, 8, 0, 0 } +}}; + +/** + * @brief The sequence scale, round, and divisors needed to compute sizing. + * + * The length of a quantized sequence in bits is: + * (scale * + round) / divisor + */ +struct ise_size { + /**< The quantization level. */ + uint8_t quant; + + /**< The scaling parameter. */ + uint8_t scale; + + /**< The rounding parameter. */ + uint8_t round; + + /**< The divisor parameter. */ + uint8_t divisor; +}; + +/** + * @brief The table of scale, round, and divisors needed for quant sizing. + */ +static const std::array ise_sizes = {{ + { QUANT_2, 1, 0, 1 }, + { QUANT_3, 8, 4, 5 }, + { QUANT_4, 2, 0, 1 }, + { QUANT_5, 7, 2, 3 }, + { QUANT_6, 13, 4, 5 }, + { QUANT_8, 3, 0, 1 }, + { QUANT_10, 10, 2, 3 }, + { QUANT_12, 18, 4, 5 }, + { QUANT_16, 4, 0, 1 }, + { QUANT_20, 13, 2, 3 }, + { QUANT_24, 23, 4, 5 }, + { QUANT_32, 5, 0, 1 }, + { QUANT_40, 16, 2, 3 }, + { QUANT_48, 28, 4, 5 }, + { QUANT_64, 6, 0, 1 }, + { QUANT_80, 19, 2, 3 }, + { QUANT_96, 33, 4, 5 }, + { QUANT_128, 7, 0, 1 }, + { QUANT_160, 22, 2, 3 }, + { QUANT_192, 38, 4, 5 }, + { QUANT_256, 8, 0, 1 } +}}; + +/* See header for documentation. */ +int get_ise_sequence_bitcount( + int items, + quant_method quant ) { - *bits = 0; - *trits = 0; - *quints = 0; - switch (quantization_level) + // Cope with out-of bounds values - input might be invalid + if (static_cast(quant) >= ise_sizes.size()) { - case QUANT_2: - *bits = 1; - break; - case QUANT_3: - *bits = 0; - *trits = 1; - break; - case QUANT_4: - *bits = 2; - break; - case QUANT_5: - *bits = 0; - *quints = 1; - break; - case QUANT_6: - *bits = 1; - *trits = 1; - break; - case QUANT_8: - *bits = 3; - break; - case QUANT_10: - *bits = 1; - *quints = 1; - break; - case QUANT_12: - *bits = 2; - *trits = 1; - break; - case QUANT_16: - *bits = 4; - break; - case QUANT_20: - *bits = 2; - *quints = 1; - break; - case QUANT_24: - *bits = 3; - *trits = 1; - break; - case QUANT_32: - *bits = 5; - break; - case QUANT_40: - *bits = 3; - *quints = 1; - break; - case QUANT_48: - *bits = 4; - *trits = 1; - break; - case QUANT_64: - *bits = 6; - break; - case QUANT_80: - *bits = 4; - *quints = 1; - break; - case QUANT_96: - *bits = 5; - *trits = 1; - break; - case QUANT_128: - *bits = 7; - break; - case QUANT_160: - *bits = 5; - *quints = 1; - break; - case QUANT_192: - *bits = 6; - *trits = 1; - break; - case QUANT_256: - *bits = 8; - break; + // Arbitrary large number that's more than an ASTC block can hold + return 1024; } + + auto& entry = ise_sizes[quant]; + return (entry.scale * items + entry.round) / entry.divisor; } // routine to write up to 8 bits @@ -456,85 +475,163 @@ static inline int read_bits( } void encode_ise( - int quantization_level, + int quant_level, int elements, const uint8_t* input_data, uint8_t* output_data, int bit_offset ) { - uint8_t lowparts[64]; - uint8_t highparts[69]; // 64 elements + 5 elements for padding - uint8_t tq_blocks[22]; // trit-blocks or quint-blocks - - int bits, trits, quints; - find_number_of_bits_trits_quints(quantization_level, &bits, &trits, &quints); + int bits = btq_counts[quant_level].bits; + int trits = btq_counts[quant_level].trits; + int quints = btq_counts[quant_level].quints; + int mask = (1 << bits) - 1; - for (int i = 0; i < elements; i++) - { - lowparts[i] = input_data[i] & ((1 << bits) - 1); - highparts[i] = input_data[i] >> bits; - } - - for (int i = elements; i < elements + 5; i++) - { - highparts[i] = 0; // padding before we start constructing trit-blocks or quint-blocks - } - - // construct trit-blocks or quint-blocks as necessary + // Write out trits and bits if (trits) { - int trit_blocks = (elements + 4) / 5; - for (int i = 0; i < trit_blocks; i++) + int i = 0; + int full_trit_blocks = elements / 5; + + for (int j = 0; j < full_trit_blocks; j++) { - tq_blocks[i] = integer_of_trits[highparts[5 * i + 4]][highparts[5 * i + 3]][highparts[5 * i + 2]][highparts[5 * i + 1]][highparts[5 * i]]; + int i4 = input_data[i + 4] >> bits; + int i3 = input_data[i + 3] >> bits; + int i2 = input_data[i + 2] >> bits; + int i1 = input_data[i + 1] >> bits; + int i0 = input_data[i + 0] >> bits; + + uint8_t T = integer_of_trits[i4][i3][i2][i1][i0]; + + // The max size of a trit bit count is 6, so we can always safely + // pack a single MX value with the following 1 or 2 T bits. + uint8_t pack; + + // Element 0 + T0 + T1 + pack = (input_data[i++] & mask) | (((T >> 0) & 0x3) << bits); + write_bits(pack, bits + 2, bit_offset, output_data); + bit_offset += bits + 2; + + // Element 1 + T2 + T3 + pack = (input_data[i++] & mask) | (((T >> 2) & 0x3) << bits); + write_bits(pack, bits + 2, bit_offset, output_data); + bit_offset += bits + 2; + + // Element 2 + T4 + pack = (input_data[i++] & mask) | (((T >> 4) & 0x1) << bits); + write_bits(pack, bits + 1, bit_offset, output_data); + bit_offset += bits + 1; + + // Element 3 + T5 + T6 + pack = (input_data[i++] & mask) | (((T >> 5) & 0x3) << bits); + write_bits(pack, bits + 2, bit_offset, output_data); + bit_offset += bits + 2; + + // Element 4 + T7 + pack = (input_data[i++] & mask) | (((T >> 7) & 0x1) << bits); + write_bits(pack, bits + 1, bit_offset, output_data); + bit_offset += bits + 1; } - } - if (quints) - { - int quint_blocks = (elements + 2) / 3; - for (int i = 0; i < quint_blocks; i++) + // Loop tail for a partial block + if (i != elements) { - tq_blocks[i] = integer_of_quints[highparts[3 * i + 2]][highparts[3 * i + 1]][highparts[3 * i]]; + // i4 cannot be present - we know the block is partial + // i0 must be present - we know the block isn't empty + int i4 = 0; + int i3 = i + 3 >= elements ? 0 : input_data[i + 3] >> bits; + int i2 = i + 2 >= elements ? 0 : input_data[i + 2] >> bits; + int i1 = i + 1 >= elements ? 0 : input_data[i + 1] >> bits; + int i0 = input_data[i + 0] >> bits; + + uint8_t T = integer_of_trits[i4][i3][i2][i1][i0]; + + for (int j = 0; i < elements; i++, j++) + { + // Truncated table as this iteration is always partital + static const uint8_t tbits[4] { 2, 2, 1, 2 }; + static const uint8_t tshift[4] { 0, 2, 4, 5 }; + + uint8_t pack = (input_data[i] & mask) | + (((T >> tshift[j]) & ((1 << tbits[j]) - 1)) << bits); + + write_bits(pack, bits + tbits[j], bit_offset, output_data); + bit_offset += bits + tbits[j]; + } } } - - // then, write out the actual bits. - int lcounter = 0; - int hcounter = 0; - for (int i = 0; i < elements; i++) + // Write out quints and bits + else if (quints) { - write_bits(lowparts[i], bits, bit_offset, output_data); - bit_offset += bits; + int i = 0; + int full_quint_blocks = elements / 3; - if (trits) + for (int j = 0; j < full_quint_blocks; j++) { - static const int bits_to_write[5] = { 2, 2, 1, 2, 1 }; - static const int block_shift[5] = { 0, 2, 4, 5, 7 }; - static const int next_lcounter[5] = { 1, 2, 3, 4, 0 }; - static const int hcounter_incr[5] = { 0, 0, 0, 0, 1 }; - write_bits(tq_blocks[hcounter] >> block_shift[lcounter], bits_to_write[lcounter], bit_offset, output_data); - bit_offset += bits_to_write[lcounter]; - hcounter += hcounter_incr[lcounter]; - lcounter = next_lcounter[lcounter]; + int i2 = input_data[i + 2] >> bits; + int i1 = input_data[i + 1] >> bits; + int i0 = input_data[i + 0] >> bits; + + uint8_t T = integer_of_quints[i2][i1][i0]; + + // The max size of a quint bit count is 5, so we can always safely + // pack a single M value with the following 2 or 3 T bits. + uint8_t pack; + + // Element 0 + pack = (input_data[i++] & mask) | (((T >> 0) & 0x7) << bits); + write_bits(pack, bits + 3, bit_offset, output_data); + bit_offset += bits + 3; + + // Element 1 + pack = (input_data[i++] & mask) | (((T >> 3) & 0x3) << bits); + write_bits(pack, bits + 2, bit_offset, output_data); + bit_offset += bits + 2; + + // Element 2 + pack = (input_data[i++] & mask) | (((T >> 5) & 0x3) << bits); + write_bits(pack, bits + 2, bit_offset, output_data); + bit_offset += bits + 2; } - if (quints) + // Loop tail for a partial block + if (i != elements) { - static const int bits_to_write[3] = { 3, 2, 2 }; - static const int block_shift[3] = { 0, 3, 5 }; - static const int next_lcounter[3] = { 1, 2, 0 }; - static const int hcounter_incr[3] = { 0, 0, 1 }; - write_bits(tq_blocks[hcounter] >> block_shift[lcounter], bits_to_write[lcounter], bit_offset, output_data); - bit_offset += bits_to_write[lcounter]; - hcounter += hcounter_incr[lcounter]; - lcounter = next_lcounter[lcounter]; + // i2 cannot be present - we know the block is partial + // i0 must be present - we know the block isn't empty + int i2 = 0; + int i1 = i + 1 >= elements ? 0 : input_data[i + 1] >> bits; + int i0 = input_data[i + 0] >> bits; + + uint8_t T = integer_of_quints[i2][i1][i0]; + + for (int j = 0; i < elements; i++, j++) + { + // Truncated table as this iteration is always partital + static const uint8_t tbits[2] { 3, 2 }; + static const uint8_t tshift[2] { 0, 3 }; + + uint8_t pack = (input_data[i] & mask) | + (((T >> tshift[j]) & ((1 << tbits[j]) - 1)) << bits); + + write_bits(pack, bits + tbits[j], bit_offset, output_data); + bit_offset += bits + tbits[j]; + } + } + } + // Write out just bits + else + { + promise(elements > 0); + for (int i = 0; i < elements; i++) + { + write_bits(input_data[i], bits, bit_offset, output_data); + bit_offset += bits; } } } void decode_ise( - int quantization_level, + int quant_level, int elements, const uint8_t* input_data, uint8_t* output_data, @@ -547,8 +644,9 @@ void decode_ise( uint8_t results[68]; uint8_t tq_blocks[22]; // trit-blocks or quint-blocks - int bits, trits, quints; - find_number_of_bits_trits_quints(quantization_level, &bits, &trits, &quints); + int bits = btq_counts[quant_level].bits; + int trits = btq_counts[quant_level].trits; + int quints = btq_counts[quant_level].quints; int lcounter = 0; int hcounter = 0; @@ -567,10 +665,10 @@ void decode_ise( if (trits) { - static const int bits_to_read[5] = { 2, 2, 1, 2, 1 }; - static const int block_shift[5] = { 0, 2, 4, 5, 7 }; - static const int next_lcounter[5] = { 1, 2, 3, 4, 0 }; - static const int hcounter_incr[5] = { 0, 0, 0, 0, 1 }; + static const int bits_to_read[5] { 2, 2, 1, 2, 1 }; + static const int block_shift[5] { 0, 2, 4, 5, 7 }; + static const int next_lcounter[5] { 1, 2, 3, 4, 0 }; + static const int hcounter_incr[5] { 0, 0, 0, 0, 1 }; int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data); bit_offset += bits_to_read[lcounter]; tq_blocks[hcounter] |= tdata << block_shift[lcounter]; @@ -580,10 +678,10 @@ void decode_ise( if (quints) { - static const int bits_to_read[3] = { 3, 2, 2 }; - static const int block_shift[3] = { 0, 3, 5 }; - static const int next_lcounter[3] = { 1, 2, 0 }; - static const int hcounter_incr[3] = { 0, 0, 1 }; + static const int bits_to_read[3] { 3, 2, 2 }; + static const int block_shift[3] { 0, 3, 5 }; + static const int next_lcounter[3] { 1, 2, 0 }; + static const int hcounter_incr[3] { 0, 0, 1 }; int tdata = read_bits(bits_to_read[lcounter], bit_offset, input_data); bit_offset += bits_to_read[lcounter]; tq_blocks[hcounter] |= tdata << block_shift[lcounter]; @@ -599,7 +697,7 @@ void decode_ise( for (int i = 0; i < trit_blocks; i++) { const uint8_t *tritptr = trits_of_integer[tq_blocks[i]]; - results[5 * i] |= tritptr[0] << bits; + results[5 * i ] |= tritptr[0] << bits; results[5 * i + 1] |= tritptr[1] << bits; results[5 * i + 2] |= tritptr[2] << bits; results[5 * i + 3] |= tritptr[3] << bits; @@ -613,7 +711,7 @@ void decode_ise( for (int i = 0; i < quint_blocks; i++) { const uint8_t *quintptr = quints_of_integer[tq_blocks[i]]; - results[3 * i] |= quintptr[0] << bits; + results[3 * i ] |= quintptr[0] << bits; results[3 * i + 1] |= quintptr[1] << bits; results[3 * i + 2] |= quintptr[2] << bits; } @@ -624,56 +722,3 @@ void decode_ise( output_data[i] = results[i]; } } - -int compute_ise_bitcount( - int items, - quantization_method quant -) { - switch (quant) - { - case QUANT_2: - return items; - case QUANT_3: - return (8 * items + 4) / 5; - case QUANT_4: - return 2 * items; - case QUANT_5: - return (7 * items + 2) / 3; - case QUANT_6: - return (13 * items + 4) / 5; - case QUANT_8: - return 3 * items; - case QUANT_10: - return (10 * items + 2) / 3; - case QUANT_12: - return (18 * items + 4) / 5; - case QUANT_16: - return items * 4; - case QUANT_20: - return (13 * items + 2) / 3; - case QUANT_24: - return (23 * items + 4) / 5; - case QUANT_32: - return 5 * items; - case QUANT_40: - return (16 * items + 2) / 3; - case QUANT_48: - return (28 * items + 4) / 5; - case QUANT_64: - return 6 * items; - case QUANT_80: - return (19 * items + 2) / 3; - case QUANT_96: - return (33 * items + 4) / 5; - case QUANT_128: - return 7 * items; - case QUANT_160: - return (22 * items + 2) / 3; - case QUANT_192: - return (38 * items + 4) / 5; - case QUANT_256: - return 8 * items; - default: - return 100000; - } -} diff --git a/libkram/astc-encoder/astcenc_internal.h b/libkram/astc-encoder/astcenc_internal.h index 42e95bd7..565bec00 100644 --- a/libkram/astc-encoder/astcenc_internal.h +++ b/libkram/astc-encoder/astcenc_internal.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -23,6 +23,7 @@ #define ASTCENC_INTERNAL_INCLUDED #include +#include #include #include #include @@ -30,31 +31,59 @@ #include #include #include +#include -#ifndef ASTCENC_SSE -#error ERROR: ASTCENC_SSE not defined -#endif - -#ifndef ASTCENC_POPCNT -#error ERROR: ASTCENC_POPCNT not defined -#endif +#include "astcenc.h" +#include "astcenc_mathlib.h" +#include "astcenc_vecmathlib.h" -#ifndef ASTCENC_AVX -#error ERROR: ASTCENC_AVX not defined +/** + * @brief Make a promise to the compiler's optimizer. + * + * A promise is an expression that the optimizer is can assume is true for to + * help it generate faster code. Common use cases for this are to promise that + * a for loop will iterate more than once, or that the loop iteration count is + * a multiple of a vector length, which avoids pre-loop checks and can avoid + * loop tails if loops are unrolled by the auto-vectorizer. + */ +#if defined(NDEBUG) + #if !defined(__clang__) && defined(_MSC_VER) + #define promise(cond) __assume(cond) + #elif defined(__clang__) + #if __has_builtin(__builtin_assume) + #define promise(cond) __builtin_assume(cond) + #elif __has_builtin(__builtin_unreachable) + #define promise(cond) if(!(cond)) { __builtin_unreachable(); } + #else + #define promise(cond) + #endif + #else // Assume GCC + #define promise(cond) if(!(cond)) { __builtin_unreachable(); } + #endif +#else + #define promise(cond) assert(cond); #endif -#ifndef ASTCENC_ISA_INVARIANCE -#error ERROR: ASTCENC_ISA_INVARIANCE not defined +/** + * @brief Make a promise to the compiler's optimizer parameters don't alias. + * + * This is a compiler extension to implement the equivalent of the C99 + * @c restrict keyword. Mostly expected to help on functions which are + * reading and writing to arrays via pointers of the same basic type. + */ +#if !defined(__clang__) && defined(_MSC_VER) + #define RESTRICT __restrict +#else // Assume Clang or GCC + #define RESTRICT __restrict__ #endif -#include "astcenc.h" -#include "astcenc_mathlib.h" - /* ============================================================================ Constants ============================================================================ */ #define MAX_TEXELS_PER_BLOCK 216 +#define MAX_KMEANS_TEXELS 64 #define MAX_WEIGHTS_PER_BLOCK 64 +#define PLANE2_WEIGHTS_OFFSET (MAX_WEIGHTS_PER_BLOCK/2) #define MIN_WEIGHT_BITS_PER_BLOCK 24 #define MAX_WEIGHT_BITS_PER_BLOCK 96 #define PARTITION_BITS 10 @@ -73,11 +102,11 @@ static const float ERROR_CALC_DEFAULT { 1e30f }; ============================================================================ */ // The max texel count in a block which can try the one partition fast path. // Default: enabled for 4x4 and 5x4 blocks. -static const int TUNE_MAX_TEXELS_MODE0_FASTPATH { 24 }; +static const unsigned int TUNE_MAX_TEXELS_MODE0_FASTPATH { 24 }; // The maximum number of candidate encodings returned for each encoding mode. // Default: depends on quality preset -static const int TUNE_MAX_TRIAL_CANDIDATES { 4 }; +static const unsigned int TUNE_MAX_TRIAL_CANDIDATES { 4 }; /* ============================================================================ Other configuration parameters @@ -100,7 +129,7 @@ static const int TUNE_MAX_TRIAL_CANDIDATES { 4 }; * * A condition variable so threads can wait for processing completion. * * The init stage will be executed by the first thread to arrive in the - * critical section, there is no master thread in the thread pool. + * critical section, there is no main thread in the thread pool. * * The processing stage uses dynamic dispatch to assign task tickets to threads * on an on-demand basis. Threads may each therefore executed different numbers @@ -153,36 +182,36 @@ static const int TUNE_MAX_TRIAL_CANDIDATES { 4 }; class ParallelManager { private: - /** \brief Lock used for critical section and condition synchronization. */ + /** @brief Lock used for critical section and condition synchronization. */ std::mutex m_lock; - /** \brief True if the stage init() step has been executed. */ + /** @brief True if the stage init() step has been executed. */ bool m_init_done; - /** \brief True if the stage term() step has been executed. */ + /** @brief True if the stage term() step has been executed. */ bool m_term_done; - /** \brief Contition variable for tracking stage processing completion. */ + /** @brief Contition variable for tracking stage processing completion. */ std::condition_variable m_complete; - /** \brief Number of tasks started, but not necessarily finished. */ - unsigned int m_start_count; + /** @brief Number of tasks started, but not necessarily finished. */ + std::atomic m_start_count; - /** \brief Number of tasks finished. */ + /** @brief Number of tasks finished. */ unsigned int m_done_count; - /** \brief Number of tasks that need to be processed. */ + /** @brief Number of tasks that need to be processed. */ unsigned int m_task_count; public: - /** \brief Create a new ParallelManager. */ + /** @brief Create a new ParallelManager. */ ParallelManager() { reset(); } /** - * \brief Reset the tracker for a new processing batch. + * @brief Reset the tracker for a new processing batch. * * This must be called from single-threaded code before starting the * multi-threaded procesing operations. @@ -197,14 +226,14 @@ class ParallelManager } /** - * \brief Trigger the pipeline stage init step. + * @brief Trigger the pipeline stage init step. * * This can be called from multi-threaded code. The first thread to * hit this will process the initialization. Other threads will block * and wait for it to complete. * - * \param init_func Callable which executes the stage initialization. - * Must return the number of tasks in the stage. + * @param init_func Callable which executes the stage initialization. + * Must return the number of tasks in the stage. */ void init(std::function init_func) { @@ -217,13 +246,13 @@ class ParallelManager } /** - * \brief Trigger the pipeline stage init step. + * @brief Trigger the pipeline stage init step. * * This can be called from multi-threaded code. The first thread to * hit this will process the initialization. Other threads will block * and wait for it to complete. * - * \param task_count Total number of tasks needing processing. + * @param task_count Total number of tasks needing processing. */ void init(unsigned int task_count) { @@ -236,36 +265,42 @@ class ParallelManager } /** - * \brief Request a task assignment. + * @brief Request a task assignment. * - * Assign up to \c granule tasks to the caller for processing. + * Assign up to @c granule tasks to the caller for processing. * - * \param granule Maximum number of tasks that can be assigned. - * \param[out] count Actual number of tasks assigned, or zero if + * @param granule Maximum number of tasks that can be assigned. + * @param[out] count Actual number of tasks assigned, or zero if * no tasks were assigned. * - * \return Task index of the first assigned task; assigned tasks + * @return Task index of the first assigned task; assigned tasks * increment from this. */ unsigned int get_task_assignment(unsigned int granule, unsigned int& count) { - std::lock_guard lck(m_lock); - unsigned int base = m_start_count; - count = std::min(granule, m_task_count - m_start_count); - m_start_count += count; + unsigned int base = m_start_count.fetch_add(granule, std::memory_order_relaxed); + if (base >= m_task_count) + { + count = 0; + return 0; + } + + count = astc::min(m_task_count - base, granule); return base; } /** - * \brief Complete a task assignment. + * @brief Complete a task assignment. * - * Mark \c count tasks as complete. This will notify all threads blocked - * on \c wait() if this completes the processing of the stage. + * Mark @c count tasks as complete. This will notify all threads blocked + * on @c wait() if this completes the processing of the stage. * - * \param count The number of completed tasks. + * @param count The number of completed tasks. */ void complete_task_assignment(unsigned int count) { + // Note: m_done_count cannot use an atomic without the mutex; this has + // a race between the update here and the wait() for other threads std::unique_lock lck(m_lock); this->m_done_count += count; if (m_done_count == m_task_count) @@ -276,7 +311,7 @@ class ParallelManager } /** - * \brief Wait for stage processing to complete. + * @brief Wait for stage processing to complete. */ void wait() { @@ -285,13 +320,13 @@ class ParallelManager } /** - * \brief Trigger the pipeline stage term step. + * @brief Trigger the pipeline stage term step. * * This can be called from multi-threaded code. The first thread to * hit this will process the thread termintion. Caller must have called * wait() prior to calling this function to ensure processing is complete. * - * \param term_func Callable which executes the stage termination. + * @param term_func Callable which executes the stage termination. */ void term(std::function term_func) { @@ -304,6 +339,27 @@ class ParallelManager } }; +struct partition_metrics +{ + vfloat4 range_sq; + vfloat4 error_weight; + vfloat4 icolor_scale; + vfloat4 color_scale; + vfloat4 avg; + vfloat4 dir; +}; + +struct partition_lines3 +{ + line3 uncor_line; + line3 samec_line; + + processed_line3 uncor_pline; + processed_line3 samec_pline; + + float uncor_line_len; + float samec_line_len; +}; /* Partition table representation: @@ -320,7 +376,7 @@ class ParallelManager struct partition_info { int partition_count; - uint8_t texels_per_partition[4]; + uint8_t partition_texel_count[4]; uint8_t partition_of_texel[MAX_TEXELS_PER_BLOCK]; uint8_t texels_of_partition[4][MAX_TEXELS_PER_BLOCK]; uint64_t coverage_bitmaps[4]; @@ -336,69 +392,130 @@ struct partition_info */ struct decimation_table { - int num_texels; - int num_weights; - uint8_t texel_num_weights[MAX_TEXELS_PER_BLOCK]; // number of indices that go into the calculation for a texel - uint8_t texel_weights_int[MAX_TEXELS_PER_BLOCK][4]; // the weight to assign to each weight - float texel_weights_float[MAX_TEXELS_PER_BLOCK][4]; // the weight to assign to each weight - uint8_t texel_weights[MAX_TEXELS_PER_BLOCK][4]; // the weights that go into a texel calculation - uint8_t weight_num_texels[MAX_WEIGHTS_PER_BLOCK]; // the number of texels that a given weight contributes to - uint8_t weight_texel[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; // the texels that the weight contributes to + // TODO: Make these byte values + int texel_count; + int weight_count; + int weight_x; + int weight_y; + int weight_z; + + uint8_t texel_weight_count[MAX_TEXELS_PER_BLOCK]; // number of indices that go into the calculation for a texel + + // The 4t and t4 tables are the same data, but transposed to allow optimal + // data access patterns depending on how we can unroll loops + alignas(ASTCENC_VECALIGN) float texel_weights_float_4t[4][MAX_TEXELS_PER_BLOCK]; // the weight to assign to each weight + alignas(ASTCENC_VECALIGN) uint8_t texel_weights_4t[4][MAX_TEXELS_PER_BLOCK]; // the weights that go into a texel calculation + + // TODO: Can we remove the copies? + float texel_weights_float_t4[MAX_TEXELS_PER_BLOCK][4]; // the weight to assign to each weight + uint8_t texel_weights_t4[MAX_TEXELS_PER_BLOCK][4]; // the weights that go into a texel calculation + + uint8_t texel_weights_int_t4[MAX_TEXELS_PER_BLOCK][4]; // the weight to assign to each weight + + uint8_t weight_texel_count[MAX_WEIGHTS_PER_BLOCK]; // the number of texels that a given weight contributes to uint8_t weights_int[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; // the weights that the weight contributes to a texel. - float weights_flt[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK]; // the weights that the weight contributes to a texel. + + // Stored transposed to give better access patterns + uint8_t weight_texel[MAX_TEXELS_PER_BLOCK][MAX_WEIGHTS_PER_BLOCK]; // the texels that the weight contributes to + alignas(ASTCENC_VECALIGN) float weights_flt[MAX_TEXELS_PER_BLOCK][MAX_WEIGHTS_PER_BLOCK]; // the weights that the weight contributes to a texel. // folded data structures: // * texel_weights_texel[i][j] = texel_weights[weight_texel[i][j]]; - // * texel_weights_float_texel[i][j] = texel_weights_float[weight_texel[i][j] + // * texel_weights_float_texel[i][j] = texel_weights_float[weight_texel[i][j]] uint8_t texel_weights_texel[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK][4]; float texel_weights_float_texel[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK][4]; }; -/* - data structure describing information that pertains to a block size and its associated block modes. -*/ +/** + * @brief Metadata for single block mode for a specific BSD. + */ struct block_mode { int8_t decimation_mode; - int8_t quantization_mode; - int8_t is_dual_plane; + int8_t quant_mode; + uint8_t is_dual_plane : 1; + uint8_t percentile_hit : 1; + uint8_t percentile_always : 1; int16_t mode_index; - float percentile; }; +/** + * @brief Metadata for single decimation mode for a specific BSD. + */ +struct decimation_mode +{ + int8_t maxprec_1plane; + int8_t maxprec_2planes; + uint8_t percentile_hit : 1; + uint8_t percentile_always : 1; +}; + +/** + * @brief Data tables for a single block size. + * + * The decimation tables store the information to apply weight grid dimension + * reductions. We only store the decimation modes that are actually needed by + * the current context; many of the possible modes will be unused (too many + * weights for the current block size or disabled by heuristics). The actual + * number of weights stored is @c decimation_mode_count, and the + * @c decimation_modes and @c decimation_tables arrays store the active modes + * contiguously at the start of the array. These entries are not stored in any + * particuar order. + * + * The block mode tables store the unpacked block mode settings. Block modes + * are stored in the compressed block as an 11 bit field, but for any given + * block size and set of compressor heuristics, only a subset of the block + * modes will be used. The actual number of block modes stored is indicated in + * @c block_mode_count, and the @c block_modes array store the active modes + * contiguously at the start of the array. These entries are stored in + * incrementing "packed" value order, which doesn't mean much once unpacked. + * To allow decompressors to reference the packed data efficiently the + * @c block_mode_packed_index array stores the mapping between physical ID and + * the actual remapped array index. + */ struct block_size_descriptor { + /**< The block X dimension, in texels. */ int xdim; + + /**< The block Y dimension, in texels. */ int ydim; + + /**< The block Z dimension, in texels. */ int zdim; + + /**< The block total texel count. */ int texel_count; + + /**< The number of stored decimation modes. */ int decimation_mode_count; - int decimation_mode_samples[MAX_DECIMATION_MODES]; - int decimation_mode_maxprec_1plane[MAX_DECIMATION_MODES]; - int decimation_mode_maxprec_2planes[MAX_DECIMATION_MODES]; - float decimation_mode_percentile[MAX_DECIMATION_MODES]; - int permit_encode[MAX_DECIMATION_MODES]; + + /**< The active decimation modes, stored in low indices. */ + decimation_mode decimation_modes[MAX_DECIMATION_MODES]; + + /**< The active decimation tables, stored in low indices. */ const decimation_table *decimation_tables[MAX_DECIMATION_MODES]; - // out of all possible 2048 weight modes, only a subset is - // actually valid for the current configuration (e.g. 6x6 - // 2D LDR has 370 valid modes); the valid ones are packed into - // block_modes_packed array. - block_mode block_modes_packed[MAX_WEIGHT_MODES]; - int block_mode_packed_count; - // get index of block mode inside the block_modes_packed array, - // or -1 if mode is not valid for the current configuration. - int16_t block_mode_to_packed[MAX_WEIGHT_MODES]; - - // for the k-means bed bitmap partitioning algorithm, we don't - // want to consider more than 64 texels; this array specifies - // which 64 texels (if that many) to consider. - int texelcount_for_bitmap_partitioning; - int texels_for_bitmap_partitioning[64]; - - // All the partitioning information for this block size - partition_info partitions[(3*PARTITION_COUNT)+1]; + + /**< The number of stored block modes. */ + int block_mode_count; + + /**< The active block modes, stored in low indices. */ + block_mode block_modes[MAX_WEIGHT_MODES]; + + /**< The block mode array index, or -1 if not valid in current config. */ + int16_t block_mode_packed_index[MAX_WEIGHT_MODES]; + + + /**< The texel count for k-means partition selection. */ + int kmeans_texel_count; + + /**< The active texels for k-means partition selection. */ + int kmeans_texels[MAX_KMEANS_TEXELS]; + + /**< The partion tables for all of the possible partitions. */ + partition_info partitions[(3 * PARTITION_COUNT) + 1]; }; // data structure representing one block of an image. @@ -410,39 +527,60 @@ struct imageblock float data_g[MAX_TEXELS_PER_BLOCK]; float data_b[MAX_TEXELS_PER_BLOCK]; float data_a[MAX_TEXELS_PER_BLOCK]; - float4 origin_texel; + + vfloat4 origin_texel; + vfloat4 data_min; + vfloat4 data_max; + bool grayscale; uint8_t rgb_lns[MAX_TEXELS_PER_BLOCK]; // 1 if RGB data are being treated as LNS uint8_t alpha_lns[MAX_TEXELS_PER_BLOCK]; // 1 if Alpha data are being treated as LNS uint8_t nan_texel[MAX_TEXELS_PER_BLOCK]; // 1 if the texel is a NaN-texel. + int xpos, ypos, zpos; - float red_min, red_max; - float green_min, green_max; - float blue_min, blue_max; - float alpha_min, alpha_max; - int grayscale; // 1 if R=G=B for every pixel, 0 otherwise + inline vfloat4 texel(int index) const + { + return vfloat4(data_r[index], + data_g[index], + data_b[index], + data_a[index]); + } - int xpos, ypos, zpos; + inline vfloat4 texel3(int index) const + { + return vfloat4(data_r[index], + data_g[index], + data_b[index], + 0.0f); + } }; -static inline int imageblock_uses_alpha(const imageblock * pb) +static inline float imageblock_default_alpha(const imageblock * blk) { - return pb->alpha_max != pb->alpha_min; + return blk->alpha_lns[0] ? (float)0x7800 : (float)0xFFFF; } -void update_imageblock_flags( - imageblock* pb, - int xdim, - int ydim, - int zdim); -void imageblock_initialize_orig_from_work( - imageblock * pb, - int pixelcount); +static inline int imageblock_uses_alpha(const imageblock * blk) +{ + return blk->data_min.lane<3>() != blk->data_max.lane<3>(); +} + +static inline int imageblock_is_lum(const imageblock * blk) +{ + float default_alpha = imageblock_default_alpha(blk); + bool alpha1 = (blk->data_min.lane<3>() == default_alpha) && + (blk->data_max.lane<3>() == default_alpha); + return blk->grayscale && alpha1; +} -void imageblock_initialize_work_from_orig( - imageblock * pb, - int pixelcount); +static inline int imageblock_is_lumalp(const imageblock * blk) +{ + float default_alpha = imageblock_default_alpha(blk); + bool alpha1 = (blk->data_min.lane<3>() == default_alpha) && + (blk->data_max.lane<3>() == default_alpha); + return blk->grayscale && !alpha1; +} /* Data structure representing error weighting for one block of an image. this is used as @@ -467,8 +605,10 @@ void imageblock_initialize_work_from_orig( struct error_weight_block { - float4 error_weights[MAX_TEXELS_PER_BLOCK]; + vfloat4 error_weights[MAX_TEXELS_PER_BLOCK]; + float texel_weight[MAX_TEXELS_PER_BLOCK]; + float texel_weight_gba[MAX_TEXELS_PER_BLOCK]; float texel_weight_rba[MAX_TEXELS_PER_BLOCK]; float texel_weight_rga[MAX_TEXELS_PER_BLOCK]; @@ -483,12 +623,10 @@ struct error_weight_block float texel_weight_g[MAX_TEXELS_PER_BLOCK]; float texel_weight_b[MAX_TEXELS_PER_BLOCK]; float texel_weight_a[MAX_TEXELS_PER_BLOCK]; - - int contains_zeroweight_texels; }; // enumeration of all the quantization methods we support under this format. -enum quantization_method +enum quant_method { QUANT_2 = 0, QUANT_3 = 1, @@ -513,6 +651,36 @@ enum quantization_method QUANT_256 = 20 }; +static inline int get_quant_method_levels(quant_method method) +{ + switch(method) + { + case QUANT_2: return 2; + case QUANT_3: return 3; + case QUANT_4: return 4; + case QUANT_5: return 5; + case QUANT_6: return 6; + case QUANT_8: return 8; + case QUANT_10: return 10; + case QUANT_12: return 12; + case QUANT_16: return 16; + case QUANT_20: return 20; + case QUANT_24: return 24; + case QUANT_32: return 32; + case QUANT_40: return 40; + case QUANT_48: return 48; + case QUANT_64: return 64; + case QUANT_80: return 80; + case QUANT_96: return 96; + case QUANT_128: return 128; + case QUANT_160: return 160; + case QUANT_192: return 192; + case QUANT_256: return 256; + // Unreachable - the enum is fully described + default: return 0; + } +} + /** * @brief Weight quantization transfer table. * @@ -530,12 +698,10 @@ enum quantization_method struct quantization_and_transfer_table { /** The quantization level used */ - quantization_method method; + quant_method method; /** The unscrambled unquantized value. */ - // TODO: Converted to floats to support AVX gathers float unquantized_value_unsc[33]; /** The scrambling order: value[map[i]] == value_unsc[i] */ - // TODO: Converted to u32 to support AVX gathers int32_t scramble_map[32]; /** The scrambled unquantized values. */ uint8_t unquantized_value[32]; @@ -580,12 +746,16 @@ struct symbolic_compressed_block int partition_index; // 0 to 1023 int color_formats[4]; // color format for each endpoint color pair. int color_formats_matched; // color format for all endpoint pairs are matched. - int color_values[4][12]; // quantized endpoint color pairs. - int color_quantization_level; - uint8_t plane1_weights[MAX_WEIGHTS_PER_BLOCK]; // quantized and decimated weights - uint8_t plane2_weights[MAX_WEIGHTS_PER_BLOCK]; + int color_quant_level; int plane2_color_component; // color component for the secondary plane of weights + + // TODO: Under what circumstances is this ever more than 8 (4 pairs) colors + int color_values[4][12]; // quantized endpoint color pairs. int constant_color[4]; // constant-color, as FP16 or UINT16. Used for constant-color blocks only. + // Quantized and decimated weights. In the case of dual plane, the second + // index plane starts at weights[PLANE2_WEIGHTS_OFFSET] + float errorval; // The error of the current encoding + uint8_t weights[MAX_WEIGHTS_PER_BLOCK]; }; struct physical_compressed_block @@ -603,15 +773,18 @@ struct physical_compressed_block * This will also initialize the partition table metadata, which is stored * as part of the BSD structure. * - * @param xdim The x axis size of the block. - * @param ydim The y axis size of the block. - * @param zdim The z axis size of the block. - * @param bsd The structure to populate. + * @param xdim The x axis size of the block. + * @param ydim The y axis size of the block. + * @param zdim The z axis size of the block. + * @param mode_cutoff The block mode percentil cutoff [0-1]. + * @param bsd The structure to populate. */ void init_block_size_descriptor( int xdim, int ydim, int zdim, + bool can_omit_modes, + float mode_cutoff, block_size_descriptor* bsd); void term_block_size_descriptor( @@ -679,29 +852,39 @@ int is_legal_3d_block_size( // functions and data pertaining to quantization and encoding // ********************************************************** -extern const uint8_t color_quantization_tables[21][256]; -extern const uint8_t color_unquantization_tables[21][256]; -extern int quantization_mode_table[17][128]; +extern const uint8_t color_quant_tables[21][256]; +extern const uint8_t color_unquant_tables[21][256]; +extern int8_t quant_mode_table[17][128]; void encode_ise( - int quantization_level, + int quant_level, int elements, const uint8_t* input_data, uint8_t* output_data, int bit_offset); void decode_ise( - int quantization_level, + int quant_level, int elements, const uint8_t* input_data, uint8_t* output_data, int bit_offset); -int compute_ise_bitcount( +/** + * @brief Return the number of bits needed to encode an ISE sequence. + * + * This implementation assumes that the @c quant level is untrusted, given it + * may come from random data being decompressed, so we return an unencodable + * size if that is the case. + * + * @param items The number of items in the sequence. + * @param quant The desired quantization level. + */ +int get_ise_sequence_bitcount( int items, - quantization_method quant); + quant_method quant); -void build_quantization_mode_table(void); +void build_quant_mode_table(void); // ********************************************** // functions and data pertaining to partitioning @@ -709,33 +892,20 @@ void build_quantization_mode_table(void); // functions to compute color averages and dominant directions // for each partition in a block - -void compute_averages_and_directions_rgb( +void compute_avgs_and_dirs_4_comp( const partition_info* pt, const imageblock* blk, const error_weight_block* ewb, - const float4* color_scalefactors, - float3* averages, - float3* directions_rgb); + partition_metrics pm[4]); -void compute_averages_and_directions_rgba( +void compute_avgs_and_dirs_3_comp( const partition_info* pt, const imageblock* blk, const error_weight_block* ewb, - const float4* color_scalefactors, - float4* averages, - float4* directions_rgba); + int omitted_component, + partition_metrics pm[4]); -void compute_averages_and_directions_3_components( - const partition_info* pt, - const imageblock* blk, - const error_weight_block* ewb, - const float3 * color_scalefactors, - int omittedComponent, - float3* averages, - float3* directions); - -void compute_averages_and_directions_2_components( +void compute_avgs_and_dirs_2_comp( const partition_info* pt, const imageblock* blk, const error_weight_block* ewb, @@ -746,67 +916,40 @@ void compute_averages_and_directions_2_components( float2* directions); void compute_error_squared_rgba( - const partition_info* pt, // the partition that we use when computing the squared-error. + const partition_info* pt, const imageblock* blk, const error_weight_block* ewb, - const processed_line4* plines_uncorr, - const processed_line4* plines_samechroma, - const processed_line3* plines_separate_red, - const processed_line3* plines_separate_green, - const processed_line3* plines_separate_blue, - const processed_line3* plines_separate_alpha, - float* length_uncorr, - float* length_samechroma, - float4* length_separate, - float* uncorr_error, - float* samechroma_error, - float4* separate_color_error); + const processed_line4* uncor_plines, + const processed_line4* samec_plines, + float* uncor_lengths, + float* samec_lengths, + float* uncor_errors, + float* samec_errors); void compute_error_squared_rgb( - const partition_info* pt, // the partition that we use when computing the squared-error. - const imageblock* blk, - const error_weight_block* ewb, - const processed_line3* plines_uncorr, - const processed_line3* plines_samechroma, - const processed_line2* plines_separate_red, - const processed_line2* plines_separate_green, - const processed_line2* plines_separate_blue, - float* length_uncorr, - float* length_samechroma, - float3* length_separate, - float* uncorr_error, - float* samechroma_error, - float3* separate_color_error); - -// functions to compute error value across a tile for a particular line function -// for a single partition. -float compute_error_squared_rgb_single_partition( - int partition_to_test, - const block_size_descriptor* bsd, - const partition_info* pt, - const imageblock* blk, - const error_weight_block* ewb, - const processed_line3* lin // the line for the partition. -); + const partition_info *pt, + const imageblock *blk, + const error_weight_block *ewb, + partition_lines3 plines[4], + float& uncor_error, + float& samec_error); // for each partition, compute its color weightings. void compute_partition_error_color_weightings( - const block_size_descriptor* bsd, - const error_weight_block * ewb, - const partition_info* pi, - float4 error_weightings[4], - float4 color_scalefactors[4]); + const error_weight_block& ewb, + const partition_info& pt, + partition_metrics pm[4]); /** - * \brief Find the best set of partitions to trial for a given block. + * @brief Find the best set of partitions to trial for a given block. * - * On return \c best_partition_uncorrelated contains the best partition - * assuming the data has noncorrelated chroma, \c best_partition_samechroma + * On return @c best_partition_uncorrelated contains the best partition + * assuming the data has noncorrelated chroma, @c best_partition_samechroma * contains the best partition assuming the data has corelated chroma, and - * \c best_partition_dualplane contains the best partition assuming the data + * @c best_partition_dualplane contains the best partition assuming the data * has one uncorrelated color component. * - * \c best_partition_dualplane is stored packed; bits [9:0] contain the + * @c best_partition_dualplane is stored packed; bits [9:0] contain the * best partition, bits [11:10] contain the best color component. */ void find_best_partitionings( @@ -847,17 +990,21 @@ struct pixel_region_variance_args /** The channel swizzle pattern. */ astcenc_swizzle swz; /** Should the algorithm bother with Z axis processing? */ - int have_z; + bool have_z; /** The kernel radius for average and variance. */ int avg_var_kernel_radius; /** The kernel radius for alpha processing. */ int alpha_kernel_radius; /** The size of the working data to process. */ - int3 size; + int size_x; + int size_y; + int size_z; /** The position of first src and dst data in the data set. */ - int3 offset; + int offset_x; + int offset_y; + int offset_z; /** The working memory buffer. */ - float4 *work_memory; + vfloat4 *work_memory; }; /** @@ -868,9 +1015,12 @@ struct avg_var_args /** The arguments for the nested variance computation. */ pixel_region_variance_args arg; /** The image dimensions. */ - int3 img_size; + int img_size_x; + int img_size_y; + int img_size_z; /** The maximum working block dimensions. */ - int3 blk_size; + int blk_size_xy; + int blk_size_z; /** The working block memory size. */ int work_memory_size; }; @@ -887,7 +1037,8 @@ struct avg_var_args * @param avg_var_kernel_radius The kernel radius (in pixels) for avg and var. * @param alpha_kernel_radius The kernel radius (in pixels) for alpha mods. * @param swz Input data channel swizzle. - * @param thread_count The number of threads to use. + * @param arg The pixel region arguments for this thread. + * @param ag The average variance arguments for this thread. * * @return The number of tasks in the processing stage. */ @@ -909,7 +1060,7 @@ void compute_averages_and_variances( void fetch_imageblock( astcenc_profile decode_mode, const astcenc_image& img, - imageblock* pb, // picture-block to initialize with image data + imageblock* blk, // picture-block to initialize with image data const block_size_descriptor* bsd, // position in picture to fetch block from int xpos, @@ -921,7 +1072,7 @@ void fetch_imageblock( // the data written are taken from orig_data. void write_imageblock( astcenc_image& img, - const imageblock* pb, // picture-block to initialize with image data + const imageblock* blk, // picture-block to initialize with image data const block_size_descriptor* bsd, // position in picture to write block to. int xpos, @@ -929,16 +1080,11 @@ void write_imageblock( int zpos, astcenc_swizzle swz); -// helper function to check whether a given picture-block has alpha that is not -// just uniformly 1. -int imageblock_uses_alpha( - const imageblock * pb); - float compute_symbolic_block_difference( - astcenc_profile decode_mode, + const astcenc_config& config, const block_size_descriptor* bsd, const symbolic_compressed_block* scb, - const imageblock* pb, + const imageblock* blk, const error_weight_block *ewb) ; // *********************************************************** @@ -947,15 +1093,15 @@ float compute_symbolic_block_difference( struct endpoints { int partition_count; - float4 endpt0[4]; - float4 endpt1[4]; + vfloat4 endpt0[4]; + vfloat4 endpt1[4]; }; struct endpoints_and_weights { endpoints ep; - float weights[MAX_TEXELS_PER_BLOCK]; - float weight_error_scale[MAX_TEXELS_PER_BLOCK]; + alignas(ASTCENC_VECALIGN) float weights[MAX_TEXELS_PER_BLOCK]; + alignas(ASTCENC_VECALIGN) float weight_error_scale[MAX_TEXELS_PER_BLOCK]; }; void compute_endpoints_and_ideal_weights_1_plane( @@ -974,24 +1120,42 @@ void compute_endpoints_and_ideal_weights_2_planes( endpoints_and_weights* ei1, // primary plane weights endpoints_and_weights* ei2); // secondary plane weights +/** + * @brief Compute the optimal weights for a decimation table. + * + * Compute the idealized weight set, assuming infinite precision and no + * quantization. Later functions will use this as a staring points. + * + * @param eai_in The non-decimated endpoints and weights. + * @param eai_out A copy of eai_in we can modify later. + * @param dt The selected decimation table. + * @param[out] weight_set The output decimated weight set. + * @param[out] weights The output decimated weights. + */ void compute_ideal_weights_for_decimation_table( - const endpoints_and_weights* eai, - const decimation_table* it, + const endpoints_and_weights& eai_in, + endpoints_and_weights& eai_out, + const decimation_table& dt, float* weight_set, float* weights); -void compute_ideal_quantized_weights_for_decimation_table( - const decimation_table* it, +/** + * @brief Compute the best quantized weights for a decimation table. + * + * Compute the quantized weight set, for a specific quant level. + */ +void compute_quantized_weights_for_decimation_table( + const decimation_table* dt, float low_bound, float high_bound, const float* weight_set_in, float* weight_set_out, uint8_t* quantized_weight_set, - int quantization_level); + int quant_level); float compute_error_of_weight_set( const endpoints_and_weights* eai, - const decimation_table* it, + const decimation_table* dt, const float *weights); void merge_endpoints( @@ -1005,35 +1169,50 @@ void merge_endpoints( // the format used may or may not match the format specified; // the return value is the format actually used. int pack_color_endpoints( - float4 color0, - float4 color1, - float4 rgbs_color, - float4 rgbo_color, + vfloat4 color0, + vfloat4 color1, + vfloat4 rgbs_color, + vfloat4 rgbo_color, int format, int* output, - int quantization_level); + int quant_level); // unpack a pair of color endpoints from a series of integers. void unpack_color_endpoints( astcenc_profile decode_mode, int format, - int quantization_level, + int quant_level, const int* input, int* rgb_hdr, int* alpha_hdr, int* nan_endpoint, - uint4* output0, - uint4* output1); + vint4* output0, + vint4* output1); + +// unquantize and undecimate a weight grid +void unpack_weights( + const block_size_descriptor& bsd, + const symbolic_compressed_block& scb, + const decimation_table& dt, + bool is_dual_plane, + int weight_quant_level, + int weights_plane1[MAX_TEXELS_PER_BLOCK], + int weights_plane2[MAX_TEXELS_PER_BLOCK]); struct encoding_choice_errors { - float rgb_scale_error; // error of using LDR RGB-scale instead of complete endpoints. - float rgb_luma_error; // error of using HDR RGB-scale instead of complete endpoints. - float luminance_error; // error of using luminance instead of RGB - float alpha_drop_error; // error of discarding alpha - float rgb_drop_error; // error of discarding RGB - int can_offset_encode; - int can_blue_contract; + // Error of using LDR RGB-scale instead of complete endpoints. + float rgb_scale_error; + // Error of using HDR RGB-scale instead of complete endpoints. + float rgb_luma_error; + // Error of using luminance instead of RGB. + float luminance_error; + // Error of discarding alpha. + float alpha_drop_error; + // Validity of using offset encoding. + bool can_offset_encode; + // Validity of using blue contraction encoding. + bool can_blue_contract; }; // buffers used to store intermediate data in compress_symbolic_block_fixed_partition_*() @@ -1052,14 +1231,13 @@ struct alignas(ASTCENC_VECALIGN) compress_fixed_partition_buffers struct compress_symbolic_block_buffers { error_weight_block ewb; - symbolic_compressed_block tempblocks[TUNE_MAX_TRIAL_CANDIDATES]; compress_fixed_partition_buffers planes; }; void compute_encoding_choice_errors( const block_size_descriptor* bsd, - const imageblock* pb, - const partition_info* pi, + const imageblock* blk, + const partition_info* pt, const error_weight_block* ewb, int separate_component, // component that is separated out in 2-plane mode, -1 in 1-plane mode encoding_choice_errors* eci); @@ -1078,20 +1256,31 @@ void determine_optimal_set_of_endpoint_formats_to_use( // output data int partition_format_specifiers[4][4], int quantized_weight[4], - int quantization_level[4], - int quantization_level_mod[4]); + int quant_level[4], + int quant_level_mod[4]); -void recompute_ideal_colors( - int weight_quantization_mode, +void recompute_ideal_colors_1plane( + int weight_quant_mode, endpoints* ep, // contains the endpoints we wish to update - float4* rgbs_vectors, // used to return RGBS-vectors for endpoint mode #6 - float4* rgbo_vectors, // used to return RGBS-vectors for endpoint mode #7 + vfloat4* rgbs_vectors, // used to return RGBS-vectors for endpoint mode #6 (LDR RGB base + scale) + vfloat4* rgbo_vectors, // used to return RGBS-vectors for endpoint mode #7 (HDR RGB base + scale) + const uint8_t* weight_set8, // the current set of weight values + const partition_info* pt, + const decimation_table* dt, + const imageblock* blk, // picture-block containing the actual data. + const error_weight_block* ewb); + +void recompute_ideal_colors_2planes( + int weight_quant_mode, + endpoints* ep, // contains the endpoints we wish to update + vfloat4* rgbs_vectors, // used to return RGBS-vectors for endpoint mode #6 (LDR RGB base + scale) + vfloat4* rgbo_vectors, // used to return RGBS-vectors for endpoint mode #7 (HDR RGB base + scale) const uint8_t* weight_set8, // the current set of weight values const uint8_t* plane2_weight_set8, // nullptr if plane 2 is not actually used. int plane2_color_component, // color component for 2nd plane of weights; -1 if the 2nd plane of weights is not present - const partition_info* pi, - const decimation_table* it, - const imageblock* pb, // picture-block containing the actual data. + const partition_info* pt, + const decimation_table* dt, + const imageblock* blk, // picture-block containing the actual data. const error_weight_block* ewb); void expand_deblock_weights( @@ -1101,12 +1290,12 @@ void expand_deblock_weights( void prepare_angular_tables(); void imageblock_initialize_deriv( - const imageblock* pb, + const imageblock* blk, int pixelcount, - float4* dptr); + vfloat4* dptr); void compute_angular_endpoints_1plane( - float mode_cutoff, + bool only_always, const block_size_descriptor* bsd, const float* decimated_quantized_weights, const float* decimated_weights, @@ -1114,7 +1303,7 @@ void compute_angular_endpoints_1plane( float high_value[MAX_WEIGHT_MODES]); void compute_angular_endpoints_2planes( - float mode_cutoff, + bool only_always, const block_size_descriptor * bsd, const float* decimated_quantized_weights, const float* decimated_weights, @@ -1152,8 +1341,9 @@ void physical_to_symbolic( const physical_compressed_block& pcb, symbolic_compressed_block& scb); -uint16_t unorm16_to_sf16( - uint16_t p); +#if defined(ASTCENC_DIAGNOSTICS) +class TraceLog; // See astcenc_diagnostic_trace for details. +#endif struct astcenc_context { @@ -1168,8 +1358,8 @@ struct astcenc_context // Regional average-and-variance information, initialized by // compute_averages_and_variances() only if the astc encoder // is requested to do error weighting based on averages and variances. - float4 *input_averages; - float4 *input_variances; + vfloat4 *input_averages; + vfloat4 *input_variances; float *input_alpha_averages; compress_symbolic_block_buffers* working_buffers; @@ -1183,26 +1373,38 @@ struct astcenc_context ParallelManager manage_avg_var; ParallelManager manage_compress; #endif + + ParallelManager manage_decompress; + +#if defined(ASTCENC_DIAGNOSTICS) + TraceLog* trace_log; +#endif }; /* ============================================================================ Platform-specific functions ============================================================================ */ /** - * @brief Run-time detection if the host CPU supports SSE 4.2. - * @returns Zero if not supported, positive value if it is. + * @brief Run-time detection if the host CPU supports the POPCNT extension. + * @return Zero if not supported, positive value if it is. + */ +int cpu_supports_popcnt(); + +/** + * @brief Run-time detection if the host CPU supports F16C extension. + * @return Zero if not supported, positive value if it is. */ -int cpu_supports_sse42(); +int cpu_supports_f16c(); /** - * @brief Run-time detection if the host CPU supports popcnt. - * @returns Zero if not supported, positive value if it is. + * @brief Run-time detection if the host CPU supports SSE 4.1 extension. + * @return Zero if not supported, positive value if it is. */ -int cpu_supports_popcnt(); +int cpu_supports_sse41(); /** - * @brief Run-time detection if the host CPU supports avx2. - * @returns Zero if not supported, positive value if it is. + * @brief Run-time detection if the host CPU supports AVX 2 extension. + * @return Zero if not supported, positive value if it is. */ int cpu_supports_avx2(); @@ -1215,7 +1417,7 @@ int cpu_supports_avx2(); * @param size The desired buffer size. * @param align The desired buffer alignment; must be 2^N. * - * @returns The memory buffer pointer or nullptr on allocation failure. + * @return The memory buffer pointer or nullptr on allocation failure. */ template T* aligned_malloc(size_t size, size_t align) @@ -1246,9 +1448,9 @@ template void aligned_free(T* ptr) { #if defined(_WIN32) - _aligned_free(ptr); + _aligned_free((void*)ptr); #else - free(ptr); + free((void*)ptr); #endif } diff --git a/libkram/astc-encoder/astcenc_kmeans_partitioning.cpp b/libkram/astc-encoder/astcenc_kmeans_partitioning.cpp index 3d3c94de..6b837566 100644 --- a/libkram/astc-encoder/astcenc_kmeans_partitioning.cpp +++ b/libkram/astc-encoder/astcenc_kmeans_partitioning.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -37,16 +37,12 @@ // algorithm similar to XKCD #221. (http://xkcd.com/221/) // cluster the texels using the k++ means clustering initialization algorithm. -static void kpp_initialize( - int xdim, - int ydim, - int zdim, +static void kmeans_init( + int texels_per_block, int partition_count, const imageblock* blk, - float4* cluster_centers + vfloat4* cluster_centers ) { - int texels_per_block = xdim * ydim * zdim; - int cluster_center_samples[4]; // pick a random sample as first center-point. cluster_center_samples[0] = 145897 /* number from random.org */ % texels_per_block; @@ -56,20 +52,14 @@ static void kpp_initialize( // compute the distance to the first point. int sample = cluster_center_samples[0]; - float4 center_color = float4(blk->data_r[sample], - blk->data_g[sample], - blk->data_b[sample], - blk->data_a[sample]); + vfloat4 center_color = blk->texel(sample); float distance_sum = 0.0f; for (int i = 0; i < texels_per_block; i++) { - float4 color = float4(blk->data_r[i], - blk->data_g[i], - blk->data_b[i], - blk->data_a[i]); - float4 diff = color - center_color; - float distance = dot(diff, diff); + vfloat4 color = blk->texel(i); + vfloat4 diff = color - center_color; + float distance = dot_s(diff, diff); distance_sum += distance; distances[i] = distance; } @@ -110,21 +100,15 @@ static void kpp_initialize( } // update the distances with the new point. - center_color = float4(blk->data_r[sample], - blk->data_g[sample], - blk->data_b[sample], - blk->data_a[sample]); + center_color = blk->texel(sample); distance_sum = 0.0f; for (int i = 0; i < texels_per_block; i++) { - float4 color = float4(blk->data_r[i], - blk->data_g[i], - blk->data_b[i], - blk->data_a[i]); - float4 diff = color - center_color; - float distance = dot(diff, diff); - distance = MIN(distance, distances[i]); + vfloat4 color = blk->texel(i); + vfloat4 diff = color - center_color; + float distance = dot_s(diff, diff); + distance = astc::min(distance, distances[i]); distance_sum += distance; distances[i] = distance; } @@ -134,66 +118,52 @@ static void kpp_initialize( for (int i = 0; i < partition_count; i++) { int center_sample = cluster_center_samples[i]; - float4 color = float4(blk->data_r[center_sample], - blk->data_g[center_sample], - blk->data_b[center_sample], - blk->data_a[center_sample]); - cluster_centers[i] = color; + cluster_centers[i] = blk->texel(center_sample); } } // basic K-means clustering: given a set of cluster centers, // assign each texel to a partition -static void basic_kmeans_assign_pass( - int xdim, - int ydim, - int zdim, +static void kmeans_assign( + int texels_per_block, int partition_count, const imageblock* blk, - const float4* cluster_centers, + const vfloat4* cluster_centers, int* partition_of_texel ) { - int texels_per_block = xdim * ydim * zdim; - float distances[MAX_TEXELS_PER_BLOCK]; - int texels_per_partition[4]; + int partition_texel_count[4]; - texels_per_partition[0] = texels_per_block; + partition_texel_count[0] = texels_per_block; for (int i = 1; i < partition_count; i++) { - texels_per_partition[i] = 0; + partition_texel_count[i] = 0; } for (int i = 0; i < texels_per_block; i++) { - float4 color = float4(blk->data_r[i], - blk->data_g[i], - blk->data_b[i], - blk->data_a[i]); - float4 diff = color - cluster_centers[0]; - float distance = dot(diff, diff); + vfloat4 color = blk->texel(i); + vfloat4 diff = color - cluster_centers[0]; + float distance = dot_s(diff, diff); distances[i] = distance; partition_of_texel[i] = 0; } for (int j = 1; j < partition_count; j++) { - float4 center_color = cluster_centers[j]; + vfloat4 center_color = cluster_centers[j]; for (int i = 0; i < texels_per_block; i++) { - float4 color = float4(blk->data_r[i], - blk->data_g[i], - blk->data_b[i], - blk->data_a[i]); - float4 diff = color - center_color; - float distance = dot(diff, diff); + vfloat4 color = blk->texel(i); + vfloat4 diff = color - center_color; + float distance = dot_s(diff, diff); if (distance < distances[i]) { distances[i] = distance; - texels_per_partition[partition_of_texel[i]]--; - texels_per_partition[j]++; + partition_texel_count[partition_of_texel[i]]--; + partition_texel_count[j]++; partition_of_texel[i] = j; } } @@ -210,10 +180,10 @@ static void basic_kmeans_assign_pass( problem_case = 0; for (int i = 0; i < partition_count; i++) { - if (texels_per_partition[i] == 0) + if (partition_texel_count[i] == 0) { - texels_per_partition[partition_of_texel[i]]--; - texels_per_partition[i]++; + partition_texel_count[partition_of_texel[i]]--; + partition_texel_count[i]++; partition_of_texel[i] = i; problem_case = 1; } @@ -224,33 +194,26 @@ static void basic_kmeans_assign_pass( // basic k-means clustering: given a set of cluster assignments // for the texels, find the center position of each cluster. -static void basic_kmeans_update( - int xdim, - int ydim, - int zdim, +static void kmeans_update( + int texels_per_block, int partition_count, const imageblock* blk, const int* partition_of_texel, - float4* cluster_centers + vfloat4* cluster_centers ) { - int texels_per_block = xdim * ydim * zdim; - - float4 color_sum[4]; + vfloat4 color_sum[4]; int weight_sum[4]; for (int i = 0; i < partition_count; i++) { - color_sum[i] = float4(0.0f, 0.0f, 0.0f, 0.0f); + color_sum[i] = vfloat4::zero(); weight_sum[i] = 0; } // first, find the center-of-gravity in each cluster for (int i = 0; i < texels_per_block; i++) { - float4 color = float4(blk->data_r[i], - blk->data_g[i], - blk->data_b[i], - blk->data_a[i]); + vfloat4 color = blk->texel(i); int part = partition_of_texel[i]; color_sum[part] = color_sum[part] + color; weight_sum[part]++; @@ -258,7 +221,7 @@ static void basic_kmeans_update( for (int i = 0; i < partition_count; i++) { - cluster_centers[i] = color_sum[i] * (1.0f / weight_sum[i]); + cluster_centers[i] = color_sum[i] * (1.0f / static_cast(weight_sum[i])); } } @@ -271,7 +234,7 @@ static inline int partition_mismatch2( ) { int v1 = astc::popcount(a0 ^ b0) + astc::popcount(a1 ^ b1); int v2 = astc::popcount(a0 ^ b1) + astc::popcount(a1 ^ b0); - return MIN(v1, v2); + return astc::min(v1, v2); } // compute the bit-mismatch for a partitioning in 3-partition mode @@ -297,31 +260,17 @@ static inline int partition_mismatch3( int s0 = p11 + p22; int s1 = p12 + p21; - int v0 = MIN(s0, s1) + p00; + int v0 = astc::min(s0, s1) + p00; int s2 = p10 + p22; int s3 = p12 + p20; - int v1 = MIN(s2, s3) + p01; + int v1 = astc::min(s2, s3) + p01; int s4 = p10 + p21; int s5 = p11 + p20; - int v2 = MIN(s4, s5) + p02; + int v2 = astc::min(s4, s5) + p02; - if (v1 < v0) - v0 = v1; - if (v2 < v0) - v0 = v2; - - return v0; -} - -static inline int MIN3( - int a, - int b, - int c -) { - int d = MIN(a, b); - return MIN(c, d); + return astc::min(v0, v1, v2); } // compute the bit-mismatch for a partitioning in 4-partition mode @@ -355,21 +304,19 @@ static inline int partition_mismatch4( int p32 = astc::popcount(a3 ^ b2); int p33 = astc::popcount(a3 ^ b3); - int mx23 = MIN(p22 + p33, p23 + p32); - int mx13 = MIN(p21 + p33, p23 + p31); - int mx12 = MIN(p21 + p32, p22 + p31); - int mx03 = MIN(p20 + p33, p23 + p30); - int mx02 = MIN(p20 + p32, p22 + p30); - int mx01 = MIN(p21 + p30, p20 + p31); - - int v0 = p00 + MIN3(p11 + mx23, p12 + mx13, p13 + mx12); - int v1 = p01 + MIN3(p10 + mx23, p12 + mx03, p13 + mx02); - int v2 = p02 + MIN3(p11 + mx03, p10 + mx13, p13 + mx01); - int v3 = p03 + MIN3(p11 + mx02, p12 + mx01, p10 + mx12); - - int x0 = MIN(v0, v1); - int x1 = MIN(v2, v3); - return MIN(x0, x1); + int mx23 = astc::min(p22 + p33, p23 + p32); + int mx13 = astc::min(p21 + p33, p23 + p31); + int mx12 = astc::min(p21 + p32, p22 + p31); + int mx03 = astc::min(p20 + p33, p23 + p30); + int mx02 = astc::min(p20 + p32, p22 + p30); + int mx01 = astc::min(p21 + p30, p20 + p31); + + int v0 = p00 + astc::min(p11 + mx23, p12 + mx13, p13 + mx12); + int v1 = p01 + astc::min(p10 + mx23, p12 + mx03, p13 + mx02); + int v2 = p02 + astc::min(p11 + mx03, p10 + mx13, p13 + mx01); + int v3 = p03 + astc::min(p11 + mx02, p12 + mx01, p10 + mx12); + + return astc::min(v0, v1, v2, v3); } static void count_partition_mismatch_bits( @@ -378,7 +325,7 @@ static void count_partition_mismatch_bits( const uint64_t bitmaps[4], int bitcounts[PARTITION_COUNT] ) { - const partition_info *pi = get_partition_table(bsd, partition_count); + const partition_info *pt = get_partition_table(bsd, partition_count); if (partition_count == 2) { @@ -386,15 +333,15 @@ static void count_partition_mismatch_bits( uint64_t bm1 = bitmaps[1]; for (int i = 0; i < PARTITION_COUNT; i++) { - if (pi->partition_count == 2) + if (pt->partition_count == 2) { - bitcounts[i] = partition_mismatch2(bm0, bm1, pi->coverage_bitmaps[0], pi->coverage_bitmaps[1]); + bitcounts[i] = partition_mismatch2(bm0, bm1, pt->coverage_bitmaps[0], pt->coverage_bitmaps[1]); } else { bitcounts[i] = 255; } - pi++; + pt++; } } else if (partition_count == 3) @@ -404,15 +351,15 @@ static void count_partition_mismatch_bits( uint64_t bm2 = bitmaps[2]; for (int i = 0; i < PARTITION_COUNT; i++) { - if (pi->partition_count == 3) + if (pt->partition_count == 3) { - bitcounts[i] = partition_mismatch3(bm0, bm1, bm2, pi->coverage_bitmaps[0], pi->coverage_bitmaps[1], pi->coverage_bitmaps[2]); + bitcounts[i] = partition_mismatch3(bm0, bm1, bm2, pt->coverage_bitmaps[0], pt->coverage_bitmaps[1], pt->coverage_bitmaps[2]); } else { bitcounts[i] = 255; } - pi++; + pt++; } } else if (partition_count == 4) @@ -423,37 +370,37 @@ static void count_partition_mismatch_bits( uint64_t bm3 = bitmaps[3]; for (int i = 0; i < PARTITION_COUNT; i++) { - if (pi->partition_count == 4) + if (pt->partition_count == 4) { - bitcounts[i] = partition_mismatch4(bm0, bm1, bm2, bm3, pi->coverage_bitmaps[0], pi->coverage_bitmaps[1], pi->coverage_bitmaps[2], pi->coverage_bitmaps[3]); + bitcounts[i] = partition_mismatch4(bm0, bm1, bm2, bm3, pt->coverage_bitmaps[0], pt->coverage_bitmaps[1], pt->coverage_bitmaps[2], pt->coverage_bitmaps[3]); } else { bitcounts[i] = 255; } - pi++; + pt++; } } } -// counting-sort on the mismatch-bits, thereby -// sorting the partitions into an ordering. +/** + * @brief Use counting sort on the mismatch array to sort partition candidates. + */ static void get_partition_ordering_by_mismatch_bits( const int mismatch_bits[PARTITION_COUNT], int partition_ordering[PARTITION_COUNT] ) { - int mscount[256]; - for (int i = 0; i < 256; i++) - { - mscount[i] = 0; - } + int mscount[256] { 0 }; + // Create the histogram of mismatch counts for (int i = 0; i < PARTITION_COUNT; i++) { mscount[mismatch_bits[i]]++; } + // Create a running sum from the histogram array + // Cells store previous values only; i.e. exclude self after sum int summa = 0; for (int i = 0; i < 256; i++) { @@ -462,6 +409,8 @@ static void get_partition_ordering_by_mismatch_bits( summa += cnt; } + // Use the running sum as the index, incrementing after read to allow + // sequential entries with the same count for (int i = 0; i < PARTITION_COUNT; i++) { int idx = mscount[mismatch_bits[i]]++; @@ -475,46 +424,39 @@ void kmeans_compute_partition_ordering( const imageblock* blk, int* ordering ) { - float4 cluster_centers[4]; + vfloat4 cluster_centers[4]; int partition_of_texel[MAX_TEXELS_PER_BLOCK]; - // 3 passes of plain k-means partitioning + // Use three passes of k-means clustering to partition the block data for (int i = 0; i < 3; i++) { if (i == 0) { - kpp_initialize(bsd->xdim, bsd->ydim, bsd->zdim, partition_count, blk, cluster_centers); + kmeans_init(bsd->texel_count, partition_count, blk, cluster_centers); } else { - basic_kmeans_update(bsd->xdim, bsd->ydim, bsd->zdim, partition_count, blk, partition_of_texel, cluster_centers); + kmeans_update(bsd->texel_count, partition_count, blk, partition_of_texel, cluster_centers); } - basic_kmeans_assign_pass(bsd->xdim, bsd->ydim, bsd->zdim, partition_count, blk, cluster_centers, partition_of_texel); - } - - // at this point, we have a near-ideal partitioning. - - // construct bitmaps - uint64_t bitmaps[4]; - for (int i = 0; i < 4; i++) - { - bitmaps[i] = 0ULL; + kmeans_assign(bsd->texel_count, partition_count, blk, cluster_centers, partition_of_texel); } - int texels_to_process = bsd->texelcount_for_bitmap_partitioning; + // Construct the block bitmaps of texel assignments to each partition + uint64_t bitmaps[4] { 0 }; + int texels_to_process = bsd->kmeans_texel_count; for (int i = 0; i < texels_to_process; i++) { - int idx = bsd->texels_for_bitmap_partitioning[i]; + int idx = bsd->kmeans_texels[i]; bitmaps[partition_of_texel[idx]] |= 1ULL << i; } - int bitcounts[PARTITION_COUNT]; - // for each entry in the partition table, count bits of partition-mismatch. - count_partition_mismatch_bits(bsd, partition_count, bitmaps, bitcounts); + // Count the mismatch between the block and the format's partition tables + int mismatch_counts[PARTITION_COUNT]; + count_partition_mismatch_bits(bsd, partition_count, bitmaps, mismatch_counts); - // finally, sort the partitions by bits-of-partition-mismatch - get_partition_ordering_by_mismatch_bits(bitcounts, ordering); + // Sort the partitions based on the number of mismatched bits + get_partition_ordering_by_mismatch_bits(mismatch_counts, ordering); } #endif diff --git a/libkram/astc-encoder/astcenc_mathlib.cpp b/libkram/astc-encoder/astcenc_mathlib.cpp index ffe01c40..a59cb24b 100644 --- a/libkram/astc-encoder/astcenc_mathlib.cpp +++ b/libkram/astc-encoder/astcenc_mathlib.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -17,41 +17,6 @@ #include "astcenc_mathlib.h" -/* Public function, see header file for detailed documentation */ -float astc::log2(float val) -{ - if32 p; - p.f = val; - if (p.s < 0x800000) - p.s = 0x800000; // negative, 0, denormal get clamped to non-denormal. - - // normalize mantissa to range [0.66, 1.33] and extract an exponent - // in such a way that 1.0 returns 0. - p.s -= 0x3f2aaaab; - int expo = p.s >> 23; - p.s &= 0x7fffff; - p.s += 0x3f2aaaab; - - float x = p.f - 1.0f; - - // taylor polynomial that, with horner's-rule style evaluation, - // gives sufficient precision for our use - // (relative error of about 1 in 10^6) - - float res = (float)expo - + x * ( 1.442695040888963f - + x * (-0.721347520444482f - + x * ( 0.480898346962988f - + x * (-0.360673760222241f - + x * ( 0.288539008177793f - + x * (-0.240449173481494f - + x * ( 0.206099291555566f - + x * (-0.180336880111120f - + x * ( 0.160299448987663f - ))))))))); - return res; -} - /** * @brief 64-bit rotate left. * diff --git a/libkram/astc-encoder/astcenc_mathlib.h b/libkram/astc-encoder/astcenc_mathlib.h index 05bd258f..63822627 100644 --- a/libkram/astc-encoder/astcenc_mathlib.h +++ b/libkram/astc-encoder/astcenc_mathlib.h @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -27,10 +27,69 @@ #include #include -// Kram uses SSE2Neon on ARM, so needs intrinsics in use but not the include -//#if /* USE_SSE && */ (ASTCENC_SSE != 0 || ASTCENC_AVX != 0) -// #include -//#endif +#ifndef ASTCENC_NEON + #if defined(__aarch64__) || defined(__arm__) + #define ASTCENC_NEON 1 + + // these aren't valid on Neon + #define ASTCENC_AVX 0 + #define ASTCENC_SSE 0 + #else + #define ASTCENC_NEON 0 + #endif +#endif + +#ifndef ASTCENC_POPCNT + #if defined(__POPCNT__) + #define ASTCENC_POPCNT 1 + #else + #define ASTCENC_POPCNT 0 + #endif +#endif + +#ifndef ASTCENC_F16C + #if defined(__F16C__) + #define ASTCENC_F16C 1 + #else + #define ASTCENC_F16C 0 + #endif +#endif + +#ifndef ASTCENC_SSE + #if defined(__SSE4_2__) + #define ASTCENC_SSE 42 + #elif defined(__SSE4_1__) + #define ASTCENC_SSE 41 + #elif defined(__SSE3__) + #define ASTCENC_SSE 30 + #elif defined(__SSE2__) + #define ASTCENC_SSE 20 + #else + #define ASTCENC_SSE 0 + #endif +#endif + +#ifndef ASTCENC_AVX + #if defined(__AVX2__) + #define ASTCENC_AVX 2 + #elif defined(__AVX__) + #define ASTCENC_AVX 1 + #else + #define ASTCENC_AVX 0 + #endif +#endif + +// 32-byte words in AVX and AVX2, but also a lot of 16-byte ops in AVX +// Neon only has 16-byte ops for now, but new ISA on the way. +#if ASTCENC_AVX + #define ASTCENC_VECALIGN 32 +#else + #define ASTCENC_VECALIGN 16 +#endif + +#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0 + #include +#endif /* ============================================================================ Fast math library; note that many of the higher-order functions in this set @@ -43,6 +102,14 @@ to future vectorization. ============================================================================ */ +// Union for manipulation of float bit patterns +typedef union +{ + uint32_t u; + int32_t s; + float f; +} if32; + // These are namespaced to avoid colliding with C standard library functions. namespace astc { @@ -51,271 +118,281 @@ static const float PI = 3.14159265358979323846f; static const float PI_OVER_TWO = 1.57079632679489661923f; /** - * @brief Fast approximation of log2(x) + * @brief SP float absolute value. * - * This does not produce correct results for special cases such as - * zero/inf/nan/denormal/negative inputs: + * @param v The value to make absolute. * - * * Any negative, zero, or denormal will get clamped to smallest-normal, - * resulting in a logarithm of -126. - * * +Inf and +NaN get treated as an extension of largest-finite values, - * which should result in a logarithm value between 128 and 129. + * @return The absolute value. */ -float log2(float val); +static inline float fabs(float v) +{ + return std::fabs(v); +} /** - * @brief SP float absolute value. + * @brief Test if a float value is a nan. * - * @param val The value to make absolute. + * @param v The value test. * - * @return The absolute value. + * @return Zero is not a NaN, non-zero otherwise. */ -static inline float fabs(float val) +static inline bool isnan(float v) { - return std::fabs(val); + return v != v; } /** - * @brief SP float min. + * @brief Return the minimum of two values. * - * @param valA The first value to compare. - * @param valB The second value to compare. + * For floats, NaNs are turned into @c q. + * + * @param p The first value to compare. + * @param q The second value to compare. * * @return The smallest value. */ -static inline float fmin(float p, float q) +template +static inline T min(T p, T q) { return p < q ? p : q; } /** - * @brief SP float max. + * @brief Return the minimum of three values. * - * @param valA The first value to compare. - * @param valB The second value to compare. + * For floats, NaNs are turned into @c r. * - * @return The largest value. + * @param p The first value to compare. + * @param q The second value to compare. + * @param r The third value to compare. + * + * @return The smallest value. */ -static inline float fmax(float p, float q) +template +static inline T min(T p, T q, T r) { - return q < p ? p : q; + return min(min(p, q), r); } /** - * @brief Test if a float value is a nan. + * @brief Return the minimum of four values. * - * @param val The value test. + * For floats, NaNs are turned into @c s. * - * @return Zero is not a NaN, non-zero otherwise. + * @param p The first value to compare. + * @param q The second value to compare. + * @param r The third value to compare. + * @param s The fourth value to compare. + * + * @return The smallest value. */ -static inline int isnan(float val) +template +static inline T min(T p, T q, T r, T s) { - return val != val; + return min(min(p, q), min(r, s)); } /** - * @brief Clamp a float value between 0.0f and 1.0f. + * @brief Return the maximum of two values. * - * NaNs are turned into 0.0f. + * For floats, NaNs are turned into @c q. * - * @param val The value clamp. + * @param p The first value to compare. + * @param q The second value to compare. * - * @return The clamped value. + * @return The largest value. */ -static inline float clamp1f(float val) +template +static inline T max(T p, T q) { - // Do not reorder these, correct NaN handling relies on the fact that - // any comparison with NaN returns false so will fall-though to the 0.0f. - if (val > 1.0f) return 1.0f; - if (val > 0.0f) return val; - return 0.0f; + return p > q ? p : q; } /** - * @brief Clamp a float value between 0.0f and 255.0f. + * @brief Return the maximum of three values. * - * NaNs are turned into 0.0f. + * For floats, NaNs are turned into @c r. * - * @param val The value clamp. + * @param p The first value to compare. + * @param q The second value to compare. + * @param r The third value to compare. * - * @return The clamped value. + * @return The largest value. */ -static inline float clamp255f(float val) +template +static inline T max(T p, T q, T r) { - // Do not reorder these, correct NaN handling relies on the fact that - // any comparison with NaN returns false so will fall-though to the 0.0f. - if (val > 255.0f) return 255.0f; - if (val > 0.0f) return val; - return 0.0f; + return max(max(p, q), r); } /** - * @brief Clamp a value value between mn and mx + * @brief Return the maximum of four values. * - * For floats, NaNs are turned into mn. + * For floats, NaNs are turned into @c s. * - * @param val The value clamp. - * @param mn The min value (inclusive). - * @param mx The max value (inclusive). + * @param p The first value to compare. + * @param q The second value to compare. + * @param r The third value to compare. + * @param s The fourth value to compare. * - * @return The clamped value. + * @return The largest value. */ template -inline T clamp(T val, T mn, T mx) +static inline T max(T p, T q, T r, T s) { - // Do not reorder; correct NaN handling relies on the fact that comparison - // with NaN returns false and will fall-though to the "min" value. - if (val > mx) return mx; - if (val > mn) return val; - return mn; + return max(max(p, q), max(r, s)); } /** - * @brief Clamp a float value between 0.0f and 65504.0f. + * @brief Clamp a value value between @c mn and @c mx. * - * NaNs are turned into 0.0f. + * For floats, NaNs are turned into @c mn. * - * @param val The value to clamp + * @param v The value to clamp. + * @param mn The min value (inclusive). + * @param mx The max value (inclusive). * - * @return The clamped value + * @return The clamped value. */ -static inline float clamp64Kf(float val) +template +inline T clamp(T v, T mn, T mx) { - // Do not reorder these, correct NaN handling relies on the fact that - // any comparison with NaN returns false so will fall-though to the 0.0f. - if (val > 65504.0f) return 65504.0f; - if (val > 0.0f) return val; - return 0.0f; + // Do not reorder; correct NaN handling relies on the fact that comparison + // with NaN returns false and will fall-though to the "min" value. + if (v > mx) return mx; + if (v > mn) return v; + return mn; } /** - * @brief Clamp an integer between two specified limits. + * @brief Clamp a float value between 0.0f and 1.0f. * - * @param val The value clamp. + * NaNs are turned into 0.0f. + * + * @param v The value to clamp. * * @return The clamped value. */ -static inline int clampi(int val, int low, int high) +static inline float clamp1f(float v) { - if (val < low) return low; - if (val > high) return high; - return val; + return astc::clamp(v, 0.0f, 1.0f); } /** - * @brief SP float round-to-nearest. + * @brief Clamp a float value between 0.0f and 255.0f. * - * @param val The value to round. + * NaNs are turned into 0.0f. * - * @return The rounded value. + * @param v The value to clamp. + * + * @return The clamped value. */ -static inline float flt_rte(float val) +static inline float clamp255f(float v) { - return std::floor(val + 0.5f); + return astc::clamp(v, 0.0f, 255.0f); } /** * @brief SP float round-down. * - * @param val The value to round. + * @param v The value to round. * * @return The rounded value. */ -static inline float flt_rd(float val) +static inline float flt_rd(float v) { - return std::floor(val); + return std::floor(v); } /** * @brief SP float round-to-nearest and convert to integer. * - * @param val The value to round. + * @param v The value to round. * * @return The rounded value. */ -static inline int flt2int_rtn(float val) +static inline int flt2int_rtn(float v) { - return (int)(val + 0.5f); + return (int)(v + 0.5f); } /** * @brief SP float round down and convert to integer. * - * @param val The value to round. + * @param v The value to round. * * @return The rounded value. */ -static inline int flt2int_rd(float val) +static inline int flt2int_rd(float v) { - return (int)(val); + return (int)(v); } /** * @brief Population bit count. * - * @param val The value to count. + * @param v The value to population count. * * @return The number of 1 bits. */ -static inline int popcount(uint64_t p) +static inline int popcount(uint64_t v) { #if ASTCENC_POPCNT >= 1 - return (int)_mm_popcnt_u64(p); + return (int)_mm_popcnt_u64(v); #else uint64_t mask1 = 0x5555555555555555ULL; uint64_t mask2 = 0x3333333333333333ULL; uint64_t mask3 = 0x0F0F0F0F0F0F0F0FULL; - p -= (p >> 1) & mask1; - p = (p & mask2) + ((p >> 2) & mask2); - p += p >> 4; - p &= mask3; - p *= 0x0101010101010101ULL; - p >>= 56; - return (int)p; + v -= (v >> 1) & mask1; + v = (v & mask2) + ((v >> 2) & mask2); + v += v >> 4; + v &= mask3; + v *= 0x0101010101010101ULL; + v >>= 56; + return (int)v; #endif } /** * @brief Fast approximation of 1.0 / sqrt(val). * - * @param val The input value. + * @param v The input value. * * @return The approximated result. */ -static inline float rsqrt(float val) +static inline float rsqrt(float v) { - return 1.0f / std::sqrt(val); + return 1.0f / std::sqrt(v); } /** * @brief Fast approximation of sqrt(val). * - * @param val The input value. + * @param v The input value. * * @return The approximated result. */ -static inline float sqrt(float val) +static inline float sqrt(float v) { - return std::sqrt(val); + return std::sqrt(v); } /** - * @brief Log base 2, linearized from 2^-14. + * @brief Extract mantissa and exponent of a float value. * - * @param val The value to log2. + * @param v The input value. + * @param[out] expo The output exponent. * - * @return The approximated result. + * @return The mantissa. */ -static inline float xlog2(float val) +static inline float frexp(float v, int* expo) { - if (val >= 0.00006103515625f) - { - return astc::log2(val); - } - - // Linearized region - return -15.44269504088896340735f + val * 23637.11554992477646609062f; + if32 p; + p.f = v; + *expo = ((p.u >> 23) & 0xFF) - 126; + p.u = (p.u & 0x807fffff) | 0x3f000000; + return p.f; } /** @@ -400,220 +477,29 @@ vtype2 operator*(vtype2 p, T q) { // Scalar by vector multiplication operator template -vtype2 operator*(T p, vtype2 q){ +vtype2 operator*(T p, vtype2 q) { return vtype2 { p * q.r, p * q.g }; } -template class vtype3 -{ -public: - // Data storage - T r, g, b; - - // Default constructor - vtype3() {} - - // Initialize from 1 scalar - vtype3(T p) : r(p), g(p), b(p) {} - - // Initialize from N scalars - vtype3(T p, T q, T s) : r(p), g(q), b(s) {} - - // Initialize from another vector - vtype3(const vtype3 & p) : r(p.r), g(p.g), b(p.b) {} - - // Assignment operator - vtype3& operator=(const vtype3 &s) { - this->r = s.r; - this->g = s.g; - this->b = s.b; - return *this; - } -}; - -// Vector by vector addition -template -vtype3 operator+(vtype3 p, vtype3 q) { - return vtype3 { p.r + q.r, p.g + q.g, p.b + q.b }; -} - -// Vector by vector subtraction -template -vtype3 operator-(vtype3 p, vtype3 q) { - return vtype3 { p.r - q.r, p.g - q.g, p.b - q.b }; -} - -// Vector by vector multiplication operator -template -vtype3 operator*(vtype3 p, vtype3 q) { - return vtype3 { p.r * q.r, p.g * q.g, p.b * q.b }; -} - -// Vector by scalar multiplication operator -template -vtype3 operator*(vtype3 p, T q) { - return vtype3 { p.r * q, p.g * q, p.b * q }; -} - -// Scalar by vector multiplication operator -template -vtype3 operator*(T p, vtype3 q){ - return vtype3 { p * q.r, p * q.g, p * q.b }; -} - -template class alignas(16) vtype4 -{ -public: - // Data storage - T r, g, b, a; - - // Default constructor - vtype4() {} - - // Initialize from 1 scalar - vtype4(T p) : r(p), g(p), b(p), a(p) {} - - // Initialize from N scalars - vtype4(T p, T q, T s, T t) : r(p), g(q), b(s), a(t) {} - - // Initialize from another vector - vtype4(const vtype4 & p) : r(p.r), g(p.g), b(p.b), a(p.a) {} - - // Assignment operator - vtype4& operator=(const vtype4 &s) { - this->r = s.r; - this->g = s.g; - this->b = s.b; - this->a = s.a; - return *this; - } -}; - -// Vector by vector addition -template -vtype4 operator+(vtype4 p, vtype4 q) { - return vtype4 { p.r + q.r, p.g + q.g, p.b + q.b, p.a + q.a }; -} - -// Vector by vector subtraction -template -vtype4 operator-(vtype4 p, vtype4 q) { - return vtype4 { p.r - q.r, p.g - q.g, p.b - q.b, p.a - q.a }; -} - -// Vector by vector multiplication operator -template -vtype4 operator*(vtype4 p, vtype4 q) { - return vtype4 { p.r * q.r, p.g * q.g, p.b * q.b, p.a * q.a }; -} - -// Vector by scalar multiplication operator -template -vtype4 operator*(vtype4 p, T q) { - return vtype4 { p.r * q, p.g * q, p.b * q, p.a * q }; -} - -// Scalar by vector multiplication operator -template -vtype4 operator*(T p, vtype4 q){ - return vtype4 { p * q.r, p * q.g, p * q.b, p * q.a }; -} - typedef vtype2 float2; -typedef vtype3 float3; -typedef vtype4 float4; -typedef vtype3 int3; -typedef vtype4 int4; -typedef vtype4 uint4; static inline float dot(float2 p, float2 q) { return p.r * q.r + p.g * q.g; } -static inline float dot(float3 p, float3 q) { return p.r * q.r + p.g * q.g + p.b * q.b; } -static inline float dot(float4 p, float4 q) { -#if (ASTCENC_SSE >= 42) && (ASTCENC_ISA_INVARIANCE == 0) - __m128 pv = _mm_load_ps((float*)&p); - __m128 qv = _mm_load_ps((float*)&q); - __m128 t = _mm_dp_ps(pv, qv, 0xFF); - return _mm_cvtss_f32(t); -#else - return p.r * q.r + p.g * q.g + p.b * q.b + p.a * q.a; -#endif -} static inline float2 normalize(float2 p) { return p * astc::rsqrt(dot(p, p)); } -static inline float3 normalize(float3 p) { return p * astc::rsqrt(dot(p, p)); } -static inline float4 normalize(float4 p) { return p * astc::rsqrt(dot(p, p)); } - -static inline float4 sqrt(float4 p) { - float4 r; -#if ASTCENC_SSE >= 20 - __m128 pv = _mm_load_ps((float*)&p); - __m128 t = _mm_sqrt_ps(pv); - _mm_store_ps((float*)&r, t); -#else - r.r = std::sqrt(p.r); - r.g = std::sqrt(p.g); - r.b = std::sqrt(p.b); - r.a = std::sqrt(p.a); -#endif - return r; -} - -#ifndef MIN - #define MIN(x,y) ((x)<(y)?(x):(y)) -#endif - -#ifndef MAX - #define MAX(x,y) ((x)>(y)?(x):(y)) -#endif - -// TODO: need to use _mm_min/max_ps -static inline float4 min(float4 p, float4 q) { - return float4(MIN(p.r, q.r), MIN(p.g, q.g), MIN(p.b, q.b), MIN(p.a, q.a)); -} -static inline float4 max(float4 p, float4 q) { - return float4(MAX(p.r, q.r), MAX(p.g, q.g), MAX(p.b, q.b), MAX(p.a, q.a)); -} - /* ============================================================================ Softfloat library with fp32 and fp16 conversion functionality. ============================================================================ */ -typedef union if32_ -{ - uint32_t u; - int32_t s; - float f; -} if32; - uint32_t clz32(uint32_t p); -/* sized soft-float types. These are mapped to the sized integer - types of C99, instead of C's floating-point types; this is because - the library needs to maintain exact, bit-level control on all - operations on these data types. */ -typedef uint16_t sf16; -typedef uint32_t sf32; - -/* the five rounding modes that IEEE-754r defines */ -typedef enum -{ - SF_UP = 0, /* round towards positive infinity */ - SF_DOWN = 1, /* round towards negative infinity */ - SF_TOZERO = 2, /* round towards zero */ - SF_NEARESTEVEN = 3, /* round toward nearest value; if mid-between, round to even value */ - SF_NEARESTAWAY = 4 /* round toward nearest value; if mid-between, round away from zero */ -} roundmode; - /* narrowing float->float conversions */ -sf16 sf32_to_sf16(sf32, roundmode); - -/* widening float->float conversions */ -sf32 sf16_to_sf32(sf16); - -sf16 float_to_sf16(float, roundmode); - -float sf16_to_float(sf16); +uint16_t float_to_sf16(float val); +float sf16_to_float(uint16_t val); +/********************************* + Vector library +*********************************/ +#include "astcenc_vecmathlib.h" /********************************* Declaration of line types @@ -629,14 +515,14 @@ struct line2 // parametric line, 3D struct line3 { - float3 a; - float3 b; + vfloat4 a; + vfloat4 b; }; struct line4 { - float4 a; - float4 b; + vfloat4 a; + vfloat4 b; }; @@ -649,16 +535,16 @@ struct processed_line2 struct processed_line3 { - float3 amod; - float3 bs; - float3 bis; + vfloat4 amod; + vfloat4 bs; + vfloat4 bis; }; struct processed_line4 { - float4 amod; - float4 bs; - float4 bis; + vfloat4 amod; + vfloat4 bs; + vfloat4 bis; }; #endif diff --git a/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp b/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp index 98ebac7d..d1381fd7 100644 --- a/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp +++ b/libkram/astc-encoder/astcenc_mathlib_softfloat.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -21,6 +21,13 @@ #include "astcenc_mathlib.h" +/* sized soft-float types. These are mapped to the sized integer + types of C99, instead of C's floating-point types; this is because + the library needs to maintain exact, bit-level control on all + operations on these data types. */ +typedef uint16_t sf16; +typedef uint32_t sf32; + /****************************************** helper functions and their lookup tables ******************************************/ @@ -58,7 +65,7 @@ uint32_t clz32(uint32_t inp) { #if defined(__GNUC__) && (defined(__i386) || defined(__amd64)) uint32_t bsr; - __asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1)); + __asm__("bsrl %1, %0": "=r"(bsr):"r"(inp | 1)); return 31 - bsr; #else #if defined(__arm__) && defined(__ARMCC_VERSION) @@ -66,7 +73,7 @@ uint32_t clz32(uint32_t inp) #else #if defined(__arm__) && defined(__GNUC__) uint32_t lz; - __asm__("clz %0, %1": "=r"(lz):"r"(inp)); + __asm__("clz %0, %1": "=r"(lz):"r"(inp)); return lz; #else /* slow default version */ @@ -87,6 +94,17 @@ uint32_t clz32(uint32_t inp) #endif } +/* the five rounding modes that IEEE-754r defines */ +typedef enum +{ + SF_UP = 0, /* round towards positive infinity */ + SF_DOWN = 1, /* round towards negative infinity */ + SF_TOZERO = 2, /* round towards zero */ + SF_NEARESTEVEN = 3, /* round toward nearest value; if mid-between, round to even value */ + SF_NEARESTAWAY = 4 /* round toward nearest value; if mid-between, round away from zero */ +} roundmode; + + static uint32_t rtne_shift32(uint32_t inp, uint32_t shamt) { uint32_t vl1 = UINT32_C(1) << shamt; @@ -116,7 +134,7 @@ static uint32_t rtup_shift32(uint32_t inp, uint32_t shamt) } /* convert from FP16 to FP32. */ -sf32 sf16_to_sf32(sf16 inp) +static sf32 sf16_to_sf32(sf16 inp) { uint32_t inpx = inp; @@ -167,7 +185,7 @@ sf32 sf16_to_sf32(sf16 inp) } /* Conversion routine that converts from FP32 to FP16. It supports denormals and all rounding modes. If a NaN is given as input, it is quietened. */ -sf16 sf32_to_sf16(sf32 inp, roundmode rmode) +static sf16 sf32_to_sf16(sf32 inp, roundmode rmode) { /* for each possible sign/exponent combination, store a case index. This gives a 512-byte table */ static const uint8_t tab[512] = { @@ -369,7 +387,7 @@ sf16 sf32_to_sf16(sf32 inp, roundmode rmode) } /* convert from soft-float to native-float */ -float sf16_to_float(sf16 p) +float sf16_to_float(uint16_t p) { if32 i; i.u = sf16_to_sf32(p); @@ -377,9 +395,9 @@ float sf16_to_float(sf16 p) } /* convert from native-float to soft-float */ -sf16 float_to_sf16(float p, roundmode rm) +uint16_t float_to_sf16(float p) { if32 i; i.f = p; - return sf32_to_sf16(i.u, rm); + return sf32_to_sf16(i.u, SF_NEARESTEVEN); } diff --git a/libkram/astc-encoder/astcenc_partition_tables.cpp b/libkram/astc-encoder/astcenc_partition_tables.cpp index 20be6a11..04f7ae23 100644 --- a/libkram/astc-encoder/astcenc_partition_tables.cpp +++ b/libkram/astc-encoder/astcenc_partition_tables.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -75,14 +75,14 @@ static int compare_canonicalized_partition_tables( consider and thus improves encode performance. */ static void partition_table_zap_equal_elements( int texel_count, - partition_info* pi + partition_info* pt ) { int partition_tables_zapped = 0; uint64_t *canonicalizeds = new uint64_t[PARTITION_COUNT * 7]; for (int i = 0; i < PARTITION_COUNT; i++) { - gen_canonicalized_partition_table(texel_count, pi[i].partition_of_texel, canonicalizeds + i * 7); + gen_canonicalized_partition_table(texel_count, pt[i].partition_of_texel, canonicalizeds + i * 7); } for (int i = 0; i < PARTITION_COUNT; i++) @@ -91,7 +91,7 @@ static void partition_table_zap_equal_elements( { if (compare_canonicalized_partition_tables(canonicalizeds + 7 * i, canonicalizeds + 7 * j)) { - pi[i].partition_count = 0; + pt[i].partition_count = 0; partition_tables_zapped++; break; } @@ -275,7 +275,7 @@ static void generate_one_partition_table( for (int i = 0; i < 4; i++) { - pt->texels_per_partition[i] = counts[i]; + pt->partition_texel_count[i] = counts[i]; } if (counts[0] == 0) @@ -304,10 +304,10 @@ static void generate_one_partition_table( pt->coverage_bitmaps[i] = 0ULL; } - int texels_to_process = bsd->texelcount_for_bitmap_partitioning; + int texels_to_process = bsd->kmeans_texel_count; for (int i = 0; i < texels_to_process; i++) { - int idx = bsd->texels_for_bitmap_partitioning[i]; + int idx = bsd->kmeans_texels[i]; pt->coverage_bitmaps[pt->partition_of_texel[idx]] |= 1ULL << i; } } diff --git a/libkram/astc-encoder/astcenc_percentile_tables.cpp b/libkram/astc-encoder/astcenc_percentile_tables.cpp index f84dea03..6d55a7ac 100644 --- a/libkram/astc-encoder/astcenc_percentile_tables.cpp +++ b/libkram/astc-encoder/astcenc_percentile_tables.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -1108,7 +1108,7 @@ static const packed_percentile_table *get_packed_table( case 0x0A0A: return &block_pcd_10x10; case 0x0A0C: return &block_pcd_12x10; case 0x0C0C: return &block_pcd_12x12; - }; + } // Should never hit this with a valid 2D block size return nullptr; @@ -1173,7 +1173,7 @@ int is_legal_2d_block_size( case 0x0C0A: case 0x0C0C: return 1; - }; + } return 0; } diff --git a/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp b/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp index 21d8cf87..9ba1685f 100644 --- a/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp +++ b/libkram/astc-encoder/astcenc_pick_best_endpoint_format.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -32,19 +32,19 @@ // for a given partition, compute for every (integer-component-count, quantization-level) // the color error. -static void compute_color_error_for_every_integer_count_and_quantization_level( +static void compute_color_error_for_every_integer_count_and_quant_level( int encode_hdr_rgb, // 1 = perform HDR encoding, 0 = perform LDR encoding. int encode_hdr_alpha, int partition_index, - const partition_info* pi, - const encoding_choice_errors * eci, // pointer to the structure for the CURRENT partition. - const endpoints * ep, - float4 error_weightings[4], + const partition_info* pt, + const encoding_choice_errors* eci, // pointer to the structure for the CURRENT partition. + const endpoints* ep, + vfloat4 error_weight, // arrays to return results back through. float best_error[21][4], int format_of_choice[21][4] ) { - int partition_size = pi->texels_per_partition[partition_index]; + int partition_size = pt->partition_texel_count[partition_index]; static const float baseline_quant_error[21] = { (65536.0f * 65536.0f / 18.0f), // 2 values, 1 step @@ -70,15 +70,13 @@ static void compute_color_error_for_every_integer_count_and_quantization_level( (65536.0f * 65536.0f / 18.0f) / (255 * 255) }; - float4 ep0 = ep->endpt0[partition_index]; - float4 ep1 = ep->endpt1[partition_index]; - - float ep1_min = MIN(MIN(ep1.r, ep1.g), ep1.b); - ep1_min = MAX(ep1_min, 0.0f); + vfloat4 ep0 = ep->endpt0[partition_index]; + vfloat4 ep1 = ep->endpt1[partition_index]; - float4 error_weight = error_weightings[partition_index]; + float ep1_min = hmin_rgb_s(ep1); + ep1_min = astc::max(ep1_min, 0.0f); - float error_weight_rgbsum = error_weight.r + error_weight.g + error_weight.b; + float error_weight_rgbsum = hadd_rgb_s(error_weight); float range_upper_limit_rgb = encode_hdr_rgb ? 61440.0f : 65535.0f; float range_upper_limit_alpha = encode_hdr_alpha ? 61440.0f : 65535.0f; @@ -86,66 +84,56 @@ static void compute_color_error_for_every_integer_count_and_quantization_level( // it is possible to get endpoint colors significantly outside [0,upper-limit] // even if the input data are safely contained in [0,upper-limit]; // we need to add an error term for this situation, - float4 ep0_range_error_high; - float4 ep1_range_error_high; - float4 ep0_range_error_low; - float4 ep1_range_error_low; - - ep0_range_error_high.r = MAX(0.0f, ep0.r - range_upper_limit_rgb); - ep0_range_error_high.g = MAX(0.0f, ep0.g - range_upper_limit_rgb); - ep0_range_error_high.b = MAX(0.0f, ep0.b - range_upper_limit_rgb); - ep0_range_error_high.a = MAX(0.0f, ep0.a - range_upper_limit_alpha); - - ep1_range_error_high.r = MAX(0.0f, ep1.r - range_upper_limit_rgb); - ep1_range_error_high.g = MAX(0.0f, ep1.g - range_upper_limit_rgb); - ep1_range_error_high.b = MAX(0.0f, ep1.b - range_upper_limit_rgb); - ep1_range_error_high.a = MAX(0.0f, ep1.a - range_upper_limit_alpha); - - ep0_range_error_low.r = MIN(0.0f, ep0.r); - ep0_range_error_low.g = MIN(0.0f, ep0.g); - ep0_range_error_low.b = MIN(0.0f, ep0.b); - ep0_range_error_low.a = MIN(0.0f, ep0.a); - - ep1_range_error_low.r = MIN(0.0f, ep1.r); - ep1_range_error_low.g = MIN(0.0f, ep1.g); - ep1_range_error_low.b = MIN(0.0f, ep1.b); - ep1_range_error_low.a = MIN(0.0f, ep1.a); - - float4 sum_range_error = + vfloat4 ep0_range_error_high; + vfloat4 ep1_range_error_high; + vfloat4 ep0_range_error_low; + vfloat4 ep1_range_error_low; + + vfloat4 offset(range_upper_limit_rgb, range_upper_limit_rgb, range_upper_limit_rgb, range_upper_limit_alpha); + ep0_range_error_high = max(ep0 - offset, 0.0f); + ep1_range_error_high = max(ep1 - offset, 0.0f); + + ep0_range_error_low = min(ep0, 0.0f); + ep1_range_error_low = min(ep1, 0.0f); + + vfloat4 sum_range_error = (ep0_range_error_low * ep0_range_error_low) + (ep1_range_error_low * ep1_range_error_low) + (ep0_range_error_high * ep0_range_error_high) + (ep1_range_error_high * ep1_range_error_high); - float rgb_range_error = dot(float3(sum_range_error.r, sum_range_error.g, sum_range_error.b), - float3(error_weight.r, error_weight.g, error_weight.b)) * 0.5f * partition_size; - float alpha_range_error = sum_range_error.a * error_weight.a * 0.5f * partition_size; + + float rgb_range_error = dot3_s(sum_range_error.swz<0, 1, 2>(), + error_weight.swz<0, 1, 2>()) + * 0.5f * static_cast(partition_size); + float alpha_range_error = sum_range_error.lane<3>() * error_weight.lane<3>() + * 0.5f * static_cast(partition_size); if (encode_hdr_rgb) { // collect some statistics float af, cf; - if (ep1.r > ep1.g && ep1.r > ep1.b) + if (ep1.lane<0>() > ep1.lane<1>() && ep1.lane<0>() > ep1.lane<2>()) { - af = ep1.r; - cf = ep1.r - ep0.r; + af = ep1.lane<0>(); + cf = ep1.lane<0>() - ep0.lane<0>(); } - else if (ep1.g > ep1.b) + else if (ep1.lane<1>() > ep1.lane<2>()) { - af = ep1.g; - cf = ep1.g - ep0.g; + af = ep1.lane<1>(); + cf = ep1.lane<1>() - ep0.lane<1>(); } else { - af = ep1.b; - cf = ep1.b - ep0.b; + af = ep1.lane<2>(); + cf = ep1.lane<2>() - ep0.lane<2>(); } float bf = af - ep1_min; // estimate of color-component spread in high endpoint color - float3 prd = float3(ep1.r, ep1.g, ep1.b) - float3(cf, cf, cf); - float3 pdif = prd - float3(ep0.r, ep0.g, ep0.b); + vfloat4 prd = (ep1 - vfloat4(cf)).swz<0, 1, 2>(); + vfloat4 pdif = prd - ep0.swz<0, 1, 2>(); // estimate of color-component spread in low endpoint color - float df = MAX(MAX(fabsf(pdif.r), fabsf(pdif.g)), fabsf(pdif.b)); + float df = hmax_s(abs(pdif)); int b = (int)bf; int c = (int)cf; @@ -236,15 +224,15 @@ static void compute_color_error_for_every_integer_count_and_quantization_level( rgb_mode = 7; } - static const float rgbo_error_scales[6] = { 4.0f, 4.0f, 16.0f, 64.0f, 256.0f, 1024.0f }; - static const float rgb_error_scales[9] = { 64.0f, 64.0f, 16.0f, 16.0f, 4.0f, 4.0f, 1.0f, 1.0f, 384.0f }; + static const float rgbo_error_scales[6] { 4.0f, 4.0f, 16.0f, 64.0f, 256.0f, 1024.0f }; + static const float rgb_error_scales[9] { 64.0f, 64.0f, 16.0f, 16.0f, 4.0f, 4.0f, 1.0f, 1.0f, 384.0f }; float mode7mult = rgbo_error_scales[rgbo_mode] * 0.0015f; // empirically determined .... float mode11mult = rgb_error_scales[rgb_mode] * 0.010f; // empirically determined .... - float lum_high = (ep1.r + ep1.g + ep1.b) * (1.0f / 3.0f); - float lum_low = (ep0.r + ep0.g + ep0.b) * (1.0f / 3.0f); + float lum_high = hadd_rgb_s(ep1) * (1.0f / 3.0f); + float lum_low = hadd_rgb_s(ep0) * (1.0f / 3.0f); float lumdif = lum_high - lum_low; float mode23mult = lumdif < 960 ? 4.0f : lumdif < 3968 ? 16.0f : 128.0f; @@ -268,9 +256,9 @@ static void compute_color_error_for_every_integer_count_and_quantization_level( // base_quant_error should depend on the scale-factor that would be used // during actual encode of the color value. - float base_quant_error = baseline_quant_error[i] * partition_size * 1.0f; + float base_quant_error = baseline_quant_error[i] * static_cast(partition_size); float rgb_quantization_error = error_weight_rgbsum * base_quant_error * 2.0f; - float alpha_quantization_error = error_weight.a * base_quant_error * 2.0f; + float alpha_quantization_error = error_weight.lane<3>() * base_quant_error * 2.0f; float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error; // for 8 integers, we have two encodings: one with HDR alpha and another one @@ -312,52 +300,51 @@ static void compute_color_error_for_every_integer_count_and_quantization_level( format_of_choice[i][0] = FMT_LUMINANCE; } + float base_quant_error_rgb = error_weight_rgbsum * static_cast(partition_size); + float base_quant_error_a = error_weight.lane<3>() * static_cast(partition_size); + float base_quant_error_rgba = base_quant_error_rgb + base_quant_error_a; + + float error_scale_bc_rgba = eci->can_blue_contract ? 0.625f : 1.0f; + float error_scale_oe_rgba = eci->can_offset_encode ? 0.5f : 1.0f; + + float error_scale_bc_rgb = eci->can_blue_contract ? 0.5f : 1.0f; + float error_scale_oe_rgb = eci->can_offset_encode ? 0.25f : 1.0f; + // pick among the available LDR endpoint modes for (int i = 4; i < 21; i++) { - float base_quant_error = baseline_quant_error[i] * partition_size * 1.0f; - float rgb_quantization_error = error_weight_rgbsum * base_quant_error; - float alpha_quantization_error = error_weight.a * base_quant_error; - float rgba_quantization_error = rgb_quantization_error + alpha_quantization_error; - - // for 8 integers, the available encodings are: - // full LDR RGB-Alpha - float full_ldr_rgba_error = rgba_quantization_error; - - if (eci->can_blue_contract) + // Offset encoding not possible at higher quant levels + if (i == 19) { - full_ldr_rgba_error *= 0.625f; + error_scale_oe_rgba = 1.0f; + error_scale_oe_rgb = 1.0f; } - if (eci->can_offset_encode && i <= 18) - { - full_ldr_rgba_error *= 0.5f; - } + float base_quant_error = baseline_quant_error[i]; + float quant_error_rgb = base_quant_error_rgb * base_quant_error; + float quant_error_rgba = base_quant_error_rgba * base_quant_error; - full_ldr_rgba_error += rgb_range_error + alpha_range_error; + // 8 integers can encode as RGBA+RGBA + float full_ldr_rgba_error = quant_error_rgba + * error_scale_bc_rgba + * error_scale_oe_rgba + + rgb_range_error + + alpha_range_error; best_error[i][3] = full_ldr_rgba_error; format_of_choice[i][3] = FMT_RGBA; - // for 6 integers, we have: - // - an LDR-RGB encoding - // - an RGBS + Alpha encoding (LDR) - - float full_ldr_rgb_error = rgb_quantization_error; - - if (eci->can_blue_contract) - { - full_ldr_rgb_error *= 0.5f; - } - - if (eci->can_offset_encode && i <= 18) - { - full_ldr_rgb_error *= 0.25f; - } - - full_ldr_rgb_error += eci->alpha_drop_error + rgb_range_error; + // 6 integers can encode as RGB+RGB or RGBS+AA + float full_ldr_rgb_error = quant_error_rgb + * error_scale_bc_rgb + * error_scale_oe_rgb + + rgb_range_error + + eci->alpha_drop_error; - float rgbs_alpha_error = rgba_quantization_error + eci->rgb_scale_error + rgb_range_error + alpha_range_error; + float rgbs_alpha_error = quant_error_rgba + + eci->rgb_scale_error + + rgb_range_error + + alpha_range_error; if (rgbs_alpha_error < full_ldr_rgb_error) { @@ -370,10 +357,16 @@ static void compute_color_error_for_every_integer_count_and_quantization_level( format_of_choice[i][2] = FMT_RGB; } - // for 4 integers, we have a Luminance-Alpha encoding and the RGBS encoding - float ldr_rgbs_error = rgb_quantization_error + eci->alpha_drop_error + eci->rgb_scale_error + rgb_range_error; + // 4 integers can encode as RGBS or LA+LA + float ldr_rgbs_error = quant_error_rgb + + rgb_range_error + + eci->alpha_drop_error + + eci->rgb_scale_error; - float lum_alpha_error = rgba_quantization_error + eci->luminance_error + rgb_range_error + alpha_range_error; + float lum_alpha_error = quant_error_rgba + + rgb_range_error + + alpha_range_error + + eci->luminance_error; if (ldr_rgbs_error < lum_alpha_error) { @@ -386,8 +379,11 @@ static void compute_color_error_for_every_integer_count_and_quantization_level( format_of_choice[i][1] = FMT_LUMINANCE_ALPHA; } - // for 2 integers, we have a Luminance-encoding and an Alpha-encoding. - float luminance_error = rgb_quantization_error + eci->alpha_drop_error + eci->luminance_error + rgb_range_error; + // 2 integers can encode as L+L + float luminance_error = quant_error_rgb + + rgb_range_error + + eci->alpha_drop_error + + eci->luminance_error; best_error[i][0] = luminance_error; format_of_choice[i][0] = FMT_LUMINANCE; @@ -400,7 +396,7 @@ static void one_partition_find_best_combination_for_bitcount( float combined_best_error[21][4], int formats_of_choice[21][4], int bits_available, - int* best_quantization_level, + int* best_quant_level, int* best_formats, float* error_of_best_combination ) { @@ -409,23 +405,23 @@ static void one_partition_find_best_combination_for_bitcount( for (int i = 0; i < 4; i++) { // compute the quantization level for a given number of integers and a given number of bits. - int quantization_level = quantization_mode_table[i + 1][bits_available]; + int quant_level = quant_mode_table[i + 1][bits_available]; - if (quantization_level == -1) + if (quant_level == -1) { continue; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all. } - if (combined_best_error[quantization_level][i] < best_integer_count_error) + if (combined_best_error[quant_level][i] < best_integer_count_error) { - best_integer_count_error = combined_best_error[quantization_level][i]; + best_integer_count_error = combined_best_error[quant_level][i]; best_integer_count = i; } } - int ql = quantization_mode_table[best_integer_count + 1][bits_available]; + int ql = quant_mode_table[best_integer_count + 1][bits_available]; - *best_quantization_level = ql; + *best_quant_level = ql; *error_of_best_combination = best_integer_count_error; if (ql >= 0) { @@ -458,15 +454,15 @@ static void two_partitions_find_best_combination_for_every_quantization_and_inte { for (int j = 0; j < 4; j++) // integer-count for second endpoint-pair { - int low2 = MIN(i, j); - int high2 = MAX(i, j); + int low2 = astc::min(i, j); + int high2 = astc::max(i, j); if ((high2 - low2) > 1) { continue; } int intcnt = i + j; - float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j], 1e10f); + float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j], 1e10f); if (errorterm <= combined_best_error[quant][intcnt]) { combined_best_error[quant][intcnt] = errorterm; @@ -483,8 +479,8 @@ static void two_partitions_find_best_combination_for_bitcount( float combined_best_error[21][7], int formats_of_choice[21][7][2], int bits_available, - int* best_quantization_level, - int* best_quantization_level_mod, + int* best_quant_level, + int* best_quant_level_mod, int* best_formats, float* error_of_best_combination ) { @@ -494,14 +490,14 @@ static void two_partitions_find_best_combination_for_bitcount( for (int integer_count = 2; integer_count <= 8; integer_count++) { // compute the quantization level for a given number of integers and a given number of bits. - int quantization_level = quantization_mode_table[integer_count][bits_available]; + int quant_level = quant_mode_table[integer_count][bits_available]; - if (quantization_level == -1) + if (quant_level == -1) { break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all. } - float integer_count_error = combined_best_error[quantization_level][integer_count - 2]; + float integer_count_error = combined_best_error[quant_level][integer_count - 2]; if (integer_count_error < best_integer_count_error) { @@ -510,17 +506,14 @@ static void two_partitions_find_best_combination_for_bitcount( } } - int ql = quantization_mode_table[best_integer_count][bits_available]; - int ql_mod = quantization_mode_table[best_integer_count][bits_available + 2]; + int ql = quant_mode_table[best_integer_count][bits_available]; + int ql_mod = quant_mode_table[best_integer_count][bits_available + 2]; - *best_quantization_level = ql; - *best_quantization_level_mod = ql_mod; + *best_quant_level = ql; + *best_quant_level_mod = ql_mod; *error_of_best_combination = best_integer_count_error; if (ql >= 0) { - // make sure this is postive too - assert(ql_mod >= 0 && ql_mod < 21); - for (int i = 0; i < 2; i++) { best_formats[i] = formats_of_choice[ql][best_integer_count - 2][i]; @@ -556,8 +549,8 @@ static void three_partitions_find_best_combination_for_every_quantization_and_in { for (int j = 0; j < 4; j++) // integer-count for second endpoint-pair { - int low2 = MIN(i, j); - int high2 = MAX(i, j); + int low2 = astc::min(i, j); + int high2 = astc::max(i, j); if ((high2 - low2) > 1) { continue; @@ -565,15 +558,15 @@ static void three_partitions_find_best_combination_for_every_quantization_and_in for (int k = 0; k < 4; k++) // integer-count for third endpoint-pair { - int low3 = MIN(k, low2); - int high3 = MAX(k, high2); + int low3 = astc::min(k, low2); + int high3 = astc::max(k, high2); if ((high3 - low3) > 1) { continue; } int intcnt = i + j + k; - float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k], 1e10f); + float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k], 1e10f); if (errorterm <= combined_best_error[quant][intcnt]) { combined_best_error[quant][intcnt] = errorterm; @@ -592,8 +585,8 @@ static void three_partitions_find_best_combination_for_bitcount( float combined_best_error[21][10], int formats_of_choice[21][10][3], int bits_available, - int* best_quantization_level, - int* best_quantization_level_mod, + int* best_quant_level, + int* best_quant_level_mod, int* best_formats, float* error_of_best_combination ) { @@ -603,14 +596,14 @@ static void three_partitions_find_best_combination_for_bitcount( for (int integer_count = 3; integer_count <= 9; integer_count++) { // compute the quantization level for a given number of integers and a given number of bits. - int quantization_level = quantization_mode_table[integer_count][bits_available]; + int quant_level = quant_mode_table[integer_count][bits_available]; - if (quantization_level == -1) + if (quant_level == -1) { break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all. } - float integer_count_error = combined_best_error[quantization_level][integer_count - 3]; + float integer_count_error = combined_best_error[quant_level][integer_count - 3]; if (integer_count_error < best_integer_count_error) { @@ -619,17 +612,14 @@ static void three_partitions_find_best_combination_for_bitcount( } } - int ql = quantization_mode_table[best_integer_count][bits_available]; - int ql_mod = quantization_mode_table[best_integer_count][bits_available + 5]; + int ql = quant_mode_table[best_integer_count][bits_available]; + int ql_mod = quant_mode_table[best_integer_count][bits_available + 5]; - *best_quantization_level = ql; - *best_quantization_level_mod = ql_mod; + *best_quant_level = ql; + *best_quant_level_mod = ql_mod; *error_of_best_combination = best_integer_count_error; if (ql >= 0) { - // make sure this is postive too - assert(ql_mod >= 0 && ql_mod < 21); - for (int i = 0; i < 3; i++) { best_formats[i] = formats_of_choice[ql][best_integer_count - 3][i]; @@ -665,8 +655,8 @@ static void four_partitions_find_best_combination_for_every_quantization_and_int { for (int j = 0; j < 4; j++) // integer-count for second endpoint-pair { - int low2 = MIN(i, j); - int high2 = MAX(i, j); + int low2 = astc::min(i, j); + int high2 = astc::max(i, j); if ((high2 - low2) > 1) { continue; @@ -674,8 +664,8 @@ static void four_partitions_find_best_combination_for_every_quantization_and_int for (int k = 0; k < 4; k++) // integer-count for third endpoint-pair { - int low3 = MIN(k, low2); - int high3 = MAX(k, high2); + int low3 = astc::min(k, low2); + int high3 = astc::max(k, high2); if ((high3 - low3) > 1) { continue; @@ -683,15 +673,15 @@ static void four_partitions_find_best_combination_for_every_quantization_and_int for (int l = 0; l < 4; l++) // integer-count for fourth endpoint-pair { - int low4 = MIN(l, low3); - int high4 = MAX(l, high3); + int low4 = astc::min(l, low3); + int high4 = astc::max(l, high3); if ((high4 - low4) > 1) { continue; } int intcnt = i + j + k + l; - float errorterm = MIN(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k] + best_error[3][quant][l], 1e10f); + float errorterm = astc::min(best_error[0][quant][i] + best_error[1][quant][j] + best_error[2][quant][k] + best_error[3][quant][l], 1e10f); if (errorterm <= combined_best_error[quant][intcnt]) { combined_best_error[quant][intcnt] = errorterm; @@ -712,8 +702,8 @@ static void four_partitions_find_best_combination_for_bitcount( float combined_best_error[21][13], int formats_of_choice[21][13][4], int bits_available, - int* best_quantization_level, - int* best_quantization_level_mod, + int* best_quant_level, + int* best_quant_level_mod, int* best_formats, float* error_of_best_combination ) { @@ -723,14 +713,14 @@ static void four_partitions_find_best_combination_for_bitcount( for (int integer_count = 4; integer_count <= 9; integer_count++) { // compute the quantization level for a given number of integers and a given number of bits. - int quantization_level = quantization_mode_table[integer_count][bits_available]; + int quant_level = quant_mode_table[integer_count][bits_available]; - if (quantization_level == -1) + if (quant_level == -1) { break; // used to indicate the case where we don't have enough bits to represent a given endpoint format at all. } - float integer_count_error = combined_best_error[quantization_level][integer_count - 4]; + float integer_count_error = combined_best_error[quant_level][integer_count - 4]; if (integer_count_error < best_integer_count_error) { @@ -739,17 +729,14 @@ static void four_partitions_find_best_combination_for_bitcount( } } - int ql = quantization_mode_table[best_integer_count][bits_available]; - int ql_mod = quantization_mode_table[best_integer_count][bits_available + 8]; + int ql = quant_mode_table[best_integer_count][bits_available]; + int ql_mod = quant_mode_table[best_integer_count][bits_available + 8]; - *best_quantization_level = ql; - *best_quantization_level_mod = ql_mod; + *best_quant_level = ql; + *best_quant_level_mod = ql_mod; *error_of_best_combination = best_integer_count_error; if (ql >= 0) { - // make sure this is postive too - assert(ql_mod >= 0 && ql_mod < 21); - for (int i = 0; i < 4; i++) { best_formats[i] = formats_of_choice[ql][best_integer_count - 4][i]; @@ -799,8 +786,8 @@ void determine_optimal_set_of_endpoint_formats_to_use( // output data int partition_format_specifiers[TUNE_MAX_TRIAL_CANDIDATES][4], int quantized_weight[TUNE_MAX_TRIAL_CANDIDATES], - int quantization_level[TUNE_MAX_TRIAL_CANDIDATES], - int quantization_level_mod[TUNE_MAX_TRIAL_CANDIDATES] + int quant_level[TUNE_MAX_TRIAL_CANDIDATES], + int quant_level_mod[TUNE_MAX_TRIAL_CANDIDATES] ) { int partition_count = pt->partition_count; @@ -814,45 +801,43 @@ void determine_optimal_set_of_endpoint_formats_to_use( compute_encoding_choice_errors(bsd, blk, pt, ewb, separate_component, eci); // for each partition, compute the error weights to apply for that partition. - float4 error_weightings[4]; - float4 dummied_color_scalefactors[4]; // only used to receive data - compute_partition_error_color_weightings(bsd, ewb, pt, error_weightings, dummied_color_scalefactors); + partition_metrics pms[4]; + + compute_partition_error_color_weightings(*ewb, *pt, pms); float best_error[4][21][4]; int format_of_choice[4][21][4]; for (int i = 0; i < partition_count; i++) { - compute_color_error_for_every_integer_count_and_quantization_level( + compute_color_error_for_every_integer_count_and_quant_level( encode_hdr_rgb, encode_hdr_alpha, i, - pt, &(eci[i]), ep, error_weightings, best_error[i], + pt, &(eci[i]), ep, pms[i].error_weight, best_error[i], format_of_choice[i]); } alignas(ASTCENC_VECALIGN) float errors_of_best_combination[MAX_WEIGHT_MODES]; - alignas(ASTCENC_VECALIGN) int best_quantization_levels[MAX_WEIGHT_MODES]; - int best_quantization_levels_mod[MAX_WEIGHT_MODES]; + alignas(ASTCENC_VECALIGN) int best_quant_levels[MAX_WEIGHT_MODES]; + int best_quant_levels_mod[MAX_WEIGHT_MODES]; int best_ep_formats[MAX_WEIGHT_MODES][4]; #if ASTCENC_SIMD_WIDTH > 1 // have to ensure that the "overstep" of the last iteration in the vectorized // loop will contain data that will never be picked as best candidate - const int packed_mode_count = bsd->block_mode_packed_count; - const int packed_mode_count_simd_up = (packed_mode_count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH * ASTCENC_SIMD_WIDTH; + const int packed_mode_count = bsd->block_mode_count; + const int packed_mode_count_simd_up = round_up_to_simd_multiple_vla(packed_mode_count); for (int i = packed_mode_count; i < packed_mode_count_simd_up; ++i) { errors_of_best_combination[i] = 1e30f; - best_quantization_levels[i] = 0; - best_quantization_levels_mod[i] = 0; + best_quant_levels[i] = 0; + best_quant_levels_mod[i] = 0; } #endif // #if ASTCENC_SIMD_WIDTH > 1 // code for the case where the block contains 1 partition if (partition_count == 1) { - int best_quantization_level; - int best_format; float error_of_best_combination; - for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i) + for (int i = 0; i < bsd->block_mode_count; ++i) { if (qwt_errors[i] >= 1e29f) { @@ -862,23 +847,16 @@ void determine_optimal_set_of_endpoint_formats_to_use( one_partition_find_best_combination_for_bitcount( best_error[0], format_of_choice[0], qwt_bitcounts[i], - &best_quantization_level, &best_format, &error_of_best_combination); + best_quant_levels + i, best_ep_formats[i], &error_of_best_combination); error_of_best_combination += qwt_errors[i]; errors_of_best_combination[i] = error_of_best_combination; - best_quantization_levels[i] = best_quantization_level; - best_quantization_levels_mod[i] = best_quantization_level; - best_ep_formats[i][0] = best_format; + best_quant_levels_mod[i] = best_quant_levels[i]; } } // code for the case where the block contains 2 partitions else if (partition_count == 2) { - int best_quantization_level; - int best_quantization_level_mod; - int best_formats[2]; - float error_of_best_combination; - float combined_best_error[21][7]; int formats_of_choice[21][7][2]; @@ -886,7 +864,7 @@ void determine_optimal_set_of_endpoint_formats_to_use( best_error, format_of_choice, combined_best_error, formats_of_choice); - for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i) + for (int i = 0; i < bsd->block_mode_count; ++i) { if (qwt_errors[i] >= 1e29f) { @@ -894,35 +872,25 @@ void determine_optimal_set_of_endpoint_formats_to_use( continue; } + float error_of_best_combination; two_partitions_find_best_combination_for_bitcount( combined_best_error, formats_of_choice, qwt_bitcounts[i], - &best_quantization_level, &best_quantization_level_mod, - best_formats, &error_of_best_combination); - - error_of_best_combination += qwt_errors[i]; + best_quant_levels + i, best_quant_levels_mod + i, + best_ep_formats[i], &error_of_best_combination); - errors_of_best_combination[i] = error_of_best_combination; - best_quantization_levels[i] = best_quantization_level; - best_quantization_levels_mod[i] = best_quantization_level_mod; - best_ep_formats[i][0] = best_formats[0]; - best_ep_formats[i][1] = best_formats[1]; + errors_of_best_combination[i] = error_of_best_combination + qwt_errors[i]; } } // code for the case where the block contains 3 partitions else if (partition_count == 3) { - int best_quantization_level; - int best_quantization_level_mod; - int best_formats[3]; - float error_of_best_combination; - float combined_best_error[21][10]; int formats_of_choice[21][10][3]; three_partitions_find_best_combination_for_every_quantization_and_integer_count( best_error, format_of_choice, combined_best_error, formats_of_choice); - for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i) + for (int i = 0; i < bsd->block_mode_count; ++i) { if (qwt_errors[i] >= 1e29f) { @@ -930,36 +898,25 @@ void determine_optimal_set_of_endpoint_formats_to_use( continue; } + float error_of_best_combination; three_partitions_find_best_combination_for_bitcount( combined_best_error, formats_of_choice, qwt_bitcounts[i], - &best_quantization_level, &best_quantization_level_mod, - best_formats, &error_of_best_combination); - - error_of_best_combination += qwt_errors[i]; + best_quant_levels + i, best_quant_levels_mod + i, + best_ep_formats[i], &error_of_best_combination); - errors_of_best_combination[i] = error_of_best_combination; - best_quantization_levels[i] = best_quantization_level; - best_quantization_levels_mod[i] = best_quantization_level_mod; - best_ep_formats[i][0] = best_formats[0]; - best_ep_formats[i][1] = best_formats[1]; - best_ep_formats[i][2] = best_formats[2]; + errors_of_best_combination[i] = error_of_best_combination + qwt_errors[i]; } } // code for the case where the block contains 4 partitions else if (partition_count == 4) { - int best_quantization_level; - int best_quantization_level_mod; - int best_formats[4]; - float error_of_best_combination; - float combined_best_error[21][13]; int formats_of_choice[21][13][4]; four_partitions_find_best_combination_for_every_quantization_and_integer_count( best_error, format_of_choice, combined_best_error, formats_of_choice); - for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i) + for (int i = 0; i < bsd->block_mode_count; ++i) { if (qwt_errors[i] >= 1e29f) { @@ -967,69 +924,46 @@ void determine_optimal_set_of_endpoint_formats_to_use( continue; } + float error_of_best_combination; four_partitions_find_best_combination_for_bitcount( combined_best_error, formats_of_choice, qwt_bitcounts[i], - &best_quantization_level, &best_quantization_level_mod, - best_formats, &error_of_best_combination); + best_quant_levels + i, best_quant_levels_mod + i, + best_ep_formats[i], &error_of_best_combination); - error_of_best_combination += qwt_errors[i]; - - errors_of_best_combination[i] = error_of_best_combination; - best_quantization_levels[i] = best_quantization_level; - best_quantization_levels_mod[i] = best_quantization_level_mod; - best_ep_formats[i][0] = best_formats[0]; - best_ep_formats[i][1] = best_formats[1]; - best_ep_formats[i][2] = best_formats[2]; - best_ep_formats[i][3] = best_formats[3]; + errors_of_best_combination[i] = error_of_best_combination + qwt_errors[i]; } } - // finally, go through the results and pick the best-looking modes. + // Go through the results and pick the best candidate modes int best_error_weights[TUNE_MAX_TRIAL_CANDIDATES]; + static_assert((MAX_WEIGHT_MODES % ASTCENC_SIMD_WIDTH) == 0, + "MAX_WEIGHT_MODES should be multiple of ASTCENC_SIMD_WIDTH"); for (int i = 0; i < tune_candidate_limit; i++) { -#if 0 - // reference; scalar code - float best_ep_error = 1e30f; - int best_error_index = -1; - for (int j = 0, npack = bsd->block_mode_packed_count; j < npack; ++j) - { - if (errors_of_best_combination[j] < best_ep_error && best_quantization_levels[j] >= 5) - { - best_ep_error = errors_of_best_combination[j]; - best_error_index = j; - } - } -#else - // find best mode, SIMD N-wide way - static_assert((MAX_WEIGHT_MODES % ASTCENC_SIMD_WIDTH) == 0, "MAX_WEIGHT_MODES should be multiple of ASTCENC_SIMD_WIDTH"); vint vbest_error_index(-1); vfloat vbest_ep_error(1e30f); vint lane_ids = vint::lane_id(); - for (int j = 0, npack = bsd->block_mode_packed_count; j < npack; j += ASTCENC_SIMD_WIDTH) + for (int j = 0; j < bsd->block_mode_count; j += ASTCENC_SIMD_WIDTH) { vfloat err = vfloat(&errors_of_best_combination[j]); vmask mask1 = err < vbest_ep_error; - vmask mask2 = vint(&best_quantization_levels[j]) > vint(4); + vmask mask2 = vint(&best_quant_levels[j]) > vint(4); vmask mask = mask1 & mask2; vbest_ep_error = select(vbest_ep_error, err, mask); vbest_error_index = select(vbest_error_index, lane_ids, mask); lane_ids = lane_ids + vint(ASTCENC_SIMD_WIDTH); } - // pick final best mode from the SIMD result. - // note that if multiple SIMD lanes have "best" score, - // we want to pick one with the lowest index, i.e. what - // would happen if code was purely scalar. - vmask lanes_with_min_error = vbest_ep_error == hmin(vbest_ep_error); - // take smallest index from the SIMD lanes that had the best score - vbest_error_index = select(vint(0x7fffffff), vbest_error_index, lanes_with_min_error); + // Pick best mode from the SIMD result. If multiple SIMD lanes have + // the best score, pick the one with the lowest index. + vmask lanes_min_error = vbest_ep_error == hmin(vbest_ep_error); + vbest_error_index = select(vint(0x7FFFFFFF), vbest_error_index, lanes_min_error); vbest_error_index = hmin(vbest_error_index); - int best_error_index = vbest_error_index.lane(0); -#endif + int best_error_index = vbest_error_index.lane<0>(); best_error_weights[i] = best_error_index; + // Max the error for this candidate so we don't pick it again if (best_error_index >= 0) { errors_of_best_combination[best_error_index] = 1e30f; @@ -1038,21 +972,15 @@ void determine_optimal_set_of_endpoint_formats_to_use( for (int i = 0; i < tune_candidate_limit; i++) { - int weight = best_error_weights[i]; - quantized_weight[i] = weight; - if (weight >= 0) + quantized_weight[i] = best_error_weights[i]; + if (quantized_weight[i] >= 0) { - int level = best_quantization_levels[weight]; - int level_mod = best_quantization_levels_mod[weight]; - - assert(level >= 0 && level < 21); - assert(level_mod >= 0 && level_mod < 21); - - quantization_level[i] = level; - quantization_level_mod[i] = level_mod; + quant_level[i] = best_quant_levels[best_error_weights[i]]; + assert(quant_level[i] >= 0 && quant_level[i] < 21); + quant_level_mod[i] = best_quant_levels_mod[best_error_weights[i]]; for (int j = 0; j < partition_count; j++) { - partition_format_specifiers[i][j] = best_ep_formats[weight][j]; + partition_format_specifiers[i][j] = best_ep_formats[best_error_weights[i]][j]; } } } diff --git a/libkram/astc-encoder/astcenc_platform_isa_detection.cpp b/libkram/astc-encoder/astcenc_platform_isa_detection.cpp index 4ed1ee2d..3766aa51 100644 --- a/libkram/astc-encoder/astcenc_platform_isa_detection.cpp +++ b/libkram/astc-encoder/astcenc_platform_isa_detection.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2020 Arm Limited +// Copyright 2020-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -15,7 +15,6 @@ // under the License. // ---------------------------------------------------------------------------- -#if (ASTCENC_SSE > 0) || (ASTCENC_AVX > 0) || (ASTCENC_POPCNT > 0) /** * @brief Platform-specific function implementations. @@ -25,26 +24,18 @@ #include "astcenc_internal.h" -int cpu_supports_sse42() { - return 1; -} -int cpu_supports_popcnt() { - return 1; -} -// kram only wants avx1 for now -int cpu_supports_avx2() { - return 0; -} +#if (ASTCENC_SSE > 0) || (ASTCENC_AVX > 0) || \ + (ASTCENC_POPCNT > 0) || (ASTCENC_F16C > 0) -#if 0 -static int g_cpu_has_sse42 = -1; +static int g_cpu_has_sse41 = -1; static int g_cpu_has_avx2 = -1; static int g_cpu_has_popcnt = -1; +static int g_cpu_has_f16c = -1; /* ============================================================================ Platform code for Visual Studio ============================================================================ */ -#if defined(_MSC_VER) +#if !defined(__clang__) && defined(_MSC_VER) #include static void detect_cpu_isa() @@ -54,15 +45,18 @@ static void detect_cpu_isa() __cpuid(data, 0); int num_id = data[0]; - g_cpu_has_sse42 = 0; + g_cpu_has_sse41 = 0; g_cpu_has_popcnt = 0; + g_cpu_has_f16c = 0; if (num_id >= 1) { __cpuidex(data, 1, 0); - // SSE42 = Bank 1, ECX, bit 20 - g_cpu_has_sse42 = data[2] & (1 << 20) ? 1 : 0; + // SSE41 = Bank 1, ECX, bit 19 + g_cpu_has_sse41 = data[2] & (1 << 19) ? 1 : 0; // POPCNT = Bank 1, ECX, bit 23 g_cpu_has_popcnt = data[2] & (1 << 23) ? 1 : 0; + // F16C = Bank 1, ECX, bit 29 + g_cpu_has_f16c = data[2] & (1 << 29) ? 1 : 0; } g_cpu_has_avx2 = 0; @@ -84,14 +78,17 @@ static void detect_cpu_isa() { unsigned int data[4]; - g_cpu_has_sse42 = 0; + g_cpu_has_sse41 = 0; g_cpu_has_popcnt = 0; + g_cpu_has_f16c = 0; if (__get_cpuid_count(1, 0, &data[0], &data[1], &data[2], &data[3])) { - // SSE42 = Bank 1, ECX, bit 20 - g_cpu_has_sse42 = data[2] & (1 << 20) ? 1 : 0; + // SSE41 = Bank 1, ECX, bit 19 + g_cpu_has_sse41 = data[2] & (1 << 19) ? 1 : 0; // POPCNT = Bank 1, ECX, bit 23 g_cpu_has_popcnt = data[2] & (1 << 23) ? 1 : 0; + // F16C = Bank 1, ECX, bit 29 + g_cpu_has_f16c = data[2] & (1 << 29) ? 1 : 0; } g_cpu_has_avx2 = 0; @@ -104,14 +101,14 @@ static void detect_cpu_isa() #endif /* Public function, see header file for detailed documentation */ -int cpu_supports_sse42() +int cpu_supports_sse41() { - if (g_cpu_has_sse42 == -1) + if (g_cpu_has_sse41 == -1) { detect_cpu_isa(); } - return g_cpu_has_sse42; + return g_cpu_has_sse41; } /* Public function, see header file for detailed documentation */ @@ -125,6 +122,17 @@ int cpu_supports_popcnt() return g_cpu_has_popcnt; } +/* Public function, see header file for detailed documentation */ +int cpu_supports_f16c() +{ + if (g_cpu_has_f16c == -1) + { + detect_cpu_isa(); + } + + return g_cpu_has_f16c; +} + /* Public function, see header file for detailed documentation */ int cpu_supports_avx2() { @@ -137,4 +145,3 @@ int cpu_supports_avx2() } #endif -#endif diff --git a/libkram/astc-encoder/astcenc_quantization.cpp b/libkram/astc-encoder/astcenc_quantization.cpp index 7ce26b08..afc10160 100644 --- a/libkram/astc-encoder/astcenc_quantization.cpp +++ b/libkram/astc-encoder/astcenc_quantization.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -23,7 +23,7 @@ #if !defined(ASTCENC_DECOMPRESS_ONLY) -const uint8_t color_quantization_tables[21][256] = { +const uint8_t color_quant_tables[21][256] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -406,7 +406,7 @@ const uint8_t color_quantization_tables[21][256] = { #endif -const uint8_t color_unquantization_tables[21][256] = { +const uint8_t color_unquant_tables[21][256] = { { 0, 255 }, @@ -533,19 +533,18 @@ const uint8_t color_unquantization_tables[21][256] = { } }; -// quantization_mode_table[integercount/2][bits] gives -// us the quantization level for a given integer count and number of bits that -// the integer may fit into. This is needed for color decoding, -// and for the color encoding. -int quantization_mode_table[17][128]; +// The quant_mode_table[integercount/2][bits] gives us the quantization +// level for a given integer count and number of bits that the integer may fit +// into. This is needed for color decoding, and for the color encoding. +int8_t quant_mode_table[17][128]; -void build_quantization_mode_table() +void build_quant_mode_table() { for (int i = 0; i <= 16; i++) { for (int j = 0; j < 128; j++) { - quantization_mode_table[i][j] = -1; + quant_mode_table[i][j] = -1; } } @@ -553,10 +552,10 @@ void build_quantization_mode_table() { for (int j = 1; j <= 16; j++) { - int p = compute_ise_bitcount(2 * j, (quantization_method) i); + int p = get_ise_sequence_bitcount(2 * j, (quant_method)i); if (p < 128) { - quantization_mode_table[j][p] = i; + quant_mode_table[j][p] = i; } } } @@ -566,13 +565,13 @@ void build_quantization_mode_table() int largest_value_so_far = -1; for (int j = 0; j < 128; j++) { - if (quantization_mode_table[i][j] > largest_value_so_far) + if (quant_mode_table[i][j] > largest_value_so_far) { - largest_value_so_far = quantization_mode_table[i][j]; + largest_value_so_far = quant_mode_table[i][j]; } else { - quantization_mode_table[i][j] = largest_value_so_far; + quant_mode_table[i][j] = largest_value_so_far; } } } diff --git a/libkram/astc-encoder/astcenc_symbolic_physical.cpp b/libkram/astc-encoder/astcenc_symbolic_physical.cpp index a486f884..894dc933 100644 --- a/libkram/astc-encoder/astcenc_symbolic_physical.cpp +++ b/libkram/astc-encoder/astcenc_symbolic_physical.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -78,7 +78,7 @@ void symbolic_to_physical( // This encodes separate constant-color blocks. There is currently // no attempt to coalesce them into larger void-extents. - static const uint8_t cbytes[8] = { 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; + static const uint8_t cbytes[8] { 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; for (int i = 0; i < 8; i++) { pcb.data[i] = cbytes[i]; @@ -99,7 +99,7 @@ void symbolic_to_physical( // This encodes separate constant-color blocks. There is currently // no attempt to coalesce them into larger void-extents. - static const uint8_t cbytes[8] = { 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; + static const uint8_t cbytes[8] { 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; for (int i = 0; i < 8; i++) { pcb.data[i] = cbytes[i]; @@ -124,34 +124,34 @@ void symbolic_to_physical( weightbuf[i] = 0; } - const decimation_table *const *ixtab2 = bsd.decimation_tables; + const decimation_table *const *dts = bsd.decimation_tables; - const int packed_index = bsd.block_mode_to_packed[scb.block_mode]; - assert(packed_index >= 0 && packed_index < bsd.block_mode_packed_count); - const block_mode& bm = bsd.block_modes_packed[packed_index]; + const int packed_index = bsd.block_mode_packed_index[scb.block_mode]; + assert(packed_index >= 0 && packed_index < bsd.block_mode_count); + const block_mode& bm = bsd.block_modes[packed_index]; - int weight_count = ixtab2[bm.decimation_mode]->num_weights; - int weight_quantization_method = bm.quantization_mode; + int weight_count = dts[bm.decimation_mode]->weight_count; + int weight_quant_method = bm.quant_mode; int is_dual_plane = bm.is_dual_plane; int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count; - int bits_for_weights = compute_ise_bitcount(real_weight_count, - (quantization_method) weight_quantization_method); + int bits_for_weights = get_ise_sequence_bitcount(real_weight_count, + (quant_method)weight_quant_method); if (is_dual_plane) { uint8_t weights[64]; for (int i = 0; i < weight_count; i++) { - weights[2 * i] = scb.plane1_weights[i]; - weights[2 * i + 1] = scb.plane2_weights[i]; + weights[2 * i] = scb.weights[i]; + weights[2 * i + 1] = scb.weights[i + PLANE2_WEIGHTS_OFFSET]; } - encode_ise(weight_quantization_method, real_weight_count, weights, weightbuf, 0); + encode_ise(weight_quant_method, real_weight_count, weights, weightbuf, 0); } else { - encode_ise(weight_quantization_method, weight_count, scb.plane1_weights, weightbuf, 0); + encode_ise(weight_quant_method, weight_count, scb.weights, weightbuf, 0); } for (int i = 0; i < 16; i++) @@ -184,10 +184,7 @@ void symbolic_to_physical( for (int i = 0; i < partition_count; i++) { int class_of_format = scb.color_formats[i] >> 2; - if (class_of_format < low_class) - { - low_class = class_of_format; - } + low_class = astc::min(class_of_format, low_class); } if (low_class == 3) @@ -247,7 +244,7 @@ void symbolic_to_physical( } // then, encode an ISE based on them. - encode_ise(scb.color_quantization_level, valuecount_to_encode, values_to_encode, pcb.data, (scb.partition_count == 1 ? 17 : 19 + PARTITION_BITS)); + encode_ise(scb.color_quant_level, valuecount_to_encode, values_to_encode, pcb.data, (scb.partition_count == 1 ? 17 : 19 + PARTITION_BITS)); } void physical_to_symbolic( @@ -260,7 +257,7 @@ void physical_to_symbolic( scb.error_block = 0; // get hold of the decimation tables. - const decimation_table *const *ixtab2 = bsd.decimation_tables; + const decimation_table *const *dts = bsd.decimation_tables; // extract header fields int block_mode = read_bits(11, 0, pcb.data); @@ -327,17 +324,17 @@ void physical_to_symbolic( return; } - const int packed_index = bsd.block_mode_to_packed[block_mode]; + const int packed_index = bsd.block_mode_packed_index[block_mode]; if (packed_index < 0) { scb.error_block = 1; return; } - assert(packed_index >= 0 && packed_index < bsd.block_mode_packed_count); - const struct block_mode& bm = bsd.block_modes_packed[packed_index]; + assert(packed_index >= 0 && packed_index < bsd.block_mode_count); + const struct block_mode& bm = bsd.block_modes[packed_index]; - int weight_count = ixtab2[bm.decimation_mode]->num_weights; - int weight_quantization_method = bm.quantization_mode; + int weight_count = dts[bm.decimation_mode]->weight_count; + int weight_quant_method = bm.quant_mode; int is_dual_plane = bm.is_dual_plane; int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count; @@ -352,24 +349,24 @@ void physical_to_symbolic( bswapped[i] = bitrev8(pcb.data[15 - i]); } - int bits_for_weights = compute_ise_bitcount(real_weight_count, - (quantization_method) weight_quantization_method); + int bits_for_weights = get_ise_sequence_bitcount(real_weight_count, + (quant_method)weight_quant_method); int below_weights_pos = 128 - bits_for_weights; if (is_dual_plane) { uint8_t indices[64]; - decode_ise(weight_quantization_method, real_weight_count, bswapped, indices, 0); + decode_ise(weight_quant_method, real_weight_count, bswapped, indices, 0); for (int i = 0; i < weight_count; i++) { - scb.plane1_weights[i] = indices[2 * i]; - scb.plane2_weights[i] = indices[2 * i + 1]; + scb.weights[i] = indices[2 * i]; + scb.weights[i + PLANE2_WEIGHTS_OFFSET] = indices[2 * i + 1]; } } else { - decode_ise(weight_quantization_method, weight_count, bswapped, scb.plane1_weights, 0); + decode_ise(weight_quant_method, weight_count, bswapped, scb.weights, 0); } if (is_dual_plane && partition_count == 4) @@ -443,7 +440,7 @@ void physical_to_symbolic( } // then, determine the color endpoint format to use for these integers - static const int color_bits_arr[5] = { -1, 115 - 4, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS }; + static const int color_bits_arr[5] { -1, 115 - 4, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS, 113 - 4 - PARTITION_BITS }; int color_bits = color_bits_arr[partition_count] - bits_for_weights - encoded_type_highpart_size; if (is_dual_plane) { @@ -455,16 +452,16 @@ void physical_to_symbolic( color_bits = 0; } - int color_quantization_level = quantization_mode_table[color_integer_count >> 1][color_bits]; - scb.color_quantization_level = color_quantization_level; - if (color_quantization_level < 4) + int color_quant_level = quant_mode_table[color_integer_count >> 1][color_bits]; + scb.color_quant_level = color_quant_level; + if (color_quant_level < 4) { scb.error_block = 1; } // then unpack the integer-bits uint8_t values_to_decode[32]; - decode_ise(color_quantization_level, color_integer_count, pcb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_BITS)); + decode_ise(color_quant_level, color_integer_count, pcb.data, values_to_decode, (partition_count == 1 ? 17 : 19 + PARTITION_BITS)); // and distribute them over the endpoint types int valuecount_to_decode = 0; diff --git a/libkram/astc-encoder/astcenc_vecmathlib.h b/libkram/astc-encoder/astcenc_vecmathlib.h index a1b90661..aed6752c 100644 --- a/libkram/astc-encoder/astcenc_vecmathlib.h +++ b/libkram/astc-encoder/astcenc_vecmathlib.h @@ -1,6 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2019-2020 Arm Limited +// Copyright 2019-2021 Arm Limited +// Copyright 2008 Jose Fonseca // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -16,724 +17,508 @@ // ---------------------------------------------------------------------------- /* - * This module implements flexible N-wide float and integer vectors, where the - * width can be selected at compile time depending on the underlying ISA. It - * is not possible to mix different ISAs (or vector widths) in a single file - - * the ISA is statically selected when the header is first included. + * This module implements vector support for floats, ints, and vector lane + * control masks. It provides access to both explicit vector width types, and + * flexible N-wide types where N can be determined at compile time. * - * ISA support is provided for: + * The design of this module encourages use of vector length agnostic code, via + * the vint, vfloat, and vmask types. These will take on the widest SIMD vector + * with that is available at compile time. The current vector width is + * accessible for e.g. loop strides via the ASTCENC_SIMD_WIDTH constant. + * + * Explicit scalar types are acessible via the vint1, vfloat1, vmask1 types. + * These are provided primarily for prototyping and algorithm debug of VLA + * implementations. + * + * Explicit 4-wide types are accessible via the vint4, vfloat4, and vmask4 + * types. These are provided for use by VLA code, but are also expected to be + * used as a fixed-width type and will supported a reference C++ fallback for + * use on platforms without SIMD intrinsics. + * + * Explicit 8-wide types are accessible via the vint8, vfloat8, and vmask8 + * types. These are provide for use by VLA code, and are not expected to be + * used as a fixed-width type in normal code. No reference C implementation is + * provided on platforms without underlying SIMD intrinsics. + * + * With the current implementation ISA support is provided for: * * * 1-wide for scalar reference. - * * 4-wide for SSE2. - * * 4-wide for SSE4.2. - * * 8-wide for AVX2. + * * 4-wide for Armv8-A NEON. + * * 4-wide for x86-64 SSE2. + * * 4-wide for x86-64 SSE4.1. + * * 8-wide for x86-64 AVX2. * */ #ifndef ASTC_VECMATHLIB_H_INCLUDED #define ASTC_VECMATHLIB_H_INCLUDED -// Kram uses SSE2Neon on ARM, so needs intrinsics in use but not the include -//#if /* USE_SSE && */ (ASTCENC_SSE != 0 || ASTCENC_AVX != 0) -// #include -//#endif +#if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 + #include +#elif ASTCENC_NEON != 0 + #include +#endif -// This conflicts with simd.h library definition -#if defined(_MSC_VER) +#if !defined(__clang__) && defined(_MSC_VER) #define ASTCENC_SIMD_INLINE __forceinline #elif defined(__GNUC__) && !defined(__clang__) - #define ASTCENC_SIMD_INLINE __attribute__((unused, always_inline)) inline + #define ASTCENC_SIMD_INLINE __attribute__((always_inline)) inline #else - #define ASTCENC_SIMD_INLINE __attribute__((unused, always_inline, nodebug)) inline + #define ASTCENC_SIMD_INLINE __attribute__((always_inline, nodebug)) inline #endif #if ASTCENC_AVX >= 2 - #define ASTCENC_SIMD_ISA_AVX2 -#elif ASTCENC_SSE >= 20 - #define ASTCENC_SIMD_ISA_SSE -#else - #define ASTCENC_SIMD_ISA_SCALAR -#endif + /* If we have AVX2 expose 8-wide VLA. */ + #include "astcenc_vecmathlib_sse_4.h" + #include "astcenc_vecmathlib_common_4.h" + #include "astcenc_vecmathlib_avx2_8.h" + #define ASTCENC_SIMD_WIDTH 8 -// ---------------------------------------------------------------------------- -// AVX2 8-wide implementation + using vfloat = vfloat8; + using vint = vint8; + using vmask = vmask8; -#ifdef ASTCENC_SIMD_ISA_AVX2 + constexpr auto loada = vfloat8::loada; + constexpr auto load1 = vfloat8::load1; -#define ASTCENC_SIMD_WIDTH 8 - -// N-wide float -struct vfloat -{ - ASTCENC_SIMD_INLINE vfloat() {} - // Initialize with N floats from an unaligned memory address. - // Using loada() when address is aligned might be more optimal. - ASTCENC_SIMD_INLINE explicit vfloat(const float *p) { m = _mm256_loadu_ps(p); } - // Initialize with the same given float value in all lanes. - ASTCENC_SIMD_INLINE explicit vfloat(float v) { m = _mm256_set1_ps(v); } - - ASTCENC_SIMD_INLINE explicit vfloat(__m256 v) { m = v; } - - // Get SIMD lane #i value. - ASTCENC_SIMD_INLINE float lane(int i) const - { - #ifdef _MSC_VER - return m.m256_f32[i]; - #else - union { __m256 m; float f[ASTCENC_SIMD_WIDTH]; } cvt; - cvt.m = m; - return cvt.f[i]; - #endif - } +#elif ASTCENC_SSE >= 20 + /* If we have SSE expose 4-wide VLA, and 4-wide fixed width. */ + #include "astcenc_vecmathlib_sse_4.h" + #include "astcenc_vecmathlib_common_4.h" - // Float vector with all zero values - static ASTCENC_SIMD_INLINE vfloat zero() { return vfloat(_mm256_setzero_ps()); } + #define ASTCENC_SIMD_WIDTH 4 - // Float vector with each lane having the lane index (0, 1, 2, ...) - static ASTCENC_SIMD_INLINE vfloat lane_id() { return vfloat(_mm256_set_ps(7, 6, 5, 4, 3, 2, 1, 0)); } + using vfloat = vfloat4; + using vint = vint4; + using vmask = vmask4; - __m256 m; -}; + constexpr auto loada = vfloat4::loada; + constexpr auto load1 = vfloat4::load1; -// N-wide integer (32 bit in each lane) -struct vint -{ - ASTCENC_SIMD_INLINE vint() {} - // Initialize with N ints from an unaligned memory address. - ASTCENC_SIMD_INLINE explicit vint(const int *p) { m = _mm256_loadu_si256((const __m256i*)p); } - // Initialize with the same given integer value in all lanes. - ASTCENC_SIMD_INLINE explicit vint(int v) { m = _mm256_set1_epi32(v); } +#elif ASTCENC_NEON > 0 + /* If we have NEON expose 4-wide VLA. */ + #include "astcenc_vecmathlib_neon_4.h" + #include "astcenc_vecmathlib_common_4.h" - ASTCENC_SIMD_INLINE explicit vint(__m256i v) { m = v; } + #define ASTCENC_SIMD_WIDTH 4 - // Get SIMD lane #i value - ASTCENC_SIMD_INLINE int lane(int i) const - { - #ifdef _MSC_VER - return m.m256i_i32[i]; - #else - union { __m256i m; int f[ASTCENC_SIMD_WIDTH]; } cvt; - cvt.m = m; - return cvt.f[i]; - #endif - } + using vfloat = vfloat4; + using vint = vint4; + using vmask = vmask4; - // Integer vector with each lane having the lane index (0, 1, 2, ...) - static ASTCENC_SIMD_INLINE vint lane_id() { return vint(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0)); } + constexpr auto loada = vfloat4::loada; + constexpr auto load1 = vfloat4::load1; - __m256i m; -}; +#else + // If we have nothing expose 4-wide VLA, and 4-wide fixed width. + + // Note: We no longer expose the 1-wide scalar fallback because it is not + // invariant with the 4-wide path due to algorithms that use horizontal + // operations that accumulate a local vector sum before accumulating into + // a running sum. + // + // For 4 items adding into an accumulator using 1-wide vectors the sum is: + // + // result = ((((sum + l0) + l1) + l2) + l3) + // + // ... whereas the accumulator for a 4-wide vector sum is: + // + // result = sum + ((l0 + l2) + (l1 + l3)) + // + // In "normal maths" this is the same, but the floating point reassociation + // differences mean that these will not produce the same result. + + #include "astcenc_vecmathlib_none_4.h" + #include "astcenc_vecmathlib_common_4.h" + + #define ASTCENC_SIMD_WIDTH 4 + + using vfloat = vfloat4; + using vint = vint4; + using vmask = vmask4; + + constexpr auto loada = vfloat4::loada; + constexpr auto load1 = vfloat4::load1; +#endif -// N-wide comparison mask. vmask is a result of comparison operators, -// and an argument for select() function below. -struct vmask -{ - ASTCENC_SIMD_INLINE explicit vmask(__m256 v) { m = v; } - ASTCENC_SIMD_INLINE explicit vmask(__m256i v) { m = _mm256_castsi256_ps(v); } - __m256 m; -}; - -// Initialize with one float in all SIMD lanes, from an aligned memory address. -ASTCENC_SIMD_INLINE vfloat load1a(const float* p) { return vfloat(_mm256_broadcast_ss(p)); } -// Initialize with N floats from an aligned memory address. -ASTCENC_SIMD_INLINE vfloat loada(const float* p) { return vfloat(_mm256_load_ps(p)); } - -// Per-lane float arithmetic operations -ASTCENC_SIMD_INLINE vfloat operator+ (vfloat a, vfloat b) { a.m = _mm256_add_ps(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vfloat operator- (vfloat a, vfloat b) { a.m = _mm256_sub_ps(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vfloat operator* (vfloat a, vfloat b) { a.m = _mm256_mul_ps(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vfloat operator/ (vfloat a, vfloat b) { a.m = _mm256_div_ps(a.m, b.m); return a; } - -// Per-lane float comparison operations -ASTCENC_SIMD_INLINE vmask operator==(vfloat a, vfloat b) { return vmask(_mm256_cmp_ps(a.m, b.m, _CMP_EQ_OQ)); } -ASTCENC_SIMD_INLINE vmask operator!=(vfloat a, vfloat b) { return vmask(_mm256_cmp_ps(a.m, b.m, _CMP_NEQ_OQ)); } -ASTCENC_SIMD_INLINE vmask operator< (vfloat a, vfloat b) { return vmask(_mm256_cmp_ps(a.m, b.m, _CMP_LT_OQ)); } -ASTCENC_SIMD_INLINE vmask operator> (vfloat a, vfloat b) { return vmask(_mm256_cmp_ps(a.m, b.m, _CMP_GT_OQ)); } -ASTCENC_SIMD_INLINE vmask operator<=(vfloat a, vfloat b) { return vmask(_mm256_cmp_ps(a.m, b.m, _CMP_LE_OQ)); } -ASTCENC_SIMD_INLINE vmask operator>=(vfloat a, vfloat b) { return vmask(_mm256_cmp_ps(a.m, b.m, _CMP_GE_OQ)); } - -// Logical operations on comparison mask values -ASTCENC_SIMD_INLINE vmask operator| (vmask a, vmask b) { return vmask(_mm256_or_ps(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator& (vmask a, vmask b) { return vmask(_mm256_and_ps(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator^ (vmask a, vmask b) { return vmask(_mm256_xor_ps(a.m, b.m)); } - -// Returns a 8-bit code where bit0..bit7 map to lanes -ASTCENC_SIMD_INLINE unsigned mask(vmask v) { return _mm256_movemask_ps(v.m); } -// Whether any lane in the comparison mask is set -ASTCENC_SIMD_INLINE bool any(vmask v) { return mask(v) != 0; } -// Whether all lanes in the comparison mask are set -ASTCENC_SIMD_INLINE bool all(vmask v) { return mask(v) == 0xFF; } - -// Per-lane float min & max -ASTCENC_SIMD_INLINE vfloat min(vfloat a, vfloat b) { a.m = _mm256_min_ps(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vfloat max(vfloat a, vfloat b) { a.m = _mm256_max_ps(a.m, b.m); return a; } - -// Per-lane clamp to 0..1 range -ASTCENC_SIMD_INLINE vfloat saturate(vfloat a) +/** + * @brief Round a count down to the largest multiple of 8. + * + * @param count The unrounded value. + * + * @return The rounded value. + */ +ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_8(int count) { - __m256 zero = _mm256_setzero_ps(); - __m256 one = _mm256_set1_ps(1.0f); - return vfloat(_mm256_min_ps(_mm256_max_ps(a.m, zero), one)); + return count & ~(8 - 1); } -ASTCENC_SIMD_INLINE vfloat abs(vfloat x) +/** + * @brief Round a count down to the largest multiple of 4. + * + * @param count The unrounded value. + * + * @return The rounded value. + */ +ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_4(int count) { - __m256 msk = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)); - return vfloat(_mm256_and_ps(x.m, msk)); + return count & ~(4 - 1); } -// Round to nearest integer (nearest even for .5 cases) -ASTCENC_SIMD_INLINE vfloat round(vfloat v) +/** + * @brief Round a count down to the largest multiple of the SIMD width. + * + * Assumption that the vector width is a power of two ... + * + * @param count The unrounded value. + * + * @return The rounded value. + */ +ASTCENC_SIMD_INLINE int round_down_to_simd_multiple_vla(int count) { - return vfloat(_mm256_round_ps(v.m, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); + return count & ~(ASTCENC_SIMD_WIDTH - 1); } -// Per-lane convert to integer (truncate) -ASTCENC_SIMD_INLINE vint floatToInt(vfloat v) { return vint(_mm256_cvttps_epi32(v.m)); } - -// Reinterpret-bitcast integer vector as a float vector (this is basically a no-op on the CPU) -ASTCENC_SIMD_INLINE vfloat intAsFloat(vint v) { return vfloat(_mm256_castsi256_ps(v.m)); } -// Reinterpret-bitcast float vector as an integer vector (this is basically a no-op on the CPU) -ASTCENC_SIMD_INLINE vint floatAsInt(vfloat v) { return vint(_mm256_castps_si256(v.m)); } - -ASTCENC_SIMD_INLINE vint operator~ (vint a) { return vint(_mm256_xor_si256(a.m, _mm256_set1_epi32(-1))); } -ASTCENC_SIMD_INLINE vmask operator~ (vmask a) { return vmask(_mm256_xor_si256(_mm256_castps_si256(a.m), _mm256_set1_epi32(-1))); } - -// Per-lane arithmetic integer operations -ASTCENC_SIMD_INLINE vint operator+ (vint a, vint b) { a.m = _mm256_add_epi32(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vint operator- (vint a, vint b) { a.m = _mm256_sub_epi32(a.m, b.m); return a; } - -// Per-lane logical bit operations -ASTCENC_SIMD_INLINE vint operator| (vint a, vint b) { return vint(_mm256_or_si256(a.m, b.m)); } -ASTCENC_SIMD_INLINE vint operator& (vint a, vint b) { return vint(_mm256_and_si256(a.m, b.m)); } -ASTCENC_SIMD_INLINE vint operator^ (vint a, vint b) { return vint(_mm256_xor_si256(a.m, b.m)); } - -// Per-lane integer comparison operations -ASTCENC_SIMD_INLINE vmask operator< (vint a, vint b) { return vmask(_mm256_cmpgt_epi32(b.m, a.m)); } -ASTCENC_SIMD_INLINE vmask operator> (vint a, vint b) { return vmask(_mm256_cmpgt_epi32(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator==(vint a, vint b) { return vmask(_mm256_cmpeq_epi32(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator!=(vint a, vint b) { return ~vmask(_mm256_cmpeq_epi32(a.m, b.m)); } - -// Per-lane integer min & max -ASTCENC_SIMD_INLINE vint min(vint a, vint b) { a.m = _mm256_min_epi32(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vint max(vint a, vint b) { a.m = _mm256_max_epi32(a.m, b.m); return a; } - -// Horizontal minimum - returns vector with all lanes -// set to the minimum value of the input vector. -ASTCENC_SIMD_INLINE vfloat hmin(vfloat v) +/** + * @brief Round a count up to the largest multiple of the SIMD width. + * + * Assumption that the vector width is a power of two ... + * + * @param count The unrounded value. + * + * @return The rounded value. + */ +ASTCENC_SIMD_INLINE int round_up_to_simd_multiple_vla(int count) { - __m128 vlow = _mm256_castps256_ps128(v.m); - __m128 vhigh = _mm256_extractf128_ps(v.m, 1); - vlow = _mm_min_ps(vlow, vhigh); - - // First do an horizontal reduction. // v = [ D C | B A ] - __m128 shuf = _mm_shuffle_ps(vlow, vlow, _MM_SHUFFLE(2, 3, 0, 1)); // [ C D | A B ] - __m128 mins = _mm_min_ps(vlow, shuf); // mins = [ D+C C+D | B+A A+B ] - shuf = _mm_movehl_ps(shuf, mins); // [ C D | D+C C+D ] - mins = _mm_min_ss(mins, shuf); - - - // This is the most logical implementation, but the convenience intrinsic - // is missing on older compilers (supported in g++ 9 and clang++ 9). - //__m256i r = _mm256_set_m128(m, m) - __m256 r = _mm256_insertf128_ps(_mm256_castps128_ps256(mins), mins, 1); - - vfloat vmin(_mm256_permute_ps(r, 0)); - return vmin; + int multiples = (count + ASTCENC_SIMD_WIDTH - 1) / ASTCENC_SIMD_WIDTH; + return multiples * ASTCENC_SIMD_WIDTH; } -ASTCENC_SIMD_INLINE vint hmin(vint v) +/** + * @brief Return @c a with lanes negated if the @c b lane is negative. + */ +ASTCENC_SIMD_INLINE vfloat change_sign(vfloat a, vfloat b) { - __m128i m = _mm_min_epi32(_mm256_extracti128_si256(v.m, 0), _mm256_extracti128_si256(v.m, 1)); - m = _mm_min_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,3,2))); - m = _mm_min_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,0,1))); - m = _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,0,0)); - - // This is the most logical implementation, but the convenience intrinsic - // is missing on older compilers (supported in g++ 9 and clang++ 9). - //__m256i r = _mm256_set_m128i(m, m) - __m256i r = _mm256_insertf128_si256(_mm256_castsi128_si256(m), m, 1); - vint vmin(r); - return vmin; + vint ia = float_as_int(a); + vint ib = float_as_int(b); + vint sign_mask((int)0x80000000); + vint r = ia ^ (ib & sign_mask); + return int_as_float(r); } -// Store float vector into an aligned address. -ASTCENC_SIMD_INLINE void store(vfloat v, float* ptr) { _mm256_store_ps(ptr, v.m); } -// Store integer vector into an aligned address. -ASTCENC_SIMD_INLINE void store(vint v, int* ptr) { _mm256_store_si256((__m256i*)ptr, v.m); } - -// Store lowest N (simd width) bytes of integer vector into an unaligned address. -ASTCENC_SIMD_INLINE void store_nbytes(vint v, uint8_t* ptr) +/** + * @brief Return fast, but approximate, vector atan(x). + * + * Max error of this implementaiton is 0.004883. + */ +ASTCENC_SIMD_INLINE vfloat atan(vfloat x) { - // This is the most logical implementation, but the convenience intrinsic - // is missing on older compilers (supported in g++ 9 and clang++ 9). - // _mm_storeu_si64(ptr, _mm256_extracti128_si256(v.m, 0)) - _mm_storel_epi64((__m128i*)ptr, _mm256_extracti128_si256(v.m, 0)); + vmask c = abs(x) > vfloat(1.0f); + vfloat z = change_sign(vfloat(astc::PI_OVER_TWO), x); + vfloat y = select(x, vfloat(1.0f) / x, c); + y = y / (y * y * vfloat(0.28f) + vfloat(1.0f)); + return select(y, z - y, c); } -// SIMD "gather" - load each lane with base[indices[i]] -ASTCENC_SIMD_INLINE vfloat gatherf(const float* base, vint indices) -{ - return vfloat(_mm256_i32gather_ps(base, indices.m, 4)); -} -ASTCENC_SIMD_INLINE vint gatheri(const int* base, vint indices) +/** + * @brief Return fast, but approximate, vector atan2(x, y). + */ +ASTCENC_SIMD_INLINE vfloat atan2(vfloat y, vfloat x) { - return vint(_mm256_i32gather_epi32(base, indices.m, 4)); + vfloat z = atan(abs(y / x)); + vmask xmask = vmask(float_as_int(x).m); + return change_sign(select(z, vfloat(astc::PI) - z, xmask), y); } -// Pack low 8 bits of each lane into low 64 bits of result. -ASTCENC_SIMD_INLINE vint pack_low_bytes(vint v) +/* + * @brief Factory that returns a unit length 4 component vfloat4. + */ +static ASTCENC_SIMD_INLINE vfloat4 unit4() { - __m256i shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 28, 24, 20, 16, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 12, 8, 4, 0); - __m256i a = _mm256_shuffle_epi8(v.m, shuf); - __m128i a0 = _mm256_extracti128_si256(a, 0); - __m128i a1 = _mm256_extracti128_si256(a, 1); - __m128i b = _mm_unpacklo_epi32(a0, a1); - - // This is the most logical implementation, but the convenience intrinsic - // is missing on older compilers (supported in g++ 9 and clang++ 9). - //__m256i r = _mm256_set_m128i(b, b) - __m256i r = _mm256_insertf128_si256(_mm256_castsi128_si256(b), b, 1); - return vint(r); + return vfloat4(0.5f); } -// "select", i.e. highbit(cond) ? b : a -ASTCENC_SIMD_INLINE vfloat select(vfloat a, vfloat b, vmask cond) -{ - return vfloat(_mm256_blendv_ps(a.m, b.m, cond.m)); -} -ASTCENC_SIMD_INLINE vint select(vint a, vint b, vmask cond) +/** + * @brief Factory that returns a unit length 3 component vfloat4. + */ +static ASTCENC_SIMD_INLINE vfloat4 unit3() { - return vint(_mm256_blendv_epi8(a.m, b.m, _mm256_castps_si256(cond.m))); + return vfloat4(0.57735f, 0.57735f, 0.57735f, 0.0f); } -ASTCENC_SIMD_INLINE void print(vfloat a) +/** + * @brief Normalize a non-zero length vector to unit length. + */ +static ASTCENC_SIMD_INLINE vfloat4 normalize(vfloat4 a) { - alignas(ASTCENC_VECALIGN) float v[8]; - store(a, v); - KLOGD("Astcenc", "v8_f32:\n %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f\n", - (double)v[0], (double)v[1], (double)v[2], (double)v[3], - (double)v[4], (double)v[5], (double)v[6], (double)v[7]); + vfloat4 length = dot(a, a); + return a / sqrt(length); } -ASTCENC_SIMD_INLINE void print(vint a) +/** + * @brief Normalize a vector, returning @c safe if len is zero. + */ +static ASTCENC_SIMD_INLINE vfloat4 normalize_safe(vfloat4 a, vfloat4 safe) { - alignas(ASTCENC_VECALIGN) int v[8]; - store(a, v); - KLOGD("Astcenc", "v8_i32:\n %8u %8u %8u %8u %8u %8u %8u %8u\n", - v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]); -} - -#endif // #ifdef ASTCENC_SIMD_ISA_AVX2 + vfloat4 length = dot(a, a); + if (length.lane<0>() != 0.0f) + { + return a / sqrt(length); + } + return safe; +} -// ---------------------------------------------------------------------------- -// SSE 4-wide implementation -// Uses SSE2 as baseline, optionally SSE4.x instructions based on ASTCENC_SSE value -#ifdef ASTCENC_SIMD_ISA_SSE -#define ASTCENC_SIMD_WIDTH 4 +#define POLY0(x, c0) ( c0) +#define POLY1(x, c0, c1) ((POLY0(x, c1) * x) + c0) +#define POLY2(x, c0, c1, c2) ((POLY1(x, c1, c2) * x) + c0) +#define POLY3(x, c0, c1, c2, c3) ((POLY2(x, c1, c2, c3) * x) + c0) +#define POLY4(x, c0, c1, c2, c3, c4) ((POLY3(x, c1, c2, c3, c4) * x) + c0) +#define POLY5(x, c0, c1, c2, c3, c4, c5) ((POLY4(x, c1, c2, c3, c4, c5) * x) + c0) -struct vfloat +/** + * @brief Compute an approximate exp2(x) for each lane in the vector. + * + * Based on 5th degree minimax polynomials, ported from this blog + * https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html + */ +static ASTCENC_SIMD_INLINE vfloat4 exp2(vfloat4 x) { - ASTCENC_SIMD_INLINE vfloat() {} - ASTCENC_SIMD_INLINE explicit vfloat(const float *p) { m = _mm_loadu_ps(p); } - ASTCENC_SIMD_INLINE explicit vfloat(float v) { m = _mm_set_ps1(v); } - ASTCENC_SIMD_INLINE explicit vfloat(__m128 v) { m = v; } - ASTCENC_SIMD_INLINE float lane(int i) const - { - #ifdef _MSC_VER - return m.m128_f32[i]; - #else - union { __m128 m; float f[ASTCENC_SIMD_WIDTH]; } cvt; - cvt.m = m; - return cvt.f[i]; - #endif - } - static ASTCENC_SIMD_INLINE vfloat zero() { return vfloat(_mm_setzero_ps()); } - static ASTCENC_SIMD_INLINE vfloat lane_id() { return vfloat(_mm_set_ps(3, 2, 1, 0)); } - __m128 m; -}; + x = clamp(-126.99999f, 129.0f, x); -struct vint -{ - ASTCENC_SIMD_INLINE vint() {} - ASTCENC_SIMD_INLINE explicit vint(const int *p) { m = _mm_load_si128((const __m128i*)p); } - ASTCENC_SIMD_INLINE explicit vint(int v) { m = _mm_set1_epi32(v); } - ASTCENC_SIMD_INLINE explicit vint(__m128i v) { m = v; } - ASTCENC_SIMD_INLINE int lane(int i) const - { - #ifdef _MSC_VER - return m.m128i_i32[i]; - #else - union { __m128i m; int f[ASTCENC_SIMD_WIDTH]; } cvt; - cvt.m = m; - return cvt.f[i]; - #endif - } - static ASTCENC_SIMD_INLINE vint lane_id() { return vint(_mm_set_epi32(3, 2, 1, 0)); } - __m128i m; -}; + vint4 ipart = float_to_int(x - 0.5f); + vfloat4 fpart = x - int_to_float(ipart); -struct vmask -{ - ASTCENC_SIMD_INLINE explicit vmask(__m128 v) { m = v; } - ASTCENC_SIMD_INLINE explicit vmask(__m128i v) { m = _mm_castsi128_ps(v); } - __m128 m; -}; - - -ASTCENC_SIMD_INLINE vfloat load1a(const float* p) { return vfloat(_mm_load_ps1(p)); } -ASTCENC_SIMD_INLINE vfloat loada(const float* p) { return vfloat(_mm_load_ps(p)); } - -ASTCENC_SIMD_INLINE vfloat operator+ (vfloat a, vfloat b) { a.m = _mm_add_ps(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vfloat operator- (vfloat a, vfloat b) { a.m = _mm_sub_ps(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vfloat operator* (vfloat a, vfloat b) { a.m = _mm_mul_ps(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vfloat operator/ (vfloat a, vfloat b) { a.m = _mm_div_ps(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vmask operator==(vfloat a, vfloat b) { return vmask(_mm_cmpeq_ps(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator!=(vfloat a, vfloat b) { return vmask(_mm_cmpneq_ps(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator< (vfloat a, vfloat b) { return vmask(_mm_cmplt_ps(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator> (vfloat a, vfloat b) { return vmask(_mm_cmpgt_ps(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator<=(vfloat a, vfloat b) { return vmask(_mm_cmple_ps(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator>=(vfloat a, vfloat b) { return vmask(_mm_cmpge_ps(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator| (vmask a, vmask b) { return vmask(_mm_or_ps(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator& (vmask a, vmask b) { return vmask(_mm_and_ps(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator^ (vmask a, vmask b) { return vmask(_mm_xor_ps(a.m, b.m)); } -// Returns a 4-bit code where bit0..bit3 is X..W -ASTCENC_SIMD_INLINE unsigned mask(vmask v) { return _mm_movemask_ps(v.m); } -ASTCENC_SIMD_INLINE bool any(vmask v) { return mask(v) != 0; } -ASTCENC_SIMD_INLINE bool all(vmask v) { return mask(v) == 0xF; } - -ASTCENC_SIMD_INLINE vfloat min(vfloat a, vfloat b) { a.m = _mm_min_ps(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vfloat max(vfloat a, vfloat b) { a.m = _mm_max_ps(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vfloat saturate(vfloat a) -{ - __m128 zero = _mm_setzero_ps(); - __m128 one = _mm_set1_ps(1.0f); - return vfloat(_mm_min_ps(_mm_max_ps(a.m, zero), one)); -} + // Integer contrib, using 1 << ipart + vfloat4 iexp = int_as_float(lsl<23>(ipart + 127)); -ASTCENC_SIMD_INLINE vfloat abs(vfloat x) -{ - __m128 msk = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); - return vfloat(_mm_and_ps(x.m, msk)); + // Fractional contrib, using polynomial fit of 2^x in range [-0.5, 0.5) + vfloat4 fexp = POLY5(fpart, + 9.9999994e-1f, + 6.9315308e-1f, + 2.4015361e-1f, + 5.5826318e-2f, + 8.9893397e-3f, + 1.8775767e-3f); + + return iexp * fexp; } -ASTCENC_SIMD_INLINE vfloat round(vfloat v) +/** + * @brief Compute an approximate log2(x) for each lane in the vector. + * + * Based on 5th degree minimax polynomials, ported from this blog + * https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html + */ +static ASTCENC_SIMD_INLINE vfloat4 log2(vfloat4 x) { -#if ASTCENC_SSE >= 41 - return vfloat(_mm_round_ps(v.m, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC)); -#else - __m128 V = v.m; - __m128 negZero = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); - __m128 noFraction = _mm_set_ps1(8388608.0f); - __m128 absMask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)); - __m128 sign = _mm_and_ps(V, negZero); - __m128 sMagic = _mm_or_ps(noFraction, sign); - __m128 R1 = _mm_add_ps(V, sMagic); - R1 = _mm_sub_ps(R1, sMagic); - __m128 R2 = _mm_and_ps(V, absMask); - __m128 mask = _mm_cmple_ps(R2, noFraction); - R2 = _mm_andnot_ps(mask, V); - R1 = _mm_and_ps(R1, mask); - return vfloat(_mm_xor_ps(R1, R2)); -#endif -} + vint4 exp(0x7F800000); + vint4 mant(0x007FFFFF); + vint4 one(0x3F800000); -ASTCENC_SIMD_INLINE vint floatToInt(vfloat v) { return vint(_mm_cvttps_epi32(v.m)); } - -ASTCENC_SIMD_INLINE vfloat intAsFloat(vint v) { return vfloat(_mm_castsi128_ps(v.m)); } -ASTCENC_SIMD_INLINE vint floatAsInt(vfloat v) { return vint(_mm_castps_si128(v.m)); } - -ASTCENC_SIMD_INLINE vint operator~ (vint a) { return vint(_mm_xor_si128(a.m, _mm_set1_epi32(-1))); } -ASTCENC_SIMD_INLINE vmask operator~ (vmask a) { return vmask(_mm_xor_si128(_mm_castps_si128(a.m), _mm_set1_epi32(-1))); } - -ASTCENC_SIMD_INLINE vint operator+ (vint a, vint b) { a.m = _mm_add_epi32(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vint operator- (vint a, vint b) { a.m = _mm_sub_epi32(a.m, b.m); return a; } -ASTCENC_SIMD_INLINE vint operator| (vint a, vint b) { return vint(_mm_or_si128(a.m, b.m)); } -ASTCENC_SIMD_INLINE vint operator& (vint a, vint b) { return vint(_mm_and_si128(a.m, b.m)); } -ASTCENC_SIMD_INLINE vint operator^ (vint a, vint b) { return vint(_mm_xor_si128(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator< (vint a, vint b) { return vmask(_mm_cmplt_epi32(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator> (vint a, vint b) { return vmask(_mm_cmpgt_epi32(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator==(vint a, vint b) { return vmask(_mm_cmpeq_epi32(a.m, b.m)); } -ASTCENC_SIMD_INLINE vmask operator!=(vint a, vint b) { return ~vmask(_mm_cmpeq_epi32(a.m, b.m)); } -ASTCENC_SIMD_INLINE vint min(vint a, vint b) { -#if ASTCENC_SSE >= 41 - a.m = _mm_min_epi32(a.m, b.m); -#else - vmask d = a < b; - a.m = _mm_or_si128(_mm_and_si128(_mm_castps_si128(d.m), a.m), _mm_andnot_si128(_mm_castps_si128(d.m), b.m)); -#endif - return a; -} + vint4 i = float_as_int(x); -ASTCENC_SIMD_INLINE vint max(vint a, vint b) { -#if ASTCENC_SSE >= 41 - a.m = _mm_max_epi32(a.m, b.m); -#else - vmask d = a > b; - a.m = _mm_or_si128(_mm_and_si128(_mm_castps_si128(d.m), a.m), _mm_andnot_si128(_mm_castps_si128(d.m), b.m)); -#endif - return a; -} + vfloat4 e = int_to_float(lsr<23>(i & exp) - 127); -#define ASTCENC_SHUFFLE4F(V, X,Y,Z,W) vfloat(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(W,Z,Y,X))) -#define ASTCENC_SHUFFLE4I(V, X,Y,Z,W) vint(_mm_shuffle_epi32((V).m, _MM_SHUFFLE(W,Z,Y,X))) + vfloat4 m = int_as_float((i & mant) | one); -ASTCENC_SIMD_INLINE vfloat hmin(vfloat v) -{ - v = min(v, ASTCENC_SHUFFLE4F(v, 2, 3, 0, 0)); - v = min(v, ASTCENC_SHUFFLE4F(v, 1, 0, 0, 0)); - return ASTCENC_SHUFFLE4F(v, 0,0,0,0); -} -ASTCENC_SIMD_INLINE vint hmin(vint v) -{ - v = min(v, ASTCENC_SHUFFLE4I(v, 2, 3, 0, 0)); - v = min(v, ASTCENC_SHUFFLE4I(v, 1, 0, 0, 0)); - return ASTCENC_SHUFFLE4I(v, 0,0,0,0); -} + // Polynomial fit of log2(x)/(x - 1), for x in range [1, 2) + vfloat4 p = POLY4(m, + 2.8882704548164776201f, + -2.52074962577807006663f, + 1.48116647521213171641f, + -0.465725644288844778798f, + 0.0596515482674574969533f); -ASTCENC_SIMD_INLINE void store(vfloat v, float* ptr) { _mm_store_ps(ptr, v.m); } -ASTCENC_SIMD_INLINE void store(vint v, int* ptr) { _mm_store_si128((__m128i*)ptr, v.m); } + // Increases the polynomial degree, but ensures that log2(1) == 0 + p = p * (m - 1.0f); -ASTCENC_SIMD_INLINE void store_nbytes(vint v, uint8_t* ptr) -{ - // This is the most logical implementation, but the convenience intrinsic - // is missing on older compilers (supported in g++ 9 and clang++ 9). - // _mm_storeu_si32(ptr, v.m); - _mm_store_ss((float*)ptr, _mm_castsi128_ps(v.m)); + return p + e; } -ASTCENC_SIMD_INLINE vfloat gatherf(const float* base, vint indices) +/** + * @brief Compute an approximate pow(x, y) for each lane in the vector. + * + * Power function based on the exp2(log2(x) * y) transform. + */ +static ASTCENC_SIMD_INLINE vfloat4 pow(vfloat4 x, vfloat4 y) { - int idx[4]; - store(indices, idx); - return vfloat(_mm_set_ps(base[idx[3]], base[idx[2]], base[idx[1]], base[idx[0]])); -} + vmask4 zero_mask = y == vfloat4(0.0f); + vfloat4 estimate = exp2(log2(x) * y); -ASTCENC_SIMD_INLINE vint gatheri(const int* base, vint indices) -{ - int idx[4]; - store(indices, idx); - return vint(_mm_set_epi32(base[idx[3]], base[idx[2]], base[idx[1]], base[idx[0]])); + // Guarantee that y == 0 returns exactly 1.0f + return select(estimate, vfloat4(1.0f), zero_mask); } -// packs low 8 bits of each lane into low 32 bits of result -ASTCENC_SIMD_INLINE vint pack_low_bytes(vint v) +/** + * @brief Count the leading zeros for each lane in @c a. + * + * Valid for all data values of @c a; will return a per-lane value [0, 32]. + */ +ASTCENC_SIMD_INLINE vint4 clz(vint4 a) { - #if ASTCENC_SSE >= 41 - __m128i shuf = _mm_set_epi8(0,0,0,0, 0,0,0,0, 0,0,0,0, 12,8,4,0); - return vint(_mm_shuffle_epi8(v.m, shuf)); - #else - __m128i va = _mm_unpacklo_epi8(v.m, _mm_shuffle_epi32(v.m, _MM_SHUFFLE(1,1,1,1))); - __m128i vb = _mm_unpackhi_epi8(v.m, _mm_shuffle_epi32(v.m, _MM_SHUFFLE(3,3,3,3))); - return vint(_mm_unpacklo_epi16(va, vb)); - #endif -} + // This function is a horrible abuse of floating point exponents to convert + // the original integer value into a 2^N encoding we can recover easily. -// "select", i.e. highbit(cond) ? b : a -// on SSE4.1 and up this can be done easily via "blend" instruction; -// on older SSEs we have to do some hoops, see -// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/ -ASTCENC_SIMD_INLINE vfloat select(vfloat a, vfloat b, vmask cond) -{ -#if ASTCENC_SSE >= 41 - a.m = _mm_blendv_ps(a.m, b.m, cond.m); -#else - __m128 d = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(cond.m), 31)); - a.m = _mm_or_ps(_mm_and_ps(d, b.m), _mm_andnot_ps(d, a.m)); -#endif - return a; -} + // Convert to float without risk of rounding up by keeping only top 8 bits. + // This trick is is guranteed to keep top 8 bits and clear the 9th. + a = (~lsr<8>(a)) & a; + a = float_as_int(int_to_float(a)); -ASTCENC_SIMD_INLINE vint select(vint a, vint b, vmask cond) -{ -#if ASTCENC_SSE >= 41 - return vint(_mm_blendv_epi8(a.m, b.m, _mm_castps_si128(cond.m))); -#else - __m128i d = _mm_srai_epi32(_mm_castps_si128(cond.m), 31); - return vint(_mm_or_si128(_mm_and_si128(d, b.m), _mm_andnot_si128(d, a.m))); -#endif -} + // Extract and unbias exponent + a = vint4(127 + 31) - lsr<23>(a); -ASTCENC_SIMD_INLINE void print(vfloat a) -{ - alignas(ASTCENC_VECALIGN) float v[4]; - store(a, v); - KLOGD("Astcenc", "v4_f32:\n %0.4f %0.4f %0.4f %0.4f\n", - (double)v[0], (double)v[1], (double)v[2], (double)v[3]); + // Clamp result to a valid 32-bit range + return clamp(0, 32, a); } -ASTCENC_SIMD_INLINE void print(vint a) +/** + * @brief Return lanewise 2^a for each lane in @c a. + * + * Use of signed int mean that this is only valid for values in range [0, 31]. + */ +ASTCENC_SIMD_INLINE vint4 two_to_the_n(vint4 a) { - alignas(ASTCENC_VECALIGN) int v[4]; - store(a, v); - KLOGD("Astcenc", "v4_i32:\n %8u %8u %8u %8u\n", - v[0], v[1], v[2], v[3]); -} + // 2^30 is the largest signed number than can be represented + assert(all(a < vint4(31))); + // This function is a horrible abuse of floating point to use the exponent + // and float conversion to generate a 2^N multiple. -#endif // #ifdef ASTCENC_SIMD_ISA_SSE + // Bias the exponent + vint4 exp = a + 127; + exp = lsl<23>(exp); + // Reinterpret the bits as a float, and then convert to an int + vfloat4 f = int_as_float(exp); + return float_to_int(f); +} -// ---------------------------------------------------------------------------- -// Pure scalar, 1-wide implementation +/** + * @brief Convert unorm16 [0, 65535] to float16 in range [0, 1]. + */ +ASTCENC_SIMD_INLINE vint4 unorm16_to_sf16(vint4 p) +{ + vint4 fp16_one = vint4(0x3C00); + vint4 fp16_small = lsl<8>(p); -#ifdef ASTCENC_SIMD_ISA_SCALAR + vmask4 is_one = p == vint4(0xFFFF); + vmask4 is_small = p < vint4(4); -#include -#include -#include + vint4 lz = clz(p) - 16; -#define ASTCENC_SIMD_WIDTH 1 + // TODO: Could use AVX2 _mm_sllv_epi32() instead of p * 2^ + p = p * two_to_the_n(lz + 1); + p = p & vint4(0xFFFF); -struct vfloat -{ - ASTCENC_SIMD_INLINE vfloat() {} - ASTCENC_SIMD_INLINE explicit vfloat(const float *p) { m = *p; } - ASTCENC_SIMD_INLINE explicit vfloat(float v) { m = v; } - ASTCENC_SIMD_INLINE float lane(int i) const { return m; } - static ASTCENC_SIMD_INLINE vfloat zero() { return vfloat(0.0f); } - static ASTCENC_SIMD_INLINE vfloat lane_id() { return vfloat(0.0f); } - float m; -}; - -struct vint -{ - ASTCENC_SIMD_INLINE vint() {} - ASTCENC_SIMD_INLINE explicit vint(const int *p) { m = *p; } - ASTCENC_SIMD_INLINE explicit vint(int v) { m = v; } - ASTCENC_SIMD_INLINE int lane(int i) const { return m; } - static ASTCENC_SIMD_INLINE vint lane_id() { return vint(0); } - int m; -}; - -struct vmask -{ - ASTCENC_SIMD_INLINE explicit vmask(bool v) { m = v; } - bool m; -}; - - -ASTCENC_SIMD_INLINE vfloat load1a(const float* p) { return vfloat(*p); } -ASTCENC_SIMD_INLINE vfloat loada(const float* p) { return vfloat(*p); } - -ASTCENC_SIMD_INLINE vfloat operator+ (vfloat a, vfloat b) { a.m = a.m + b.m; return a; } -ASTCENC_SIMD_INLINE vfloat operator- (vfloat a, vfloat b) { a.m = a.m - b.m; return a; } -ASTCENC_SIMD_INLINE vfloat operator* (vfloat a, vfloat b) { a.m = a.m * b.m; return a; } -ASTCENC_SIMD_INLINE vfloat operator/ (vfloat a, vfloat b) { a.m = a.m / b.m; return a; } -ASTCENC_SIMD_INLINE vmask operator==(vfloat a, vfloat b) { return vmask(a.m = a.m == b.m); } -ASTCENC_SIMD_INLINE vmask operator!=(vfloat a, vfloat b) { return vmask(a.m = a.m != b.m); } -ASTCENC_SIMD_INLINE vmask operator< (vfloat a, vfloat b) { return vmask(a.m = a.m < b.m); } -ASTCENC_SIMD_INLINE vmask operator> (vfloat a, vfloat b) { return vmask(a.m = a.m > b.m); } -ASTCENC_SIMD_INLINE vmask operator<=(vfloat a, vfloat b) { return vmask(a.m = a.m <= b.m); } -ASTCENC_SIMD_INLINE vmask operator>=(vfloat a, vfloat b) { return vmask(a.m = a.m >= b.m); } -ASTCENC_SIMD_INLINE vmask operator| (vmask a, vmask b) { return vmask(a.m || b.m); } -ASTCENC_SIMD_INLINE vmask operator& (vmask a, vmask b) { return vmask(a.m && b.m); } -ASTCENC_SIMD_INLINE vmask operator^ (vmask a, vmask b) { return vmask(a.m ^ b.m); } -ASTCENC_SIMD_INLINE unsigned mask(vmask v) { return v.m; } -ASTCENC_SIMD_INLINE bool any(vmask v) { return mask(v) != 0; } -ASTCENC_SIMD_INLINE bool all(vmask v) { return mask(v) != 0; } - -ASTCENC_SIMD_INLINE vfloat min(vfloat a, vfloat b) { a.m = a.m < b.m ? a.m : b.m; return a; } -ASTCENC_SIMD_INLINE vfloat max(vfloat a, vfloat b) { a.m = a.m > b.m ? a.m : b.m; return a; } -ASTCENC_SIMD_INLINE vfloat saturate(vfloat a) { return vfloat(std::min(std::max(a.m,0.0f), 1.0f)); } - -ASTCENC_SIMD_INLINE vfloat abs(vfloat x) { return vfloat(std::abs(x.m)); } - -ASTCENC_SIMD_INLINE vfloat round(vfloat v) -{ - return vfloat(std::floor(v.m + 0.5f)); -} + p = lsr<6>(p); -ASTCENC_SIMD_INLINE vint floatToInt(vfloat v) { return vint(v.m); } + p = p | lsl<10>(vint4(14) - lz); -ASTCENC_SIMD_INLINE vfloat intAsFloat(vint v) { vfloat r; memcpy(&r.m, &v.m, 4); return r; } -ASTCENC_SIMD_INLINE vint floatAsInt(vfloat v) { vint r; memcpy(&r.m, &v.m, 4); return r; } + vint4 r = select(p, fp16_one, is_one); + r = select(r, fp16_small, is_small); + return r; +} -ASTCENC_SIMD_INLINE vint operator~ (vint a) { a.m = ~a.m; return a; } -ASTCENC_SIMD_INLINE vint operator+ (vint a, vint b) { a.m = a.m + b.m; return a; } -ASTCENC_SIMD_INLINE vint operator- (vint a, vint b) { a.m = a.m - b.m; return a; } -ASTCENC_SIMD_INLINE vint operator| (vint a, vint b) { return vint(a.m | b.m); } -ASTCENC_SIMD_INLINE vint operator& (vint a, vint b) { return vint(a.m & b.m); } -ASTCENC_SIMD_INLINE vint operator^ (vint a, vint b) { return vint(a.m ^ b.m); } -ASTCENC_SIMD_INLINE vmask operator< (vint a, vint b) { return vmask(a.m = a.m < b.m); } -ASTCENC_SIMD_INLINE vmask operator> (vint a, vint b) { return vmask(a.m = a.m > b.m); } -ASTCENC_SIMD_INLINE vmask operator==(vint a, vint b) { return vmask(a.m = a.m == b.m); } -ASTCENC_SIMD_INLINE vmask operator!=(vint a, vint b) { return vmask(a.m = a.m != b.m); } -ASTCENC_SIMD_INLINE vint min(vint a, vint b) { a.m = a.m < b.m ? a.m : b.m; return a; } -ASTCENC_SIMD_INLINE vint max(vint a, vint b) { a.m = a.m > b.m ? a.m : b.m; return a; } +/** + * @brief Convert 16-bit LNS to float16. + */ +ASTCENC_SIMD_INLINE vint4 lns_to_sf16(vint4 p) +{ + vint4 mc = p & 0x7FF; + vint4 ec = lsr<11>(p); -ASTCENC_SIMD_INLINE vfloat hmin(vfloat v) { return v; } -ASTCENC_SIMD_INLINE vint hmin(vint v) { return v; } + vint4 mc_512 = mc * 3; + vmask4 mask_512 = mc < vint4(512); -ASTCENC_SIMD_INLINE void store(vfloat v, float* ptr) { *ptr = v.m; } -ASTCENC_SIMD_INLINE void store(vint v, int* ptr) { *ptr = v.m; } + vint4 mc_1536 = mc * 4 - 512; + vmask4 mask_1536 = mc < vint4(1536); -ASTCENC_SIMD_INLINE void store_nbytes(vint v, uint8_t* ptr) { *ptr = (uint8_t)v.m; } + vint4 mc_else = mc * 5 - 2048; -ASTCENC_SIMD_INLINE vfloat gatherf(const float* base, vint indices) -{ - return vfloat(base[indices.m]); -} -ASTCENC_SIMD_INLINE vint gatheri(const int* base, vint indices) -{ - return vint(base[indices.m]); + vint4 mt = mc_else; + mt = select(mt, mc_1536, mask_1536); + mt = select(mt, mc_512, mask_512); + + vint4 res = lsl<10>(ec) | lsr<3>(mt); + return min(res, vint4(0x7BFF)); } -// packs low 8 bits of each lane into low 8 bits of result (a no-op in scalar code path) -ASTCENC_SIMD_INLINE vint pack_low_bytes(vint v) +/** + * @brief Extract mantissa and exponent of a float value. + * + * @param a The input value. + * @param[out] exp The output exponent. + * + * @return The mantissa. + */ +static inline vfloat4 frexp(vfloat4 a, vint4& exp) { - return v; -} + // Interpret the bits as an integer + vint4 ai = float_as_int(a); + // Extract and unbias the exponent + exp = (lsr<23>(ai) & 0xFF) - 126; -// "select", i.e. highbit(cond) ? b : a -ASTCENC_SIMD_INLINE vfloat select(vfloat a, vfloat b, vmask cond) -{ - return cond.m ? b : a; + // Extract and unbias the mantissa + vint4 manti = (ai & 0x807FFFFF) | 0x3F000000; + return int_as_float(manti); } -ASTCENC_SIMD_INLINE vint select(vint a, vint b, vmask cond) + +/** + * @brief Convert float to 16-bit LNS. + */ +static inline vfloat4 float_to_lns(vfloat4 a) { - return cond.m ? b : a; -} + vint4 exp; + vfloat4 mant = frexp(a, exp); + // Do these early before we start messing about ... + vmask4 mask_underflow_nan = ~(a > vfloat4(1.0f / 67108864.0f)); + vmask4 mask_infinity = a >= vfloat4(65536.0f); -#endif // #ifdef ASTCENC_SIMD_ISA_SCALAR + // If input is smaller than 2^-14, multiply by 2^25 and don't bias. + vmask4 exp_lt_m13 = exp < vint4(-13); + vfloat4 a1a = a * 33554432.0f; + vint4 expa = vint4::zero(); -// ---------------------------------------------------------------------------- + vfloat4 a1b = (mant - 0.5f) * 4096; + vint4 expb = exp + 14; -// Return x, with each lane having its sign flipped where the corresponding y lane is negative, i.e. msb(y) ? -x : x -ASTCENC_SIMD_INLINE vfloat changesign(vfloat x, vfloat y) -{ - vint ix = floatAsInt(x); - vint iy = floatAsInt(y); - vint signMask((int)0x80000000); - vint r = ix ^ (iy & signMask); - return intAsFloat(r); + a = select(a1b, a1a, exp_lt_m13); + exp = select(expb, expa, exp_lt_m13); + + vmask4 a_lt_384 = a < vfloat4(384.0f); + vmask4 a_lt_1408 = a <= vfloat4(1408.0f); + + vfloat4 a2a = a * (4.0f / 3.0f); + vfloat4 a2b = a + 128.0f; + vfloat4 a2c = (a + 512.0f) * (4.0f / 5.0f); + + a = a2c; + a = select(a, a2b, a_lt_1408); + a = select(a, a2a, a_lt_384); + + a = a + (int_to_float(exp) * 2048.0f) + 1.0f; + + a = select(a, vfloat4(65535.0f), mask_infinity); + a = select(a, vfloat4::zero(), mask_underflow_nan); + + return a; } -// Fast atan implementation, with max error of 0.004883 -ASTCENC_SIMD_INLINE vfloat atan(vfloat x) +namespace astc { - vmask c = abs(x) > vfloat(1.0f); - vfloat z = changesign(vfloat(astc::PI_OVER_TWO), x); - vfloat y = select(x, vfloat(1.0f) / x, c); - y = y / (y * y * vfloat(0.28f) + vfloat(1.0f)); - return select(y, z - y, c); -} -ASTCENC_SIMD_INLINE vfloat atan2(vfloat y, vfloat x) +static ASTCENC_SIMD_INLINE float pow(float x, float y) { - vfloat z = atan(abs(y / x)); - vmask xmask = vmask(floatAsInt(x).m); - return changesign(select(z, vfloat(astc::PI) - z, xmask), y); + return pow(vfloat4(x), vfloat4(y)).lane<0>(); +} + } #endif // #ifndef ASTC_VECMATHLIB_H_INCLUDED diff --git a/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h b/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h new file mode 100755 index 00000000..cba1db45 --- /dev/null +++ b/libkram/astc-encoder/astcenc_vecmathlib_avx2_8.h @@ -0,0 +1,943 @@ +// SPDX-License-Identifier: Apache-2.0 +// ---------------------------------------------------------------------------- +// Copyright 2019-2021 Arm Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// ---------------------------------------------------------------------------- + +/** + * @brief 8x32-bit vectors, implemented using AVX2. + * + * This module implements 8-wide 32-bit float, int, and mask vectors for x86 + * AVX2. + * + * There is a baseline level of functionality provided by all vector widths and + * implementations. This is implemented using identical function signatures, + * modulo data type, so we can use them as substitutable implementations in VLA + * code. + */ + +#ifndef ASTC_VECMATHLIB_AVX2_8_H_INCLUDED +#define ASTC_VECMATHLIB_AVX2_8_H_INCLUDED + +#ifndef ASTCENC_SIMD_INLINE + #error "Include astcenc_vecmathlib.h, do not include directly" +#endif + +#include + +// ============================================================================ +// vfloat8 data type +// ============================================================================ + +/** + * @brief Data type for 8-wide floats. + */ +struct vfloat8 +{ + /** + * @brief Construct from zero-initialized value. + */ + ASTCENC_SIMD_INLINE vfloat8() {} + + /** + * @brief Construct from 4 values loaded from an unaligned address. + * + * Consider using loada() which is better with vectors if data is aligned + * to vector length. + */ + ASTCENC_SIMD_INLINE explicit vfloat8(const float *p) + { + m = _mm256_loadu_ps(p); + } + + /** + * @brief Construct from 1 scalar value replicated across all lanes. + * + * Consider using zero() for constexpr zeros. + */ + ASTCENC_SIMD_INLINE explicit vfloat8(float a) + { + m = _mm256_set1_ps(a); + } + + /** + * @brief Construct from 8 scalar values. + * + * The value of @c a is stored to lane 0 (LSB) in the SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vfloat8( + float a, float b, float c, float d, + float e, float f, float g, float h) + { + m = _mm256_set_ps(h, g, f, e, d, c, b, a); + } + + /** + * @brief Construct from an existing SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vfloat8(__m256 a) { + m = a; + } + + /** + * @brief Get the scalar value of a single lane. + */ + template ASTCENC_SIMD_INLINE float lane() const + { + #if !defined(__clang__) && defined(_MSC_VER) + return m.m256_f32[l]; + #else + union { __m256 m; float f[8]; } cvt; + cvt.m = m; + return cvt.f[l]; + #endif + } + + /** + * @brief Factory that returns a vector of zeros. + */ + static ASTCENC_SIMD_INLINE vfloat8 zero() + { + return vfloat8(_mm256_setzero_ps()); + } + + /** + * @brief Factory that returns a replicated scalar loaded from memory. + */ + static ASTCENC_SIMD_INLINE vfloat8 load1(const float* p) + { + return vfloat8(_mm256_broadcast_ss(p)); + } + + /** + * @brief Factory that returns a vector loaded from 32B aligned memory. + */ + static ASTCENC_SIMD_INLINE vfloat8 loada(const float* p) + { + return vfloat8(_mm256_load_ps(p)); + } + + /** + * @brief Factory that returns a vector containing the lane IDs. + */ + static ASTCENC_SIMD_INLINE vfloat8 lane_id() + { + return vfloat8(_mm256_set_ps(7, 6, 5, 4, 3, 2, 1, 0)); + } + + /** + * @brief The vector ... + */ + __m256 m; +}; + +// ============================================================================ +// vint8 data type +// ============================================================================ + +/** + * @brief Data type for 8-wide ints. + */ +struct vint8 +{ + /** + * @brief Construct from zero-initialized value. + */ + ASTCENC_SIMD_INLINE vint8() {} + + /** + * @brief Construct from 8 values loaded from an unaligned address. + * + * Consider using loada() which is better with vectors if data is aligned + * to vector length. + */ + ASTCENC_SIMD_INLINE explicit vint8(const int *p) + { + m = _mm256_loadu_si256((const __m256i*)p); + } + + /** + * @brief Construct from 8 uint8_t loaded from an unaligned address. + */ + ASTCENC_SIMD_INLINE explicit vint8(const uint8_t *p) + { + // _mm_loadu_si64 would be nicer syntax, but missing on older GCC + m = _mm256_cvtepu8_epi32(_mm_cvtsi64_si128(*(const long long*)p)); + } + + /** + * @brief Construct from 1 scalar value replicated across all lanes. + * + * Consider using vfloat4::zero() for constexpr zeros. + */ + ASTCENC_SIMD_INLINE explicit vint8(int a) + { + m = _mm256_set1_epi32(a); + } + + /** + * @brief Construct from 8 scalar values. + * + * The value of @c a is stored to lane 0 (LSB) in the SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vint8( + int a, int b, int c, int d, + int e, int f, int g, int h) + { + m = _mm256_set_epi32(h, g, f, e, d, c, b, a); + } + + /** + * @brief Construct from an existing SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vint8(__m256i a) + { + m = a; + } + + /** + * @brief Get the scalar from a single lane. + */ + template ASTCENC_SIMD_INLINE int lane() const + { + #if !defined(__clang__) && defined(_MSC_VER) + return m.m256i_i32[l]; + #else + union { __m256i m; int f[8]; } cvt; + cvt.m = m; + return cvt.f[l]; + #endif + } + + /** + * @brief Factory that returns a vector of zeros. + */ + static ASTCENC_SIMD_INLINE vint8 zero() + { + return vint8(_mm256_setzero_si256()); + } + + /** + * @brief Factory that returns a replicated scalar loaded from memory. + */ + static ASTCENC_SIMD_INLINE vint8 load1(const int* p) + { + __m128i a = _mm_set1_epi32(*p); + return vint8(_mm256_broadcastd_epi32(a)); + } + + /** + * @brief Factory that returns a vector loaded from 32B aligned memory. + */ + static ASTCENC_SIMD_INLINE vint8 loada(const int* p) + { + return vint8(_mm256_load_si256((const __m256i*)p)); + } + + /** + * @brief Factory that returns a vector containing the lane IDs. + */ + static ASTCENC_SIMD_INLINE vint8 lane_id() + { + return vint8(_mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0)); + } + + /** + * @brief The vector ... + */ + __m256i m; +}; + +// ============================================================================ +// vmask8 data type +// ============================================================================ + +/** + * @brief Data type for 8-wide control plane masks. + */ +struct vmask8 +{ + /** + * @brief Construct from an existing SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vmask8(__m256 a) + { + m = a; + } + + /** + * @brief Construct from an existing SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vmask8(__m256i a) + { + m = _mm256_castsi256_ps(a); + } + + /** + * @brief The vector ... + */ + __m256 m; +}; + +// ============================================================================ +// vmask8 operators and functions +// ============================================================================ + +/** + * @brief Overload: mask union (or). + */ +ASTCENC_SIMD_INLINE vmask8 operator|(vmask8 a, vmask8 b) +{ + return vmask8(_mm256_or_ps(a.m, b.m)); +} + +/** + * @brief Overload: mask intersect (and). + */ +ASTCENC_SIMD_INLINE vmask8 operator&(vmask8 a, vmask8 b) +{ + return vmask8(_mm256_and_ps(a.m, b.m)); +} + +/** + * @brief Overload: mask difference (xor). + */ +ASTCENC_SIMD_INLINE vmask8 operator^(vmask8 a, vmask8 b) +{ + return vmask8(_mm256_xor_ps(a.m, b.m)); +} + +/** + * @brief Overload: mask invert (not). + */ +ASTCENC_SIMD_INLINE vmask8 operator~(vmask8 a) +{ + return vmask8(_mm256_xor_si256(_mm256_castps_si256(a.m), _mm256_set1_epi32(-1))); +} + +/** + * @brief Return a 8-bit mask code indicating mask status. + * + * bit0 = lane 0 + */ +ASTCENC_SIMD_INLINE unsigned mask(vmask8 a) +{ + return _mm256_movemask_ps(a.m); +} + +/** + * @brief True if any lanes are enabled, false otherwise. + */ +ASTCENC_SIMD_INLINE bool any(vmask8 a) +{ + return mask(a) != 0; +} + +/** + * @brief True if any lanes are enabled, false otherwise. + */ +ASTCENC_SIMD_INLINE bool all(vmask8 a) +{ + return mask(a) == 0xFF; +} + +// ============================================================================ +// vint8 operators and functions +// ============================================================================ +/** + * @brief Overload: vector by vector addition. + */ +ASTCENC_SIMD_INLINE vint8 operator+(vint8 a, vint8 b) +{ + return vint8(_mm256_add_epi32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector subtraction. + */ +ASTCENC_SIMD_INLINE vint8 operator-(vint8 a, vint8 b) +{ + return vint8(_mm256_sub_epi32(a.m, b.m)); +} + +/** + * @brief Overload: vector bit invert. + */ +ASTCENC_SIMD_INLINE vint8 operator~(vint8 a) +{ + return vint8(_mm256_xor_si256(a.m, _mm256_set1_epi32(-1))); +} + +/** + * @brief Overload: vector by vector bitwise or. + */ +ASTCENC_SIMD_INLINE vint8 operator|(vint8 a, vint8 b) +{ + return vint8(_mm256_or_si256(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector bitwise and. + */ +ASTCENC_SIMD_INLINE vint8 operator&(vint8 a, vint8 b) +{ + return vint8(_mm256_and_si256(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector bitwise xor. + */ +ASTCENC_SIMD_INLINE vint8 operator^(vint8 a, vint8 b) +{ + return vint8(_mm256_xor_si256(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector equality. + */ +ASTCENC_SIMD_INLINE vmask8 operator==(vint8 a, vint8 b) +{ + return vmask8(_mm256_cmpeq_epi32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector inequality. + */ +ASTCENC_SIMD_INLINE vmask8 operator!=(vint8 a, vint8 b) +{ + return ~vmask8(_mm256_cmpeq_epi32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector less than. + */ +ASTCENC_SIMD_INLINE vmask8 operator<(vint8 a, vint8 b) +{ + return vmask8(_mm256_cmpgt_epi32(b.m, a.m)); +} + +/** + * @brief Overload: vector by vector greater than. + */ +ASTCENC_SIMD_INLINE vmask8 operator>(vint8 a, vint8 b) +{ + return vmask8(_mm256_cmpgt_epi32(a.m, b.m)); +} + +/** + * @brief Return the min vector of two vectors. + */ +ASTCENC_SIMD_INLINE vint8 min(vint8 a, vint8 b) +{ + return vint8(_mm256_min_epi32(a.m, b.m)); +} + +/** + * @brief Return the max vector of two vectors. + */ +ASTCENC_SIMD_INLINE vint8 max(vint8 a, vint8 b) +{ + return vint8(_mm256_max_epi32(a.m, b.m)); +} + +/** + * @brief Return the horizontal minimum of a vector. + */ +ASTCENC_SIMD_INLINE vint8 hmin(vint8 a) +{ + __m128i m = _mm_min_epi32(_mm256_extracti128_si256(a.m, 0), _mm256_extracti128_si256(a.m, 1)); + m = _mm_min_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,3,2))); + m = _mm_min_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,0,1))); + m = _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,0,0)); + + // This is the most logical implementation, but the convenience intrinsic + // is missing on older compilers (supported in g++ 9 and clang++ 9). + //__m256i r = _mm256_set_m128i(m, m) + __m256i r = _mm256_insertf128_si256(_mm256_castsi128_si256(m), m, 1); + vint8 vmin(r); + return vmin; +} + +/** + * @brief Return the horizontal minimum of a vector. + */ +ASTCENC_SIMD_INLINE vint8 hmax(vint8 a) +{ + __m128i m = _mm_max_epi32(_mm256_extracti128_si256(a.m, 0), _mm256_extracti128_si256(a.m, 1)); + m = _mm_max_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,3,2))); + m = _mm_max_epi32(m, _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,0,1))); + m = _mm_shuffle_epi32(m, _MM_SHUFFLE(0,0,0,0)); + + // This is the most logical implementation, but the convenience intrinsic + // is missing on older compilers (supported in g++ 9 and clang++ 9). + //__m256i r = _mm256_set_m128i(m, m) + __m256i r = _mm256_insertf128_si256(_mm256_castsi128_si256(m), m, 1); + vint8 vmax(r); + return vmax; +} + +/** + * @brief Store a vector to a 16B aligned memory address. + */ +ASTCENC_SIMD_INLINE void storea(vint8 a, int* p) +{ + _mm256_store_si256((__m256i*)p, a.m); +} + +/** + * @brief Store lowest N (vector width) bytes into an unaligned address. + */ +ASTCENC_SIMD_INLINE void store_nbytes(vint8 a, uint8_t* p) +{ + // This is the most logical implementation, but the convenience intrinsic + // is missing on older compilers (supported in g++ 9 and clang++ 9). + // _mm_storeu_si64(ptr, _mm256_extracti128_si256(v.m, 0)) + _mm_storel_epi64((__m128i*)p, _mm256_extracti128_si256(a.m, 0)); +} + +/** + * @brief Gather N (vector width) indices from the array. + */ +ASTCENC_SIMD_INLINE vint8 gatheri(const int* base, vint8 indices) +{ + return vint8(_mm256_i32gather_epi32(base, indices.m, 4)); +} + +/** + * @brief Pack low 8 bits of N (vector width) lanes into bottom of vector. + */ +ASTCENC_SIMD_INLINE vint8 pack_low_bytes(vint8 v) +{ + __m256i shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 28, 24, 20, 16, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 12, 8, 4, 0); + __m256i a = _mm256_shuffle_epi8(v.m, shuf); + __m128i a0 = _mm256_extracti128_si256(a, 0); + __m128i a1 = _mm256_extracti128_si256(a, 1); + __m128i b = _mm_unpacklo_epi32(a0, a1); + + // This is the most logical implementation, but the convenience intrinsic + // is missing on older compilers (supported in g++ 9 and clang++ 9). + //__m256i r = _mm256_set_m128i(b, b) + __m256i r = _mm256_insertf128_si256(_mm256_castsi128_si256(b), b, 1); + return vint8(r); +} + +/** + * @brief Return lanes from @c b if MSB of @c cond is set, else @c a. + */ +ASTCENC_SIMD_INLINE vint8 select(vint8 a, vint8 b, vmask8 cond) +{ + // Don't use _mm256_blendv_epi8 directly, as it doesn't give the select on + // float sign-bit in the mask behavior which is useful. Performance is the + // same, these casts are free. + __m256 av = _mm256_castsi256_ps(a.m); + __m256 bv = _mm256_castsi256_ps(b.m); + return vint8(_mm256_castps_si256(_mm256_blendv_ps(av, bv, cond.m))); +} + +/** + * @brief Debug function to print a vector of ints. + */ +ASTCENC_SIMD_INLINE void print(vint8 a) +{ + alignas(ASTCENC_VECALIGN) int v[8]; + storea(a, v); + printf("v8_i32:\n %8d %8d %8d %8d %8d %8d %8d %8d\n", + v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]); +} + +// ============================================================================ +// vfloat4 operators and functions +// ============================================================================ + +/** + * @brief Overload: vector by vector addition. + */ +ASTCENC_SIMD_INLINE vfloat8 operator+(vfloat8 a, vfloat8 b) +{ + return vfloat8(_mm256_add_ps(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector subtraction. + */ +ASTCENC_SIMD_INLINE vfloat8 operator-(vfloat8 a, vfloat8 b) +{ + return vfloat8(_mm256_sub_ps(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector multiplication. + */ +ASTCENC_SIMD_INLINE vfloat8 operator*(vfloat8 a, vfloat8 b) +{ + return vfloat8(_mm256_mul_ps(a.m, b.m)); +} + +/** + * @brief Overload: vector by scalar multiplication. + */ +ASTCENC_SIMD_INLINE vfloat8 operator*(vfloat8 a, float b) +{ + return vfloat8(_mm256_mul_ps(a.m, _mm256_set1_ps(b))); +} + +/** + * @brief Overload: scalar by vector multiplication. + */ +ASTCENC_SIMD_INLINE vfloat8 operator*(float a, vfloat8 b) +{ + return vfloat8(_mm256_mul_ps(_mm256_set1_ps(a), b.m)); +} + +/** + * @brief Overload: vector by vector division. + */ +ASTCENC_SIMD_INLINE vfloat8 operator/(vfloat8 a, vfloat8 b) +{ + return vfloat8(_mm256_div_ps(a.m, b.m)); +} + +/** + * @brief Overload: vector by scalar division. + */ +ASTCENC_SIMD_INLINE vfloat8 operator/(vfloat8 a, float b) +{ + return vfloat8(_mm256_div_ps(a.m, _mm256_set1_ps(b))); +} + + +/** + * @brief Overload: scalar by vector division. + */ +ASTCENC_SIMD_INLINE vfloat8 operator/(float a, vfloat8 b) +{ + return vfloat8(_mm256_div_ps(_mm256_set1_ps(a), b.m)); +} + + +/** + * @brief Overload: vector by vector equality. + */ +ASTCENC_SIMD_INLINE vmask8 operator==(vfloat8 a, vfloat8 b) +{ + return vmask8(_mm256_cmp_ps(a.m, b.m, _CMP_EQ_OQ)); +} + +/** + * @brief Overload: vector by vector inequality. + */ +ASTCENC_SIMD_INLINE vmask8 operator!=(vfloat8 a, vfloat8 b) +{ + return vmask8(_mm256_cmp_ps(a.m, b.m, _CMP_NEQ_OQ)); +} + +/** + * @brief Overload: vector by vector less than. + */ +ASTCENC_SIMD_INLINE vmask8 operator<(vfloat8 a, vfloat8 b) +{ + return vmask8(_mm256_cmp_ps(a.m, b.m, _CMP_LT_OQ)); +} + +/** + * @brief Overload: vector by vector greater than. + */ +ASTCENC_SIMD_INLINE vmask8 operator>(vfloat8 a, vfloat8 b) +{ + return vmask8(_mm256_cmp_ps(a.m, b.m, _CMP_GT_OQ)); +} + +/** + * @brief Overload: vector by vector les than or equal. + */ +ASTCENC_SIMD_INLINE vmask8 operator<=(vfloat8 a, vfloat8 b) +{ + return vmask8(_mm256_cmp_ps(a.m, b.m, _CMP_LE_OQ)); +} + +/** + * @brief Overload: vector by vector greater than or equal. + */ +ASTCENC_SIMD_INLINE vmask8 operator>=(vfloat8 a, vfloat8 b) +{ + return vmask8(_mm256_cmp_ps(a.m, b.m, _CMP_GE_OQ)); +} + +/** + * @brief Return the min vector of two vectors. + * + * If either lane value is NaN, @c b will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat8 min(vfloat8 a, vfloat8 b) +{ + return vfloat8(_mm256_min_ps(a.m, b.m)); +} + +/** + * @brief Return the max vector of two vectors. + * + * If either lane value is NaN, @c b will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat8 max(vfloat8 a, vfloat8 b) +{ + return vfloat8(_mm256_max_ps(a.m, b.m)); +} + +/** + * @brief Return the clamped value between min and max. + * + * It is assumed that neither @c min nor @c max are NaN values. If @c a is NaN + * then @c min will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat8 clamp(float min, float max, vfloat8 a) +{ + // Do not reorder - second operand will return if either is NaN + a.m = _mm256_max_ps(a.m, _mm256_set1_ps(min)); + a.m = _mm256_min_ps(a.m, _mm256_set1_ps(max)); + return a; +} + +/** + * @brief Return a clamped value between 0.0f and max. + * + * It is assumed that @c max is not a NaN value. If @c a is NaN then zero will + * be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat8 clampz(float max, vfloat8 a) +{ + a.m = _mm256_max_ps(a.m, _mm256_setzero_ps()); + a.m = _mm256_min_ps(a.m, _mm256_set1_ps(max)); + return a; +} + +/** + * @brief Return a clamped value between 0.0f and 1.0f. + * + * If @c a is NaN then zero will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat8 clampzo(vfloat8 a) +{ + a.m = _mm256_max_ps(a.m, _mm256_setzero_ps()); + a.m = _mm256_min_ps(a.m, _mm256_set1_ps(1.0f)); + return a; +} + +/** + * @brief Return the absolute value of the float vector. + */ +ASTCENC_SIMD_INLINE vfloat8 abs(vfloat8 a) +{ + __m256 msk = _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)); + return vfloat8(_mm256_and_ps(a.m, msk)); +} + +/** + * @brief Return a float rounded to the nearest integer value. + */ +ASTCENC_SIMD_INLINE vfloat8 round(vfloat8 a) +{ + constexpr int flags = _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; + return vfloat8(_mm256_round_ps(a.m, flags)); +} + +/** + * @brief Return the horizontal minimum of a vector. + */ +ASTCENC_SIMD_INLINE vfloat8 hmin(vfloat8 a) +{ + __m128 vlow = _mm256_castps256_ps128(a.m); + __m128 vhigh = _mm256_extractf128_ps(a.m, 1); + vlow = _mm_min_ps(vlow, vhigh); + + // First do an horizontal reduction. + __m128 shuf = _mm_shuffle_ps(vlow, vlow, _MM_SHUFFLE(2, 3, 0, 1)); + __m128 mins = _mm_min_ps(vlow, shuf); + shuf = _mm_movehl_ps(shuf, mins); + mins = _mm_min_ss(mins, shuf); + + // This is the most logical implementation, but the convenience intrinsic + // is missing on older compilers (supported in g++ 9 and clang++ 9). + //__m256i r = _mm256_set_m128(m, m) + __m256 r = _mm256_insertf128_ps(_mm256_castps128_ps256(mins), mins, 1); + + return vfloat8(_mm256_permute_ps(r, 0)); +} + +/** + * @brief Return the horizontal minimum of a vector. + */ +ASTCENC_SIMD_INLINE float hmin_s(vfloat8 a) +{ + return hmin(a).lane<0>(); +} + +/** + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE vfloat8 hmax(vfloat8 a) +{ + __m128 vlow = _mm256_castps256_ps128(a.m); + __m128 vhigh = _mm256_extractf128_ps(a.m, 1); + vhigh = _mm_max_ps(vlow, vhigh); + + // First do an horizontal reduction. + __m128 shuf = _mm_shuffle_ps(vhigh, vhigh, _MM_SHUFFLE(2, 3, 0, 1)); + __m128 maxs = _mm_max_ps(vhigh, shuf); + shuf = _mm_movehl_ps(shuf,maxs); + maxs = _mm_max_ss(maxs, shuf); + + // This is the most logical implementation, but the convenience intrinsic + // is missing on older compilers (supported in g++ 9 and clang++ 9). + //__m256i r = _mm256_set_m128(m, m) + __m256 r = _mm256_insertf128_ps(_mm256_castps128_ps256(maxs), maxs, 1); + return vfloat8(_mm256_permute_ps(r, 0)); +} + +/** + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE float hmax_s(vfloat8 a) +{ + return hmax(a).lane<0>(); +} + +/** + * @brief Return the horizontal sum of a vector. + */ +ASTCENC_SIMD_INLINE float hadd_s(vfloat8 a) +{ + // Two sequential 4-wide adds gives invariance with 4-wide code + vfloat4 lo(_mm256_extractf128_ps(a.m, 0)); + vfloat4 hi(_mm256_extractf128_ps(a.m, 1)); + return hadd_s(lo) + hadd_s(hi); +} + +/** + * @brief Accumulate the full horizontal sum of a vector. + */ +ASTCENC_SIMD_INLINE void haccumulate(float& accum, vfloat8 a) +{ + // Two sequential 4-wide accumulates gives invariance with 4-wide code. + // Note that this approach gives higher error in the sum; adding the two + // smaller numbers together first would be more accurate. + vfloat4 lo(_mm256_extractf128_ps(a.m, 0)); + haccumulate(accum, lo); + + vfloat4 hi(_mm256_extractf128_ps(a.m, 1)); + haccumulate(accum, hi); +} + +/** + * @brief Accumulate lane-wise sums for a vector, folded 4-wide. + */ +ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat8 a) +{ + // Two sequential 4-wide accumulates gives invariance with 4-wide code. + // Note that this approach gives higher error in the sum; adding the two + // smaller numbers together first would be more accurate. + vfloat4 lo(_mm256_extractf128_ps(a.m, 0)); + haccumulate(accum, lo); + + vfloat4 hi(_mm256_extractf128_ps(a.m, 1)); + haccumulate(accum, hi); +} + +/** + * @brief Return the sqrt of the lanes in the vector. + */ +ASTCENC_SIMD_INLINE vfloat8 sqrt(vfloat8 a) +{ + return vfloat8(_mm256_sqrt_ps(a.m)); +} + +/** + * @brief Return lanes from @c b if MSB of @c cond is set, else @c a. + */ +ASTCENC_SIMD_INLINE vfloat8 select(vfloat8 a, vfloat8 b, vmask8 cond) +{ + return vfloat8(_mm256_blendv_ps(a.m, b.m, cond.m)); +} + +/** + * @brief Load a vector of gathered results from an array; + */ +ASTCENC_SIMD_INLINE vfloat8 gatherf(const float* base, vint8 indices) +{ + return vfloat8(_mm256_i32gather_ps(base, indices.m, 4)); +} + +/** + * @brief Store a vector to an unaligned memory address. + */ +ASTCENC_SIMD_INLINE void store(vfloat8 a, float* p) +{ + _mm256_storeu_ps(p, a.m); +} + +/** + * @brief Store a vector to a 32B aligned memory address. + */ +ASTCENC_SIMD_INLINE void storea(vfloat8 a, float* p) +{ + _mm256_store_ps(p, a.m); +} + +/** + * @brief Return a integer value for a float vector, using truncation. + */ +ASTCENC_SIMD_INLINE vint8 float_to_int(vfloat8 a) +{ + return vint8(_mm256_cvttps_epi32(a.m)); +} + +/** + * @brief Return a float value as an integer bit pattern (i.e. no conversion). + * + * It is a common trick to convert floats into integer bit patterns, perform + * some bit hackery based on knowledge they are IEEE 754 layout, and then + * convert them back again. This is the first half of that flip. + */ +ASTCENC_SIMD_INLINE vint8 float_as_int(vfloat8 a) +{ + return vint8(_mm256_castps_si256(a.m)); +} + +/** + * @brief Return a integer value as a float bit pattern (i.e. no conversion). + * + * It is a common trick to convert floats into integer bit patterns, perform + * some bit hackery based on knowledge they are IEEE 754 layout, and then + * convert them back again. This is the second half of that flip. + */ +ASTCENC_SIMD_INLINE vfloat8 int_as_float(vint8 a) +{ + return vfloat8(_mm256_castsi256_ps(a.m)); +} + +/** + * @brief Debug function to print a vector of floats. + */ +ASTCENC_SIMD_INLINE void print(vfloat8 a) +{ + alignas(ASTCENC_VECALIGN) float v[8]; + storea(a, v); + printf("v8_f32:\n %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f %0.4f\n", + (double)v[0], (double)v[1], (double)v[2], (double)v[3], + (double)v[4], (double)v[5], (double)v[6], (double)v[7]); +} + +#endif // #ifndef ASTC_VECMATHLIB_AVX2_8_H_INCLUDED diff --git a/libkram/astc-encoder/astcenc_vecmathlib_common_4.h b/libkram/astc-encoder/astcenc_vecmathlib_common_4.h new file mode 100755 index 00000000..319537b6 --- /dev/null +++ b/libkram/astc-encoder/astcenc_vecmathlib_common_4.h @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: Apache-2.0 +// ---------------------------------------------------------------------------- +// Copyright 2020-2021 Arm Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// ---------------------------------------------------------------------------- + +/** + * @brief Generic 4x32-bit vector functions. + * + * This module implements generic 4-wide vector functions that are valid for + * all instruction sets, typically implemented using lower level 4-wide + * operations that are ISA-specific. + */ + +#ifndef ASTC_VECMATHLIB_COMMON_4_H_INCLUDED +#define ASTC_VECMATHLIB_COMMON_4_H_INCLUDED + +#ifndef ASTCENC_SIMD_INLINE + #error "Include astcenc_vecmathlib.h, do not include directly" +#endif + +#include + +// ============================================================================ +// vmask4 operators and functions +// ============================================================================ + +/** + * @brief True if any lanes are enabled, false otherwise. + */ +ASTCENC_SIMD_INLINE bool any(vmask4 a) +{ + return mask(a) != 0; +} + +/** + * @brief True if all lanes are enabled, false otherwise. + */ +ASTCENC_SIMD_INLINE bool all(vmask4 a) +{ + return mask(a) == 0xF; +} + +// ============================================================================ +// vint4 operators and functions +// ============================================================================ + +/** + * @brief Overload: vector by scalar addition. + */ +ASTCENC_SIMD_INLINE vint4 operator+(vint4 a, int b) +{ + return a + vint4(b); +} + +/** + * @brief Overload: vector by scalar subtraction. + */ +ASTCENC_SIMD_INLINE vint4 operator-(vint4 a, int b) +{ + return a - vint4(b); +} + +/** + * @brief Overload: vector by scalar multiplication. + */ +ASTCENC_SIMD_INLINE vint4 operator*(vint4 a, int b) +{ + return a * vint4(b); +} + +/** + * @brief Overload: vector by scalar bitwise or. + */ +ASTCENC_SIMD_INLINE vint4 operator|(vint4 a, int b) +{ + return a | vint4(b); +} + +/** + * @brief Overload: vector by scalar bitwise and. + */ +ASTCENC_SIMD_INLINE vint4 operator&(vint4 a, int b) +{ + return a & vint4(b); +} + +/** + * @brief Overload: vector by scalar bitwise xor. + */ +ASTCENC_SIMD_INLINE vint4 operator^(vint4 a, int b) +{ + return a ^ vint4(b); +} + +/** + * @brief Return the clamped value between min and max. + */ +ASTCENC_SIMD_INLINE vint4 clamp(int minv, int maxv, vint4 a) +{ + return min(max(a, vint4(minv)), vint4(maxv)); +} + +/** + * @brief Return the horizontal sum of RGB vector lanes as a scalar. + */ +ASTCENC_SIMD_INLINE int hadd_rgb_s(vint4 a) +{ + return a.lane<0>() + a.lane<1>() + a.lane<2>(); +} + +/** + * @brief Debug function to print a vector of ints. + */ +ASTCENC_SIMD_INLINE void print(vint4 a) +{ + alignas(16) int v[4]; + storea(a, v); + printf("v4_i32:\n %8d %8d %8d %8d\n", + v[0], v[1], v[2], v[3]); +} + +// ============================================================================ +// vfloat4 operators and functions +// ============================================================================ + +ASTCENC_SIMD_INLINE vfloat4& operator+=(vfloat4& a, const vfloat4& b) +{ + a = a + b; + return a; +} + +/** + * @brief Overload: vector by scalar addition. + */ +ASTCENC_SIMD_INLINE vfloat4 operator+(vfloat4 a, float b) +{ + return a + vfloat4(b); +} + +/** + * @brief Overload: vector by scalar subtraction. + */ +ASTCENC_SIMD_INLINE vfloat4 operator-(vfloat4 a, float b) +{ + return a - vfloat4(b); +} + +/** + * @brief Overload: vector by scalar multiplication. + */ +ASTCENC_SIMD_INLINE vfloat4 operator*(vfloat4 a, float b) +{ + return a * vfloat4(b); +} + +/** + * @brief Overload: scalar by vector multiplication. + */ +ASTCENC_SIMD_INLINE vfloat4 operator*(float a, vfloat4 b) +{ + return vfloat4(a) * b; +} + +/** + * @brief Overload: vector by scalar division. + */ +ASTCENC_SIMD_INLINE vfloat4 operator/(vfloat4 a, float b) +{ + return a / vfloat4(b); +} + +/** + * @brief Overload: scalar by vector division. + */ +ASTCENC_SIMD_INLINE vfloat4 operator/(float a, vfloat4 b) +{ + return vfloat4(a) / b; +} + +/** + * @brief Return the min vector of a vector and a scalar. + * + * If either lane value is NaN, @c b will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat4 min(vfloat4 a, float b) +{ + return min(a, vfloat4(b)); +} + +/** + * @brief Return the max vector of a vector and a scalar. + * + * If either lane value is NaN, @c b will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat4 max(vfloat4 a, float b) +{ + return max(a, vfloat4(b)); +} + +/** + * @brief Return the clamped value between min and max. + * + * It is assumed that neither @c min nor @c max are NaN values. If @c a is NaN + * then @c min will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat4 clamp(float minv, float maxv, vfloat4 a) +{ + // Do not reorder - second operand will return if either is NaN + return min(max(a, minv), maxv); +} + +/** + * @brief Return the clamped value between 0.0f and max. + * + * It is assumed that @c max is not a NaN value. If @c a is NaN then zero will + * be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat4 clampz(float maxv, vfloat4 a) +{ + // Do not reorder - second operand will return if either is NaN + return min(max(a, vfloat4::zero()), maxv); +} + +/** + * @brief Return the clamped value between 0.0f and 1.0f. + * + * If @c a is NaN then zero will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat4 clampzo(vfloat4 a) +{ + // Do not reorder - second operand will return if either is NaN + return min(max(a, vfloat4::zero()), 1.0f); +} + +/** + * @brief Return the horizontal minimum of a vector. + */ +ASTCENC_SIMD_INLINE float hmin_s(vfloat4 a) +{ + return hmin(a).lane<0>(); +} + +/** + * @brief Return the horizontal min of RGB vector lanes as a scalar. + */ +ASTCENC_SIMD_INLINE float hmin_rgb_s(vfloat4 a) +{ + a.set_lane<3>(a.lane<0>()); + return hmin_s(a); +} + +/** + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE float hmax_s(vfloat4 a) +{ + return hmax(a).lane<0>(); +} + +/** + * @brief Accumulate the full horizontal sum of a vector. + */ +ASTCENC_SIMD_INLINE void haccumulate(float& accum, vfloat4 a) +{ + accum += hadd_s(a); +} + +/** + * @brief Accumulate lane-wise sums for a vector. + */ +ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat4 a) +{ + accum = accum + a; +} + +/** + * @brief Return the horizontal sum of RGB vector lanes as a scalar. + */ +ASTCENC_SIMD_INLINE float hadd_rgb_s(vfloat4 a) +{ + return a.lane<0>() + a.lane<1>() + a.lane<2>(); +} + +/** + * @brief Return the dot product for the full 4 lanes, returning scalar. + */ +ASTCENC_SIMD_INLINE float dot_s(vfloat4 a, vfloat4 b) +{ + vfloat4 m = a * b; + return hadd_s(m); +} + +/** + * @brief Return the dot product for the full 4 lanes, returning vector. + */ +ASTCENC_SIMD_INLINE vfloat4 dot(vfloat4 a, vfloat4 b) +{ + vfloat4 m = a * b; + return vfloat4(hadd_s(m)); +} + +/** + * @brief Return the dot product for the bottom 3 lanes, returning scalar. + */ +ASTCENC_SIMD_INLINE float dot3_s(vfloat4 a, vfloat4 b) +{ + vfloat4 m = a * b; + return hadd_rgb_s(m); +} + +/** + * @brief Return the dot product for the full 4 lanes, returning vector. + */ +ASTCENC_SIMD_INLINE vfloat4 dot3(vfloat4 a, vfloat4 b) +{ + vfloat4 m = a * b; + float d3 = hadd_rgb_s(m); + return vfloat4(d3, d3, d3, 0.0f); +} + +/** + * @brief Generate a reciprocal of a vector. + */ +ASTCENC_SIMD_INLINE vfloat4 recip(vfloat4 b) +{ + return 1.0f / b; +} + +/** + * @brief Debug function to print a vector of floats. + */ +ASTCENC_SIMD_INLINE void print(vfloat4 a) +{ + alignas(16) float v[4]; + storea(a, v); + printf("v4_f32:\n %0.4f %0.4f %0.4f %0.4f\n", + (double)v[0], (double)v[1], (double)v[2], (double)v[3]); +} + +#endif // #ifndef ASTC_VECMATHLIB_COMMON_4_H_INCLUDED diff --git a/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h b/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h new file mode 100755 index 00000000..a1163531 --- /dev/null +++ b/libkram/astc-encoder/astcenc_vecmathlib_neon_4.h @@ -0,0 +1,915 @@ +// SPDX-License-Identifier: Apache-2.0 +// ---------------------------------------------------------------------------- +// Copyright 2019-2021 Arm Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// ---------------------------------------------------------------------------- + +/** + * @brief 4x32-bit vectors, implemented using Armv8-A NEON. + * + * This module implements 4-wide 32-bit float, int, and mask vectors for + * Armv8-A NEON. + * + * There is a baseline level of functionality provided by all vector widths and + * implementations. This is implemented using identical function signatures, + * modulo data type, so we can use them as substitutable implementations in VLA + * code. + * + * The 4-wide vectors are also used as a fixed-width type, and significantly + * extend the functionality above that available to VLA code. + */ + +#ifndef ASTC_VECMATHLIB_NEON_4_H_INCLUDED +#define ASTC_VECMATHLIB_NEON_4_H_INCLUDED + +#ifndef ASTCENC_SIMD_INLINE + #error "Include astcenc_vecmathlib.h, do not include directly" +#endif + +#include + +#if defined(__arm__) + #include "astcenc_vecmathlib_neon_armv7_4.h" +#endif + +// ============================================================================ +// vfloat4 data type +// ============================================================================ + +/** + * @brief Data type for 4-wide floats. + */ +struct vfloat4 +{ + /** + * @brief Construct from zero-initialized value. + */ + ASTCENC_SIMD_INLINE vfloat4() {} + + /** + * @brief Construct from 4 values loaded from an unaligned address. + * + * Consider using loada() which is better with vectors if data is aligned + * to vector length. + */ + ASTCENC_SIMD_INLINE explicit vfloat4(const float *p) + { + m = vld1q_f32(p); + } + + /** + * @brief Construct from 1 scalar value replicated across all lanes. + * + * Consider using zero() for constexpr zeros. + */ + ASTCENC_SIMD_INLINE explicit vfloat4(float a) + { + m = vdupq_n_f32(a); + } + + /** + * @brief Construct from 4 scalar values. + * + * The value of @c a is stored to lane 0 (LSB) in the SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vfloat4(float a, float b, float c, float d) + { + float32x4_t v { a, b, c, d }; + m = v; + } + + /** + * @brief Construct from an existing SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vfloat4(float32x4_t a) + { + m = a; + } + + /** + * @brief Get the scalar value of a single lane. + */ + template ASTCENC_SIMD_INLINE float lane() const + { + return vgetq_lane_f32(m, l); + } + + /** + * @brief Set the scalar value of a single lane. + */ + template ASTCENC_SIMD_INLINE void set_lane(float a) + { + m = vld1q_lane_f32(&a, m, l); + } + + /** + * @brief Factory that returns a vector of zeros. + */ + static ASTCENC_SIMD_INLINE vfloat4 zero() + { + return vfloat4(vdupq_n_f32(0.0f)); + } + + /** + * @brief Factory that returns a replicated scalar loaded from memory. + */ + static ASTCENC_SIMD_INLINE vfloat4 load1(const float* p) + { + return vfloat4(vdupq_n_f32(*p)); + } + + /** + * @brief Factory that returns a vector loaded from 16B aligned memory. + */ + static ASTCENC_SIMD_INLINE vfloat4 loada(const float* p) + { + return vfloat4(vld1q_f32(p)); + } + + /** + * @brief Factory that returns a vector containing the lane IDs. + */ + static ASTCENC_SIMD_INLINE vfloat4 lane_id() + { + alignas(16) float data[4] { 0.0f, 1.0f, 2.0f, 3.0f }; + return vfloat4(vld1q_f32(data)); + } + + /** + * @brief Return a swizzled float 2. + */ + template ASTCENC_SIMD_INLINE float2 swz() const + { + return float2(lane(), lane()); + } + + /** + * @brief Return a swizzled float 3. + * + * TODO: Implement using permutes. + */ + template ASTCENC_SIMD_INLINE vfloat4 swz() const + { + return vfloat4(lane(), lane(), lane(), 0.0f); + } + + /** + * @brief Return a swizzled float 4. + * + * TODO: Implement using permutes. + */ + template ASTCENC_SIMD_INLINE vfloat4 swz() const + { + return vfloat4(lane(), lane(), lane(), lane()); + } + + /** + * @brief The vector ... + */ + float32x4_t m; +}; + +// ============================================================================ +// vint4 data type +// ============================================================================ + +/** + * @brief Data type for 4-wide ints. + */ +struct vint4 +{ + /** + * @brief Construct from zero-initialized value. + */ + ASTCENC_SIMD_INLINE vint4() {} + + /** + * @brief Construct from 4 values loaded from an unaligned address. + * + * Consider using loada() which is better with vectors if data is aligned + * to vector length. + */ + ASTCENC_SIMD_INLINE explicit vint4(const int *p) + { + m = vld1q_s32(p); + } + + /** + * @brief Construct from 4 uint8_t loaded from an unaligned address. + */ + ASTCENC_SIMD_INLINE explicit vint4(const uint8_t *p) + { + uint32x2_t t8 {}; + // Cast is safe - NEON loads are allowed to be unaligned + t8 = vld1_lane_u32((const uint32_t*)p, t8, 0); + uint16x4_t t16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(t8))); + m = vreinterpretq_s32_u32(vmovl_u16(t16)); + } + + /** + * @brief Construct from 1 scalar value replicated across all lanes. + * + * Consider using vfloat4::zero() for constexpr zeros. + */ + ASTCENC_SIMD_INLINE explicit vint4(int a) + { + m = vdupq_n_s32(a); + } + + /** + * @brief Construct from 4 scalar values. + * + * The value of @c a is stored to lane 0 (LSB) in the SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vint4(int a, int b, int c, int d) + { + int32x4_t v { a, b, c, d }; + m = v; + } + + /** + * @brief Construct from an existing SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vint4(int32x4_t a) + { + m = a; + } + + /** + * @brief Get the scalar from a single lane. + */ + template ASTCENC_SIMD_INLINE int lane() const + { + return vgetq_lane_s32(m, l); + } + + /** + * @brief Set the scalar value of a single lane. + */ + template ASTCENC_SIMD_INLINE void set_lane(int a) + { + m = vld1q_lane_s32(&a, m, l); + } + + /** + * @brief Factory that returns a vector of zeros. + */ + static ASTCENC_SIMD_INLINE vint4 zero() + { + return vint4(0); + } + + /** + * @brief Factory that returns a replicated scalar loaded from memory. + */ + static ASTCENC_SIMD_INLINE vint4 load1(const int* p) + { + return vint4(*p); + } + + /** + * @brief Factory that returns a vector loaded from 16B aligned memory. + */ + static ASTCENC_SIMD_INLINE vint4 loada(const int* p) + { + return vint4(*p); + } + + /** + * @brief Factory that returns a vector containing the lane IDs. + */ + static ASTCENC_SIMD_INLINE vint4 lane_id() + { + alignas(ASTCENC_VECALIGN) static const int data[4] { 0, 1, 2, 3 }; + return vint4(vld1q_s32(data)); + } + + /** + * @brief The vector ... + */ + int32x4_t m; +}; + +// ============================================================================ +// vmask4 data type +// ============================================================================ + +/** + * @brief Data type for 4-wide control plane masks. + */ +struct vmask4 +{ + /** + * @brief Construct from an existing SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vmask4(uint32x4_t a) + { + m = a; + } + + /** + * @brief Construct from an existing SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vmask4(int32x4_t a) + { + m = vreinterpretq_u32_s32(a); + } + + /** + * @brief Construct from an existing SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vmask4(bool a, bool b, bool c, bool d) + { + int32x4_t v { + a == true ? -1 : 0, + b == true ? -1 : 0, + c == true ? -1 : 0, + d == true ? -1 : 0 + }; + + m = vreinterpretq_u32_s32(v); + } + + + /** + * @brief The vector ... + */ + uint32x4_t m; +}; + +// ============================================================================ +// vmask4 operators and functions +// ============================================================================ + +/** + * @brief Overload: mask union (or). + */ +ASTCENC_SIMD_INLINE vmask4 operator|(vmask4 a, vmask4 b) +{ + return vmask4(vorrq_u32(a.m, b.m)); +} + +/** + * @brief Overload: mask intersect (and). + */ +ASTCENC_SIMD_INLINE vmask4 operator&(vmask4 a, vmask4 b) +{ + return vmask4(vandq_u32(a.m, b.m)); +} + +/** + * @brief Overload: mask difference (xor). + */ +ASTCENC_SIMD_INLINE vmask4 operator^(vmask4 a, vmask4 b) +{ + return vmask4(veorq_u32(a.m, b.m)); +} + +/** + * @brief Overload: mask invert (not). + */ +ASTCENC_SIMD_INLINE vmask4 operator~(vmask4 a) +{ + return vmask4(vmvnq_u32(a.m)); +} + +/** + * @brief Return a 4-bit mask code indicating mask status. + * + * bit0 = lane 0 + */ +ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a) +{ + static const int32x4_t shift { 0, 1, 2, 3 }; + uint32x4_t tmp = vshrq_n_u32(a.m, 31); + return vaddvq_u32(vshlq_u32(tmp, shift)); +} + +// ============================================================================ +// vint4 operators and functions +// ============================================================================ + +/** + * @brief Overload: vector by vector addition. + */ +ASTCENC_SIMD_INLINE vint4 operator+(vint4 a, vint4 b) +{ + return vint4(vaddq_s32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector subtraction. + */ +ASTCENC_SIMD_INLINE vint4 operator-(vint4 a, vint4 b) +{ + return vint4(vsubq_s32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector multiplication. + */ +ASTCENC_SIMD_INLINE vint4 operator*(vint4 a, vint4 b) +{ + return vint4(vmulq_s32(a.m, b.m)); +} + +/** + * @brief Overload: vector bit invert. + */ +ASTCENC_SIMD_INLINE vint4 operator~(vint4 a) +{ + return vint4(vmvnq_s32(a.m)); +} + +/** + * @brief Overload: vector by vector bitwise or. + */ +ASTCENC_SIMD_INLINE vint4 operator|(vint4 a, vint4 b) +{ + return vint4(vorrq_s32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector bitwise and. + */ +ASTCENC_SIMD_INLINE vint4 operator&(vint4 a, vint4 b) +{ + return vint4(vandq_s32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector bitwise xor. + */ +ASTCENC_SIMD_INLINE vint4 operator^(vint4 a, vint4 b) +{ + return vint4(veorq_s32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector equality. + */ +ASTCENC_SIMD_INLINE vmask4 operator==(vint4 a, vint4 b) +{ + return vmask4(vceqq_s32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector inequality. + */ +ASTCENC_SIMD_INLINE vmask4 operator!=(vint4 a, vint4 b) +{ + return ~vmask4(vceqq_s32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector less than. + */ +ASTCENC_SIMD_INLINE vmask4 operator<(vint4 a, vint4 b) +{ + return vmask4(vcltq_s32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector greater than. + */ +ASTCENC_SIMD_INLINE vmask4 operator>(vint4 a, vint4 b) +{ + return vmask4(vcgtq_s32(a.m, b.m)); +} + +/** + * @brief Logical shift left. + */ +template ASTCENC_SIMD_INLINE vint4 lsl(vint4 a) +{ + return vint4(vshlq_s32(a.m, vdupq_n_s32(s))); +} + +/** + * @brief Logical shift right. + */ +template ASTCENC_SIMD_INLINE vint4 lsr(vint4 a) +{ + uint32x4_t ua = vreinterpretq_u32_s32(a.m); + ua = vshlq_u32(ua, vdupq_n_s32(-s)); + return vint4(vreinterpretq_s32_u32(ua)); +} + +/** + * @brief Arithmetic shift right. + */ +template ASTCENC_SIMD_INLINE vint4 asr(vint4 a) +{ + return vint4(vshlq_s32(a.m, vdupq_n_s32(-s))); +} + +/** + * @brief Return the min vector of two vectors. + */ +ASTCENC_SIMD_INLINE vint4 min(vint4 a, vint4 b) +{ + return vint4(vminq_s32(a.m, b.m)); +} + +/** + * @brief Return the max vector of two vectors. + */ +ASTCENC_SIMD_INLINE vint4 max(vint4 a, vint4 b) +{ + return vint4(vmaxq_s32(a.m, b.m)); +} + +/** + * @brief Return the horizontal minimum of a vector. + */ +ASTCENC_SIMD_INLINE vint4 hmin(vint4 a) +{ + return vint4(vminvq_s32(a.m)); +} + +/** + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE vint4 hmax(vint4 a) +{ + return vint4(vmaxvq_s32(a.m)); +} + +/** + * @brief Return the horizontal sum of a vector. + */ +ASTCENC_SIMD_INLINE int hadd_s(vint4 a) +{ + int32x2_t t = vadd_s32(vget_high_s32(a.m), vget_low_s32(a.m)); + return vget_lane_s32(vpadd_s32(t, t), 0); +} + +/** + * @brief Store a vector to a 16B aligned memory address. + */ +ASTCENC_SIMD_INLINE void storea(vint4 a, int* p) +{ + vst1q_s32(p, a.m); +} + +/** + * @brief Store a vector to an unaligned memory address. + */ +ASTCENC_SIMD_INLINE void store(vint4 a, int* p) +{ + vst1q_s32(p, a.m); +} + +/** + * @brief Store lowest N (vector width) bytes into an unaligned address. + */ +ASTCENC_SIMD_INLINE void store_nbytes(vint4 a, uint8_t* p) +{ + vst1q_lane_s32((int32_t*)p, a.m, 0); +} + +/** + * @brief Gather N (vector width) indices from the array. + */ +ASTCENC_SIMD_INLINE vint4 gatheri(const int* base, vint4 indices) +{ + alignas(16) int idx[4]; + storea(indices, idx); + alignas(16) int vals[4]; + vals[0] = base[idx[0]]; + vals[1] = base[idx[1]]; + vals[2] = base[idx[2]]; + vals[3] = base[idx[3]]; + return vint4(vals); +} + +/** + * @brief Pack low 8 bits of N (vector width) lanes into bottom of vector. + */ +ASTCENC_SIMD_INLINE vint4 pack_low_bytes(vint4 a) +{ + alignas(16) uint8_t shuf[16] = { + 0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + uint8x16_t idx = vld1q_u8(shuf); + int8x16_t av = vreinterpretq_s8_s32(a.m); + return vint4(vreinterpretq_s32_s8(vqtbl1q_s8(av, idx))); +} + +/** + * @brief Return lanes from @c b if MSB of @c cond is set, else @c a. + */ +ASTCENC_SIMD_INLINE vint4 select(vint4 a, vint4 b, vmask4 cond) +{ + static const uint32x4_t msb = vdupq_n_u32(0x80000000u); + uint32x4_t mask = vcgeq_u32(cond.m, msb); + return vint4(vbslq_s32(mask, b.m, a.m)); +} + +// ============================================================================ +// vfloat4 operators and functions +// ============================================================================ + +/** + * @brief Overload: vector by vector addition. + */ +ASTCENC_SIMD_INLINE vfloat4 operator+(vfloat4 a, vfloat4 b) +{ + return vfloat4(vaddq_f32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector subtraction. + */ +ASTCENC_SIMD_INLINE vfloat4 operator-(vfloat4 a, vfloat4 b) +{ + return vfloat4(vsubq_f32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector multiplication. + */ +ASTCENC_SIMD_INLINE vfloat4 operator*(vfloat4 a, vfloat4 b) +{ + return vfloat4(vmulq_f32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector division. + */ +ASTCENC_SIMD_INLINE vfloat4 operator/(vfloat4 a, vfloat4 b) +{ + return vfloat4(vdivq_f32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector equality. + */ +ASTCENC_SIMD_INLINE vmask4 operator==(vfloat4 a, vfloat4 b) +{ + return vmask4(vceqq_f32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector inequality. + */ +ASTCENC_SIMD_INLINE vmask4 operator!=(vfloat4 a, vfloat4 b) +{ + return vmask4(vmvnq_u32(vceqq_f32(a.m, b.m))); +} + +/** + * @brief Overload: vector by vector less than. + */ +ASTCENC_SIMD_INLINE vmask4 operator<(vfloat4 a, vfloat4 b) +{ + return vmask4(vcltq_f32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector greater than. + */ +ASTCENC_SIMD_INLINE vmask4 operator>(vfloat4 a, vfloat4 b) +{ + return vmask4(vcgtq_f32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector less than or equal. + */ +ASTCENC_SIMD_INLINE vmask4 operator<=(vfloat4 a, vfloat4 b) +{ + return vmask4(vcleq_f32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector greater than or equal. + */ +ASTCENC_SIMD_INLINE vmask4 operator>=(vfloat4 a, vfloat4 b) +{ + return vmask4(vcgeq_f32(a.m, b.m)); +} + +/** + * @brief Return the min vector of two vectors. + * + * If either lane value is NaN, @c b will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat4 min(vfloat4 a, vfloat4 b) +{ + // Do not reorder - second operand will return if either is NaN + return vfloat4(vminnmq_f32(a.m, b.m)); +} + +/** + * @brief Return the max vector of two vectors. + * + * If either lane value is NaN, @c b will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat4 max(vfloat4 a, vfloat4 b) +{ + // Do not reorder - second operand will return if either is NaN + return vfloat4(vmaxnmq_f32(a.m, b.m)); +} + +/** + * @brief Return the absolute value of the float vector. + */ +ASTCENC_SIMD_INLINE vfloat4 abs(vfloat4 a) +{ + float32x4_t zero = vdupq_n_f32(0.0f); + float32x4_t inv = vsubq_f32(zero, a.m); + return vfloat4(vmaxq_f32(a.m, inv)); +} + +/** + * @brief Return a float rounded to the nearest integer value. + */ +ASTCENC_SIMD_INLINE vfloat4 round(vfloat4 a) +{ + return vfloat4(vrndnq_f32(a.m)); +} + +/** + * @brief Return the horizontal minimum of a vector. + */ +ASTCENC_SIMD_INLINE vfloat4 hmin(vfloat4 a) +{ + return vfloat4(vminvq_f32(a.m)); +} + +/** + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE vfloat4 hmax(vfloat4 a) +{ + return vfloat4(vmaxvq_f32(a.m)); +} + +/** + * @brief Return the horizontal sum of a vector. + */ +ASTCENC_SIMD_INLINE float hadd_s(vfloat4 a) +{ + // Perform halving add to ensure invariance; we cannot use vaddqv as this + // does (0 + 1 + 2 + 3) which is not invariant with x86 (0 + 2) + (1 + 3). + float32x2_t t = vadd_f32(vget_high_f32(a.m), vget_low_f32(a.m)); + return vget_lane_f32(vpadd_f32(t, t), 0); +} + +/** + * @brief Return the sqrt of the lanes in the vector. + */ +ASTCENC_SIMD_INLINE vfloat4 sqrt(vfloat4 a) +{ + return vfloat4(vsqrtq_f32(a.m)); +} + +/** + * @brief Return lanes from @c b if MSB of @c cond is set, else @c a. + */ +ASTCENC_SIMD_INLINE vfloat4 select(vfloat4 a, vfloat4 b, vmask4 cond) +{ + static const uint32x4_t msb = vdupq_n_u32(0x80000000u); + uint32x4_t mask = vcgeq_u32(cond.m, msb); + return vfloat4(vbslq_f32(mask, b.m, a.m)); +} + +/** + * @brief Load a vector of gathered results from an array; + */ +ASTCENC_SIMD_INLINE vfloat4 gatherf(const float* base, vint4 indices) +{ + alignas(16) int idx[4]; + storea(indices, idx); + alignas(16) float vals[4]; + vals[0] = base[idx[0]]; + vals[1] = base[idx[1]]; + vals[2] = base[idx[2]]; + vals[3] = base[idx[3]]; + return vfloat4(vals); +} + +/** + * @brief Store a vector to an unaligned memory address. + */ +ASTCENC_SIMD_INLINE void store(vfloat4 a, float* p) +{ + vst1q_f32(p, a.m); +} + +/** + * @brief Store a vector to a 16B aligned memory address. + */ +ASTCENC_SIMD_INLINE void storea(vfloat4 a, float* p) +{ + vst1q_f32(p, a.m); +} + +/** + * @brief Return a integer value for a float vector, using truncation. + */ +ASTCENC_SIMD_INLINE vint4 float_to_int(vfloat4 a) +{ + return vint4(vcvtq_s32_f32(a.m)); +} + +/** + * @brief Return a integer value for a float vector, using round-to-nearest. + */ +ASTCENC_SIMD_INLINE vint4 float_to_int_rtn(vfloat4 a) +{ + a = round(a); + return vint4(vcvtq_s32_f32(a.m)); +} + +/** + * @brief Return a float value for an integer vector. + */ +ASTCENC_SIMD_INLINE vfloat4 int_to_float(vint4 a) +{ + return vfloat4(vcvtq_f32_s32(a.m)); +} + +/** + * @brief Return a float16 value for a float vector, using round-to-nearest. + */ +ASTCENC_SIMD_INLINE vint4 float_to_float16(vfloat4 a) +{ + // Generate float16 value + float16x4_t f16 = vcvt_f16_f32(a.m); + + // Convert each 16-bit float pattern to a 32-bit pattern + uint16x4_t u16 = vreinterpret_u16_f16(f16); + uint32x4_t u32 = vmovl_u16(u16); + return vint4(vreinterpretq_s32_u32(u32)); +} + +/** + * @brief Return a float16 value for a float scalar, using round-to-nearest. + */ +static inline uint16_t float_to_float16(float a) +{ + vfloat4 av(a); + return float_to_float16(av).lane<0>(); +} + +/** + * @brief Return a float value for a float16 vector. + */ +ASTCENC_SIMD_INLINE vfloat4 float16_to_float(vint4 a) +{ + // Convert each 32-bit float pattern to a 16-bit pattern + uint32x4_t u32 = vreinterpretq_u32_s32(a.m); + uint16x4_t u16 = vmovn_u32(u32); + float16x4_t f16 = vreinterpret_f16_u16(u16); + + // Generate float16 value + return vfloat4(vcvt_f32_f16(f16)); +} + +/** + * @brief Return a float value for a float16 scalar. + */ +ASTCENC_SIMD_INLINE float float16_to_float(uint16_t a) +{ + vint4 av(a); + return float16_to_float(av).lane<0>(); +} + +/** + * @brief Return a float value as an integer bit pattern (i.e. no conversion). + * + * It is a common trick to convert floats into integer bit patterns, perform + * some bit hackery based on knowledge they are IEEE 754 layout, and then + * convert them back again. This is the first half of that flip. + */ +ASTCENC_SIMD_INLINE vint4 float_as_int(vfloat4 a) +{ + return vint4(vreinterpretq_s32_f32(a.m)); +} + +/** + * @brief Return a integer value as a float bit pattern (i.e. no conversion). + * + * It is a common trick to convert floats into integer bit patterns, perform + * some bit hackery based on knowledge they are IEEE 754 layout, and then + * convert them back again. This is the second half of that flip. + */ +ASTCENC_SIMD_INLINE vfloat4 int_as_float(vint4 v) +{ + return vfloat4(vreinterpretq_f32_s32(v.m)); +} + +#endif // #ifndef ASTC_VECMATHLIB_NEON_4_H_INCLUDED diff --git a/libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h b/libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h new file mode 100644 index 00000000..7d33dc15 --- /dev/null +++ b/libkram/astc-encoder/astcenc_vecmathlib_neon_armv7_4.h @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: Apache-2.0 +// ---------------------------------------------------------------------------- +// Copyright 2021 Arm Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// ---------------------------------------------------------------------------- + +/** + * @brief Intrinsics for Armv7 NEON. + * + * This module implements a few Armv7-compatible intrinsics indentical to Armv8 + * ones. Thus, astcenc can be compiled using Armv7 architecture. + */ + +#ifndef ASTC_VECMATHLIB_NEON_ARMV7_4_H_INCLUDED +#define ASTC_VECMATHLIB_NEON_ARMV7_4_H_INCLUDED + +#ifndef ASTCENC_SIMD_INLINE + #error "Include astcenc_vecmathlib.h, do not include directly" +#endif + +#include +#include + + +// arm-linux-gnueabi-gcc contains the following functions by using +// #pragma GCC target ("fpu=neon-fp-armv8"), while clang does not. +#if defined(__clang__) + +/** + * @brief Return the max vector of two vectors. + * + * If one vector element is numeric and the other is a quiet NaN, + * the result placed in the vector is the numerical value. + */ +ASTCENC_SIMD_INLINE float32x4_t vmaxnmq_f32(float32x4_t a, float32x4_t b) +{ + uint32x4_t amask = vceqq_f32(a, a); + uint32x4_t bmask = vceqq_f32(b, b); + a = vbslq_f32(amask, a, b); + b = vbslq_f32(bmask, b, a); + return vmaxq_f32(a, b); +} + +/** + * @brief Return the min vector of two vectors. + * + * If one vector element is numeric and the other is a quiet NaN, + * the result placed in the vector is the numerical value. + */ +ASTCENC_SIMD_INLINE float32x4_t vminnmq_f32(float32x4_t a, float32x4_t b) +{ + uint32x4_t amask = vceqq_f32(a, a); + uint32x4_t bmask = vceqq_f32(b, b); + a = vbslq_f32(amask, a, b); + b = vbslq_f32(bmask, b, a); + return vminq_f32(a, b); +} + +/** + * @brief Return a float rounded to the nearest integer value. + */ +ASTCENC_SIMD_INLINE float32x4_t vrndnq_f32(float32x4_t a) +{ + assert(std::fegetround() == FE_TONEAREST); + float a0 = std::nearbyintf(vgetq_lane_f32(a, 0)); + float a1 = std::nearbyintf(vgetq_lane_f32(a, 1)); + float a2 = std::nearbyintf(vgetq_lane_f32(a, 2)); + float a3 = std::nearbyintf(vgetq_lane_f32(a, 3)); + float32x4_t c { a0, a1, a2, a3 }; + return c; +} + +#endif + +/** + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE float vmaxvq_f32(float32x4_t a) +{ + float a0 = vgetq_lane_f32(a, 0); + float a1 = vgetq_lane_f32(a, 1); + float a2 = vgetq_lane_f32(a, 2); + float a3 = vgetq_lane_f32(a, 3); + return std::max(std::max(a0, a1), std::max(a2, a3)); +} + +/** + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE float vminvq_f32(float32x4_t a) +{ + float a0 = vgetq_lane_f32(a, 0); + float a1 = vgetq_lane_f32(a, 1); + float a2 = vgetq_lane_f32(a, 2); + float a3 = vgetq_lane_f32(a, 3); + return std::min(std::min(a0, a1), std::min(a2, a3)); +} + +/** + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE int32_t vmaxvq_s32(int32x4_t a) +{ + int32_t a0 = vgetq_lane_s32(a, 0); + int32_t a1 = vgetq_lane_s32(a, 1); + int32_t a2 = vgetq_lane_s32(a, 2); + int32_t a3 = vgetq_lane_s32(a, 3); + return std::max(std::max(a0, a1), std::max(a2, a3)); +} + +/** + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE int32_t vminvq_s32(int32x4_t a) +{ + int32_t a0 = vgetq_lane_s32(a, 0); + int32_t a1 = vgetq_lane_s32(a, 1); + int32_t a2 = vgetq_lane_s32(a, 2); + int32_t a3 = vgetq_lane_s32(a, 3); + return std::min(std::min(a0, a1), std::min(a2, a3)); +} + +/** + * @brief Return the sqrt of the lanes in the vector. + */ +ASTCENC_SIMD_INLINE float32x4_t vsqrtq_f32(float32x4_t a) +{ + float a0 = std::sqrt(vgetq_lane_f32(a, 0)); + float a1 = std::sqrt(vgetq_lane_f32(a, 1)); + float a2 = std::sqrt(vgetq_lane_f32(a, 2)); + float a3 = std::sqrt(vgetq_lane_f32(a, 3)); + float32x4_t c { a0, a1, a2, a3 }; + return c; +} + +/** + * @brief Vector by vector division. + */ +ASTCENC_SIMD_INLINE float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) +{ + float a0 = vgetq_lane_f32(a, 0), b0 = vgetq_lane_f32(b, 0); + float a1 = vgetq_lane_f32(a, 1), b1 = vgetq_lane_f32(b, 1); + float a2 = vgetq_lane_f32(a, 2), b2 = vgetq_lane_f32(b, 2); + float a3 = vgetq_lane_f32(a, 3), b3 = vgetq_lane_f32(b, 3); + float32x4_t c { a0 / b0, a1 / b1, a2 / b2, a3 / b3 }; + return c; +} + +/** + * @brief Table vector lookup. + */ +ASTCENC_SIMD_INLINE int8x16_t vqtbl1q_s8(int8x16_t t, uint8x16_t idx) +{ + int8x8x2_t tab; + tab.val[0] = vget_low_s8(t); + tab.val[1] = vget_high_s8(t); + int8x16_t id = vreinterpretq_s8_u8(idx); + return vcombine_s8( + vtbl2_s8(tab, vget_low_s8(id)), + vtbl2_s8(tab, vget_high_s8(id))); +} + +/** + * @brief Horizontal integer addition. + */ +ASTCENC_SIMD_INLINE uint32_t vaddvq_u32(uint32x4_t a) +{ + uint32_t a0 = vgetq_lane_u32(a, 0); + uint32_t a1 = vgetq_lane_u32(a, 1); + uint32_t a2 = vgetq_lane_u32(a, 2); + uint32_t a3 = vgetq_lane_u32(a, 3); + return a0 + a1 + a2 + a3; +} + +#endif diff --git a/libkram/astc-encoder/astcenc_vecmathlib_none_4.h b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h new file mode 100644 index 00000000..716d6982 --- /dev/null +++ b/libkram/astc-encoder/astcenc_vecmathlib_none_4.h @@ -0,0 +1,1025 @@ +// SPDX-License-Identifier: Apache-2.0 +// ---------------------------------------------------------------------------- +// Copyright 2019-2021 Arm Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// ---------------------------------------------------------------------------- + +/** + * @brief 4x32-bit vectors, implemented using plain C++. + * + * This module implements 4-wide 32-bit float, int, and mask vectors. This + * module provides a scalar fallback for VLA code, primarily useful for + * debugging VLA algorithms without the complexity of handling SIMD. Only the + * baseline level of functionality needed to support VLA is provided. + * + * Note that the vector conditional operators implemented by this module are + * designed to behave like SIMD conditional operators that generate lane masks. + * Rather than returning 0/1 booleans like normal C++ code they will return + * 0/-1 to give a full lane-width bitmask. + * + * Note that the documentation for this module still talks about "vectors" to + * help developers think about the implied VLA behavior when writing optimized + * paths. + */ + +#ifndef ASTC_VECMATHLIB_NONE_4_H_INCLUDED +#define ASTC_VECMATHLIB_NONE_4_H_INCLUDED + +#ifndef ASTCENC_SIMD_INLINE + #error "Include astcenc_vecmathlib.h, do not include directly" +#endif + +#include +#include +#include +#include + +// ============================================================================ +// vfloat4 data type +// ============================================================================ + +/** + * @brief Data type for 4-wide floats. + */ +struct vfloat4 +{ + /** + * @brief Construct from zero-initialized value. + */ + ASTCENC_SIMD_INLINE vfloat4() {} + + /** + * @brief Construct from 4 values loaded from an unaligned address. + * + * Consider using loada() which is better with wider VLA vectors if data is + * aligned to vector length. + */ + ASTCENC_SIMD_INLINE explicit vfloat4(const float* p) + { + m[0] = p[0]; + m[1] = p[1]; + m[2] = p[2]; + m[3] = p[3]; + } + + /** + * @brief Construct from 4 scalar values replicated across all lanes. + * + * Consider using zero() for constexpr zeros. + */ + ASTCENC_SIMD_INLINE explicit vfloat4(float a) + { + m[0] = a; + m[1] = a; + m[2] = a; + m[3] = a; + } + + /** + * @brief Construct from 4 scalar values. + * + * The value of @c a is stored to lane 0 (LSB) in the SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vfloat4(float a, float b, float c, float d) + { + m[0] = a; + m[1] = b; + m[2] = c; + m[3] = d; + } + + /** + * @brief Get the scalar value of a single lane. + */ + template ASTCENC_SIMD_INLINE float lane() const + { + return m[l]; + } + + /** + * @brief Set the scalar value of a single lane. + */ + template ASTCENC_SIMD_INLINE void set_lane(float a) + { + m[l] = a; + } + + /** + * @brief Factory that returns a vector of zeros. + */ + static ASTCENC_SIMD_INLINE vfloat4 zero() + { + return vfloat4(0.0f); + } + + /** + * @brief Factory that returns a replicated scalar loaded from memory. + */ + static ASTCENC_SIMD_INLINE vfloat4 load1(const float* p) + { + return vfloat4(*p); + } + + /** + * @brief Factory that returns a vector loaded from aligned memory. + */ + static ASTCENC_SIMD_INLINE vfloat4 loada(const float* p) + { + return vfloat4(p); + } + + /** + * @brief Factory that returns a vector containing the lane IDs. + */ + static ASTCENC_SIMD_INLINE vfloat4 lane_id() + { + return vfloat4(0.0f, 1.0f, 2.0f, 3.0f); + } + + /** + * @brief Return a swizzled float 2. + */ + template ASTCENC_SIMD_INLINE float2 swz() const + { + return float2(lane(), lane()); + } + + /** + * @brief Return a swizzled float 3. + */ + template ASTCENC_SIMD_INLINE vfloat4 swz() const + { + return vfloat4(lane(), lane(), lane(), 0.0f); + } + + /** + * @brief Return a swizzled float 4. + */ + template ASTCENC_SIMD_INLINE vfloat4 swz() const + { + return vfloat4(lane(), lane(), lane(), lane()); + } + + /** + * @brief The vector ... + */ + float m[4]; +}; + +// ============================================================================ +// vint4 data type +// ============================================================================ + +/** + * @brief Data type for 4-wide ints. + */ +struct vint4 +{ + /** + * @brief Construct from zero-initialized value. + */ + ASTCENC_SIMD_INLINE vint4() {} + + /** + * @brief Construct from 4 values loaded from an unaligned address. + * + * Consider using vint4::loada() which is better with wider VLA vectors + * if data is aligned. + */ + ASTCENC_SIMD_INLINE explicit vint4(const int* p) + { + m[0] = p[0]; + m[1] = p[1]; + m[2] = p[2]; + m[3] = p[3]; + } + + /** + * @brief Construct from 4 uint8_t loaded from an unaligned address. + */ + ASTCENC_SIMD_INLINE explicit vint4(const uint8_t *p) + { + m[0] = p[0]; + m[1] = p[1]; + m[2] = p[2]; + m[3] = p[3]; + } + + /** + * @brief Construct from 4 scalar values. + * + * The value of @c a is stored to lane 0 (LSB) in the SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vint4(int a, int b, int c, int d) + { + m[0] = a; + m[1] = b; + m[2] = c; + m[3] = d; + } + + + /** + * @brief Construct from 4 scalar values replicated across all lanes. + * + * Consider using vint4::zero() for constexpr zeros. + */ + ASTCENC_SIMD_INLINE explicit vint4(int a) + { + m[0] = a; + m[1] = a; + m[2] = a; + m[3] = a; + } + + /** + * @brief Get the scalar value of a single lane. + */ + template ASTCENC_SIMD_INLINE int lane() const + { + return m[l]; + } + + /** + * @brief Set the scalar value of a single lane. + */ + template ASTCENC_SIMD_INLINE void set_lane(int a) + { + m[l] = a; + } + + /** + * @brief Factory that returns a vector of zeros. + */ + static ASTCENC_SIMD_INLINE vint4 zero() + { + return vint4(0); + } + + /** + * @brief Factory that returns a replicated scalar loaded from memory. + */ + static ASTCENC_SIMD_INLINE vint4 load1(const int* p) + { + return vint4(*p); + } + + /** + * @brief Factory that returns a vector loaded from 16B aligned memory. + */ + static ASTCENC_SIMD_INLINE vint4 loada(const int* p) + { + return vint4(p); + } + + /** + * @brief Factory that returns a vector containing the lane IDs. + */ + static ASTCENC_SIMD_INLINE vint4 lane_id() + { + return vint4(0, 1, 2, 3); + } + + /** + * @brief The vector ... + */ + int m[4]; +}; + +// ============================================================================ +// vmask4 data type +// ============================================================================ + +/** + * @brief Data type for 4-wide control plane masks. + */ +struct vmask4 +{ + /** + * @brief Construct from an existing mask value. + */ + ASTCENC_SIMD_INLINE explicit vmask4(int* p) + { + m[0] = p[0]; + m[1] = p[1]; + m[2] = p[2]; + m[3] = p[3]; + } + + /** + * @brief Construct from 4 scalar values. + * + * The value of @c a is stored to lane 0 (LSB) in the SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vmask4(bool a, bool b, bool c, bool d) + { + m[0] = a == false ? 0 : -1; + m[1] = b == false ? 0 : -1; + m[2] = c == false ? 0 : -1; + m[3] = d == false ? 0 : -1; + } + + /** + * @brief The vector ... + */ + int m[4]; +}; + +// ============================================================================ +// vmask4 operators and functions +// ============================================================================ + +/** + * @brief Overload: mask union (or). + */ +ASTCENC_SIMD_INLINE vmask4 operator|(vmask4 a, vmask4 b) +{ + return vmask4(a.m[0] | b.m[0], + a.m[1] | b.m[1], + a.m[2] | b.m[2], + a.m[3] | b.m[3]); +} + +/** + * @brief Overload: mask intersect (and). + */ +ASTCENC_SIMD_INLINE vmask4 operator&(vmask4 a, vmask4 b) +{ + return vmask4(a.m[0] & b.m[0], + a.m[1] & b.m[1], + a.m[2] & b.m[2], + a.m[3] & b.m[3]); +} + +/** + * @brief Overload: mask difference (xor). + */ +ASTCENC_SIMD_INLINE vmask4 operator^(vmask4 a, vmask4 b) +{ + return vmask4(a.m[0] ^ b.m[0], + a.m[1] ^ b.m[1], + a.m[2] ^ b.m[2], + a.m[3] ^ b.m[3]); +} + +/** + * @brief Overload: mask invert (not). + */ +ASTCENC_SIMD_INLINE vmask4 operator~(vmask4 a) +{ + return vmask4(~a.m[0], + ~a.m[1], + ~a.m[2], + ~a.m[3]); +} + +/** + * @brief Return a 1-bit mask code indicating mask status. + * + * bit0 = lane 0 + */ +ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a) +{ + return ((a.m[0] >> 31) & 0x1) | + ((a.m[1] >> 30) & 0x2) | + ((a.m[2] >> 29) & 0x4) | + ((a.m[3] >> 28) & 0x8); +} + +// ============================================================================ +// vint4 operators and functions +// ============================================================================ + +/** + * @brief Overload: vector by vector addition. + */ +ASTCENC_SIMD_INLINE vint4 operator+(vint4 a, vint4 b) +{ + return vint4(a.m[0] + b.m[0], + a.m[1] + b.m[1], + a.m[2] + b.m[2], + a.m[3] + b.m[3]); +} + +/** + * @brief Overload: vector by vector subtraction. + */ +ASTCENC_SIMD_INLINE vint4 operator-(vint4 a, vint4 b) +{ + return vint4(a.m[0] - b.m[0], + a.m[1] - b.m[1], + a.m[2] - b.m[2], + a.m[3] - b.m[3]); +} + +/** + * @brief Overload: vector by vector multiplication. + */ +ASTCENC_SIMD_INLINE vint4 operator*(vint4 a, vint4 b) +{ + return vint4(a.m[0] * b.m[0], + a.m[1] * b.m[1], + a.m[2] * b.m[2], + a.m[3] * b.m[3]); +} + +/** + * @brief Overload: vector bit invert. + */ +ASTCENC_SIMD_INLINE vint4 operator~(vint4 a) +{ + return vint4(~a.m[0], + ~a.m[1], + ~a.m[2], + ~a.m[3]); +} + +/** + * @brief Overload: vector by vector bitwise or. + */ +ASTCENC_SIMD_INLINE vint4 operator|(vint4 a, vint4 b) +{ + return vint4(a.m[0] | b.m[0], + a.m[1] | b.m[1], + a.m[2] | b.m[2], + a.m[3] | b.m[3]); +} + +/** + * @brief Overload: vector by vector bitwise and. + */ +ASTCENC_SIMD_INLINE vint4 operator&(vint4 a, vint4 b) +{ + return vint4(a.m[0] & b.m[0], + a.m[1] & b.m[1], + a.m[2] & b.m[2], + a.m[3] & b.m[3]); +} + +/** + * @brief Overload: vector by vector bitwise xor. + */ +ASTCENC_SIMD_INLINE vint4 operator^(vint4 a, vint4 b) +{ + return vint4(a.m[0] ^ b.m[0], + a.m[1] ^ b.m[1], + a.m[2] ^ b.m[2], + a.m[3] ^ b.m[3]); +} + +/** + * @brief Overload: vector by vector equality. + */ +ASTCENC_SIMD_INLINE vmask4 operator==(vint4 a, vint4 b) +{ + return vmask4(a.m[0] == b.m[0], + a.m[1] == b.m[1], + a.m[2] == b.m[2], + a.m[3] == b.m[3]); +} + +/** + * @brief Overload: vector by vector inequality. + */ +ASTCENC_SIMD_INLINE vmask4 operator!=(vint4 a, vint4 b) +{ + return vmask4(a.m[0] != b.m[0], + a.m[1] != b.m[1], + a.m[2] != b.m[2], + a.m[3] != b.m[3]); +} + +/** + * @brief Overload: vector by vector less than. + */ +ASTCENC_SIMD_INLINE vmask4 operator<(vint4 a, vint4 b) +{ + return vmask4(a.m[0] < b.m[0], + a.m[1] < b.m[1], + a.m[2] < b.m[2], + a.m[3] < b.m[3]); +} + +/** + * @brief Overload: vector by vector greater than. + */ +ASTCENC_SIMD_INLINE vmask4 operator>(vint4 a, vint4 b) +{ + return vmask4(a.m[0] > b.m[0], + a.m[1] > b.m[1], + a.m[2] > b.m[2], + a.m[3] > b.m[3]); +} + +/** + * @brief Logical shift left. + */ +template ASTCENC_SIMD_INLINE vint4 lsl(vint4 a) +{ + return vint4(a.m[0] << s, + a.m[1] << s, + a.m[2] << s, + a.m[3] << s); +} + +/** + * @brief Logical shift right. + */ +template ASTCENC_SIMD_INLINE vint4 lsr(vint4 a) +{ + return vint4((int)(((unsigned int)a.m[0]) >> s), + (int)(((unsigned int)a.m[1]) >> s), + (int)(((unsigned int)a.m[2]) >> s), + (int)(((unsigned int)a.m[3]) >> s)); +} + +/** + * @brief Arithmetic shift right. + */ +template ASTCENC_SIMD_INLINE vint4 asr(vint4 a) +{ + return vint4(a.m[0] >> s, + a.m[1] >> s, + a.m[2] >> s, + a.m[3] >> s); +} + +/** + * @brief Return the min vector of two vectors. + */ +ASTCENC_SIMD_INLINE vint4 min(vint4 a, vint4 b) +{ + return vint4(a.m[0] < b.m[0] ? a.m[0] : b.m[0], + a.m[1] < b.m[1] ? a.m[1] : b.m[1], + a.m[2] < b.m[2] ? a.m[2] : b.m[2], + a.m[3] < b.m[3] ? a.m[3] : b.m[3]); +} + +/** + * @brief Return the min vector of two vectors. + */ +ASTCENC_SIMD_INLINE vint4 max(vint4 a, vint4 b) +{ + return vint4(a.m[0] > b.m[0] ? a.m[0] : b.m[0], + a.m[1] > b.m[1] ? a.m[1] : b.m[1], + a.m[2] > b.m[2] ? a.m[2] : b.m[2], + a.m[3] > b.m[3] ? a.m[3] : b.m[3]); +} + +/** + * @brief Return the horizontal minimum of a single vector. + */ +ASTCENC_SIMD_INLINE vint4 hmin(vint4 a) +{ + int b = std::min(a.m[0], a.m[1]); + int c = std::min(a.m[2], a.m[3]); + return vint4(std::min(b, c)); +} + +/** + * @brief Return the horizontal maximum of a single vector. + */ +ASTCENC_SIMD_INLINE vint4 hmax(vint4 a) +{ + int b = std::max(a.m[0], a.m[1]); + int c = std::max(a.m[2], a.m[3]); + return vint4(std::max(b, c)); +} + +/** + * @brief Return the horizontal sum of vector lanes as a scalar. + */ +ASTCENC_SIMD_INLINE int hadd_s(vint4 a) +{ + return a.m[0] + a.m[1] + a.m[2] + a.m[3]; +} + +/** + * @brief Store a vector to an aligned memory address. + */ +ASTCENC_SIMD_INLINE void storea(vint4 a, int* p) +{ + p[0] = a.m[0]; + p[1] = a.m[1]; + p[2] = a.m[2]; + p[3] = a.m[3]; +} + +/** + * @brief Store a vector to an unaligned memory address. + */ +ASTCENC_SIMD_INLINE void store(vint4 a, int* p) +{ + p[0] = a.m[0]; + p[1] = a.m[1]; + p[2] = a.m[2]; + p[3] = a.m[3]; +} + +/** + * @brief Store lowest N (vector width) bytes into an unaligned address. + */ +ASTCENC_SIMD_INLINE void store_nbytes(vint4 a, uint8_t* p) +{ + int* pi = (int*)p; + *pi = a.m[0]; +} + +/** + * @brief Gather N (vector width) indices from the array. + */ +ASTCENC_SIMD_INLINE vint4 gatheri(const int* base, vint4 indices) +{ + return vint4(base[indices.m[0]], + base[indices.m[1]], + base[indices.m[2]], + base[indices.m[3]]); +} + +/** + * @brief Pack low 8 bits of N (vector width) lanes into bottom of vector. + */ +ASTCENC_SIMD_INLINE vint4 pack_low_bytes(vint4 a) +{ + int b0 = a.m[0] & 0xFF; + int b1 = a.m[1] & 0xFF; + int b2 = a.m[2] & 0xFF; + int b3 = a.m[3] & 0xFF; + + int b = b0 | (b1 << 8) | (b2 << 16) | (b3 << 24); + return vint4(b, 0, 0, 0); +} + +/** + * @brief Return lanes from @c b if MSB of @c cond is set, else @c a. + */ +ASTCENC_SIMD_INLINE vint4 select(vint4 a, vint4 b, vmask4 cond) +{ + return vint4((cond.m[0] & 0x80000000) ? b.m[0] : a.m[0], + (cond.m[1] & 0x80000000) ? b.m[1] : a.m[1], + (cond.m[2] & 0x80000000) ? b.m[2] : a.m[2], + (cond.m[3] & 0x80000000) ? b.m[3] : a.m[3]); +} + +// ============================================================================ +// vfloat4 operators and functions +// ============================================================================ + +/** + * @brief Overload: vector by vector addition. + */ +ASTCENC_SIMD_INLINE vfloat4 operator+(vfloat4 a, vfloat4 b) +{ + return vfloat4(a.m[0] + b.m[0], + a.m[1] + b.m[1], + a.m[2] + b.m[2], + a.m[3] + b.m[3]); +} + +/** + * @brief Overload: vector by vector subtraction. + */ +ASTCENC_SIMD_INLINE vfloat4 operator-(vfloat4 a, vfloat4 b) +{ + return vfloat4(a.m[0] - b.m[0], + a.m[1] - b.m[1], + a.m[2] - b.m[2], + a.m[3] - b.m[3]); +} + +/** + * @brief Overload: vector by vector multiplication. + */ +ASTCENC_SIMD_INLINE vfloat4 operator*(vfloat4 a, vfloat4 b) +{ + return vfloat4(a.m[0] * b.m[0], + a.m[1] * b.m[1], + a.m[2] * b.m[2], + a.m[3] * b.m[3]); +} + +/** + * @brief Overload: vector by vector division. + */ +ASTCENC_SIMD_INLINE vfloat4 operator/(vfloat4 a, vfloat4 b) +{ + return vfloat4(a.m[0] / b.m[0], + a.m[1] / b.m[1], + a.m[2] / b.m[2], + a.m[3] / b.m[3]); +} + +/** + * @brief Overload: vector by vector equality. + */ +ASTCENC_SIMD_INLINE vmask4 operator==(vfloat4 a, vfloat4 b) +{ + return vmask4(a.m[0] == b.m[0], + a.m[1] == b.m[1], + a.m[2] == b.m[2], + a.m[3] == b.m[3]); +} + +/** + * @brief Overload: vector by vector inequality. + */ +ASTCENC_SIMD_INLINE vmask4 operator!=(vfloat4 a, vfloat4 b) +{ + return vmask4(a.m[0] != b.m[0], + a.m[1] != b.m[1], + a.m[2] != b.m[2], + a.m[3] != b.m[3]); +} + +/** + * @brief Overload: vector by vector less than. + */ +ASTCENC_SIMD_INLINE vmask4 operator<(vfloat4 a, vfloat4 b) +{ + return vmask4(a.m[0] < b.m[0], + a.m[1] < b.m[1], + a.m[2] < b.m[2], + a.m[3] < b.m[3]); +} + +/** + * @brief Overload: vector by vector greater than. + */ +ASTCENC_SIMD_INLINE vmask4 operator>(vfloat4 a, vfloat4 b) +{ + return vmask4(a.m[0] > b.m[0], + a.m[1] > b.m[1], + a.m[2] > b.m[2], + a.m[3] > b.m[3]); +} + +/** + * @brief Overload: vector by vector less than or equal. + */ +ASTCENC_SIMD_INLINE vmask4 operator<=(vfloat4 a, vfloat4 b) +{ + return vmask4(a.m[0] <= b.m[0], + a.m[1] <= b.m[1], + a.m[2] <= b.m[2], + a.m[3] <= b.m[3]); +} + +/** + * @brief Overload: vector by vector greater than or equal. + */ +ASTCENC_SIMD_INLINE vmask4 operator>=(vfloat4 a, vfloat4 b) +{ + return vmask4(a.m[0] >= b.m[0], + a.m[1] >= b.m[1], + a.m[2] >= b.m[2], + a.m[3] >= b.m[3]); +} + +/** + * @brief Return the min vector of two vectors. + * + * If either lane value is NaN, @c b will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat4 min(vfloat4 a, vfloat4 b) +{ + return vfloat4(a.m[0] < b.m[0] ? a.m[0] : b.m[0], + a.m[1] < b.m[1] ? a.m[1] : b.m[1], + a.m[2] < b.m[2] ? a.m[2] : b.m[2], + a.m[3] < b.m[3] ? a.m[3] : b.m[3]); +} + +/** + * @brief Return the max vector of two vectors. + * + * If either lane value is NaN, @c b will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat4 max(vfloat4 a, vfloat4 b) +{ + return vfloat4(a.m[0] > b.m[0] ? a.m[0] : b.m[0], + a.m[1] > b.m[1] ? a.m[1] : b.m[1], + a.m[2] > b.m[2] ? a.m[2] : b.m[2], + a.m[3] > b.m[3] ? a.m[3] : b.m[3]); +} + +/** + * @brief Return the absolute value of the float vector. + */ +ASTCENC_SIMD_INLINE vfloat4 abs(vfloat4 a) +{ + return vfloat4(std::abs(a.m[0]), + std::abs(a.m[1]), + std::abs(a.m[2]), + std::abs(a.m[3])); +} + +/** + * @brief Return a float rounded to the nearest integer value. + */ +ASTCENC_SIMD_INLINE vfloat4 round(vfloat4 a) +{ + assert(std::fegetround() == FE_TONEAREST); + return vfloat4(std::nearbyint(a.m[0]), + std::nearbyint(a.m[1]), + std::nearbyint(a.m[2]), + std::nearbyint(a.m[3])); +} + +/** + * @brief Return the horizontal minimum of a vector. + */ +ASTCENC_SIMD_INLINE vfloat4 hmin(vfloat4 a) +{ + float tmp1 = std::min(a.m[0], a.m[1]); + float tmp2 = std::min(a.m[2], a.m[3]); + return vfloat4(std::min(tmp1, tmp2)); +} + +/** + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE vfloat4 hmax(vfloat4 a) +{ + float tmp1 = std::max(a.m[0], a.m[1]); + float tmp2 = std::max(a.m[2], a.m[3]); + return vfloat4(std::max(tmp1, tmp2)); +} + +/** + * @brief Return the horizontal sum of a vector. + */ +ASTCENC_SIMD_INLINE float hadd_s(vfloat4 a) +{ + // Use halving add, gives invariance with SIMD versions + return (a.m[0] + a.m[2]) + (a.m[1] + a.m[3]); +} + +/** + * @brief Return the sqrt of the lanes in the vector. + */ +ASTCENC_SIMD_INLINE vfloat4 sqrt(vfloat4 a) +{ + return vfloat4(std::sqrt(a.m[0]), + std::sqrt(a.m[1]), + std::sqrt(a.m[2]), + std::sqrt(a.m[3])); +} + +/** + * @brief Return lanes from @c b if MSB of @c cond is set, else @c a. + */ +ASTCENC_SIMD_INLINE vfloat4 select(vfloat4 a, vfloat4 b, vmask4 cond) +{ + return vfloat4((cond.m[0] & 0x80000000) ? b.m[0] : a.m[0], + (cond.m[1] & 0x80000000) ? b.m[1] : a.m[1], + (cond.m[2] & 0x80000000) ? b.m[2] : a.m[2], + (cond.m[3] & 0x80000000) ? b.m[3] : a.m[3]); +} + +/** + * @brief Load a vector of gathered results from an array; + */ +ASTCENC_SIMD_INLINE vfloat4 gatherf(const float* base, vint4 indices) +{ + return vfloat4(base[indices.m[0]], + base[indices.m[1]], + base[indices.m[2]], + base[indices.m[3]]); +} + +/** + * @brief Store a vector to an unaligned memory address. + */ +ASTCENC_SIMD_INLINE void store(vfloat4 a, float* ptr) +{ + ptr[0] = a.m[0]; + ptr[1] = a.m[1]; + ptr[2] = a.m[2]; + ptr[3] = a.m[3]; +} + +/** + * @brief Store a vector to an aligned memory address. + */ +ASTCENC_SIMD_INLINE void storea(vfloat4 a, float* ptr) +{ + ptr[0] = a.m[0]; + ptr[1] = a.m[1]; + ptr[2] = a.m[2]; + ptr[3] = a.m[3]; +} + +/** + * @brief Return a integer value for a float vector, using truncation. + */ +ASTCENC_SIMD_INLINE vint4 float_to_int(vfloat4 a) +{ + // Casting to unsigned buys us an extra bit of precision in cases where + // we can use the integer as nasty bit hacks. + return vint4((unsigned int)a.m[0], + (unsigned int)a.m[1], + (unsigned int)a.m[2], + (unsigned int)a.m[3]); +} + +/**f + * @brief Return a integer value for a float vector, using round-to-nearest. + */ +ASTCENC_SIMD_INLINE vint4 float_to_int_rtn(vfloat4 a) +{ + return vint4((int)(a.m[0] + 0.5f), + (int)(a.m[1] + 0.5f), + (int)(a.m[2] + 0.5f), + (int)(a.m[3] + 0.5f)); +} + +/** + * @brief Return a float value for a integer vector. + */ +ASTCENC_SIMD_INLINE vfloat4 int_to_float(vint4 a) +{ + return vfloat4((float)a.m[0], + (float)a.m[1], + (float)a.m[2], + (float)a.m[3]); +} + +/** + * @brief Return a float16 value for a float vector, using round-to-nearest. + */ +ASTCENC_SIMD_INLINE vint4 float_to_float16(vfloat4 a) +{ + return vint4( + float_to_sf16(a.lane<0>()), + float_to_sf16(a.lane<1>()), + float_to_sf16(a.lane<2>()), + float_to_sf16(a.lane<3>())); +} + +/** + * @brief Return a float16 value for a float scalar, using round-to-nearest. + */ +static inline uint16_t float_to_float16(float a) +{ + return float_to_sf16(a); +} + +/** + * @brief Return a float value for a float16 vector. + */ +ASTCENC_SIMD_INLINE vfloat4 float16_to_float(vint4 a) +{ + return vfloat4( + sf16_to_float(a.lane<0>()), + sf16_to_float(a.lane<1>()), + sf16_to_float(a.lane<2>()), + sf16_to_float(a.lane<3>())); +} + +/** + * @brief Return a float value for a float16 scalar. + */ +ASTCENC_SIMD_INLINE float float16_to_float(uint16_t a) +{ + return sf16_to_float(a); +} + +/** + * @brief Return a float value as an integer bit pattern (i.e. no conversion). + * + * It is a common trick to convert floats into integer bit patterns, perform + * some bit hackery based on knowledge they are IEEE 754 layout, and then + * convert them back again. This is the first half of that flip. + */ +ASTCENC_SIMD_INLINE vint4 float_as_int(vfloat4 a) +{ + vint4 r; + memcpy(r.m, a.m, 4 * 4); + return r; +} + +/** + * @brief Return a integer value as a float bit pattern (i.e. no conversion). + * + * It is a common trick to convert floats into integer bit patterns, perform + * some bit hackery based on knowledge they are IEEE 754 layout, and then + * convert them back again. This is the second half of that flip. + */ +ASTCENC_SIMD_INLINE vfloat4 int_as_float(vint4 a) +{ + vfloat4 r; + memcpy(r.m, a.m, 4 * 4); + return r; +} + +#endif // #ifndef ASTC_VECMATHLIB_NONE_4_H_INCLUDED diff --git a/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h new file mode 100755 index 00000000..4bb8ea96 --- /dev/null +++ b/libkram/astc-encoder/astcenc_vecmathlib_sse_4.h @@ -0,0 +1,1008 @@ +// SPDX-License-Identifier: Apache-2.0 +// ---------------------------------------------------------------------------- +// Copyright 2019-2021 Arm Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// ---------------------------------------------------------------------------- + +/** + * @brief 4x32-bit vectors, implemented using SSE. + * + * This module implements 4-wide 32-bit float, int, and mask vectors for x86 + * SSE. The implementation requires at least SSE2, but higher levels of SSE can + * be selected at compile time to improve performance. + * + * There is a baseline level of functionality provided by all vector widths and + * implementations. This is implemented using identical function signatures, + * modulo data type, so we can use them as substitutable implementations in VLA + * code. + * + * The 4-wide vectors are also used as a fixed-width type, and significantly + * extend the functionality above that available to VLA code. + */ + +#ifndef ASTC_VECMATHLIB_SSE_4_H_INCLUDED +#define ASTC_VECMATHLIB_SSE_4_H_INCLUDED + +#ifndef ASTCENC_SIMD_INLINE + #error "Include astcenc_vecmathlib.h, do not include directly" +#endif + +#include + +// ============================================================================ +// vfloat4 data type +// ============================================================================ + +/** + * @brief Data type for 4-wide floats. + */ +struct vfloat4 +{ + /** + * @brief Construct from zero-initialized value. + */ + ASTCENC_SIMD_INLINE vfloat4() {} + + /** + * @brief Construct from 4 values loaded from an unaligned address. + * + * Consider using loada() which is better with vectors if data is aligned + * to vector length. + */ + ASTCENC_SIMD_INLINE explicit vfloat4(const float *p) + { + m = _mm_loadu_ps(p); + } + + /** + * @brief Construct from 1 scalar value replicated across all lanes. + * + * Consider using zero() for constexpr zeros. + */ + ASTCENC_SIMD_INLINE explicit vfloat4(float a) + { + m = _mm_set1_ps(a); + } + + /** + * @brief Construct from 4 scalar values. + * + * The value of @c a is stored to lane 0 (LSB) in the SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vfloat4(float a, float b, float c, float d) + { + m = _mm_set_ps(d, c, b, a); + } + + /** + * @brief Construct from an existing SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vfloat4(__m128 a) + { + m = a; + } + + /** + * @brief Get the scalar value of a single lane. + */ + template ASTCENC_SIMD_INLINE float lane() const + { + return _mm_cvtss_f32(_mm_shuffle_ps(m, m, l)); + } + + /** + * @brief Set the scalar value of a single lane. + */ + template ASTCENC_SIMD_INLINE void set_lane(float a) + { +#if ASTCENC_SSE >= 41 + __m128 v = _mm_set1_ps(a); + m = _mm_insert_ps(m, v, l << 6 | l << 4); +#else + alignas(16) float idx[4]; + _mm_store_ps(idx, m); + idx[l] = a; + m = _mm_load_ps(idx); +#endif + } + + /** + * @brief Factory that returns a vector of zeros. + */ + static ASTCENC_SIMD_INLINE vfloat4 zero() + { + return vfloat4(_mm_setzero_ps()); + } + + /** + * @brief Factory that returns a replicated scalar loaded from memory. + */ + static ASTCENC_SIMD_INLINE vfloat4 load1(const float* p) + { + return vfloat4(_mm_load_ps1(p)); + } + + /** + * @brief Factory that returns a vector loaded from 16B aligned memory. + */ + static ASTCENC_SIMD_INLINE vfloat4 loada(const float* p) + { + return vfloat4(_mm_load_ps(p)); + } + + /** + * @brief Factory that returns a vector containing the lane IDs. + */ + static ASTCENC_SIMD_INLINE vfloat4 lane_id() + { + return vfloat4(_mm_set_ps(3, 2, 1, 0)); + } + + /** + * @brief Return a swizzled float 2. + */ + template ASTCENC_SIMD_INLINE float2 swz() const + { + return float2(lane(), lane()); + } + + /** + * @brief Return a swizzled float 3. + */ + template ASTCENC_SIMD_INLINE vfloat4 swz() const + { + vfloat4 result(_mm_shuffle_ps(m, m, l0 | l1 << 2 | l2 << 4)); + result.set_lane<3>(0.0f); + return result; + } + + /** + * @brief Return a swizzled float 4. + */ + template ASTCENC_SIMD_INLINE vfloat4 swz() const + { + return vfloat4(_mm_shuffle_ps(m, m, l0 | l1 << 2 | l2 << 4 | l3 << 6)); + } + + /** + * @brief The vector ... + */ + __m128 m; +}; + +// ============================================================================ +// vint4 data type +// ============================================================================ + +/** + * @brief Data type for 4-wide ints. + */ +struct vint4 +{ + /** + * @brief Construct from zero-initialized value. + */ + ASTCENC_SIMD_INLINE vint4() {} + + /** + * @brief Construct from 4 values loaded from an unaligned address. + * + * Consider using loada() which is better with vectors if data is aligned + * to vector length. + */ + ASTCENC_SIMD_INLINE explicit vint4(const int *p) + { + m = _mm_loadu_si128((const __m128i*)p); + } + + /** + * @brief Construct from 4 uint8_t loaded from an unaligned address. + */ + ASTCENC_SIMD_INLINE explicit vint4(const uint8_t *p) + { + // _mm_loadu_si32 would be nicer syntax, but missing on older GCC + __m128i t = _mm_cvtsi32_si128(*(const int*)p); + +#if ASTCENC_SSE >= 41 + m = _mm_cvtepu8_epi32(t); +#else + t = _mm_unpacklo_epi8(t, _mm_setzero_si128()); + m = _mm_unpacklo_epi16(t, _mm_setzero_si128()); +#endif + } + + /** + * @brief Construct from 1 scalar value replicated across all lanes. + * + * Consider using vfloat4::zero() for constexpr zeros. + */ + ASTCENC_SIMD_INLINE explicit vint4(int a) + { + m = _mm_set1_epi32(a); + } + + /** + * @brief Construct from 4 scalar values. + * + * The value of @c a is stored to lane 0 (LSB) in the SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vint4(int a, int b, int c, int d) + { + m = _mm_set_epi32(d, c, b, a); + } + + /** + * @brief Construct from an existing SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vint4(__m128i a) + { + m = a; + } + + /** + * @brief Get the scalar from a single lane. + */ + template ASTCENC_SIMD_INLINE int lane() const + { + return _mm_cvtsi128_si32(_mm_shuffle_epi32(m, l)); + } + + /** + * @brief Set the scalar value of a single lane. + */ + template ASTCENC_SIMD_INLINE void set_lane(int a) + { +#if ASTCENC_SSE >= 41 + m = _mm_insert_epi32(m, a, l); +#else + alignas(16) int idx[4]; + _mm_store_si128((__m128i*)idx, m); + idx[l] = a; + m = _mm_load_si128((const __m128i*)idx); +#endif + } + + /** + * @brief Factory that returns a vector of zeros. + */ + static ASTCENC_SIMD_INLINE vint4 zero() + { + return vint4(_mm_setzero_si128()); + } + + /** + * @brief Factory that returns a replicated scalar loaded from memory. + */ + static ASTCENC_SIMD_INLINE vint4 load1(const int* p) + { + return vint4(*p); + } + + /** + * @brief Factory that returns a vector loaded from 16B aligned memory. + */ + static ASTCENC_SIMD_INLINE vint4 loada(const int* p) + { + return vint4(_mm_load_si128((const __m128i*)p)); + } + + /** + * @brief Factory that returns a vector containing the lane IDs. + */ + static ASTCENC_SIMD_INLINE vint4 lane_id() + { + return vint4(_mm_set_epi32(3, 2, 1, 0)); + } + + /** + * @brief The vector ... + */ + __m128i m; +}; + +// ============================================================================ +// vmask4 data type +// ============================================================================ + +/** + * @brief Data type for 4-wide control plane masks. + */ +struct vmask4 +{ + /** + * @brief Construct from an existing SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vmask4(__m128 a) + { + m = a; + } + + /** + * @brief Construct from an existing SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vmask4(__m128i a) + { + m = _mm_castsi128_ps(a); + } + + /** + * @brief Construct from 4 scalar values. + * + * The value of @c a is stored to lane 0 (LSB) in the SIMD register. + */ + ASTCENC_SIMD_INLINE explicit vmask4(bool a, bool b, bool c, bool d) + { + vint4 mask(a == false ? 0 : -1, + b == false ? 0 : -1, + c == false ? 0 : -1, + d == false ? 0 : -1); + + m = _mm_castsi128_ps(mask.m); + } + + /** + * @brief The vector ... + */ + __m128 m; +}; + +// ============================================================================ +// vmask4 operators and functions +// ============================================================================ + +/** + * @brief Overload: mask union (or). + */ +ASTCENC_SIMD_INLINE vmask4 operator|(vmask4 a, vmask4 b) +{ + return vmask4(_mm_or_ps(a.m, b.m)); +} + +/** + * @brief Overload: mask intersect (and). + */ +ASTCENC_SIMD_INLINE vmask4 operator&(vmask4 a, vmask4 b) +{ + return vmask4(_mm_and_ps(a.m, b.m)); +} + +/** + * @brief Overload: mask difference (xor). + */ +ASTCENC_SIMD_INLINE vmask4 operator^(vmask4 a, vmask4 b) +{ + return vmask4(_mm_xor_ps(a.m, b.m)); +} + +/** + * @brief Overload: mask invert (not). + */ +ASTCENC_SIMD_INLINE vmask4 operator~(vmask4 a) +{ + return vmask4(_mm_xor_si128(_mm_castps_si128(a.m), _mm_set1_epi32(-1))); +} + +/** + * @brief Return a 4-bit mask code indicating mask status. + * + * bit0 = lane 0 + */ +ASTCENC_SIMD_INLINE unsigned int mask(vmask4 a) +{ + return _mm_movemask_ps(a.m); +} + +// ============================================================================ +// vint4 operators and functions +// ============================================================================ + +/** + * @brief Overload: vector by vector addition. + */ +ASTCENC_SIMD_INLINE vint4 operator+(vint4 a, vint4 b) +{ + return vint4(_mm_add_epi32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector subtraction. + */ +ASTCENC_SIMD_INLINE vint4 operator-(vint4 a, vint4 b) +{ + return vint4(_mm_sub_epi32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector multiplication. + */ +ASTCENC_SIMD_INLINE vint4 operator*(vint4 a, vint4 b) +{ +#if ASTCENC_SSE >= 41 + return vint4(_mm_mullo_epi32 (a.m, b.m)); +#else + __m128i t1 = _mm_mul_epu32(a.m, b.m); + __m128i t2 = _mm_mul_epu32( + _mm_srli_si128(a.m, 4), + _mm_srli_si128(b.m, 4)); + __m128i r = _mm_unpacklo_epi32( + _mm_shuffle_epi32(t1, _MM_SHUFFLE (0, 0, 2, 0)), + _mm_shuffle_epi32(t2, _MM_SHUFFLE (0, 0, 2, 0))); + return vint4(r); +#endif +} + +/** + * @brief Overload: vector bit invert. + */ +ASTCENC_SIMD_INLINE vint4 operator~(vint4 a) +{ + return vint4(_mm_xor_si128(a.m, _mm_set1_epi32(-1))); +} + +/** + * @brief Overload: vector by vector bitwise or. + */ +ASTCENC_SIMD_INLINE vint4 operator|(vint4 a, vint4 b) +{ + return vint4(_mm_or_si128(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector bitwise and. + */ +ASTCENC_SIMD_INLINE vint4 operator&(vint4 a, vint4 b) +{ + return vint4(_mm_and_si128(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector bitwise xor. + */ +ASTCENC_SIMD_INLINE vint4 operator^(vint4 a, vint4 b) +{ + return vint4(_mm_xor_si128(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector equality. + */ +ASTCENC_SIMD_INLINE vmask4 operator==(vint4 a, vint4 b) +{ + return vmask4(_mm_cmpeq_epi32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector inequality. + */ +ASTCENC_SIMD_INLINE vmask4 operator!=(vint4 a, vint4 b) +{ + return ~vmask4(_mm_cmpeq_epi32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector less than. + */ +ASTCENC_SIMD_INLINE vmask4 operator<(vint4 a, vint4 b) +{ + return vmask4(_mm_cmplt_epi32(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector greater than. + */ +ASTCENC_SIMD_INLINE vmask4 operator>(vint4 a, vint4 b) +{ + return vmask4(_mm_cmpgt_epi32(a.m, b.m)); +} + +/** + * @brief Logical shift left. + */ +template ASTCENC_SIMD_INLINE vint4 lsl(vint4 a) +{ + return vint4(_mm_slli_epi32(a.m, s)); +} + +/** + * @brief Logical shift right. + */ +template ASTCENC_SIMD_INLINE vint4 lsr(vint4 a) +{ + return vint4(_mm_srli_epi32(a.m, s)); +} + +/** + * @brief Arithmetic shift right. + */ +template ASTCENC_SIMD_INLINE vint4 asr(vint4 a) +{ + return vint4(_mm_srai_epi32(a.m, s)); +} + +/** + * @brief Return the min vector of two vectors. + */ +ASTCENC_SIMD_INLINE vint4 min(vint4 a, vint4 b) +{ +#if ASTCENC_SSE >= 41 + return vint4(_mm_min_epi32(a.m, b.m)); +#else + vmask4 d = a < b; + __m128i ap = _mm_and_si128(_mm_castps_si128(d.m), a.m); + __m128i bp = _mm_andnot_si128(_mm_castps_si128(d.m), b.m); + return vint4(_mm_or_si128(ap,bp)); +#endif +} + +/** + * @brief Return the max vector of two vectors. + */ +ASTCENC_SIMD_INLINE vint4 max(vint4 a, vint4 b) +{ +#if ASTCENC_SSE >= 41 + return vint4(_mm_max_epi32(a.m, b.m)); +#else + vmask4 d = a > b; + __m128i ap = _mm_and_si128(_mm_castps_si128(d.m), a.m); + __m128i bp = _mm_andnot_si128(_mm_castps_si128(d.m), b.m); + return vint4(_mm_or_si128(ap,bp)); +#endif +} + +/** + * @brief Return the horizontal minimum of a vector. + */ +ASTCENC_SIMD_INLINE vint4 hmin(vint4 a) +{ + a = min(a, vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 3, 2)))); + a = min(a, vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 0, 1)))); + return vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 0, 0))); +} + +/* + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE vint4 hmax(vint4 a) +{ + a = max(a, vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 3, 2)))); + a = max(a, vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 0, 1)))); + return vint4(_mm_shuffle_epi32(a.m, _MM_SHUFFLE(0, 0, 0, 0))); +} + +/** + * @brief Return the horizontal sum of a vector as a scalar. + */ +ASTCENC_SIMD_INLINE int hadd_s(vint4 a) +{ + // Add top and bottom halves, lane 1/0 + __m128i fold = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(a.m), + _mm_castsi128_ps(a.m))); + __m128i t = _mm_add_epi32(a.m, fold); + + // Add top and bottom halves, lane 0 (_mm_hadd_ps exists but slow) + t = _mm_add_epi32(t, _mm_shuffle_epi32(t, 0x55)); + + return _mm_cvtsi128_si32(t); +} + +/** + * @brief Store a vector to a 16B aligned memory address. + */ +ASTCENC_SIMD_INLINE void storea(vint4 a, int* p) +{ + _mm_store_si128((__m128i*)p, a.m); +} + +/** + * @brief Store a vector to an unaligned memory address. + */ +ASTCENC_SIMD_INLINE void store(vint4 a, int* p) +{ + // Cast due to missing intrinsics + _mm_storeu_ps((float*)p, _mm_castsi128_ps(a.m)); +} + +/** + * @brief Store lowest N (vector width) bytes into an unaligned address. + */ +ASTCENC_SIMD_INLINE void store_nbytes(vint4 a, uint8_t* p) +{ + // Cast due to missing intrinsics + _mm_store_ss((float*)p, _mm_castsi128_ps(a.m)); +} + +/** + * @brief Gather N (vector width) indices from the array. + */ +ASTCENC_SIMD_INLINE vint4 gatheri(const int* base, vint4 indices) +{ +#if ASTCENC_AVX >= 2 + return vint4(_mm_i32gather_epi32(base, indices.m, 4)); +#else + alignas(16) int idx[4]; + storea(indices, idx); + return vint4(base[idx[0]], base[idx[1]], base[idx[2]], base[idx[3]]); +#endif +} + +/** + * @brief Pack low 8 bits of N (vector width) lanes into bottom of vector. + */ +ASTCENC_SIMD_INLINE vint4 pack_low_bytes(vint4 a) +{ +#if ASTCENC_SSE >= 41 + __m128i shuf = _mm_set_epi8(0,0,0,0, 0,0,0,0, 0,0,0,0, 12,8,4,0); + return vint4(_mm_shuffle_epi8(a.m, shuf)); +#else + __m128i va = _mm_unpacklo_epi8(a.m, _mm_shuffle_epi32(a.m, _MM_SHUFFLE(1,1,1,1))); + __m128i vb = _mm_unpackhi_epi8(a.m, _mm_shuffle_epi32(a.m, _MM_SHUFFLE(3,3,3,3))); + return vint4(_mm_unpacklo_epi16(va, vb)); +#endif +} + +/** + * @brief Return lanes from @c b if MSB of @c cond is set, else @c a. + */ +ASTCENC_SIMD_INLINE vint4 select(vint4 a, vint4 b, vmask4 cond) +{ +#if ASTCENC_SSE >= 41 + // Don't use _mm_blendv_epi8 directly, as it doesn't give the select on + // float sign-bit in the mask behavior which is useful. Performance is the + // same, these casts are free. + __m128 av = _mm_castsi128_ps(a.m); + __m128 bv = _mm_castsi128_ps(b.m); + return vint4(_mm_castps_si128(_mm_blendv_ps(av, bv, cond.m))); +#else + __m128i d = _mm_srai_epi32(_mm_castps_si128(cond.m), 31); + return vint4(_mm_or_si128(_mm_and_si128(d, b.m), _mm_andnot_si128(d, a.m))); +#endif +} + +// ============================================================================ +// vfloat4 operators and functions +// ============================================================================ + +/** + * @brief Overload: vector by vector addition. + */ +ASTCENC_SIMD_INLINE vfloat4 operator+(vfloat4 a, vfloat4 b) +{ + return vfloat4(_mm_add_ps(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector subtraction. + */ +ASTCENC_SIMD_INLINE vfloat4 operator-(vfloat4 a, vfloat4 b) +{ + return vfloat4(_mm_sub_ps(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector multiplication. + */ +ASTCENC_SIMD_INLINE vfloat4 operator*(vfloat4 a, vfloat4 b) +{ + return vfloat4(_mm_mul_ps(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector division. + */ +ASTCENC_SIMD_INLINE vfloat4 operator/(vfloat4 a, vfloat4 b) +{ + return vfloat4(_mm_div_ps(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector equality. + */ +ASTCENC_SIMD_INLINE vmask4 operator==(vfloat4 a, vfloat4 b) +{ + return vmask4(_mm_cmpeq_ps(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector inequality. + */ +ASTCENC_SIMD_INLINE vmask4 operator!=(vfloat4 a, vfloat4 b) +{ + return vmask4(_mm_cmpneq_ps(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector less than. + */ +ASTCENC_SIMD_INLINE vmask4 operator<(vfloat4 a, vfloat4 b) +{ + return vmask4(_mm_cmplt_ps(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector greater than. + */ +ASTCENC_SIMD_INLINE vmask4 operator>(vfloat4 a, vfloat4 b) +{ + return vmask4(_mm_cmpgt_ps(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector less than or equal. + */ +ASTCENC_SIMD_INLINE vmask4 operator<=(vfloat4 a, vfloat4 b) +{ + return vmask4(_mm_cmple_ps(a.m, b.m)); +} + +/** + * @brief Overload: vector by vector greater than or equal. + */ +ASTCENC_SIMD_INLINE vmask4 operator>=(vfloat4 a, vfloat4 b) +{ + return vmask4(_mm_cmpge_ps(a.m, b.m)); +} + +/** + * @brief Return the min vector of two vectors. + * + * If either lane value is NaN, @c b will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat4 min(vfloat4 a, vfloat4 b) +{ + // Do not reorder - second operand will return if either is NaN + return vfloat4(_mm_min_ps(a.m, b.m)); +} + +/** + * @brief Return the max vector of two vectors. + * + * If either lane value is NaN, @c b will be returned for that lane. + */ +ASTCENC_SIMD_INLINE vfloat4 max(vfloat4 a, vfloat4 b) +{ + // Do not reorder - second operand will return if either is NaN + return vfloat4(_mm_max_ps(a.m, b.m)); +} + +/** + * @brief Return the absolute value of the float vector. + */ +ASTCENC_SIMD_INLINE vfloat4 abs(vfloat4 a) +{ + return vfloat4(_mm_max_ps(_mm_sub_ps(_mm_setzero_ps(), a.m), a.m)); +} + +/** + * @brief Return a float rounded to the nearest integer value. + */ +ASTCENC_SIMD_INLINE vfloat4 round(vfloat4 a) +{ +#if ASTCENC_SSE >= 41 + constexpr int flags = _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC; + return vfloat4(_mm_round_ps(a.m, flags)); +#else + __m128 v = a.m; + __m128 neg_zero = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); + __m128 no_fraction = _mm_set1_ps(8388608.0f); + __m128 abs_mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF)); + __m128 sign = _mm_and_ps(v, neg_zero); + __m128 s_magic = _mm_or_ps(no_fraction, sign); + __m128 r1 = _mm_add_ps(v, s_magic); + r1 = _mm_sub_ps(r1, s_magic); + __m128 r2 = _mm_and_ps(v, abs_mask); + __m128 mask = _mm_cmple_ps(r2, no_fraction); + r2 = _mm_andnot_ps(mask, v); + r1 = _mm_and_ps(r1, mask); + return vfloat4(_mm_xor_ps(r1, r2)); +#endif +} + +/** + * @brief Return the horizontal minimum of a vector. + */ +ASTCENC_SIMD_INLINE vfloat4 hmin(vfloat4 a) +{ + a = min(a, vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 3, 2)))); + a = min(a, vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 0, 1)))); + return vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 0, 0))); +} + +/** + * @brief Return the horizontal maximum of a vector. + */ +ASTCENC_SIMD_INLINE vfloat4 hmax(vfloat4 a) +{ + a = max(a, vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 3, 2)))); + a = max(a, vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 0, 1)))); + return vfloat4(_mm_shuffle_ps(a.m, a.m, _MM_SHUFFLE(0, 0, 0, 0))); +} + +/** + * @brief Return the horizontal sum of a vector as a scalar. + */ +ASTCENC_SIMD_INLINE float hadd_s(vfloat4 a) +{ + // Add top and bottom halves, lane 1/0 + __m128 t = _mm_add_ps(a.m, _mm_movehl_ps(a.m, a.m)); + + // Add top and bottom halves, lane 0 (_mm_hadd_ps exists but slow) + t = _mm_add_ss(t, _mm_shuffle_ps(t, t, 0x55)); + + return _mm_cvtss_f32(t); +} + +/** + * @brief Return the sqrt of the lanes in the vector. + */ +ASTCENC_SIMD_INLINE vfloat4 sqrt(vfloat4 a) +{ + return vfloat4(_mm_sqrt_ps(a.m)); +} + +/** + * @brief Return lanes from @c b if MSB of @c cond is set, else @c a. + */ +ASTCENC_SIMD_INLINE vfloat4 select(vfloat4 a, vfloat4 b, vmask4 cond) +{ +#if ASTCENC_SSE >= 41 + return vfloat4(_mm_blendv_ps(a.m, b.m, cond.m)); +#else + __m128 d = _mm_castsi128_ps(_mm_srai_epi32(_mm_castps_si128(cond.m), 31)); + return vfloat4(_mm_or_ps(_mm_and_ps(d, b.m), _mm_andnot_ps(d, a.m))); +#endif +} + +/** + * @brief Load a vector of gathered results from an array; + */ +ASTCENC_SIMD_INLINE vfloat4 gatherf(const float* base, vint4 indices) +{ +#if ASTCENC_AVX >= 2 + return vfloat4(_mm_i32gather_ps(base, indices.m, 4)); +#else + alignas(16) int idx[4]; + storea(indices, idx); + return vfloat4(base[idx[0]], base[idx[1]], base[idx[2]], base[idx[3]]); +#endif +} + +/** + * @brief Store a vector to an unaligned memory address. + */ +ASTCENC_SIMD_INLINE void store(vfloat4 a, float* p) +{ + _mm_storeu_ps(p, a.m); +} + +/** + * @brief Store a vector to a 16B aligned memory address. + */ +ASTCENC_SIMD_INLINE void storea(vfloat4 a, float* p) +{ + _mm_store_ps(p, a.m); +} + +/** + * @brief Return a integer value for a float vector, using truncation. + */ +ASTCENC_SIMD_INLINE vint4 float_to_int(vfloat4 a) +{ + return vint4(_mm_cvttps_epi32(a.m)); +} + +/** + * @brief Return a integer value for a float vector, using round-to-nearest. + */ +ASTCENC_SIMD_INLINE vint4 float_to_int_rtn(vfloat4 a) +{ + a = round(a); + return vint4(_mm_cvttps_epi32(a.m)); +} + +/** + * @brief Return a float value for an integer vector. + */ +ASTCENC_SIMD_INLINE vfloat4 int_to_float(vint4 a) +{ + return vfloat4(_mm_cvtepi32_ps(a.m)); +} + +/** + * @brief Return a float16 value for a float vector, using round-to-nearest. + */ +ASTCENC_SIMD_INLINE vint4 float_to_float16(vfloat4 a) +{ +#if ASTCENC_F16C >= 1 + __m128i packedf16 = _mm_cvtps_ph(a.m, 0); + __m128i f16 = _mm_cvtepu16_epi32(packedf16); + return vint4(f16); +#else + return vint4( + float_to_sf16(a.lane<0>()), + float_to_sf16(a.lane<1>()), + float_to_sf16(a.lane<2>()), + float_to_sf16(a.lane<3>())); +#endif +} + +/** + * @brief Return a float16 value for a float scalar, using round-to-nearest. + */ +static inline uint16_t float_to_float16(float a) +{ +#if ASTCENC_F16C >= 1 + __m128i f16 = _mm_cvtps_ph(_mm_set1_ps(a), 0); + return (uint16_t)_mm_cvtsi128_si32(f16); +#else + return float_to_sf16(a); +#endif +} + +/** + * @brief Return a float value for a float16 vector. + */ +ASTCENC_SIMD_INLINE vfloat4 float16_to_float(vint4 a) +{ +#if ASTCENC_F16C >= 1 + __m128i packed = _mm_packs_epi32(a.m, a.m); + __m128 f32 = _mm_cvtph_ps(packed); + return vfloat4(f32); +#else + return vfloat4( + sf16_to_float(a.lane<0>()), + sf16_to_float(a.lane<1>()), + sf16_to_float(a.lane<2>()), + sf16_to_float(a.lane<3>())); +#endif +} + +/** + * @brief Return a float value for a float16 scalar. + */ +ASTCENC_SIMD_INLINE float float16_to_float(uint16_t a) +{ +#if ASTCENC_F16C >= 1 + __m128i packed = _mm_set1_epi16(a); + __m128 f32 = _mm_cvtph_ps(packed); + return _mm_cvtss_f32(f32); +#else + return sf16_to_float(a); +#endif +} + +/** + * @brief Return a float value as an integer bit pattern (i.e. no conversion). + * + * It is a common trick to convert floats into integer bit patterns, perform + * some bit hackery based on knowledge they are IEEE 754 layout, and then + * convert them back again. This is the first half of that flip. + */ +ASTCENC_SIMD_INLINE vint4 float_as_int(vfloat4 a) +{ + return vint4(_mm_castps_si128(a.m)); +} + +/** + * @brief Return a integer value as a float bit pattern (i.e. no conversion). + * + * It is a common trick to convert floats into integer bit patterns, perform + * some bit hackery based on knowledge they are IEEE 754 layout, and then + * convert them back again. This is the second half of that flip. + */ +ASTCENC_SIMD_INLINE vfloat4 int_as_float(vint4 v) +{ + return vfloat4(_mm_castsi128_ps(v.m)); +} + +#endif // #ifndef ASTC_VECMATHLIB_SSE_4_H_INCLUDED diff --git a/libkram/astc-encoder/astcenc_weight_align.cpp b/libkram/astc-encoder/astcenc_weight_align.cpp index e329b13d..97da89d1 100644 --- a/libkram/astc-encoder/astcenc_weight_align.cpp +++ b/libkram/astc-encoder/astcenc_weight_align.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy @@ -51,46 +51,19 @@ #include #include -#if ASTCENC_SIMD_WIDTH <= 4 - #define ANGULAR_STEPS 44 -#elif ASTCENC_SIMD_WIDTH == 8 - // AVX code path loops over these tables 8 elements at a time, - // so make sure to have their size a multiple of 8. - #define ANGULAR_STEPS 48 -#else - #error Unknown SIMD width -#endif -static_assert((ANGULAR_STEPS % ASTCENC_SIMD_WIDTH) == 0, "ANGULAR_STEPS should be multiple of ASTCENC_SIMD_WIDTH"); - -alignas(ASTCENC_VECALIGN) static const float angular_steppings[ANGULAR_STEPS] = { - 1.0f, 1.25f, 1.5f, 1.75f, - - 2.0f, 2.5f, 3.0f, 3.5f, - 4.0f, 4.5f, 5.0f, 5.5f, - 6.0f, 6.5f, 7.0f, 7.5f, - - 8.0f, 9.0f, 10.0f, 11.0f, - 12.0f, 13.0f, 14.0f, 15.0f, - 16.0f, 17.0f, 18.0f, 19.0f, - 20.0f, 21.0f, 22.0f, 23.0f, - 24.0f, 25.0f, 26.0f, 27.0f, - 28.0f, 29.0f, 30.0f, 31.0f, - 32.0f, 33.0f, 34.0f, 35.0f, -#if ANGULAR_STEPS >= 48 - // This is "redundant" and only used in more-than-4-wide - // SIMD code paths, to make the steps table size - // be a multiple of SIMD width. Values are replicated - // from last entry so that AVX2 and SSE code paths - // return the same results. - 35.0f, 35.0f, 35.0f, 35.0f, -#endif -}; - -alignas(ASTCENC_VECALIGN) static float stepsizes[ANGULAR_STEPS]; -alignas(ASTCENC_VECALIGN) static float stepsizes_sqr[ANGULAR_STEPS]; +#define ANGULAR_STEPS 40 +static_assert((ANGULAR_STEPS % ASTCENC_SIMD_WIDTH) == 0, + "ANGULAR_STEPS must be multiple of ASTCENC_SIMD_WIDTH"); static int max_angular_steps_needed_for_quant_level[13]; +// Yes, the next-to-last entry is supposed to have the value 33. This because +// the 32-weight mode leaves a double-sized hole in the middle of the weight +// space, so we are better off matching 33 weights than 32. +static const int quantization_steps_for_level[13] = { + 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33, 36 +}; + // Store a reduced sin/cos table for 64 possible weight values; this causes // slight quality loss compared to using sin() and cos() directly. Must be 2^N. #define SINCOS_STEPS 64 @@ -100,30 +73,23 @@ alignas(ASTCENC_VECALIGN) static float cos_table[SINCOS_STEPS][ANGULAR_STEPS]; void prepare_angular_tables() { - int max_angular_steps_needed_for_quant_steps[40]; + int max_angular_steps_needed_for_quant_steps[ANGULAR_STEPS + 1]; for (int i = 0; i < ANGULAR_STEPS; i++) { - stepsizes[i] = 1.0f / angular_steppings[i]; - stepsizes_sqr[i] = stepsizes[i] * stepsizes[i]; + float angle_step = (float)(i + 1); for (int j = 0; j < SINCOS_STEPS; j++) { - sin_table[j][i] = static_cast(sinf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angular_steppings[i] * j)); - cos_table[j][i] = static_cast(cosf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angular_steppings[i] * j)); + sin_table[j][i] = static_cast(sinf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast(j))); + cos_table[j][i] = static_cast(cosf((2.0f * astc::PI / (SINCOS_STEPS - 1.0f)) * angle_step * static_cast(j))); } - int p = astc::flt2int_rd(angular_steppings[i]) + 1; - max_angular_steps_needed_for_quant_steps[p] = MIN(i + 1, ANGULAR_STEPS - 1); + max_angular_steps_needed_for_quant_steps[i + 1] = astc::min(i + 1, ANGULAR_STEPS - 1); } - // yes, the next-to-last entry is supposed to have the value 33. This because under - // ASTC, the 32-weight mode leaves a double-sized hole in the middle of the - // weight space, so we are better off matching 33 weights than 32. - static const int steps_of_level[] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33, 36 }; - for (int i = 0; i < 13; i++) { - max_angular_steps_needed_for_quant_level[i] = max_angular_steps_needed_for_quant_steps[steps_of_level[i]]; + max_angular_steps_needed_for_quant_level[i] = max_angular_steps_needed_for_quant_steps[quantization_steps_for_level[i]]; } } @@ -137,10 +103,11 @@ static void compute_angular_offsets( int max_angular_steps, float* offsets ) { - alignas(ASTCENC_VECALIGN) float anglesum_x[ANGULAR_STEPS]; - alignas(ASTCENC_VECALIGN) float anglesum_y[ANGULAR_STEPS]; - std::memset(anglesum_x, 0, max_angular_steps*sizeof(anglesum_x[0])); - std::memset(anglesum_y, 0, max_angular_steps*sizeof(anglesum_y[0])); + promise(samplecount > 0); + promise(max_angular_steps > 0); + + alignas(ASTCENC_VECALIGN) float anglesum_x[ANGULAR_STEPS] { 0 }; + alignas(ASTCENC_VECALIGN) float anglesum_y[ANGULAR_STEPS] { 0 }; // compute the angle-sums. for (int i = 0; i < samplecount; i++) @@ -155,31 +122,35 @@ static void compute_angular_offsets( const float *cosptr = cos_table[isample]; vfloat sample_weightv(sample_weight); - for (int j = 0; j < max_angular_steps; j += ASTCENC_SIMD_WIDTH) // arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max + // Arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max + for (int j = 0; j < max_angular_steps; j += ASTCENC_SIMD_WIDTH) { vfloat cp = loada(&cosptr[j]); vfloat sp = loada(&sinptr[j]); vfloat ax = loada(&anglesum_x[j]) + cp * sample_weightv; vfloat ay = loada(&anglesum_y[j]) + sp * sample_weightv; - store(ax, &anglesum_x[j]); - store(ay, &anglesum_y[j]); + storea(ax, &anglesum_x[j]); + storea(ay, &anglesum_y[j]); } } // post-process the angle-sums vfloat mult = vfloat(1.0f / (2.0f * astc::PI)); - for (int i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH) // arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max + vfloat rcp_stepsize = vfloat::lane_id() + vfloat(1.0f); + // Arrays are multiple of SIMD width (ANGULAR_STEPS), safe to overshoot max + for (int i = 0; i < max_angular_steps; i += ASTCENC_SIMD_WIDTH) { + vfloat ssize = 1.0f / rcp_stepsize; + rcp_stepsize = rcp_stepsize + vfloat(ASTCENC_SIMD_WIDTH); vfloat angle = atan2(loada(&anglesum_y[i]), loada(&anglesum_x[i])); - vfloat ofs = angle * (loada(&stepsizes[i]) * mult); - store(ofs, &offsets[i]); + vfloat ofs = angle * ssize * mult; + storea(ofs, &offsets[i]); } } // for a given step-size and a given offset, compute the // lowest and highest weight that results from quantizing using the stepsize & offset. // also, compute the resulting error. - static void compute_lowest_and_highest_weight( int samplecount, const float *samples, @@ -193,7 +164,12 @@ static void compute_lowest_and_highest_weight( float *cut_low_weight_error, float *cut_high_weight_error ) { - // Arrays are always multiple of SIMD width (ANGULAR_STEPS), so this is safe even if overshoot max + promise(samplecount > 0); + promise(max_angular_steps > 0); + + vfloat rcp_stepsize = vfloat::lane_id() + vfloat(1.0f); + + // Arrays are ANGULAR_STEPS long, so always safe to run full vectors for (int sp = 0; sp < max_angular_steps; sp += ASTCENC_SIMD_WIDTH) { vint minidx(128); @@ -201,36 +177,34 @@ static void compute_lowest_and_highest_weight( vfloat errval = vfloat::zero(); vfloat cut_low_weight_err = vfloat::zero(); vfloat cut_high_weight_err = vfloat::zero(); - vfloat rcp_stepsize = loada(&angular_steppings[sp]); vfloat offset = loada(&offsets[sp]); vfloat scaled_offset = rcp_stepsize * offset; for (int j = 0; j < samplecount; ++j) { - vfloat wt = load1a(&sample_weights[j]); - vfloat sval = load1a(&samples[j]) * rcp_stepsize - scaled_offset; + vfloat wt = load1(&sample_weights[j]); + vfloat sval = load1(&samples[j]) * rcp_stepsize - scaled_offset; vfloat svalrte = round(sval); - vint idxv = floatToInt(svalrte); + vint idxv = float_to_int(svalrte); vfloat dif = sval - svalrte; vfloat dwt = dif * wt; errval = errval + dwt * dif; - // Reset tracker on min hit. + // Reset tracker on min hit vmask mask = idxv < minidx; minidx = select(minidx, idxv, mask); cut_low_weight_err = select(cut_low_weight_err, vfloat::zero(), mask); - // Accumulate on min hit. + // Accumulate on min hit mask = idxv == minidx; - minidx = select(minidx, idxv, mask); vfloat accum = cut_low_weight_err + wt - vfloat(2.0f) * dwt; cut_low_weight_err = select(cut_low_weight_err, accum, mask); - // Reset tracker on max hit. + // Reset tracker on max hit mask = idxv > maxidx; maxidx = select(maxidx, idxv, mask); cut_high_weight_err = select(cut_high_weight_err, vfloat::zero(), mask); - // Accumulate on max hit. + // Accumulate on max hit mask = idxv == maxidx; accum = cut_high_weight_err + wt + vfloat(2.0f) * dwt; cut_high_weight_err = select(cut_high_weight_err, accum, mask); @@ -240,34 +214,35 @@ static void compute_lowest_and_highest_weight( vint span = maxidx - minidx + vint(1); span = min(span, vint(max_quantization_steps + 3)); span = max(span, vint(2)); - store(minidx, &lowest_weight[sp]); - store(span, &weight_span[sp]); + storea(minidx, &lowest_weight[sp]); + storea(span, &weight_span[sp]); // The cut_(lowest/highest)_weight_error indicate the error that // results from forcing samples that should have had the weight value // one step (up/down). - vfloat errscale = loada(&stepsizes_sqr[sp]); - store(errval * errscale, &error[sp]); - store(cut_low_weight_err * errscale, &cut_low_weight_error[sp]); - store(cut_high_weight_err * errscale, &cut_high_weight_error[sp]); + vfloat ssize = 1.0f / rcp_stepsize; + vfloat errscale = ssize * ssize; + storea(errval * errscale, &error[sp]); + storea(cut_low_weight_err * errscale, &cut_low_weight_error[sp]); + storea(cut_high_weight_err * errscale, &cut_high_weight_error[sp]); + + rcp_stepsize = rcp_stepsize + vfloat(ASTCENC_SIMD_WIDTH); } } // main function for running the angular algorithm. -static void compute_angular_endpoints_for_quantization_levels( +static void compute_angular_endpoints_for_quant_levels( int samplecount, const float* samples, const float* sample_weights, - int max_quantization_level, + int max_quant_level, float low_value[12], float high_value[12] ) { - static const int quantization_steps_for_level[13] = { 2, 3, 4, 5, 6, 8, 10, 12, 16, 20, 24, 33, 36 }; - - int max_quantization_steps = quantization_steps_for_level[max_quantization_level + 1]; + int max_quantization_steps = quantization_steps_for_level[max_quant_level + 1]; alignas(ASTCENC_VECALIGN) float angular_offsets[ANGULAR_STEPS]; - int max_angular_steps = max_angular_steps_needed_for_quant_level[max_quantization_level]; + int max_angular_steps = max_angular_steps_needed_for_quant_level[max_quant_level]; compute_angular_offsets(samplecount, samples, sample_weights, max_angular_steps, angular_offsets); alignas(ASTCENC_VECALIGN) int32_t lowest_weight[ANGULAR_STEPS]; @@ -292,6 +267,7 @@ static void compute_angular_endpoints_for_quantization_levels( cut_low_weight[i] = 0; } + promise(max_angular_steps > 0); for (int i = 0; i < max_angular_steps; i++) { int idx_span = weight_span[i]; @@ -327,7 +303,6 @@ static void compute_angular_endpoints_for_quantization_levels( best_scale[idx_span - 2] = i; cut_low_weight[idx_span - 2] = 1; } - } // if we got a better error-value for a low sample count than for a high one, @@ -342,7 +317,7 @@ static void compute_angular_endpoints_for_quantization_levels( } } - for (int i = 0; i <= max_quantization_level; i++) + for (int i = 0; i <= max_quant_level; i++) { int q = quantization_steps_for_level[i]; int bsi = best_scale[q]; @@ -350,26 +325,30 @@ static void compute_angular_endpoints_for_quantization_levels( // Did we find anything? // TODO: Can we do better than bsi = 0 here. We should at least // propagate an error (and move the printf into the CLI). +#if defined(NDEBUG) if (bsi < 0) { - KLOGW("Astcenc", "Unable to find encoding within specified error limit\n"); + printf("WARNING: Unable to find encoding within specified error limit\n"); bsi = 0; } +else + bsi = astc::max(0, bsi); +#endif - float stepsize = stepsizes[bsi]; + float stepsize = 1.0f / (1.0f + (float)bsi); int lwi = lowest_weight[bsi] + cut_low_weight[q]; int hwi = lwi + q - 1; float offset = angular_offsets[bsi]; - low_value[i] = offset + lwi * stepsize; - high_value[i] = offset + hwi * stepsize; + low_value[i] = offset + static_cast(lwi) * stepsize; + high_value[i] = offset + static_cast(hwi) * stepsize; } } // helper functions that will compute ideal angular-endpoints // for a given set of weights and a given block size descriptors void compute_angular_endpoints_1plane( - float mode_cutoff, + bool only_always, const block_size_descriptor* bsd, const float* decimated_quantized_weights, const float* decimated_weights, @@ -379,32 +358,29 @@ void compute_angular_endpoints_1plane( float low_values[MAX_DECIMATION_MODES][12]; float high_values[MAX_DECIMATION_MODES][12]; - for (int i = 0; i < MAX_DECIMATION_MODES; i++) + for (int i = 0; i < bsd->decimation_mode_count; i++) { - // TODO: Do this at build time and cache the result - int samplecount = bsd->decimation_mode_samples[i]; - int quant_mode = bsd->decimation_mode_maxprec_1plane[i]; - float percentile = bsd->decimation_mode_percentile[i]; - int permit_encode = bsd->permit_encode[i]; - if (permit_encode == 0 || samplecount < 1 || quant_mode < 0 || percentile > mode_cutoff) + const decimation_mode& dm = bsd->decimation_modes[i]; + if (dm.maxprec_1plane < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit) { continue; } - compute_angular_endpoints_for_quantization_levels(samplecount, + int samplecount = bsd->decimation_tables[i]->weight_count; + compute_angular_endpoints_for_quant_levels(samplecount, decimated_quantized_weights + i * MAX_WEIGHTS_PER_BLOCK, - decimated_weights + i * MAX_WEIGHTS_PER_BLOCK, quant_mode, low_values[i], high_values[i]); + decimated_weights + i * MAX_WEIGHTS_PER_BLOCK, dm.maxprec_1plane, low_values[i], high_values[i]); } - for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i) + for (int i = 0; i < bsd->block_mode_count; ++i) { - const block_mode& bm = bsd->block_modes_packed[i]; - if (bm.is_dual_plane != 0 || bm.percentile > mode_cutoff) + const block_mode& bm = bsd->block_modes[i]; + if (bm.is_dual_plane || (only_always && !bm.percentile_always) || !bm.percentile_hit) { continue; } - int quant_mode = bm.quantization_mode; + int quant_mode = bm.quant_mode; int decim_mode = bm.decimation_mode; low_value[i] = low_values[decim_mode][quant_mode]; @@ -413,7 +389,7 @@ void compute_angular_endpoints_1plane( } void compute_angular_endpoints_2planes( - float mode_cutoff, + bool only_always, const block_size_descriptor* bsd, const float* decimated_quantized_weights, const float* decimated_weights, @@ -427,37 +403,34 @@ void compute_angular_endpoints_2planes( float low_values2[MAX_DECIMATION_MODES][12]; float high_values2[MAX_DECIMATION_MODES][12]; - for (int i = 0; i < MAX_DECIMATION_MODES; i++) + for (int i = 0; i < bsd->decimation_mode_count; i++) { - // TODO: Do this at build time and cache the result - int samplecount = bsd->decimation_mode_samples[i]; - int quant_mode = bsd->decimation_mode_maxprec_2planes[i]; - float percentile = bsd->decimation_mode_percentile[i]; - int permit_encode = bsd->permit_encode[i]; - - if (permit_encode == 0 || samplecount < 1 || quant_mode < 0 || percentile > mode_cutoff) + const decimation_mode& dm = bsd->decimation_modes[i]; + if (dm.maxprec_2planes < 0 || (only_always && !dm.percentile_always) || !dm.percentile_hit) { continue; } - compute_angular_endpoints_for_quantization_levels(samplecount, - decimated_quantized_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK, - decimated_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK, quant_mode, low_values1[i], high_values1[i]); + int samplecount = bsd->decimation_tables[i]->weight_count; + + compute_angular_endpoints_for_quant_levels(samplecount, + decimated_quantized_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK, + decimated_weights + 2 * i * MAX_WEIGHTS_PER_BLOCK, dm.maxprec_2planes, low_values1[i], high_values1[i]); - compute_angular_endpoints_for_quantization_levels(samplecount, - decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, - decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, quant_mode, low_values2[i], high_values2[i]); + compute_angular_endpoints_for_quant_levels(samplecount, + decimated_quantized_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, + decimated_weights + (2 * i + 1) * MAX_WEIGHTS_PER_BLOCK, dm.maxprec_2planes, low_values2[i], high_values2[i]); } - for (int i = 0, ni = bsd->block_mode_packed_count; i < ni; ++i) + for (int i = 0; i < bsd->block_mode_count; ++i) { - const block_mode& bm = bsd->block_modes_packed[i]; - if (bm.is_dual_plane != 1 || bm.percentile > mode_cutoff) + const block_mode& bm = bsd->block_modes[i]; + if ((!bm.is_dual_plane) || (only_always && !bm.percentile_always) || !bm.percentile_hit) { continue; } - int quant_mode = bm.quantization_mode; + int quant_mode = bm.quant_mode; int decim_mode = bm.decimation_mode; low_value1[i] = low_values1[decim_mode][quant_mode]; diff --git a/libkram/astc-encoder/astcenc_weight_quant_xfer_tables.cpp b/libkram/astc-encoder/astcenc_weight_quant_xfer_tables.cpp index d87bb5c2..d2191496 100644 --- a/libkram/astc-encoder/astcenc_weight_quant_xfer_tables.cpp +++ b/libkram/astc-encoder/astcenc_weight_quant_xfer_tables.cpp @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // ---------------------------------------------------------------------------- -// Copyright 2011-2020 Arm Limited +// Copyright 2011-2021 Arm Limited // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 8a82e448..10ce6f7a 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -75,11 +75,15 @@ bool LoadPng(const uint8_t* data, size_t dataSize, Image& sourceImage) case LCT_GREY_ALPHA: hasColor = false; break; + case LCT_MAX_OCTET_VALUE: case LCT_RGB: case LCT_RGBA: case LCT_PALETTE: // ? hasColor = true; break; + + hasColor = false; + break;; } switch (state.info_png.color.colortype) { @@ -87,6 +91,7 @@ bool LoadPng(const uint8_t* data, size_t dataSize, Image& sourceImage) case LCT_RGB: hasAlpha = false; break; + case LCT_MAX_OCTET_VALUE: case LCT_RGBA: case LCT_GREY_ALPHA: case LCT_PALETTE: // ? @@ -1254,6 +1259,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6 case LCT_GREY_ALPHA: hasColor = false; break; + case LCT_MAX_OCTET_VALUE: case LCT_RGB: case LCT_RGBA: case LCT_PALETTE: // ? @@ -1266,6 +1272,7 @@ string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint6 case LCT_RGB: hasAlpha = false; break; + case LCT_MAX_OCTET_VALUE: case LCT_RGBA: case LCT_GREY_ALPHA: case LCT_PALETTE: // ? diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h index cbf10b64..af16acd2 100644 --- a/libkram/kram/KramConfig.h +++ b/libkram/kram/KramConfig.h @@ -311,4 +311,11 @@ inline half4 toHalf4(const float4& vv) } #endif +// this just strips args +#define macroUnusedArg(x) + +// this just strips args +#define macroUnusedVar(x) (void)x + + } // namespace simd diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 6e71f5ed..92c915c6 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -777,7 +777,11 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma dstImageASTC.dim_z = 1; // Not using 3D blocks, not supported on iOS //dstImageASTC.dim_pad = 0; dstImageASTC.data_type = ASTCENC_TYPE_U8; - dstImageASTC.data = outputTexture.data(); + + + // encode/encode still setup on array of 2d slices, so need address of data + uint8_t* outData = outputTexture.data(); + dstImageASTC.data = (void**)&outData; int32_t srcDataLength = (int32_t)srcMipLevel.length; Int2 blockDims = srcImage.blockDims(); @@ -790,20 +794,20 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma astcenc_config config; astcenc_error error = astcenc_config_init( - profile, blockDims.x, blockDims.y, 1, ASTCENC_PRE_FAST, ASTCENC_FLG_DECOMPRESS_ONLY, config); + profile, blockDims.x, blockDims.y, 1, ASTCENC_PRE_FAST, ASTCENC_FLG_DECOMPRESS_ONLY, &config); if (error != ASTCENC_SUCCESS) { return false; } astcenc_context* codec_context = nullptr; - error = astcenc_context_alloc(config, 1, &codec_context); + error = astcenc_context_alloc(&config, 1, &codec_context); if (error != ASTCENC_SUCCESS) { return false; } // no swizzle astcenc_swizzle swizzleDecode = {ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A}; - error = astcenc_decompress_image(codec_context, srcData, srcDataLength, dstImageASTC, swizzleDecode); + error = astcenc_decompress_image(codec_context, srcData, srcDataLength, &dstImageASTC, swizzleDecode, 0); astcenc_context_free(codec_context); @@ -908,26 +912,41 @@ bool Image::resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, Image return true; } +// functional ctor +inline float4 float4m(float x, float y, float z, float w) +{ + return { x, y, z, w }; +} + // TODO: to hook this up, read 16u png into pixelsFlat, then gen an 8-bit normal xy // from that. This is more like SDF where a single height is used. -void Image::heightToNormals(float scale) +void Image::heightToNormals(float scale, bool isWrap) { int32_t w = _width; int32_t h = _height; - // TODO: hook these up, but needs src != dst or copy - bool isWrapY = false; - bool isWrapX = false; + bool isWrapY = isWrap; + bool isWrapX = isWrap; // 2.0 is distance betwen +1 and -1 - float scaleX = scale / 2.0; - float scaleY = scale / 2.0; + // don't scale by this, want caller to be able to pass 1.0 as default scale not 2.0 + float scaleX = scale; // / 2.0; + float scaleY = scale; // / 2.0; // src/dst the same here // may need to copy a row/column of pixels for wrap const float4* srcPixels = _pixelsFloat.data(); float4* dstPixels = (float4*)_pixelsFloat.data(); + const Color* srcPixels8 = (const Color*)_pixels.data(); + Color* dstPixels8 = (Color*)_pixels.data(); + bool isFloat = _pixels.empty(); + + if (!isFloat) { + scaleX /= 255.0f; + scaleY /= 255.0f; + } + for (int32_t y = 0; y < h; ++y) { int32_t y0 = y; int32_t ym = y - 1; @@ -962,27 +981,58 @@ void Image::heightToNormals(float scale) if (xp > (w - 1)) xp = w - 1; } - // cross pattern - // height channel is in x - float cN = srcPixels[ym + x0].x; - float cS = srcPixels[yp + x0].x; - float cE = srcPixels[y0 + xp].x; - float cW = srcPixels[y0 + xm].x; - - // up is N, so this is rhcs - float dx = (cE - cW) * scaleX; - float dy = (cN - cS) * scaleY; - - float len = sqrtf(dx * dx + dy * dy + 1.0f); - - dx /= len; - dy /= len; - - // write out the result - float4& dstPixel = dstPixels[y0 + x]; + + if (isFloat) { + + // cross pattern + // height channel is in x + float cN = srcPixels[ym + x0].x; + float cS = srcPixels[yp + x0].x; + float cE = srcPixels[y0 + xp].x; + float cW = srcPixels[y0 + xm].x; + + // up is N, so this is rhcs + float dx = (cE - cW) * scaleX; + float dy = (cN - cS) * scaleY; + + float4 normal = float4m(dx, dy, 1.0f, 0.0f); + normal = normalize(normal); + + // write out the result + float4& dstPixel = dstPixels[y0 + x]; + + dstPixel.x = normal.x; + dstPixel.y = normal.y; + dstPixel.z = normal.z; // can reconstruct + + // store height in alpha + dstPixel.w = srcPixels[y0 + x0].x; + } + else { + // cross pattern + // height channel is in x + uint8_t cN = srcPixels8[4 * (ym + x0)].r; // assumes first elem (.r) is height channel + uint8_t cS = srcPixels8[4 * (yp + x0)].r; + uint8_t cE = srcPixels8[4 * (y0 + xp)].r; + uint8_t cW = srcPixels8[4 * (y0 + xm)].r; + + float dx = (cE - cW) * scaleX; + float dy = (cN - cS) * scaleY; + + float4 normal = float4m(dx, dy, 1.0f, 0.0f); + normal = normalize(normal); + normal *= 127.5f; + + Color& dstPixel8 = dstPixels8[y0 + x]; - dstPixel.x = dx; - dstPixel.y = dy; + dstPixel8.r = normal.x; + dstPixel8.g = normal.y; + dstPixel8.b = normal.z; // can reconstruct + + // store height in alpha + dstPixel8.a = srcPixels8[y0 + x0].r; + } + } } } @@ -2031,23 +2081,25 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, // flags |= ASTCENC_FLG_USE_ALPHA_WEIGHT; // convert quality to present - astcenc_preset preset = ASTCENC_PRE_FAST; - if (info.quality <= 10) { - preset = ASTCENC_PRE_FAST; - } - else if (info.quality <= 50) { - preset = ASTCENC_PRE_MEDIUM; - } - else if (info.quality < 90) { - preset = ASTCENC_PRE_THOROUGH; - } - else { - preset = ASTCENC_PRE_EXHAUSTIVE; - } + float quality = info.quality; + +// ASTCENC_PRE_FAST; +// if (info.quality <= 10) { +// preset = ASTCENC_PRE_FAST; +// } +// else if (info.quality <= 50) { +// preset = ASTCENC_PRE_MEDIUM; +// } +// else if (info.quality < 90) { +// preset = ASTCENC_PRE_THOROUGH; +// } +// else { +// preset = ASTCENC_PRE_EXHAUSTIVE; +// } astcenc_config config; astcenc_error error = astcenc_config_init( - profile, blockDims.x, blockDims.y, 1, preset, flags, config); + profile, blockDims.x, blockDims.y, 1, quality, flags, &config); if (error != ASTCENC_SUCCESS) { return false; } @@ -2099,11 +2151,11 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, // have 2d image // hacked the src pixel handling to only do slices, not a 3D texture if (info.isHDR) { - srcImage.data = (void*)srcPixelDataFloat4; + srcImage.data = (void**)&srcPixelDataFloat4; srcImage.data_type = ASTCENC_TYPE_F32; } else { - srcImage.data = (void*)srcPixelData; + srcImage.data = (void**)&srcPixelData; srcImage.data_type = ASTCENC_TYPE_U8; } @@ -2113,7 +2165,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, // could this be built once, and reused across all mips astcenc_context* codec_context = nullptr; - error = astcenc_context_alloc(config, 1, &codec_context); + error = astcenc_context_alloc(&config, 1, &codec_context); if (error != ASTCENC_SUCCESS) { return false; } @@ -2144,7 +2196,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, } error = astcenc_compress_image( - codec_context, srcImage, swizzleEncode, + codec_context, &srcImage, swizzleEncode, outputTexture.data.data(), mipStorageSize, 0); // threadIndex @@ -2153,7 +2205,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, } #else error = astcenc_compress_image( - codec_context, srcImage, swizzleEncode, + codec_context, &srcImage, swizzleEncode, outputTexture.data.data(), mipStorageSize, 0); // threadIndex #endif diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h index 535a74b2..c69683e9 100644 --- a/libkram/kram/KramImage.h +++ b/libkram/kram/KramImage.h @@ -86,7 +86,7 @@ class Image { vector& tmpImage) const; // convert x field to normals - void heightToNormals(float scale); + void heightToNormals(float scale, bool isWrap = false); private: // pixel size of image diff --git a/plugin/kps/KPS.cpp b/plugin/kps/KPS.cpp index 5239a7b8..e3524f85 100755 --- a/plugin/kps/KPS.cpp +++ b/plugin/kps/KPS.cpp @@ -83,8 +83,6 @@ extern DialogFormat FormatToDialog(DDS_Format fmt); extern DDS_Format DialogToFormat(DialogFormat fmt); extern MyMTLPixelFormat FormatToPixelFormat(DDS_Format fmt); -// this just strips args -#define macroUnusedArg(x) // global needed by a bunch of Photoshop SDK routines SPBasicSuite *sSPBasic = NULL; From 58de0f6945cb0e216430e69f79cdc918a2ccd1cf Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 27 Mar 2021 20:47:42 -0700 Subject: [PATCH 021/901] kramv - more hud data on block and mip pixel cleanup float4 creation. early out if ptrs same on heightToNormals add block and mipDims to hud, and a few more letters in hud --- kramv/KramRenderer.mm | 8 +-- kramv/KramViewerMain.mm | 73 ++++++++++++++++----- libkram/bc7enc/bc7enc.cpp | 4 +- libkram/kram/KramConfig.h | 122 +++++++++++++++++++++++++++++------- libkram/kram/KramImage.cpp | 38 ++++++++--- libkram/kram/KramMipper.cpp | 4 +- libkram/kram/KramMipper.h | 4 +- libkram/kram/float4a.h | 2 +- libkram/squish/maths.h | 2 +- 9 files changed, 195 insertions(+), 62 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index d7471d55..3b3b74e7 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -585,7 +585,7 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex // have one of these for each texture added to the viewer float scaleX = MAX(1, texture.width); float scaleY = MAX(1, texture.height); - _modelMatrix = float4x4(simd_make_float4(scaleX, scaleY, 1.0f, 1.0f)); + _modelMatrix = float4x4(float4m(scaleX, scaleY, 1.0f, 1.0f)); _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); return YES; @@ -596,7 +596,7 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0); // scale - float4x4 viewMatrix = float4x4(simd_make_float4(zoom, zoom, 1.0f, 1.0f)); + float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); viewMatrix = panTransform * viewMatrix; return _projectionMatrix * viewMatrix * _modelMatrix; @@ -656,7 +656,7 @@ - (void)_updateGameState float4x4 panTransform = matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0); // scale - _viewMatrix = float4x4(simd_make_float4(_showSettings->zoom, _showSettings->zoom, 1.0f, 1.0f)); + _viewMatrix = float4x4(float4m(_showSettings->zoom, _showSettings->zoom, 1.0f, 1.0f)); _viewMatrix = panTransform * _viewMatrix; // viewMatrix should typically be the inverse @@ -864,7 +864,7 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie UniformsLevel uniformsLevel; - uniformsLevel.drawOffset = simd_make_float2(0.0f); + uniformsLevel.drawOffset = float2m(0.0f); if (_showSettings->isPreview) { // upload this on each face drawn, since want to be able to draw all mips/levels at once diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 08b78d75..af562df3 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -423,7 +423,7 @@ - (void)doZoomMath:(float)newZoom newPan:(float2&)newPan { float4x4 mInv = simd_inverse(projectionViewModelMatrix); mInv.columns[3].w = 1.0f; // fixes inverse, calls always leaves m[3][3] = 0.999 - float4 pixel = mInv * simd_make_float4(clipPoint.x, clipPoint.y, 1.0f, 1.0f); + float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f); //pixel /= pixel.w; // in case perspective used // allow pan to extend to show all @@ -489,8 +489,8 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer // https://stackoverflow.com/questions/30002361/image-zoom-centered-on-mouse-position // find the cursor location with respect to the image - float4 bottomLeftCorner = simd_make_float4(-0.5, -0.5f, 0.0f, 1.0f); - float4 topRightCorner = simd_make_float4(0.5, 0.5f, 0.0f, 1.0f); + float4 bottomLeftCorner = float4m(-0.5, -0.5f, 0.0f, 1.0f); + float4 topRightCorner = float4m(0.5, 0.5f, 0.0f, 1.0f); Renderer* renderer = (Renderer*)self.delegate; float4x4 newMatrix = [renderer computeImageTransform:_showSettings->panX panY:_showSettings->panY zoom:zoom]; @@ -625,7 +625,7 @@ - (void)updateEyedropper { float4x4 mInv = simd_inverse(projectionViewModelMatrix); mInv.columns[3].w = 1.0f; // fixes inverse, calls always leaves m[3][3] = 0.999 - float4 pixel = mInv * simd_make_float4(clipPoint.x, clipPoint.y, 1.0f, 1.0f); + float4 pixel = mInv * float4m(clipPoint.x, clipPoint.y, 1.0f, 1.0f); //pixel /= pixel.w; // in case perspective used // that's in model space (+/0.5f, +/0.5f), so convert to texture space @@ -637,10 +637,13 @@ - (void)updateEyedropper { pixel.x *= 0.999f; pixel.y *= 0.999f; + float uvX = pixel.x; + float uvY = pixel.y; + // pixels are 0 based pixel.x *= _showSettings->imageBoundsX; pixel.y *= _showSettings->imageBoundsY; - + // TODO: finish this logic, need to account for gaps too, and then isolate to a given level and mip to sample // if (_showSettings->isShowingAllLevelsAndMips) { // pixel.x *= _showSettings->totalLevels(); @@ -700,46 +703,82 @@ - (void)updateEyedropper { int32_t x = _showSettings->textureResultX; int32_t y = _showSettings->textureResultY; + // pixel at top-level mip sprintf(text, "px:%d %d\n", x, y); + // show block num + int mipLOD = _showSettings->mipLOD; + + // TODO:: these block numbers are not accurate on Toof at 4x4 + // there is resizing going on to the dimensions + + int mipX = _showSettings->imageBoundsX; + int mipY = _showSettings->imageBoundsY; + + for (int i = 0; i < mipLOD; ++i) { + mipX = (mipX+1) >> 1; + mipY = (mipY+1) >> 1; + } + mipX = std::max(1, mipX); + mipY = std::max(1, mipY); + + mipX = (int32_t)(uvX * mipX); + mipY = (int32_t)(uvY * mipY); + + // TODO: may want to return mip in pixel readback + // don't have it right now, so don't display if preview is enabled + if (_showSettings->isPreview) + mipLOD = 0; + + auto blockDims = blockDimsOfFormat(format); + if (blockDims.x > 1) + append_sprintf(text, "bpx: %d %d\n", mipX / blockDims.x, mipY / blockDims.y); + + // TODO: on astc if we have original blocks can run analysis from astc-encoder + // about each block. + + // show the mip pixel (only if not preview and mip changed) + if (mipLOD > 0 && !_showSettings->isPreview) + append_sprintf(text, "mpx: %d %d\n", mipX, mipY); + // TODO: more criteria here, can have 2 channel PBR metal-roughness // also have 4 channel normals where zw store other data. bool isNormal = _showSettings->isNormal; bool isFloat = isHdr; if (isNormal) { - float x = c.x; - float y = c.y; + float nx = c.x; + float ny = c.y; // unorm -> snorm if (!isSigned) { - x = x * 2.0f - 1.0f; - y = y * 2.0f - 1.0f; + nx = nx * 2.0f - 1.0f; + ny = ny * 2.0f - 1.0f; } // this is always postive on tan-space normals // assuming we're not viewing world normals - float z = sqrt(1.0f - std::min(x * x + y * y, 1.0f)); + float nz = sqrt(1.0f - std::min(nx * nx + ny * ny, 1.0f)); // print the underlying color (some nmaps are xy in 4 channels) string tmp; - printChannels(tmp, "c: ", c, numChannels, isFloat, isSigned); + printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned); text += tmp; // print direction - float4 d = simd_make_float4(x,y,z,0.0f); + float4 d = float4m(nx,ny,nz,0.0f); isFloat = true; - printChannels(tmp, "d: ", d, 3, isFloat, isSigned); + printChannels(tmp, "dr: ", d, 3, isFloat, isSigned); text += tmp; } else { // DONE: write some print helpers based on float4 and length string tmp; - printChannels(tmp, "l: ", c, numChannels, isFloat, isSigned); + printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned); text += tmp; if (isSrgb) { - printChannels(tmp, "s: ", s, numChannels, isFloat, isSigned); + printChannels(tmp, "sr: ", s, numChannels, isFloat, isSigned); text += tmp; } } @@ -820,8 +859,8 @@ - (void)scrollWheel:(NSEvent *)event // what if zoom moves it outside? - float4 pt0 = projectionViewModelMatrix * simd_make_float4(-0.5, -0.5f, 0.0f, 1.0f); - float4 pt1 = projectionViewModelMatrix * simd_make_float4(0.5, 0.5f, 0.0f, 1.0f); + float4 pt0 = projectionViewModelMatrix * float4m(-0.5, -0.5f, 0.0f, 1.0f); + float4 pt1 = projectionViewModelMatrix * float4m(0.5, 0.5f, 0.0f, 1.0f); // for perspective //pt0 /= pt0.w; diff --git a/libkram/bc7enc/bc7enc.cpp b/libkram/bc7enc/bc7enc.cpp index a8e32f15..61b4abe6 100644 --- a/libkram/bc7enc/bc7enc.cpp +++ b/libkram/bc7enc/bc7enc.cpp @@ -48,11 +48,11 @@ using namespace simd; using vec4F = float4; static inline vec4F *vec4F_set_scalar(vec4F *pV, float x) { *pV = vec4F(x); return pV; } -static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) { *pV = simd_make_float4(x,y,z,w); return pV; } +static inline vec4F *vec4F_set(vec4F *pV, float x, float y, float z, float w) { *pV = float4m(x,y,z,w); return pV; } static inline vec4F *vec4F_saturate_in_place(vec4F *pV) { *pV = saturate(*pV); return pV; } static inline vec4F vec4F_saturate(const vec4F *pV) { vec4F res = saturate(*pV); return res; } -static inline vec4F vec4F_from_color(const color_quad_u8 *pC) { vec4F res = simd_make_float4((float)pC->r, (float)pC->g, (float)pC->b, (float)pC->a); return res; } +static inline vec4F vec4F_from_color(const color_quad_u8 *pC) { vec4F res = float4m((float)pC->r, (float)pC->g, (float)pC->b, (float)pC->a); return res; } static inline vec4F vec4F_add(const vec4F *pLHS, const vec4F *pRHS) { vec4F res = *pLHS + *pRHS; return res; } static inline vec4F vec4F_sub(const vec4F *pLHS, const vec4F *pRHS) { vec4F res = *pLHS - *pRHS; return res; } static inline float vec4F_dot(const vec4F *pLHS, const vec4F *pRHS) { return dot(*pLHS, *pRHS); } diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h index af16acd2..e36ed0d1 100644 --- a/libkram/kram/KramConfig.h +++ b/libkram/kram/KramConfig.h @@ -107,6 +107,7 @@ #define USE_NEON 0 #endif +// not using simd/simd.h on Win or Linux, but clang would support #ifndef USE_SIMDLIB #if KRAM_MAC || KRAM_IOS #define USE_SIMDLIB 1 @@ -115,10 +116,6 @@ #endif #endif -#if !USE_SIMDLIB -#define simd_make_float4(x, y, z, w) float4(x, y, z, w) -#endif - // use _Float16/_fp16 vs. other #if KRAM_MAC || KRAM_IOS #define USE_FLOAT16 1 @@ -219,32 +216,84 @@ class half4 { #include "float4a.h" #endif -// D3D hobbled non-pow2 mips by only supporting round down, not round up -// So then OpenGL followed that. And then Metal followd OpenGL. -// Round up adds an extra mip level to the chain, but results in much better filtering. -// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt -// http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf -#define ROUNDMIPSDOWN 1 +namespace simd +{ -inline void mipDown(int32_t& w, int32_t& h) +#if !USE_SIMDLIB + + +// don't have float2/float3 type yet +//// use instead of simd_make_float +//inline float2 float2m(float x) +//{ +// return float2(x); +//} +// +//inline float3 float3m(float x) +//{ +// return float3(x); +//} +//inline float3 float3m(float x, float y, float z) +//{ +// return float3(x, y, z); +//} + +inline float4 float4m(float x) { -#if ROUNDMIPSDOWN - w = w / 2; - h = h / 2; + return float4(x); +} + +inline float4 float4m(float x, float y, float z, float w) +{ + return float4(x, y, z, w); +} +//inline float4 float4m(const float3& v float w) +//{ +// return float4(v, w); +//} - if (w < 1) w = 1; - if (h < 1) h = 1; #else - w = (w + 1) / 2; - h = (h + 1) / 2; -#endif + +// functional ctor +inline float4 float4m(float3 v, float w) +{ + return vector4(v, w); } -namespace simd { +inline float2 float2m(float x, float y) +{ + return { x, y }; +} +inline float3 float3m(float x, float y, float z) +{ + return { x, y, z }; +} +inline float4 float4m(float x, float y, float z, float w) +{ + return { x, y, z, w }; +} + +inline float2 float2m(float x) +{ + return float2m(x,x); +} + +inline float3 float3m(float x) +{ + return float3m(x,x,x); +} + +inline float4 float4m(float x) +{ + return float4m(x,x,x,x); +} + +#endif + inline float4 saturate(const float4& v) { - const float4 kZero = simd_make_float4(0.0f, 0.0f, 0.0f, 0.0f); - const float4 kOne = simd_make_float4(1.0f, 1.0f, 1.0f, 1.0f); + const float4 kZero = float4m(0.0f, 0.0f, 0.0f, 0.0f); + const float4 kOne = float4m(1.0f, 1.0f, 1.0f, 1.0f); return min(max(v, kZero), kOne); } @@ -255,7 +304,7 @@ inline float4 toFloat4(const half4& vv) // https://patchwork.ozlabs.org/project/gcc/patch/559BC75A.1080606@arm.com/ // https://gcc.gnu.org/onlinedocs/gcc-7.5.0/gcc/Half-Precision.html // https://developer.arm.com/documentation/dui0491/i/Using-NEON-Support/Converting-vectors - return simd_make_float4((float)vv.x, (float)vv.y, (float)vv.z, (float)vv.w); + return float4m((float)vv.x, (float)vv.y, (float)vv.z, (float)vv.w); } inline half4 toHalf4(const float4& vv) { @@ -311,11 +360,36 @@ inline half4 toHalf4(const float4& vv) } #endif +} // namespace simd + +//--------------------------------------- + // this just strips args #define macroUnusedArg(x) // this just strips args #define macroUnusedVar(x) (void)x +// GL/D3D hobbled non-pow2 mips by only supporting round down, not round up +// And then Metal followd OpenGL since it's the same hw and drivers. +// Round up adds an extra mip level to the chain, but results in much better filtering. +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt +// http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf +#define ROUNDMIPSDOWN 1 + +inline void mipDown(int32_t& w, int32_t& h) +{ +#if ROUNDMIPSDOWN + w = w / 2; + h = h / 2; + + if (w < 1) w = 1; + if (h < 1) h = 1; +#else + w = (w + 1) / 2; + h = (h + 1) / 2; +#endif +} + + -} // namespace simd diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 92c915c6..d7906f29 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -912,16 +912,13 @@ bool Image::resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, Image return true; } -// functional ctor -inline float4 float4m(float x, float y, float z, float w) -{ - return { x, y, z, w }; -} - // TODO: to hook this up, read 16u png into pixelsFlat, then gen an 8-bit normal xy // from that. This is more like SDF where a single height is used. void Image::heightToNormals(float scale, bool isWrap) { + // see here + // https://developer.download.nvidia.com/CgTutorial/cg_tutorial_chapter08.html + int32_t w = _width; int32_t h = _height; @@ -946,6 +943,28 @@ void Image::heightToNormals(float scale, bool isWrap) scaleX /= 255.0f; scaleY /= 255.0f; } + + bool isSame = srcPixels8 == dstPixels8; + if (isFloat) + isSame = srcPixels == dstPixels; + + // TODO: doing this at image level doesn't support chunk conversion + // so this would only work for 2D images, and not atlas strips to a 2D array. + + // TODO: to copy 3 rows in cyclic buffer, if src == dst, and handle clamp/wrap + // by copying the first/last row. For now disallow this. + // TODO: copy two rows here, then one row in the loop this would fundamentally + // change the algorithm, since all of these lookups assume the full srcImage + // really only need prev row, and previous height in row. Larger kernel support + // to 3x3, 5x5, 7x7, 9x9. This pattern is a 3x3 area with a cross + // where only 4 cardinal samples are used. This bigger areas have bleed + // especially if this is run on a chart. This is more for like terrain height maps. + + // this recomends generating a few maps, and blending between them + // https://vrayschool.com/normal-map/ + + if (isSame) + return; for (int32_t y = 0; y < h; ++y) { int32_t y0 = y; @@ -1003,8 +1022,9 @@ void Image::heightToNormals(float scale, bool isWrap) dstPixel.x = normal.x; dstPixel.y = normal.y; - dstPixel.z = normal.z; // can reconstruct + dstPixel.z = normal.z; // can reconstruct from xy + // TODO: consider storing in z, easier to see data channel, not premul // store height in alpha dstPixel.w = srcPixels[y0 + x0].x; } @@ -1027,12 +1047,12 @@ void Image::heightToNormals(float scale, bool isWrap) dstPixel8.r = normal.x; dstPixel8.g = normal.y; - dstPixel8.b = normal.z; // can reconstruct + dstPixel8.b = normal.z; // can reconstruct from xy + // TODO: consider storing in z, easier to see data channel, not premul // store height in alpha dstPixel8.a = srcPixels8[y0 + x0].r; } - } } } diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp index f2f4d300..6f0b6701 100644 --- a/libkram/kram/KramMipper.cpp +++ b/libkram/kram/KramMipper.cpp @@ -76,7 +76,7 @@ inline float srgbToLinearFunc(float s) float4 linearToSRGB(float4 lin) { lin = saturate(lin); - return simd_make_float4( + return float4m( linearToSRGBFunc(lin.x), linearToSRGBFunc(lin.y), linearToSRGBFunc(lin.z), @@ -168,7 +168,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo vector& halfImage) const { Color zeroColor = { 0, 0, 0, 0 }; - float4 zeroColorf = simd_make_float4(0.0, 0.0f, 0.0f, 0.f); // need a constant for this + float4 zeroColorf = float4m(0.0, 0.0f, 0.0f, 0.f); // need a constant for this half4 zeroColorh = toHalf4(zeroColorf); int32_t w = srcImage.width; diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h index 0751d440..e5439f25 100644 --- a/libkram/kram/KramMipper.h +++ b/libkram/kram/KramMipper.h @@ -26,13 +26,13 @@ struct Color { inline float4 ColorToUnormFloat4(const Color &value) { // simd lib can't ctor these even in C++, so will make abstracting harder - float4 c = simd_make_float4((float)value.r, (float)value.g, (float)value.b, (float)value.a); + float4 c = float4m((float)value.r, (float)value.g, (float)value.b, (float)value.a); return c / 255.0f; } inline float4 ColorToSnormFloat4(const Color &value) { - float4 c = simd_make_float4((float)value.r, (float)value.g, (float)value.b, (float)value.a); + float4 c = float4m((float)value.r, (float)value.g, (float)value.b, (float)value.a); return (c - float4(128.0f)) / 255.0f; } diff --git a/libkram/kram/float4a.h b/libkram/kram/float4a.h index 5b78fe0c..ca8111e7 100644 --- a/libkram/kram/float4a.h +++ b/libkram/kram/float4a.h @@ -174,7 +174,7 @@ class float4 { float4() {} // TODO: problem is that Apple's simd::float4(val) is val,000, simd::float4(val, 0, 0, 0) is 0 (last element?) - // have to go through simd_make_float4(val, val, val, val) to get 4 values + // have to go through float4m(val, val, val, val) to get 4 values // This behavior doesn't match HLSL/GLSL and is an artifact of the comma operator messing things up. explicit float4(float val) { reg = _mm_set1_ps(val); } // xyzw = val explicit float4(tType val) { reg = val; } diff --git a/libkram/squish/maths.h b/libkram/squish/maths.h index a7367508..726129c7 100644 --- a/libkram/squish/maths.h +++ b/libkram/squish/maths.h @@ -361,7 +361,7 @@ using namespace simd; using Vec4 = float4; // default ctor for float4(1) sets 1,0,0,0 in simd, but impls like Vec4 expect float4(repeating: x) #define VEC4_CONST(x) Vec4(makeVec4(x,x,x,x)) -#define makeVec4(x,y,z,w) simd_make_float4(x,y,z,w) +#define makeVec4(x,y,z,w) float4m(x,y,z,w) inline bool CompareAnyLessThan(Vec4 x, Vec4 y) { return any(x < y); } inline Vec4 Min(Vec4 x, Vec4 y) { return min(x, y); } From 6250e043aeef4c564d172ee181cfd89c10e93e2f Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 28 Mar 2021 01:14:30 -0700 Subject: [PATCH 022/901] Kram - add -height, -heightScale, -wrap options Can convert any height map to a normal with these settings. Uses standard cross layout. heightScale can be negative to generate recessed vs. protruding normal map. Added in heights test case. --- libkram/kram/Kram.cpp | 39 +++++++ libkram/kram/KramConfig.h | 7 ++ libkram/kram/KramImage.cpp | 144 ------------------------- libkram/kram/KramImage.h | 4 +- libkram/kram/KramImageInfo.cpp | 174 +++++++++++++++++++++++++++++++ libkram/kram/KramImageInfo.h | 19 +++- scripts/kramTextures.py | 26 ++++- tests/src/collectorbarrelh-h.png | 3 + 8 files changed, 262 insertions(+), 154 deletions(-) create mode 100644 tests/src/collectorbarrelh-h.png diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 10ce6f7a..b3a73281 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -999,6 +999,15 @@ void kramEncodeUsage(bool showVersion = true) "\tOnly output mips <= size px\n" "\n" + // tex to normal + "\t-height" + "\tConvert height.x to normal.xy\n" + "\t-heightScale scale" + "\tScale heights up down to adjust normal map\n" + "\t-wrap" + "\tWrap texture at edges (height only for now)\n" + "\n" + "\t-srgb" "\tsRGB for rgb/rgba formats\n" "\t-signed" @@ -1678,6 +1687,36 @@ static int32_t kramAppEncode(vector& args) // continue; // } + else if (isStringEqual(word, "-heightScale")) { + ++i; + if (i >= argc) { + KLOGE("Kram", "heightScale arg invalid"); + error = true; + break; + } + + infoArgs.isHeight = true; + infoArgs.heightScale = atof(args[i]); + + // Note: caller can negate scale, but don't allow scale 0. + if (infoArgs.heightScale == 0.0f) { + KLOGE("Kram", "heightScale arg cannot be 0"); + error = true; + } + continue; + } + else if (isStringEqual(word, "-height")) { + // converted to a normal map + infoArgs.isHeight = true; + continue; + } + else if (isStringEqual(word, "-wrap")) { + // whether texture is clamp or wrap + infoArgs.isWrap = true; + continue; + } + + else if (isStringEqual(word, "-e") || isStringEqual(word, "-encoder")) { ++i; diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h index e36ed0d1..257a7737 100644 --- a/libkram/kram/KramConfig.h +++ b/libkram/kram/KramConfig.h @@ -391,5 +391,12 @@ inline void mipDown(int32_t& w, int32_t& h) #endif } +// Use this on vectors +template +inline size_t vsizeof(const std::vector& v) +{ + return sizeof(T) * v.size(); +} + diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index d7906f29..b3758174 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -912,150 +912,6 @@ bool Image::resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, Image return true; } -// TODO: to hook this up, read 16u png into pixelsFlat, then gen an 8-bit normal xy -// from that. This is more like SDF where a single height is used. -void Image::heightToNormals(float scale, bool isWrap) -{ - // see here - // https://developer.download.nvidia.com/CgTutorial/cg_tutorial_chapter08.html - - int32_t w = _width; - int32_t h = _height; - - bool isWrapY = isWrap; - bool isWrapX = isWrap; - - // 2.0 is distance betwen +1 and -1 - // don't scale by this, want caller to be able to pass 1.0 as default scale not 2.0 - float scaleX = scale; // / 2.0; - float scaleY = scale; // / 2.0; - - // src/dst the same here - // may need to copy a row/column of pixels for wrap - const float4* srcPixels = _pixelsFloat.data(); - float4* dstPixels = (float4*)_pixelsFloat.data(); - - const Color* srcPixels8 = (const Color*)_pixels.data(); - Color* dstPixels8 = (Color*)_pixels.data(); - bool isFloat = _pixels.empty(); - - if (!isFloat) { - scaleX /= 255.0f; - scaleY /= 255.0f; - } - - bool isSame = srcPixels8 == dstPixels8; - if (isFloat) - isSame = srcPixels == dstPixels; - - // TODO: doing this at image level doesn't support chunk conversion - // so this would only work for 2D images, and not atlas strips to a 2D array. - - // TODO: to copy 3 rows in cyclic buffer, if src == dst, and handle clamp/wrap - // by copying the first/last row. For now disallow this. - // TODO: copy two rows here, then one row in the loop this would fundamentally - // change the algorithm, since all of these lookups assume the full srcImage - // really only need prev row, and previous height in row. Larger kernel support - // to 3x3, 5x5, 7x7, 9x9. This pattern is a 3x3 area with a cross - // where only 4 cardinal samples are used. This bigger areas have bleed - // especially if this is run on a chart. This is more for like terrain height maps. - - // this recomends generating a few maps, and blending between them - // https://vrayschool.com/normal-map/ - - if (isSame) - return; - - for (int32_t y = 0; y < h; ++y) { - int32_t y0 = y; - int32_t ym = y - 1; - int32_t yp = y + 1; - - if (isWrapY) { - ym = (ym + h) % h; - yp = (yp) % h; - } - else { - // clamp - if (ym < 0) ym = 0; - if (yp > (h - 1)) yp = h - 1; - } - - y0 *= w; - ym *= w; - yp *= w; - - for (int32_t x = 0; x < w; ++x) { - int32_t x0 = x; - int32_t xm = x - 1; - int32_t xp = x + 1; - - if (isWrapX) { - xm = (xm + w) % w; - xp = (xp) % w; - } - else { - // clamp - if (xm < 0) xm = 0; - if (xp > (w - 1)) xp = w - 1; - } - - - if (isFloat) { - - // cross pattern - // height channel is in x - float cN = srcPixels[ym + x0].x; - float cS = srcPixels[yp + x0].x; - float cE = srcPixels[y0 + xp].x; - float cW = srcPixels[y0 + xm].x; - - // up is N, so this is rhcs - float dx = (cE - cW) * scaleX; - float dy = (cN - cS) * scaleY; - - float4 normal = float4m(dx, dy, 1.0f, 0.0f); - normal = normalize(normal); - - // write out the result - float4& dstPixel = dstPixels[y0 + x]; - - dstPixel.x = normal.x; - dstPixel.y = normal.y; - dstPixel.z = normal.z; // can reconstruct from xy - - // TODO: consider storing in z, easier to see data channel, not premul - // store height in alpha - dstPixel.w = srcPixels[y0 + x0].x; - } - else { - // cross pattern - // height channel is in x - uint8_t cN = srcPixels8[4 * (ym + x0)].r; // assumes first elem (.r) is height channel - uint8_t cS = srcPixels8[4 * (yp + x0)].r; - uint8_t cE = srcPixels8[4 * (y0 + xp)].r; - uint8_t cW = srcPixels8[4 * (y0 + xm)].r; - - float dx = (cE - cW) * scaleX; - float dy = (cN - cS) * scaleY; - - float4 normal = float4m(dx, dy, 1.0f, 0.0f); - normal = normalize(normal); - normal *= 127.5f; - - Color& dstPixel8 = dstPixels8[y0 + x]; - - dstPixel8.r = normal.x; - dstPixel8.g = normal.y; - dstPixel8.b = normal.z; // can reconstruct from xy - - // TODO: consider storing in z, easier to see data channel, not premul - // store height in alpha - dstPixel8.a = srcPixels8[y0 + x0].r; - } - } - } -} bool Image::encode(ImageInfo& info, KTXImage& dstImage) const { diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h index c69683e9..3fc97346 100644 --- a/libkram/kram/KramImage.h +++ b/libkram/kram/KramImage.h @@ -85,9 +85,7 @@ class Image { const KTXImage& image, ImageData& srcImage, vector& tmpImage) const; - // convert x field to normals - void heightToNormals(float scale, bool isWrap = false); - + private: // pixel size of image int32_t _width = 0; diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index 2652c0c3..71b370c9 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -1017,6 +1017,13 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args) quality = args.quality; + // this is for height to normal, will convert .r to normal xy + isHeight = args.isHeight; + isWrap = args.isWrap; + heightScale = args.heightScale; + if (isHeight) + isNormal = true; + // Note: difference between input srgb and output srgb, but it's mingled // here a bit @@ -1099,6 +1106,11 @@ void ImageInfo::initWithSourceImage(Image& sourceImage) hasColor = false; } + // this will only work on 2d textures, since this is all pre-chunk + if (isHeight) { + heightToNormals(w, h, srcPixelsFloat, srcPixels, heightScale, isWrap); + } + // this updates hasColor/hasAlpha if (!swizzleText.empty()) { // set any channels that are constant @@ -1182,6 +1194,168 @@ void ImageInfo::initWithSourceImage(Image& sourceImage) } } + + +// TODO: tread 16u png into pixelsFlat, then gen an 8-bit normal xy +// from that. This is more like SDF where a single height is used. + +void ImageInfo::heightToNormals(int32_t w, int32_t h, + float4* srcPixels, + Color* srcPixels8, + float scale, bool isWrap) +{ + // see here + // https://developer.download.nvidia.com/CgTutorial/cg_tutorial_chapter08.html + + // src/dst the same here + // may need to copy a row/column of pixels for wrap + float4* dstPixels = srcPixels; + Color* dstPixels8 = srcPixels8; + + bool isFloat = srcPixels; + + // copy the texture, or there are too many edge cases in the code below + vector srcDataCopy8; + vector srcDataCopy; + if (isFloat) { + srcDataCopy.resize(w*h); + memcpy(srcDataCopy.data(), srcPixels, vsizeof(srcDataCopy)); + srcPixels = srcDataCopy.data(); + } + else { + srcDataCopy8.resize(w*h); + memcpy(srcDataCopy8.data(), srcPixels8, vsizeof(srcDataCopy8)); + srcPixels8 = srcDataCopy8.data(); + } + + //----------------------- + + bool isWrapX = isWrap; + bool isWrapY = isWrap; + + // 2.0 is distance betwen +1 and -1 + // don't scale by this, want caller to be able to pass 1.0 as default scale not 2.0 + float scaleX = scale; // / 2.0; + float scaleY = scale; // / 2.0; + + if (!isFloat) { + scaleX /= 255.0f; + scaleY /= 255.0f; + } + + // TODO: doing this at image level doesn't support chunk conversion + // so this would only work for 2D images, and not atlas strips to a 2D array. + + // TODO: Larger kernel support to 2x2, 3x3, 5x5, 7x7, 9x9 + // This pattern is a 3x3 cross here only 4 cardinal samples are used. + // Bigger areas have bleed especially if this is run on a chart. + + // this recommends generating a few maps, and blending between them + // https://vrayschool.com/normal-map/ + + for (int32_t y = 0; y < h; ++y) { + int32_t y0 = y; + int32_t ym = y - 1; + int32_t yp = y + 1; + + if (isWrapY) { + ym = (ym + h) % h; + yp = (yp) % h; + } + else { + // clamp + if (ym < 0) ym = 0; + if (yp > (h - 1)) yp = h - 1; + } + + y0 *= w; + ym *= w; + yp *= w; + + for (int32_t x = 0; x < w; ++x) { + //int32_t x0 = x; + int32_t xm = x - 1; + int32_t xp = x + 1; + + if (isWrapX) { + xm = (xm + w) % w; + xp = (xp) % w; + } + else { + // clamp + if (xm < 0) xm = 0; + if (xp > (w - 1)) xp = w - 1; + } + + + if (isFloat) { + + // cross pattern + // height channel is in x + float cN = srcPixels[ym + x].x; + float cS = srcPixels[yp + x].x; + float cE = srcPixels[y0 + xp].x; + float cW = srcPixels[y0 + xm].x; + + // up is N, so this is rhcs + float dx = (cE - cW) * scaleX; + float dy = (cN - cS) * scaleY; + + float4 normal = float4m(dx, dy, 1.0f, 0.0f); + normal = normalize(normal); + + // convert to unorm + normal = normal * 0.5 + 0.5f; + + // write out the result + float4& dstPixel = dstPixels[y0 + x]; + + dstPixel.x = normal.x; + dstPixel.y = normal.y; + + // TODO: consider storing in z, easier to see data channel, not premul + // store height in alpha. Let caller provide the swizzle xyzh01 + //dstPixel.z = normal.z; // can reconstruct from xy + //dstPixel.w = srcPixels[y0 + x0].x; + + dstPixel.z = srcPixels[y0 + x].z; + dstPixel.w = srcPixels[y0 + x].w; + } + else { + // cross pattern + // height channel is in x + uint8_t cN = srcPixels8[ym + x].r; // assumes first elem (.r) is height channel + uint8_t cS = srcPixels8[yp + x].r; + uint8_t cE = srcPixels8[y0 + xp].r; + uint8_t cW = srcPixels8[y0 + xm].r; + + float dx = (cE - cW) * scaleX; + float dy = (cN - cS) * scaleY; + + float4 normal = float4m(dx, dy, 1.0f, 0.0f); + normal = normalize(normal); + + // convert to unorm + normal = normal * 127 + 128.0f; + + Color& dstPixel8 = dstPixels8[y0 + x]; + + dstPixel8.r = normal.x; + dstPixel8.g = normal.y; + + // TODO: consider storing height in z, easier to see data channel, not premul + // store height in alpha. Let caller provide the swizzle xyzh01 + //dstPixel8.b = normal.z; // can reconstruct from xy + //dstPixel8.a = srcPixels8[y0 + x0].r; + + dstPixel8.b = srcPixels8[y0 + x].b; + dstPixel8.a = srcPixels8[y0 + x].a; + } + } + } +} + + const char* encoderName(TexEncoder encoder) { switch(encoder) { diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h index 0d9b2ae5..d4c9f862 100644 --- a/libkram/kram/KramImageInfo.h +++ b/libkram/kram/KramImageInfo.h @@ -58,7 +58,7 @@ class ImageInfoArgs { bool isPremultiplied = false; bool isPrezero = false; bool isNormal = false; // signed, but may be stored unorm and swizzled (f.e. astc/bc3nm gggr or rrrg) - + // can pick a smaller format if alpha = 1 (only for bc and etc) bool optimizeFormatForOpaque = false; @@ -68,6 +68,11 @@ class ImageInfoArgs { bool isSRGB = false; bool isHDR = false; + // for now these are only usable with normal to height + bool isHeight = false; + bool isWrap = false; + float heightScale = 1.0f; + string swizzleText; string averageChannels; @@ -91,7 +96,12 @@ class ImageInfo { const char* swizzleText); static void swizzleTextureLDR(int32_t w, int32_t h, Color* srcPixels_, const char* swizzleText); - + + // convert x field to normals + static void heightToNormals(int32_t w, int32_t h, + float4* srcPixelsFloat_, + Color* srcPixels_, + float scale, bool isWrap = false); private: // this walks pixels for hasColor and hasAlpha if not already set to false void updateImageTraitsHDR(int32_t w, int32_t h, @@ -145,6 +155,11 @@ class ImageInfo { bool useEtcenc = false; bool useExplicit = false; + // for now these are only usable with normal to height + bool isHeight = false; + bool isWrap = false; + float heightScale = 1.0f; + int32_t quality = 49; int32_t mipMinSize = 1; diff --git a/scripts/kramTextures.py b/scripts/kramTextures.py index c92e60c6..e4a52c55 100755 --- a/scripts/kramTextures.py +++ b/scripts/kramTextures.py @@ -31,6 +31,7 @@ class TextureContent(Enum): SDF = 3 MetalRoughness = 4 Mask = 5 + Height = 6 class TextureType(Enum): Unknown = 0 @@ -103,6 +104,8 @@ def textureContent(self, name): content = TextureContent.Albedo elif name.endswith("-n") or name.endswith("-normal"): content = TextureContent.Normal + elif name.endswith("-h") or name.endswith("-height"): + content = TextureContent.Height return content @@ -167,7 +170,14 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp): # this only exports to ktx, post process will convert to ktx2 ext = ".ktx" - dstName = srcFilename + ext + dstName = srcFilename + + # replace -h with -n, since it will be converted to a normal + if dstName.endswith("-h"): + dstName = dstName.replace("-h", "-n") + + dstName += ext + dstFile = dstDir + dstName # check the modstamp of src vs. dst output, form same name at dstPath, and check os.stat() on that @@ -423,7 +433,8 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, fmtSDF = "" fmtMetalRoughness = "" fmtMask = "" - + fmtHeight = "" + # note 1/2/2nm in astc need swizzles to store more efficiently # and at 4x4 aren't any smaller than explicit values # prefer etc since it's smaller and higher bit depth (11-bits) @@ -438,6 +449,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, fmtMetalRoughness = " -f etc2rg" fmtMask = " -f etc2r" fmtSDF = " -f etc2r -signed -sdf" + fmtHeight = fmtNormal + " -height -heightScale 2 -wrap" elif platform == "android": fmtAlbedo = " -f etc2rgba -srgb -premul -optopaque" # or astc @@ -445,7 +457,8 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, fmtMetalRoughness = " -f etc2rg" fmtMask = " -f etc2r" fmtSDF = " -f etc2r -signed -sdf" - + fmtHeight = fmtNormal + " -height -heightScale 2 -wrap" + elif platform == "mac": # bc1 on Toof has purple, green, yellow artifacts with bc7enc, and has more banding # and a lot of weird blocky artifacts, look into bc1 encoder. @@ -455,7 +468,8 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, fmtMetalRoughness = " -f bc5" fmtMask = " -f bc4" fmtSDF = " -f bc4 -signed -sdf" - + fmtHeight = fmtNormal + " -height -heightScale 2 -wrap" + elif platform == "win": # bc1 on Toof has purple, green, yellow artifacts with bc7enc, and has more banding # and a lot of weird blocky artifacts, look into bc1 encoder @@ -464,6 +478,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, fmtMetalRoughness = " -f bc5" fmtMask = " -f bc4" fmtSDF = " -f bc4 -signed -sdf" + fmtHeight = fmtNormal + " -height -heightScale 2 -wrap" elif platform == "any": # output to s/rgba8u, then run through ktxsc to go to BasisLZ @@ -473,6 +488,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, fmtMetalRoughness = " -f rgba8 -swizzle r001" fmtMask = " -f rgba8 -swizzle r001" fmtSDF = " -f rgba8 -swizzle r001 -sdf" + fmtHeight = fmtNormal + " -height -heightScale 2 -wrap" else: return 1 @@ -492,7 +508,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, if verbose: moreArgs += " -v" - formats = [fmtUnknown, fmtAlbedo, fmtNormal, fmtSDF, fmtMetalRoughness, fmtMask] + formats = [fmtUnknown, fmtAlbedo, fmtNormal, fmtSDF, fmtMetalRoughness, fmtMask, fmtHeight] formats = [fmt + moreArgs for fmt in formats] diff --git a/tests/src/collectorbarrelh-h.png b/tests/src/collectorbarrelh-h.png new file mode 100644 index 00000000..fdaabf09 --- /dev/null +++ b/tests/src/collectorbarrelh-h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b71f2ac9ab675fa807f7757e39019b780b1d67638bb53c27980d29d9e5c6da76 +size 35466 From 36bce4d2ab1009462d7d12df6cc4d4ff356d5404 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 28 Mar 2021 01:50:34 -0700 Subject: [PATCH 023/901] kramv - fix display of signed data (f.e. normals) that have been decoded to unorm. --- kramv/KramRenderer.mm | 1 + kramv/KramViewerBase.h | 1 + kramv/KramViewerMain.mm | 13 ++++++++----- libkram/kram/KramImageInfo.cpp | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 3b3b74e7..c4632b03 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -462,6 +462,7 @@ - (BOOL)loadTexture:(nonnull NSURL *)url _showSettings->imageInfo = kramInfoToString(fullFilename, isVerbose); _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL; + _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat; _showSettings->lastFilename = fullFilename; _showSettings->lastTimestamp = timestamp; diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index 62e9612d..d1766924 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -142,6 +142,7 @@ class ShowSettings { // format before any transcode to supported formats MyMTLPixelFormat originalFormat; + MyMTLPixelFormat decodedFormat; void advanceDebugMode(bool isShiftKeyDown); diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index af562df3..ead6f91b 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -746,12 +746,14 @@ - (void)updateEyedropper { bool isNormal = _showSettings->isNormal; bool isFloat = isHdr; + bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat); + if (isNormal) { float nx = c.x; float ny = c.y; // unorm -> snorm - if (!isSigned) { + if (!isDecodeSigned) { nx = nx * 2.0f - 1.0f; ny = ny * 2.0f - 1.0f; } @@ -762,23 +764,24 @@ - (void)updateEyedropper { // print the underlying color (some nmaps are xy in 4 channels) string tmp; - printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned); + printChannels(tmp, "ln: ", c, numChannels, isFloat, isDecodeSigned); text += tmp; // print direction float4 d = float4m(nx,ny,nz,0.0f); isFloat = true; - printChannels(tmp, "dr: ", d, 3, isFloat, isSigned); + isDecodeSigned = true; + printChannels(tmp, "dr: ", d, 3, isFloat, isDecodeSigned); text += tmp; } else { // DONE: write some print helpers based on float4 and length string tmp; - printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned); + printChannels(tmp, "ln: ", c, numChannels, isFloat, isDecodeSigned); text += tmp; if (isSrgb) { - printChannels(tmp, "sr: ", s, numChannels, isFloat, isSigned); + printChannels(tmp, "sr: ", s, numChannels, isFloat, isDecodeSigned); text += tmp; } } diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index 71b370c9..f0c87451 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -1336,7 +1336,7 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h, normal = normalize(normal); // convert to unorm - normal = normal * 127 + 128.0f; + normal = normal * 127.0f + 128.0f; Color& dstPixel8 = dstPixels8[y0 + x]; From dc1f924e7042b9ce1bceab6f2fe2fd4f4b6cef23 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 28 Mar 2021 10:34:22 -0700 Subject: [PATCH 024/901] Kramv - fix signed eyedropper conversion, convert drag-drop url to file path for reload --- kramv/KramViewerMain.mm | 41 ++++++++++++++++++++++++++-------- libkram/kram/KramConfig.h | 18 ++++++++++----- libkram/kram/KramImageInfo.cpp | 3 +++ scripts/kramTextures.py | 16 ++++++++----- 4 files changed, 58 insertions(+), 20 deletions(-) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index ead6f91b..075e24ed 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -697,8 +697,6 @@ - (void)updateEyedropper { // this will always be a linear color float4 c = _showSettings->textureResult; - // this saturates the value, so don't use for extended srgb - float4 s = linearToSRGB(c); int32_t x = _showSettings->textureResultX; int32_t y = _showSettings->textureResultY; @@ -747,13 +745,19 @@ - (void)updateEyedropper { bool isFloat = isHdr; bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat); + if (isSigned && !isDecodeSigned) { + c.x = c.x * 2.0f - 1.0f; + c.y = c.y * 2.0f - 1.0f; + c.z = c.y * 2.0f - 1.0f; + c.w = c.y * 2.0f - 1.0f; + } if (isNormal) { float nx = c.x; float ny = c.y; // unorm -> snorm - if (!isDecodeSigned) { + if (!isSigned) { nx = nx * 2.0f - 1.0f; ny = ny * 2.0f - 1.0f; } @@ -764,24 +768,27 @@ - (void)updateEyedropper { // print the underlying color (some nmaps are xy in 4 channels) string tmp; - printChannels(tmp, "ln: ", c, numChannels, isFloat, isDecodeSigned); + printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned); text += tmp; // print direction float4 d = float4m(nx,ny,nz,0.0f); isFloat = true; - isDecodeSigned = true; - printChannels(tmp, "dr: ", d, 3, isFloat, isDecodeSigned); + isSigned = true; + printChannels(tmp, "dr: ", d, 3, isFloat, isSigned); text += tmp; } else { // DONE: write some print helpers based on float4 and length string tmp; - printChannels(tmp, "ln: ", c, numChannels, isFloat, isDecodeSigned); + printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned); text += tmp; if (isSrgb) { - printChannels(tmp, "sr: ", s, numChannels, isFloat, isDecodeSigned); + // this saturates the value, so don't use for extended srgb + float4 s = linearToSRGB(c); + + printChannels(tmp, "sr: ", s, numChannels, isFloat, isSigned); text += tmp; } } @@ -1265,6 +1272,13 @@ - (BOOL)performDragOperation:(id)sender { // this turns it into a real path (supposedly works even with sandbox) NSURL * url = [NSURL URLWithString:urlString]; + // convert the original path and then back to a url, otherwise reload fails + // when this file is replaced. + const char* filename = url.fileSystemRepresentation; + NSString* filenameString = [NSString stringWithUTF8String:filename]; + + url = [NSURL fileURLWithPath:filenameString]; + if ([self loadTextureFromURL:url]) { return YES; } @@ -1368,7 +1382,16 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { //NSLog(@"LoadTexture"); const char* filename = url.fileSystemRepresentation; - + + // Getting a url that returns nil on reload, probably some security thing + // consider storing a path instead of a url. Probably when file is replaced + // the saved image url no longer points to a valid filename. + if (filename == nullptr) + { + KLOGE("kramv", "Fix this url returning nil issue"); + return NO; + } + if (endsWithExtension(filename, ".zip")) { if (!self.imageURL || ![self.imageURL isEqualTo:url]) { BOOL isArchiveLoaded = [self loadArchive:filename]; diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h index 257a7737..3c344385 100644 --- a/libkram/kram/KramConfig.h +++ b/libkram/kram/KramConfig.h @@ -370,28 +370,36 @@ inline half4 toHalf4(const float4& vv) // this just strips args #define macroUnusedVar(x) (void)x -// GL/D3D hobbled non-pow2 mips by only supporting round down, not round up -// And then Metal followd OpenGL since it's the same hw and drivers. -// Round up adds an extra mip level to the chain, but results in much better filtering. -// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt -// http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf + +//--------------------------------------- + #define ROUNDMIPSDOWN 1 inline void mipDown(int32_t& w, int32_t& h) { + // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up + // And then Metal followd OpenGL since it's the same hw and drivers. + // Round up adds an extra mip level to the chain, but results in much better filtering. + // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt + // http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf + #if ROUNDMIPSDOWN + // round-down w = w / 2; h = h / 2; if (w < 1) w = 1; if (h < 1) h = 1; #else + // round-up w = (w + 1) / 2; h = (h + 1) / 2; #endif } // Use this on vectors +#include + template inline size_t vsizeof(const std::vector& v) { diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index f0c87451..2cab1300 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -1332,6 +1332,9 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h, float dx = (cE - cW) * scaleX; float dy = (cN - cS) * scaleY; + dx = -dx; + dy = -dy; + float4 normal = float4m(dx, dy, 1.0f, 0.0f); normal = normalize(normal); diff --git a/scripts/kramTextures.py b/scripts/kramTextures.py index e4a52c55..183644c0 100755 --- a/scripts/kramTextures.py +++ b/scripts/kramTextures.py @@ -440,7 +440,11 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, # prefer etc since it's smaller and higher bit depth (11-bits) # note sdf and signed data will look odd in Preview. It's not - # really setup for signed data. + # really setup for signed data. + + # heightScale and wrap is really a per texture setting, but don't have + # support for that yet. + fmtHeightArgs = " -height -heightScale 4 -wrap" if platform == "ios": # use astc since has more quality settings @@ -449,7 +453,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, fmtMetalRoughness = " -f etc2rg" fmtMask = " -f etc2r" fmtSDF = " -f etc2r -signed -sdf" - fmtHeight = fmtNormal + " -height -heightScale 2 -wrap" + fmtHeight = fmtNormal + fmtHeightArgs elif platform == "android": fmtAlbedo = " -f etc2rgba -srgb -premul -optopaque" # or astc @@ -457,7 +461,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, fmtMetalRoughness = " -f etc2rg" fmtMask = " -f etc2r" fmtSDF = " -f etc2r -signed -sdf" - fmtHeight = fmtNormal + " -height -heightScale 2 -wrap" + fmtHeight = fmtNormal + fmtHeightArgs elif platform == "mac": # bc1 on Toof has purple, green, yellow artifacts with bc7enc, and has more banding @@ -468,7 +472,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, fmtMetalRoughness = " -f bc5" fmtMask = " -f bc4" fmtSDF = " -f bc4 -signed -sdf" - fmtHeight = fmtNormal + " -height -heightScale 2 -wrap" + fmtHeight = fmtNormal + fmtHeightArgs elif platform == "win": # bc1 on Toof has purple, green, yellow artifacts with bc7enc, and has more banding @@ -478,7 +482,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, fmtMetalRoughness = " -f bc5" fmtMask = " -f bc4" fmtSDF = " -f bc4 -signed -sdf" - fmtHeight = fmtNormal + " -height -heightScale 2 -wrap" + fmtHeight = fmtNormal + fmtHeightArgs elif platform == "any": # output to s/rgba8u, then run through ktxsc to go to BasisLZ @@ -488,7 +492,7 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, fmtMetalRoughness = " -f rgba8 -swizzle r001" fmtMask = " -f rgba8 -swizzle r001" fmtSDF = " -f rgba8 -swizzle r001 -sdf" - fmtHeight = fmtNormal + " -height -heightScale 2 -wrap" + fmtHeight = fmtNormal + fmtHeightArgs else: return 1 From 9a2577c9b3f6141b97ec78e45b8f0ff4ab17ce0e Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 28 Mar 2021 16:53:16 -0700 Subject: [PATCH 025/901] kramv - fix the "Open" and "OpenRecent" menu to work, allow ktx2 support The UTI's needed to point to an NSDocument for "Open" to not be disabled. Define a document, and then re-route the load to the view or else the call crashes on readFromData unimplemented in the default NSDocument. Also define an Export Type Identifier or KTX2 was not highlighted in the NSOpenPanel file listings. "New" is still now enabled, since I set the app up as an Editor for "KTX2" type. If they're all viewers, then no "New" option is provided. Just want that to provide empty document windows. --- kramv/Info.plist | 32 +++++++++++- kramv/KramViewerMain.mm | 105 ++++++++++++++++++++++++++++++++++------ 2 files changed, 122 insertions(+), 15 deletions(-) diff --git a/kramv/Info.plist b/kramv/Info.plist index 11f0adb3..c3a1e263 100644 --- a/kramv/Info.plist +++ b/kramv/Info.plist @@ -19,6 +19,8 @@ org.khronos.ktx + NSDocumentClass + KramDocument CFBundleTypeIconSystemGenerated @@ -33,6 +35,8 @@ public.png + NSDocumentClass + KramDocument CFBundleTypeIconSystemGenerated @@ -40,13 +44,15 @@ CFBundleTypeName KTX2 CFBundleTypeRole - Viewer + Editor LSHandlerRank Default LSItemContentTypes public.ktx2 + NSDocumentClass + KramDocument CFBundleTypeIconSystemGenerated @@ -61,6 +67,8 @@ public.zip-archive + NSDocumentClass + KramDocument CFBundleExecutable @@ -85,5 +93,27 @@ Main NSPrincipalClass NSApplication + UTExportedTypeDeclarations + + + UTTypeConformsTo + + public.image + + UTTypeDescription + KTX2 + UTTypeIcons + + UTTypeIdentifier + public.ktx2 + UTTypeTagSpecification + + public.filename-extension + + ktx2 + + + + diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 075e24ed..03d128cd 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -48,6 +48,64 @@ - (BOOL)loadTextureFromURL:(NSURL*)url; @end +//------------- + +@interface KramDocument : NSDocument + +@end + + +@interface KramDocument () + +@end + +@implementation KramDocument + +- (instancetype)init { + self = [super init]; + if (self) { + // Add your subclass-specific initialization here. + } + return self; +} + ++ (BOOL)autosavesInPlace { + return NO; // YES; +} + +// call when "new" called +- (void)makeWindowControllers { + // Override to return the Storyboard file name of the document. + //NSStoryboard* storyboard = [NSStoryboard storyboardWithName:@"Main" bundle:nil]; + //NSWindowController* controller = [storyboard instantiateControllerWithIdentifier:@"Document Window Controller"]; + //[self addWindowController:controller]; +} + + +- (NSData *)dataOfType:(NSString *)typeName error:(NSError **)outError { + // Insert code here to write your document to data of the specified type. If outError != NULL, ensure that you create and set an appropriate error if you return nil. + // Alternatively, you could remove this method and override -fileWrapperOfType:error:, -writeToURL:ofType:error:, or -writeToURL:ofType:forSaveOperation:originalContentsURL:error: instead. + [NSException raise:@"UnimplementedMethod" format:@"%@ is unimplemented", NSStringFromSelector(_cmd)]; + return nil; +} + + +- (BOOL)readFromURL:(NSURL *)url ofType:(NSString *)typeName error:(NSError **)outError { + +#if 0 + MyMTKView* view = self.windowControllers.firstObject.window.contentView; + return [view loadTextureFromURL:url]; +#else + NSApplication* app = [NSApplication sharedApplication]; + MyMTKView* view = app.mainWindow.contentView; + return [view loadTextureFromURL:url]; +#endif +} + + +@end + + //------------- @@ -79,6 +137,7 @@ - (BOOL)applicationShouldTerminateAfterLastWindowClosed:(NSApplication *)sender return YES; } +#if 1 - (void)application:(NSApplication *)sender openURLs:(nonnull NSArray *)urls { // see if this is called @@ -90,7 +149,19 @@ - (void)application:(NSApplication *)sender openURLs:(nonnull NSArray * NSURL *url = urls.firstObject; [view loadTextureFromURL:url]; } - +#else +- (BOOL)application:(NSApplication *)sender openFile:(nonnull NSString*)filename +{ + // see if this is called + //NSLog(@"OpenURLs"); + + // this is called from "Open In...", and also from OpenRecent documents menu + MyMTKView* view = sender.mainWindow.contentView; + + NSURL *url = [NSURL URLWithString:filename]; + return [view loadTextureFromURL:url]; +} +#endif - (IBAction)showAboutDialog:(id)sender { // calls openDocumentWithContentsOfURL above @@ -1226,9 +1297,7 @@ - (void)keyDown:(NSEvent *)theEvent } } -- (BOOL)acceptsFirstResponder { - return YES; -} + // Note: docs state that drag&drop should be handled automatically by UTI setup via openURLs // but I find these calls are needed, or it doesn't work. Maybe need to register for NSRUL @@ -1275,6 +1344,12 @@ - (BOOL)performDragOperation:(id)sender { // convert the original path and then back to a url, otherwise reload fails // when this file is replaced. const char* filename = url.fileSystemRepresentation; + if (filename == nullptr) + { + KLOGE("kramv", "Fix this drop url returning nil issue"); + return NO; + } + NSString* filenameString = [NSString stringWithUTF8String:filename]; url = [NSURL fileURLWithPath:filenameString]; @@ -1382,13 +1457,11 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { //NSLog(@"LoadTexture"); const char* filename = url.fileSystemRepresentation; - - // Getting a url that returns nil on reload, probably some security thing - // consider storing a path instead of a url. Probably when file is replaced - // the saved image url no longer points to a valid filename. if (filename == nullptr) { - KLOGE("kramv", "Fix this url returning nil issue"); + // Fixed by converting dropped urls into paths then back to a url. + // When file replaced the drop url is no longer valid. + KLOGE("kramv", "Fix this load url returning nil issue"); return NO; } @@ -1456,10 +1529,10 @@ - (void)concludeDragOperation:(id)sender { -// this doesn't seem to enable New/Open File menu items, but it should +// this doesn't seem to enable New. Was able to get "Open" to highlight by setting NSDocument as class for doc types. // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/EventArchitecture/EventArchitecture.html #if 0 - +/* // "New"" calls this - (__kindof NSDocument *)openUntitledDocumentAndDisplay:(BOOL)displayDocument error:(NSError * _Nullable *)outError @@ -1484,8 +1557,15 @@ - (IBAction)openDocument { - (IBAction)newDocument { // calls openUntitledDocumentAndDisplay above } +*/ #endif + +- (BOOL)acceptsFirstResponder { + return YES; +} + + @end //------------- @@ -1535,9 +1615,6 @@ - (void)viewDidLoad } - - - @end From 2cb3a9dd1a7a570cf65c6dd566e976c8013d7079 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Wed, 7 Apr 2021 14:44:17 -0700 Subject: [PATCH 026/901] kram - fix main to return 1 on failures, KPS CFBundlePackageType set to 8BIF KPS plugin shows up in PS but "fails to find file" on open/save. --- kram/KramMain.cpp | 9 ++++++++- plugin/kps/mac/Info.plist | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/kram/KramMain.cpp b/kram/KramMain.cpp index dc4ef8bb..9ddbdcde 100644 --- a/kram/KramMain.cpp +++ b/kram/KramMain.cpp @@ -2,5 +2,12 @@ int main(int argc, char* argv[]) { - return kram::kramAppMain(argc, argv); + int errorCode = kram::kramAppMain(argc, argv); + + // returning -1 from main results in exit code of 255, so fix this to return 1 on failure. + if (errorCode != 0) { + exit(1); + } + + return 0; } diff --git a/plugin/kps/mac/Info.plist b/plugin/kps/mac/Info.plist index 0537f0ac..f39d6d81 100755 --- a/plugin/kps/mac/Info.plist +++ b/plugin/kps/mac/Info.plist @@ -13,7 +13,7 @@ CFBundleName $(PRODUCT_NAME) CFBundlePackageType - $(PLUGIN_TYPE) + 8BIF CFBundleSignature 8BIM From e840b55d6a10842bd235196b0431d939f444e634 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 8 May 2021 13:14:19 -0700 Subject: [PATCH 027/901] Kramv - switch to 16f, RGBA16f backbuffer avoids the srgb curve, and stores the linear color and normal values. Otherwise linear values like normals were getting munged writing them. The display pipeline will still convert these to srgb, but the compute shader can sample from these and get unsnapped linear values. Display the premul values in eyedropper if alpha is present. This might be too many values, but keep for now. Fix signed conversion. Add helpers to shader, and handle 8-bit unorm to snorm conversions. Better normal reconstruct that avoids non-unit vecs, and n.z = 0 case. Will add debug mode to flag these pixels. n.xy = 00, 10, 01, 11, etc are all invalid, and the sat isn't enough. --- kramv/KramRenderer.mm | 6 +- kramv/KramShaders.metal | 106 +++++++++++++++++++++++++++++++----- kramv/KramViewerMain.mm | 46 ++++++++++------ libkram/kram/KramMipper.cpp | 2 +- 4 files changed, 128 insertions(+), 32 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index c4632b03..c311a6e2 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -144,7 +144,11 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view /// Load Metal state objects and initialize renderer dependent view properties view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8; - view.colorPixelFormat = MTLPixelFormatBGRA8Unorm_sRGB; // TODO: adjust this to draw srgb or not, prefer RGBA + //view.colorPixelFormat = MTLPixelFormatBGRA8Unorm_sRGB; // TODO: adjust this to draw srgb or not, prefer RGBA + + // have a mix of linear color and normals, don't want srgb conversion until displayed + view.colorPixelFormat = MTLPixelFormatRGBA16Float; + view.sampleCount = 1; _mtlVertexDescriptor = [[MTLVertexDescriptor alloc] init]; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 97e76179..0b128133 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -129,6 +129,86 @@ vertex ColorInOut DrawVolumeVS( return out; } +float toUnorm8(float c) +{ + return (127.0 / 255.0) * c + (128 / 255.0); +} +float2 toUnorm8(float2 c) +{ + return (127.0 / 255.0) * c + float2(128 / 255.0); +} +float3 toUnorm8(float3 c) +{ + return (127.0 / 255.0) * c + float3(128 / 255.0); +} + +float toUnorm(float c) +{ + return 0.5 * c + 0.5; +} +float2 toUnorm(float2 c) +{ + return 0.5 * c + 0.5; +} +float3 toUnorm(float3 c) +{ + return 0.5 * c + 0.5; +} + +float toSnorm8(float c) +{ + return (255.0 / 127.0) * c - (128 / 127.0); +} + +float2 toSnorm8(float2 c) +{ + return (255.0 / 127.0) * c - float2(128 / 127.0); +} + +float3 toSnorm8(float3 c) +{ + return (255.0 / 127.0) * c - float3(128 / 127.0); +} + +float recip(float c) +{ + return 1.0 / c; +} +float2 recip(float2 c) +{ + return 1.0 / c; +} +float3 recip(float3 c) +{ + return 1.0 / c; +} +float4 recip(float4 c) +{ + return 1.0 / c; +} + + +// scale and reconstruct normal +float3 toNormal(float3 n) +{ + // make sure the normal doesn't exceed the unit circle + // many reconstructs skip and get a non-unit or z=0 normal + // might make optional or flag pixel with a debug mode that exeed + float len = length_squared(n.xy); + if (len > 0.99 * 0.99) + { + len *= 1.001; // so we have a non-zero z component below + n.xy *= rsqrt(len); + } + + // make sure always have non-zero z, or get Nan after it knocks out N of TBN + // since that's often pointing purely in 001 direction. + len = min(0.999, len); + n.z = sqrt(1 - len); + return n; +} + + float4 DrawPixels( ColorInOut in [[stage_in]], constant Uniforms& uniforms, @@ -142,7 +222,7 @@ float4 DrawPixels( if (uniforms.isSDF) { if (!uniforms.isSigned) { // convert to signed normal to compute z - c.r = 2.0 * c.r - 256.0 / 255.0; // 0 = 128 on unorm data on 8u + c.r = toSnorm8(c.r); // 0 = 128 on unorm data on 8u } // 0.0 is the boundary of visible vs. non-visible and not a true alpha @@ -157,7 +237,7 @@ float4 DrawPixels( float dist = c.r; // size of one pixel line - float onePixel = 1.0 / max(0.0001, length(float2(dfdx(dist), dfdy(dist)))); + float onePixel = recip(max(0.0001, length(float2(dfdx(dist), dfdy(dist))))); // distance to edge in pixels (scalar) float pixelDist = dist * onePixel; @@ -178,10 +258,10 @@ float4 DrawPixels( // to signed if (!uniforms.isSigned) { // convert to signed normal to compute z - c.rg = 2.0 * c.rg - float2(256.0 / 255.0); // 0 = 128 on unorm data on 8u + c.rg = toSnorm8(c.rg); } - c.z = sqrt(1 - saturate(dot(c.xy, c.xy))); // z always positive + c.rgb = toNormal(c.rgb); float3 lightDir = normalize(float3(1,1,1)); float3 lightColor = float3(1,1,1); @@ -213,7 +293,7 @@ float4 DrawPixels( else { // to unorm if (uniforms.isSigned) { - c.xyz = c.xyz * 0.5 + 0.5; + c.xyz = toUnorm(c.xyz); } // to premul, but also need to see without premul @@ -227,7 +307,7 @@ float4 DrawPixels( if (uniforms.numChannels == 1) { // toUnorm if (uniforms.isSigned) { - c.x = c.x * 0.5 + 0.5; + c.x = toUnorm(c.x); } } else if (uniforms.isNormal) { @@ -239,13 +319,13 @@ float4 DrawPixels( // to signed if (!uniforms.isSigned) { // convert to signed normal to compute z - c.rg = 2.0 * c.rg - float2(256.0 / 255.0); // 0 = 128 on unorm data + c.rg = toSnorm8(c.rg); } - c.z = sqrt(1 - saturate(dot(c.xy, c.xy))); // z always positive + c.rgb = toNormal(c.rgb); // from signed, to match other editors that don't display signed data - c.xyz = c.xyz * 0.5 + 0.5; // can sample from this + c.xyz = toUnorm(c.xyz); // can sample from this // view data as abs magnitude //c.xyz = abs(c.xyz); // bright on extrema, but no indicator of sign (use r,g viz) @@ -258,7 +338,7 @@ float4 DrawPixels( // signed 1/2 channel formats return sr,0,0, and sr,sg,0 for rgb? // May want to display those as 0 not 0.5. if (uniforms.isSigned) { - c.xyz = c.xyz * 0.5 + 0.5; + c.xyz = toUnorm(c.xyz); } // to premul, but also need to see without premul @@ -307,7 +387,7 @@ float4 DrawPixels( - if (uniforms.debugMode != ShDebugModeNone && c.a != 0.0f) { + if (uniforms.debugMode != ShDebugModeNone && c.a != 0.0) { bool isHighlighted = false; if (uniforms.debugMode == ShDebugModeTransparent) { @@ -357,7 +437,7 @@ float4 DrawPixels( // TODO: is it best to highlight the interest pixels in red // or the negation of that to see which ones aren't. if (isHighlighted) { - float3 highlightColor = float3(1.0f, 0.0f, 1.0f); + float3 highlightColor = float3(1, 0, 1); c.rgb = highlightColor; } @@ -375,7 +455,7 @@ float4 DrawPixels( // DONE: don't draw grid if too small // fwidth = abs(ddx(p)) + abs(ddy(p)) - float2 lineWidth = 1.0 / fwidth(pixels); + float2 lineWidth = recip(fwidth(pixels)); // only show grid when pixels are 8px or bigger if (max(lineWidth.x, lineWidth.y) >= 8.0) { diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 03d128cd..59951065 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -77,7 +77,7 @@ + (BOOL)autosavesInPlace { - (void)makeWindowControllers { // Override to return the Storyboard file name of the document. //NSStoryboard* storyboard = [NSStoryboard storyboardWithName:@"Main" bundle:nil]; - //NSWindowController* controller = [storyboard instantiateControllerWithIdentifier:@"Document Window Controller"]; + //NSWindowController* controller = [storyboard instantiateControllerWithIdentifier:@"NameNeeded]; //[self addWindowController:controller]; } @@ -137,7 +137,6 @@ - (BOOL)applicationShouldTerminateAfterLastWindowClosed:(NSApplication *)sender return YES; } -#if 1 - (void)application:(NSApplication *)sender openURLs:(nonnull NSArray *)urls { // see if this is called @@ -149,19 +148,6 @@ - (void)application:(NSApplication *)sender openURLs:(nonnull NSArray * NSURL *url = urls.firstObject; [view loadTextureFromURL:url]; } -#else -- (BOOL)application:(NSApplication *)sender openFile:(nonnull NSString*)filename -{ - // see if this is called - //NSLog(@"OpenURLs"); - - // this is called from "Open In...", and also from OpenRecent documents menu - MyMTKView* view = sender.mainWindow.contentView; - - NSURL *url = [NSURL URLWithString:filename]; - return [view loadTextureFromURL:url]; -} -#endif - (IBAction)showAboutDialog:(id)sender { // calls openDocumentWithContentsOfURL above @@ -663,6 +649,16 @@ - (void)mouseMoved:(NSEvent*)event [self updateEyedropper]; } +inline float4 toPremul(const float4& c) +{ + // premul with a + float4 cpremul = c; + float a = c.a; + cpremul.w = 1.0f; + cpremul *= a; + return cpremul; +} + - (void)updateEyedropper { if ((!_showSettings->isHudShown)) { return; @@ -819,8 +815,8 @@ - (void)updateEyedropper { if (isSigned && !isDecodeSigned) { c.x = c.x * 2.0f - 1.0f; c.y = c.y * 2.0f - 1.0f; - c.z = c.y * 2.0f - 1.0f; - c.w = c.y * 2.0f - 1.0f; + c.z = c.z * 2.0f - 1.0f; + c.w = c.w * 2.0f - 1.0f; } if (isNormal) { @@ -862,6 +858,22 @@ - (void)updateEyedropper { printChannels(tmp, "sr: ", s, numChannels, isFloat, isSigned); text += tmp; } + + // display the premul values too + if (c.a < 1.0f) + { + printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned); + text += tmp; + + // TODO: do we need the premul srgb color too? + if (isSrgb) { + // this saturates the value, so don't use for extended srgb + float4 s = linearToSRGB(c); + + printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat, isSigned); + text += tmp; + } + } } [self setEyedropperText:text.c_str()]; diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp index 6f0b6701..50b5715c 100644 --- a/libkram/kram/KramMipper.cpp +++ b/libkram/kram/KramMipper.cpp @@ -51,7 +51,7 @@ inline Color Unormfloat4ToColor(float4 value) inline Color Snormfloat4ToColor(float4 value) { Color c; - value = round(127.0f * value) + float4(127.0f); // or is it 128? TODO: validatate last ctor sets all values + value = round(127.0f * value) + float4(128.0f); c.r = (uint8_t)value.x; c.g = (uint8_t)value.y; c.b = (uint8_t)value.z; From 0d1dc780604d066f14948e48200102dc64fe31a0 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 8 May 2021 15:22:38 -0700 Subject: [PATCH 028/901] kramv - debug mode for len(n.xy) > 0.99 pixels This is a fairly common issue on normal maps, so flag it to show why protections in reconstruct are important. --- kramv/KramShaders.h | 1 + kramv/KramShaders.metal | 45 +++++++++++++++++++++++++++++----------- kramv/KramViewerBase.cpp | 3 +++ kramv/KramViewerBase.h | 1 + kramv/KramViewerMain.mm | 1 + 5 files changed, 39 insertions(+), 12 deletions(-) diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index fa8ea0fc..2799faa4 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -80,6 +80,7 @@ typedef NS_ENUM(int32_t, ShaderDebugMode) ShDebugModePosX = 5, ShDebugModePosY = 6, + ShDebugModeCircleXY = 7, ShDebugModeCount }; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 0b128133..4e64dc2a 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -135,11 +135,11 @@ float toUnorm8(float c) } float2 toUnorm8(float2 c) { - return (127.0 / 255.0) * c + float2(128 / 255.0); + return (127.0 / 255.0) * c + (128 / 255.0); } float3 toUnorm8(float3 c) { - return (127.0 / 255.0) * c + float3(128 / 255.0); + return (127.0 / 255.0) * c + (128 / 255.0); } float toUnorm(float c) @@ -162,12 +162,17 @@ float toSnorm8(float c) float2 toSnorm8(float2 c) { - return (255.0 / 127.0) * c - float2(128 / 127.0); + return (255.0 / 127.0) * c - (128 / 127.0); } float3 toSnorm8(float3 c) { - return (255.0 / 127.0) * c - float3(128 / 127.0); + return (255.0 / 127.0) * c - (128 / 127.0); +} + +float2 toSnorm(float2 c) +{ + return 2 * c - 1.0; } float recip(float c) @@ -194,20 +199,29 @@ float3 toNormal(float3 n) // make sure the normal doesn't exceed the unit circle // many reconstructs skip and get a non-unit or z=0 normal // might make optional or flag pixel with a debug mode that exeed - float len = length_squared(n.xy); - if (len > 0.99 * 0.99) + float len2 = length_squared(n.xy); + const float maxLen2 = 0.99 * 0.99; + + if (len2 > maxLen2) { - len *= 1.001; // so we have a non-zero z component below - n.xy *= rsqrt(len); + len2 *= 1.001; // so we have a non-zero z component below + n.xy *= rsqrt(len2); + len2 = maxLen2; } - + //len2 = min(0.99, len2); + // make sure always have non-zero z, or get Nan after it knocks out N of TBN // since that's often pointing purely in 001 direction. - len = min(0.999, len); - n.z = sqrt(1 - len); + n.z = sqrt(1 - len2); return n; } +// TODO: do more test shapes, but that affects eyedropper +// use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt +// see http://www.mikktspace.com/ + +// TODO: eliminate the toUnorm() calls below, rendering to rgba16f +// but also need to remove conversion code on cpu side expecting unorm in eyedropper float4 DrawPixels( ColorInOut in [[stage_in]], @@ -433,7 +447,14 @@ float4 DrawPixels( isHighlighted = true; } } - + else if (uniforms.debugMode == ShDebugModeCircleXY) { + // flag pixels that would throw off normal reconstruct sqrt(1-dot(n.xy,n.xy)) + // see code above in shader that helps keep that from z = 0 + float len2 = length_squared(toSnorm(c.rg)); + if (len2 > (0.99 * 0.99)) { + isHighlighted = true; + } + } // TODO: is it best to highlight the interest pixels in red // or the negation of that to see which ones aren't. if (isHighlighted) { diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp index e1edac33..5fb3d12c 100644 --- a/kramv/KramViewerBase.cpp +++ b/kramv/KramViewerBase.cpp @@ -53,6 +53,9 @@ void ShowSettings::advanceDebugMode(bool isShiftKeyDown) { if (debugMode == DebugModePosY && !(isNormal)) { advanceDebugMode(isShiftKeyDown); } + if (debugMode == DebugModeCircleXY && !(isNormal)) { + advanceDebugMode(isShiftKeyDown); + } // TODO: have a clipping mode against a variable range too, only show pixels within that range // to help isolate problem pixels. Useful for depth, and have auto-range scaling for it and hdr. diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index d1766924..3f5a883e 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -40,6 +40,7 @@ enum DebugMode DebugModePosX = 5, DebugModePosY = 6, + DebugModeCircleXY = 7, DebugModeCount }; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 59951065..682ba97d 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1072,6 +1072,7 @@ - (void)keyDown:(NSEvent *)theEvent case DebugModeHDR: text = "Debug HDR"; break; case DebugModePosX: text = "Debug +X"; break; case DebugModePosY: text = "Debug +Y"; break; + case DebugModeCircleXY: text = "Debug XY>=1"; break; default: break; } isChanged = true; From 4cad98c995757b3ece422d86ddc0d37b3549708b Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 8 May 2021 16:36:56 -0700 Subject: [PATCH 029/901] kramv - clear hud text on texture load since debugMode is reset, increase nmap cutoff Change from 0.99 to 0.999. nmap.z min is 0.14 to 0.04. Open Recent document menu is only working the first time an item is picked. Need to hook some other callback since these are added into document menu list, and readFromURL isn't called second time. --- kramv/KramRenderer.mm | 2 ++ kramv/KramShaders.metal | 10 ++++-- kramv/KramViewerMain.mm | 78 ++++++++++++++++++++++++++++++++++------- 3 files changed, 74 insertions(+), 16 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index c311a6e2..7014ff63 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -585,6 +585,8 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex _showSettings->zoom = _showSettings->zoomFit; + // wish could keep existing setting, but new texture might not + // be supported debugMode for new texture _showSettings->debugMode = DebugMode::DebugModeNone; // have one of these for each texture added to the viewer diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 4e64dc2a..de4066d8 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -169,6 +169,10 @@ float3 toSnorm8(float3 c) { return (255.0 / 127.0) * c - (128 / 127.0); } +float4 toSnorm8(float4 c) +{ + return (255.0 / 127.0) * c - (128 / 127.0); +} float2 toSnorm(float2 c) { @@ -200,7 +204,7 @@ float3 toNormal(float3 n) // many reconstructs skip and get a non-unit or z=0 normal // might make optional or flag pixel with a debug mode that exeed float len2 = length_squared(n.xy); - const float maxLen2 = 0.99 * 0.99; + const float maxLen2 = 0.999 * 0.999; if (len2 > maxLen2) { @@ -208,7 +212,7 @@ float3 toNormal(float3 n) n.xy *= rsqrt(len2); len2 = maxLen2; } - //len2 = min(0.99, len2); + //len2 = min(0.999, len2); // make sure always have non-zero z, or get Nan after it knocks out N of TBN // since that's often pointing purely in 001 direction. @@ -451,7 +455,7 @@ float4 DrawPixels( // flag pixels that would throw off normal reconstruct sqrt(1-dot(n.xy,n.xy)) // see code above in shader that helps keep that from z = 0 float len2 = length_squared(toSnorm(c.rg)); - if (len2 > (0.99 * 0.99)) { + if (len2 > (0.999 * 0.999)) { isHighlighted = true; } } diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 682ba97d..483d68a0 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -46,6 +46,8 @@ @interface MyMTKView : MTKView - (BOOL)loadTextureFromURL:(NSURL*)url; +- (void)setHudText:(const char*)text; + @end //------------- @@ -92,13 +94,37 @@ - (NSData *)dataOfType:(NSString *)typeName error:(NSError **)outError { - (BOOL)readFromURL:(NSURL *)url ofType:(NSString *)typeName error:(NSError **)outError { + // TODO: this recent menu only seems to work the first time + // and not in subsequent calls to the same entry. readFromUrl isn't even called. + // So don't get a chance to switch back to a recent texture. + // Maybe there's some list of documents created and so it doesn't + // think the file needs to be reloaded. + // + // Note: if I return NO from this call then a dialog pops up that image + // couldn't be loaded, but then the readFromURL is called everytime a new + // image is picked from the list. + + // called from OpenRecent documents menu + #if 0 MyMTKView* view = self.windowControllers.firstObject.window.contentView; return [view loadTextureFromURL:url]; #else NSApplication* app = [NSApplication sharedApplication]; MyMTKView* view = app.mainWindow.contentView; - return [view loadTextureFromURL:url]; + BOOL success = [view loadTextureFromURL:url]; + if (success) + { + [view setHudText:""]; + } + + // Let's see the document list +// NSDocumentController* dc = [NSDocumentController sharedDocumentController]; +// NSDocument* currentDoc = dc.currentDocument; +// +// KLOGW("kramv", "This is document count %d", (int)dc.documents.count, ); + + return success; #endif } @@ -139,10 +165,7 @@ - (BOOL)applicationShouldTerminateAfterLastWindowClosed:(NSApplication *)sender - (void)application:(NSApplication *)sender openURLs:(nonnull NSArray *)urls { - // see if this is called - //NSLog(@"OpenURLs"); - - // this is called from "Open In...", and also from OpenRecent documents menu + // this is called from "Open In..." MyMTKView* view = sender.mainWindow.contentView; NSURL *url = urls.firstObject; @@ -659,6 +682,26 @@ inline float4 toPremul(const float4& c) return cpremul; } +float toSnorm8(float c) +{ + return (255.0 / 127.0) * c - (128 / 127.0); +} + +float2 toSnorm8(float2 c) +{ + return (255.0 / 127.0) * c - (128 / 127.0); +} + +float3 toSnorm8(float3 c) +{ + return (255.0 / 127.0) * c - (128 / 127.0); +} +float4 toSnorm8(float4 c) +{ + return (255.0 / 127.0) * c - (128 / 127.0); +} + + - (void)updateEyedropper { if ((!_showSettings->isHudShown)) { return; @@ -813,10 +856,7 @@ - (void)updateEyedropper { bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat); if (isSigned && !isDecodeSigned) { - c.x = c.x * 2.0f - 1.0f; - c.y = c.y * 2.0f - 1.0f; - c.z = c.z * 2.0f - 1.0f; - c.w = c.w * 2.0f - 1.0f; + c = toSnorm8(c.x); } if (isNormal) { @@ -825,13 +865,20 @@ - (void)updateEyedropper { // unorm -> snorm if (!isSigned) { - nx = nx * 2.0f - 1.0f; - ny = ny * 2.0f - 1.0f; + nx = toSnorm8(nx); + ny = toSnorm8(ny); } + // Note: not clamping nx,ny to < 1 like in shader + // this is always postive on tan-space normals // assuming we're not viewing world normals - float nz = sqrt(1.0f - std::min(nx * nx + ny * ny, 1.0f)); + const float maxLen2 = 0.999 * 0.999; + float len2 = nx * nx + ny * ny; + if (len2 > maxLen2) + len2 = maxLen2; + + float nz = sqrt(1.0f - len2); // print the underlying color (some nmaps are xy in 4 channels) string tmp; @@ -1368,6 +1415,8 @@ - (BOOL)performDragOperation:(id)sender { url = [NSURL fileURLWithPath:filenameString]; if ([self loadTextureFromURL:url]) { + [self setHudText:""]; + return YES; } } @@ -1525,8 +1574,11 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { self.window.title = [NSString stringWithUTF8String: title.c_str()]; + // topmost entry will be the recently opened document + // some entries may go stale if directories change, not sure who validates the list + // add to recent document menu - NSDocumentController *dc = [NSDocumentController sharedDocumentController]; + NSDocumentController* dc = [NSDocumentController sharedDocumentController]; [dc noteNewRecentDocumentURL:url]; self.imageURL = url; From d81d87fa2f9843141a226c86288654c638127a9c Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 8 May 2021 17:14:36 -0700 Subject: [PATCH 030/901] kramv - add a hack to keep "Open Recent" menu calling readFromURL readFromURL stops getting called once NSDocumentController has an NSDocument associate with a URL. So keep wiping out the NSDocument list on every load. The doc controller comments are near impossible to decipher what caller is supposed to do when document changes, and this works. --- kramv/KramViewerMain.mm | 46 ++++++++++++++++++++++++++--------------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 483d68a0..ede98cbb 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -94,36 +94,46 @@ - (NSData *)dataOfType:(NSString *)typeName error:(NSError **)outError { - (BOOL)readFromURL:(NSURL *)url ofType:(NSString *)typeName error:(NSError **)outError { - // TODO: this recent menu only seems to work the first time - // and not in subsequent calls to the same entry. readFromUrl isn't even called. - // So don't get a chance to switch back to a recent texture. - // Maybe there's some list of documents created and so it doesn't - // think the file needs to be reloaded. - // - // Note: if I return NO from this call then a dialog pops up that image - // couldn't be loaded, but then the readFromURL is called everytime a new - // image is picked from the list. // called from OpenRecent documents menu #if 0 - MyMTKView* view = self.windowControllers.firstObject.window.contentView; - return [view loadTextureFromURL:url]; + //MyMTKView* view = self.windowControllers.firstObject.window.contentView; + //return [view loadTextureFromURL:url]; #else + NSApplication* app = [NSApplication sharedApplication]; MyMTKView* view = app.mainWindow.contentView; BOOL success = [view loadTextureFromURL:url]; if (success) { [view setHudText:""]; + + // DONE: this recent menu only seems to work the first time + // and not in subsequent calls to the same entry. readFromUrl isn't even called. + // So don't get a chance to switch back to a recent texture. + // Maybe there's some list of documents created and so it doesn't + // think the file needs to be reloaded. + // + // Note: if I return NO from this call then a dialog pops up that image + // couldn't be loaded, but then the readFromURL is called everytime a new + // image is picked from the list. + + // Clear the document list so readFromURL keeps getting called + // Can't remove currentDoc, so have to skip that + NSDocumentController* dc = [NSDocumentController sharedDocumentController]; + NSDocument* currentDoc = dc.currentDocument; + NSMutableArray* docsToRemove = [[NSMutableArray alloc] init]; + for (NSDocument* doc in dc.documents) { + if (doc != currentDoc) + [docsToRemove addObject: doc]; + } + + for (NSDocument* doc in docsToRemove) { + [dc removeDocument: doc]; + } } - // Let's see the document list -// NSDocumentController* dc = [NSDocumentController sharedDocumentController]; -// NSDocument* currentDoc = dc.currentDocument; -// -// KLOGW("kramv", "This is document count %d", (int)dc.documents.count, ); - return success; #endif } @@ -682,6 +692,8 @@ inline float4 toPremul(const float4& c) return cpremul; } +// Writing out to rgba32 for sampling, but unorm formats like ASTC and RGBA8 +// are still off and need to use the following. float toSnorm8(float c) { return (255.0 / 127.0) * c - (128 / 127.0); From 247ac8baf729982f8ad38b60c71de46c8c922a15 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 8 May 2021 19:34:57 -0700 Subject: [PATCH 031/901] kramv - turn assert in KTXImage into returning false to fail the load Several of libktx's test images were hitting this assert. Need to find out why. Could be reversed mips on ktx2 vs. ktx. --- kramv/KramViewerMain.mm | 5 +++++ libkram/kram/KTXImage.cpp | 10 +++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index ede98cbb..78ffc877 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -178,6 +178,11 @@ - (void)application:(NSApplication *)sender openURLs:(nonnull NSArray * // this is called from "Open In..." MyMTKView* view = sender.mainWindow.contentView; + // TODO: if more than one url dropped, and they are albedo/nmap, then display them + // together with the single uv set. Need controls to show one or all together. + + // TODO: also do an overlapping diff if two files are dropped with same dimensions. + NSURL *url = urls.firstObject; [view loadTextureFromURL:url]; } diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 754a0172..99b27e8f 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -1363,7 +1363,15 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) // the offsets are reversed in ktx2 file level1.offset = level2.offset; - assert(level1.length == level2.length); + + if (level1.length != level2.length) + { + // This is likely due to the reversal of mips + // but many of the test images from libkx are hitting this, fix this issue. + + KLOGE("kram", "mip sizes aren't equal"); + return false; + } } } else { From c71f613889a7cc1aa17734e1310b3e70bf124e39 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 8 May 2021 20:20:49 -0700 Subject: [PATCH 032/901] kram - fail early if the pixel format is unupported in the loader. --- libkram/kram/KTXImage.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 99b27e8f..7decca81 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -503,7 +503,9 @@ const KTXFormatInfo& formatInfo(MyMTLPixelFormat format) initFormatsIfNeeded(); const auto& it = gFormatTable->find(format); - assert(it != gFormatTable->end()); + if (it == gFormatTable->end()) { + return gFormatTable->find(MyMTLPixelFormatInvalid)->second; + } return it->second; } @@ -911,6 +913,11 @@ bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength) pixelFormat = header.metalFormat(); } + if (pixelFormat == MyMTLPixelFormatInvalid) { + KLOGE("kram", "unsupported texture format glType 0x%0X", header.glFormat); + return false; + } + return initMipLevels(true, sizeof(KTXHeader) + header.bytesOfKeyValueData); } @@ -1326,6 +1333,13 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) // convert format to MyMTLPixelFormat pixelFormat = vulkanToMetalFormat(header2.vkFormat); + // kram can only load a subset of format + if (pixelFormat == MyMTLPixelFormatInvalid) + { + KLOGE("kram", "unsupported texture format VK_FORMAT %u", header2.vkFormat); + return false; + } + // Note: KTX2 also doesn't have the length field embedded the mipData // so need to be able to set skipLength to unify the mipgen if aliasing the mip data // Only reading this format, never writing it out. From 38d072559fb6f15b2a5ff4a218ffa7ad0d4f6b1d Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 8 May 2021 20:57:27 -0700 Subject: [PATCH 033/901] kram - print the glBase/Internal format. For some reason, on several libktx images glFormat is 0. --- libkram/kram/KTXImage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 7decca81..f69c75b6 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -914,7 +914,7 @@ bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength) } if (pixelFormat == MyMTLPixelFormatInvalid) { - KLOGE("kram", "unsupported texture format glType 0x%0X", header.glFormat); + KLOGE("kram", "unsupported texture format glBase/glInternalFormat 0x%04X 0x%04X", header.glBaseInternalFormat, header.glInternalFormat); return false; } From 8a6ec896fd0f8b0121d10882c1f64a76948b4eb1 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 8 May 2021 23:30:21 -0700 Subject: [PATCH 034/901] kram/v - add SUPPORT_RGB flag, fix RGBA32F format RGB isn't supported by Metal, but we try our best to convert one mip level so that kramv can display the image. kram converts it to RGBA, but info will still report the RGB format. Row Alignment isn't always conducive to opening these, but will fix that later. Also not handling srgb properly. This has to first go to KramImage to load as RGBA and then back to KTXImage. Fix RGBA32F format in the table. --- libkram/kram/KTXImage.cpp | 28 ++++++++++++++++++++++++++-- libkram/kram/KTXImage.h | 14 ++++++++++++-- libkram/kram/KramConfig.h | 5 +++++ libkram/kram/KramImage.cpp | 16 ++++++++++++++-- 4 files changed, 57 insertions(+), 6 deletions(-) diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index f69c75b6..868c77af 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -159,6 +159,13 @@ enum GLFormat { GL_RG32F = 0x8230, GL_RGBA32F = 0x8814, +#if SUPPORT_RGB + GL_RGB8 = 0x8051, + GL_SRGB8 = 0x8C41, + GL_RGB16F = 0x881B, + GL_RGB32F = 0x8815 +#endif + /* These are all of the variants of ASTC, ugh. Only way to identify them is to walk blocks and it's unclear how to convert from 3D to 2D blocks or whether hw supports sliced 3D. @@ -299,6 +306,15 @@ enum VKFormat { // VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182, // VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183, // VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184, + +#if SUPPORT_RGB + // import only + VK_FORMAT_R8G8B8_UNORM = 23, + VK_FORMAT_R8G8B8_SRGB = 29, + VK_FORMAT_R16G16B16_SFLOAT = 90, + VK_FORMAT_R32G32B32_SFLOAT = 106, + +#endif }; // DONE: setup a format table, so can switch on it @@ -489,8 +505,16 @@ static bool initFormatsIfNeeded() KTX_FORMAT(EXPr32f, MyMTLPixelFormatR32Float, VK_FORMAT_R32_SFLOAT, GL_R32F, GL_RED, 1, 1, 4, 1, FLAG_32F) KTX_FORMAT(EXPrg32f, MyMTLPixelFormatRG32Float, VK_FORMAT_R32G32_SFLOAT, GL_RG32F, GL_RG, 1, 1, 8, 2, FLAG_32F) - KTX_FORMAT(EXPrg32f, MyMTLPixelFormatRGBA32Float, VK_FORMAT_R32G32B32A32_SFLOAT, GL_RGBA32F, GL_RGBA, 1, 1, 16, 4, FLAG_32F) - + KTX_FORMAT(EXPrgba32f, MyMTLPixelFormatRGBA32Float, VK_FORMAT_R32G32B32A32_SFLOAT, GL_RGBA32F, GL_RGBA, 1, 1, 16, 4, FLAG_32F) + +#if SUPPORT_RGB + // these are import only formats + KTX_FORMAT(EXPrgb8, MyMTLPixelFormatRGB8Unorm_internal, VK_FORMAT_R8G8B8_UNORM, GL_RGB8, GL_RGB, 1, 1, 3, 3, 0) + KTX_FORMAT(EXPsrgb8, MyMTLPixelFormatRGB8Unorm_sRGB_internal, VK_FORMAT_R8G8B8_SRGB, GL_SRGB8, GL_SRGB, 1, 1, 3, 3, FLAG_SRGB) + KTX_FORMAT(EXPrgb16f, MyMTLPixelFormatRGB16Float_internal, VK_FORMAT_R16G16B16_SFLOAT, GL_RGB16F, GL_RGB, 1, 1, 6, 3, FLAG_16F) + KTX_FORMAT(EXPrgb32f, MyMTLPixelFormatRGB32Float_internal, VK_FORMAT_R32G32B32_SFLOAT, GL_RGB32F, GL_RGB, 1, 1, 12, 3, FLAG_32F) +#endif + return true; } diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index 02387293..bf377602 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -102,11 +102,21 @@ enum MyMTLPixelFormat { // TODO: also need rgb9e5 for fallback if ASTC HDR/6H not supported // That is Unity's fallback if alpha not needed, otherwise RGBA16F. + +#if SUPPORT_RGB + // Can import files from KTX/KTX2 with RGB data, but convert right away to RGBA. + // These are not export formats. Watch alignment on these too. These + // have no MTLPixelFormat. + MyMTLPixelFormatRGB8Unorm_internal = 200, + MyMTLPixelFormatRGB8Unorm_sRGB_internal = 201, + MyMTLPixelFormatRGB16Float_internal = 202, + MyMTLPixelFormatRGB32Float_internal = 203, +#endif }; enum MyMTLTextureType { - // MyMTLTextureType1D = 0, - MyMTLTextureType1DArray = 1, + // MyMTLTextureType1D = 0, // not twiddled or compressed, more like a buffer but with texture limits + MyMTLTextureType1DArray = 1, // not twiddled or compressed, more like a buffer but with texture limits MyMTLTextureType2D = 2, MyMTLTextureType2DArray = 3, // MyMTLTextureType2DMultisample = 4, diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h index 3c344385..cdf2c205 100644 --- a/libkram/kram/KramConfig.h +++ b/libkram/kram/KramConfig.h @@ -146,6 +146,11 @@ #define COMPILE_ASTCENC 0 #endif +// rgb8/16f/32f formats only supported for import, Metal doesn't expose these formats +#ifndef SUPPORT_RGB +#define SUPPORT_RGB 1 +#endif + // includes that are usable across all files #include "KramLog.h" diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index b3758174..87915903 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -130,7 +130,13 @@ bool Image::loadImageFromKTX(const KTXImage& image) switch (image.pixelFormat) { case MyMTLPixelFormatR8Unorm: case MyMTLPixelFormatRG8Unorm: - case MyMTLPixelFormatRGBA8Unorm: { +#if SUPPORT_RGB + case MyMTLPixelFormatRGB8Unorm_sRGB_internal: // TODO: not handling srgba yet + case MyMTLPixelFormatRGB8Unorm_internal: +#endif + case MyMTLPixelFormatRGBA8Unorm_sRGB: // TODO: not handling srgba yet + case MyMTLPixelFormatRGBA8Unorm: + { const uint8_t* srcPixels = image.fileData + image.mipLevels[0].offset; @@ -171,6 +177,9 @@ bool Image::loadImageFromKTX(const KTXImage& image) case MyMTLPixelFormatR16Float: case MyMTLPixelFormatRG16Float: +#if SUPPORT_RGB + case MyMTLPixelFormatRGB16Float_internal: +#endif case MyMTLPixelFormatRGBA16Float: { int32_t numSrcChannels = blockSize / 2; // 2 = sizeof(_float16) int32_t numDstChannels = 4; @@ -220,7 +229,10 @@ bool Image::loadImageFromKTX(const KTXImage& image) case MyMTLPixelFormatR32Float: case MyMTLPixelFormatRG32Float: - case MyMTLPixelFormatRGBA32Float: { +#if SUPPORT_RGB + case MyMTLPixelFormatRGB32Float_internal: +#endif + case MyMTLPixelFormatRGBA32Float: { const float* srcPixels = (const float*)(image.fileData + image.mipLevels[0].offset); From 3dbaf8635ab27ae52aaf705dc0707a9c6ce0774e Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 9 May 2021 00:07:52 -0700 Subject: [PATCH 035/901] kramv - add atlas grid This cycles through 32, 64, 128, 256 entries, since these are common atlasing sizes. Now the d key and shift-d advance through none, pixel, block and the 4 atlas grid sizes. --- kramv/KramRenderer.mm | 5 +++- kramv/KramViewerBase.h | 6 +++- kramv/KramViewerMain.mm | 61 ++++++++++++++++++++++++++++---------- libkram/kram/KramImage.cpp | 7 +++-- 4 files changed, 60 insertions(+), 19 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 7014ff63..a98896b0 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -648,12 +648,15 @@ - (void)_updateGameState uniforms.gridY = 1; } else if (_showSettings->isBlockGridShown) { - if (_showSettings->blockX > 1) { uniforms.gridX = _showSettings->blockX; uniforms.gridY = _showSettings->blockY; } } + else if (_showSettings->isAtlasGridShown) { + uniforms.gridX = _showSettings->gridSize; + uniforms.gridY = _showSettings->gridSize; + } // no debug mode when preview kicks on, make it possible to toggle back and forth more easily uniforms.debugMode = _showSettings->isPreview ? ShaderDebugMode::ShDebugModeNone : (ShaderDebugMode)_showSettings->debugMode; diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index 3f5a883e..4c82328b 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -77,6 +77,7 @@ class ShowSettings { // draw a 1x1 or blockSize grid, note ASTC has non-square grid sizes bool isPixelGridShown = false; bool isBlockGridShown = false; + bool isAtlasGridShown = false; // show all mips, faces, arrays all at once bool isShowingAllLevelsAndMips = false; @@ -107,10 +108,13 @@ class ShowSettings { int32_t imageBoundsX = 0; // px int32_t imageBoundsY = 0; // px - // size of the block, uses in block grid drawing + // size of the block, used in block grid drawing int32_t blockX = 1; int32_t blockY = 1; + // set when isGridShow is true + int32_t gridSize = 1; + // for eyedropper, lookup this pixel value, and return it to CPU int32_t textureLookupX = 0; int32_t textureLookupY = 0; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 78ffc877..2ccf6801 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1232,28 +1232,59 @@ - (void)keyDown:(NSEvent *)theEvent break; // toggle pixel grid when magnified above 1 pixel, can happen from mipmap changes too - case Key::D: + case Key::D: { + static int grid = 0; + static const int kNumGrids = 7; + + #define advanceGrid(g, dec) \ + grid = (grid + kNumGrids + (dec ? -1 : 1)) % kNumGrids + // TODO: display how many blocks there are - if (isShiftKeyDown && _showSettings->blockX > 1) { - // if block size is 1, then this shouldn't toggle - _showSettings->isBlockGridShown = !_showSettings->isBlockGridShown; - _showSettings->isPixelGridShown = false; - sprintf(text, "Block Grid %dx%d %s", - _showSettings->blockX, _showSettings->blockY, - _showSettings->isBlockGridShown ? "On" : "Off"); + + // if block size is 1, then this shouldn't toggle + _showSettings->isBlockGridShown = false; + _showSettings->isAtlasGridShown = false; + _showSettings->isPixelGridShown = false; + + advanceGrid(grid, isShiftKeyDown); + + if (grid == 2 && _showSettings->blockX == 1) { + // skip it + advanceGrid(grid, isShiftKeyDown); + } + + static const uint32_t gridSizes[kNumGrids] = { + 0, 1, 2, + 32, 64, 128, 256 // atlas sizes + }; + + if (grid == 0) { + sprintf(text, "Grid Off"); + } + else if (grid == 1) { + _showSettings->isPixelGridShown = true; + + sprintf(text, "Pixel Grid 1x1 On"); + } + else if (grid == 2) { + _showSettings->isBlockGridShown = true; + + sprintf(text, "Block Grid %dx%d On", + _showSettings->blockX, _showSettings->blockY); } else { - - _showSettings->isPixelGridShown = !_showSettings->isPixelGridShown; - _showSettings->isBlockGridShown = false; - text = "Pixel Grid "; - text += _showSettings->isPixelGridShown ? "On" : "Off"; + _showSettings->isAtlasGridShown = true; + + _showSettings->gridSize = gridSizes[grid]; + + sprintf(text, "Atlas Grid %dx%d On", + _showSettings->gridSize, _showSettings->gridSize); } - + isChanged = true; break; - + } case Key::S: // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips _showSettings->isShowingAllLevelsAndMips = !_showSettings->isShowingAllLevelsAndMips; diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 87915903..b1f4ade0 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -131,10 +131,10 @@ bool Image::loadImageFromKTX(const KTXImage& image) case MyMTLPixelFormatR8Unorm: case MyMTLPixelFormatRG8Unorm: #if SUPPORT_RGB - case MyMTLPixelFormatRGB8Unorm_sRGB_internal: // TODO: not handling srgba yet + case MyMTLPixelFormatRGB8Unorm_sRGB_internal: case MyMTLPixelFormatRGB8Unorm_internal: #endif - case MyMTLPixelFormatRGBA8Unorm_sRGB: // TODO: not handling srgba yet + case MyMTLPixelFormatRGBA8Unorm_sRGB: case MyMTLPixelFormatRGBA8Unorm: { const uint8_t* srcPixels = @@ -144,6 +144,7 @@ bool Image::loadImageFromKTX(const KTXImage& image) int32_t numDstChannels = 4; // Note: clearing unspecified channels to 0000, not 0001 + // can set swizzleText when encoding _pixels.resize(4 * _width * _height); if (numSrcChannels != 4) { memset(_pixels.data(), 0, _pixels.size()); @@ -185,6 +186,7 @@ bool Image::loadImageFromKTX(const KTXImage& image) int32_t numDstChannels = 4; // Note: clearing unspecified channels to 0000, not 0001 + // can set swizzleText when encoding _pixelsFloat.resize(_width * _height); if (numSrcChannels != 4) { memset(_pixelsFloat.data(), 0, @@ -240,6 +242,7 @@ bool Image::loadImageFromKTX(const KTXImage& image) int32_t numDstChannels = 4; // Note: clearing unspecified channels to 0000, not 0001 + // can set swizzleText when encoding _pixelsFloat.resize(_width * _height); if (numSrcChannels != 4) { memset(_pixelsFloat.data(), 0, From c18deb1ac1f1d652afed6082cfed8e6c1204c75f Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 9 May 2021 00:15:59 -0700 Subject: [PATCH 036/901] Update README.md --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 29a3d94d..7e932538 100644 --- a/README.md +++ b/README.md @@ -31,15 +31,15 @@ Compute shaders are used to display a single pixel sample from the gpu texture. In non-preview mode, point sampling in a pixel shader is used to show exact pixel values of a single mip, array, and face. Debug modes provide pixel analysis. KramLoader shows synchronous cpu upload to a private Metal texture, but does not yet supply the underlying KTXImage. Pinch zoom and panning tries to keep the image from onscreen, and zoom is to the cursor so navigating feels intuitive. ``` -Formats - R/RG/RGBA 8/16F/32F, BC/ETC2/ASTC +Formats - R/RG/RGBA 8/16F/32F, BC/ETC2/ASTC, RGB has limited import support Container Types - KTX, KTX2, PNG -Content Types - Albedo, Normal, SDF -Debug modes - transparent, color, gray, +x, +y +Content Types - Albedo, Normal, SDF, Height +Debug modes - transparent, color, gray, +x, +y, xy >= 1 Texture Types - 1darray (no mips), 2d, 2darray, 3d (no mips), cube, cube array / - show keyboard shortcuts O - toggle preview, disables debug mode, shows lit normals, and mips and filtering are enabled -⇧D - toggle pixel grid, must be zoomed-in to see it (block grid with ⇧) +⇧D - toggle through none, pixel grid, block grid, atlas grid (32, 64, 128, 256), must be zoomed-in to see pixel grid ⇧E - advance debug mode, this is texture content specific (reverse dir with ⇧) H - toggle hud I - show texture info in overlay From d43d8c676d7a7a99d4c0c1c88cb2387a02d09061 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 9 May 2021 00:33:01 -0700 Subject: [PATCH 037/901] kramv - finish normal handling shader --- kramv/KramShaders.metal | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index de4066d8..d1eb084b 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -201,22 +201,22 @@ float4 recip(float4 c) float3 toNormal(float3 n) { // make sure the normal doesn't exceed the unit circle - // many reconstructs skip and get a non-unit or z=0 normal - // might make optional or flag pixel with a debug mode that exeed + // many reconstructs skip and get a non-unit or n.z=0 + // this can all be done with half math too + float len2 = length_squared(n.xy); const float maxLen2 = 0.999 * 0.999; - if (len2 > maxLen2) - { - len2 *= 1.001; // so we have a non-zero z component below + if (len2 <= maxLen2) { + // textures should be corrected to always take this path + n.z = sqrt(1 - len2); + } + else { + len2 *= 1.001*1.001; // need n.xy = approx 0.999 length n.xy *= rsqrt(len2); - len2 = maxLen2; + n.z = 0.0447108; // sqrt(1-maxLen2) } - //len2 = min(0.999, len2); - // make sure always have non-zero z, or get Nan after it knocks out N of TBN - // since that's often pointing purely in 001 direction. - n.z = sqrt(1 - len2); return n; } From 8f676de49b8303cb657554d7404042e0217cce9f Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 9 May 2021 00:42:32 -0700 Subject: [PATCH 038/901] Update README.md --- README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7e932538..91e4ab4e 100644 --- a/README.md +++ b/README.md @@ -37,10 +37,12 @@ Content Types - Albedo, Normal, SDF, Height Debug modes - transparent, color, gray, +x, +y, xy >= 1 Texture Types - 1darray (no mips), 2d, 2darray, 3d (no mips), cube, cube array +⇧ decrement any advance listed below + / - show keyboard shortcuts O - toggle preview, disables debug mode, shows lit normals, and mips and filtering are enabled ⇧D - toggle through none, pixel grid, block grid, atlas grid (32, 64, 128, 256), must be zoomed-in to see pixel grid -⇧E - advance debug mode, this is texture content specific (reverse dir with ⇧) +⇧E - advance debug mode, this is texture content specific H - toggle hud I - show texture info in overlay W - toggle repeat filter, scales uv from [0,1] to [0,2] @@ -53,7 +55,7 @@ N - toggle signed/unsigned ⇧0 - refit the current mip image to 1x, or fit view. (at 1x with ⇧). ⇧L - reload from disk if changed, zoom to fit (at 1x with ⇧) -⇧Y advance array (reverse dir with ⇧) +⇧Y advance array ⇧F advance face ⇧M advance mip @@ -68,10 +70,10 @@ Texture processing is complex and there be dragons. Just be aware of some of th GPU - none of the encoders use the GPU, so cpu threading and multi-process is used Rescale Filtering - 1x1 point filter -Mip filtering - 2x2 box filter that's reasonable for pow2, but not ideal for non-pow2 mips, +Mip filtering - 2x2 box filter that's reasonable for pow2, and a non-linear filters for non-pow2 so there is no pixel shift done in linear space using half4 storage, in-place to save mem -1D array - no mip support due to hardware +1D array - no mip support due to hardware, no encoding 3D textures - no mip support, uses ASTC 2d slice encode used by Metal/Android, not exotic ASTC 3d format BC/ETC2/ASTC - supposedly WebGL requires pow2, and some implementation need top multiple of 4 for BC/ETC2 From 45677c0875be3ed6dcdb7e78559b74afd6cda54f Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 9 May 2021 11:25:03 -0700 Subject: [PATCH 039/901] kramv - improve shader, start moving towards more general shapes Add basis transform calls, but not yet generating them on model. Add half version of toNormal(), and simplify so it works with half. --- kramv/KramShaders.metal | 380 ++++++++++++++++++++++++++++++---------- 1 file changed, 285 insertions(+), 95 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index d1eb084b..5c3a1116 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -6,6 +6,283 @@ using namespace metal; +//--------------------------------- +// helpers + +float toUnorm8(float c) +{ + return (127.0 / 255.0) * c + (128.0 / 255.0); +} +float2 toUnorm8(float2 c) +{ + return (127.0 / 255.0) * c + (128.0 / 255.0); +} +float3 toUnorm8(float3 c) +{ + return (127.0 / 255.0) * c + (128.0 / 255.0); +} +float4 toUnorm8(float4 c) +{ + return (127.0 / 255.0) * c + (128.0 / 255.0); +} + +float toUnorm(float c) +{ + return 0.5 * c + 0.5; +} +float2 toUnorm(float2 c) +{ + return 0.5 * c + 0.5; +} +float3 toUnorm(float3 c) +{ + return 0.5 * c + 0.5; +} +float4 toUnorm(float4 c) +{ + return 0.5 * c + 0.5; +} + +float toSnorm8(float c) +{ + return (255.0 / 127.0) * c - (128.0 / 127.0); +} +float2 toSnorm8(float2 c) +{ + return (255.0 / 127.0) * c - (128.0 / 127.0); +} +float3 toSnorm8(float3 c) +{ + return (255.0 / 127.0) * c - (128.0 / 127.0); +} +float4 toSnorm8(float4 c) +{ + return (255.0 / 127.0) * c - (128.0 / 127.0); +} + +half2 toSnorm8(half2 c) +{ + return (255.0h / 127.0h) * c - (128.0h / 127.0h); +} + +float toSnorm(float c) +{ + return 2.0 * c - 1.0; +} +float2 toSnorm(float2 c) +{ + return 2.0 * c - 1.0; +} +float3 toSnorm(float3 c) +{ + return 2.0 * c - 1.0; +} +float4 toSnorm(float4 c) +{ + return 2.0 * c - 1.0; +} + +float recip(float c) +{ + return 1.0 / c; +} +float2 recip(float2 c) +{ + return 1.0 / c; +} +float3 recip(float3 c) +{ + return 1.0 / c; +} +float4 recip(float4 c) +{ + return 1.0 / c; +} + +half toHalf(float c) +{ + return half(c); +} +half2 toHalf(float2 c) +{ + return half2(c); +} +half3 toHalf(float3 c) +{ + return half3(c); +} +half4 toHalf(float4 c) +{ + return half4(c); +} + +float toFloat(half c) +{ + return float(c); +} +float2 toFloat(half2 c) +{ + return float2(c); +} +float3 toFloat(half3 c) +{ + return float3(c); +} +float4 toFloat(half4 c) +{ + return float4(c); +} + +//------------------------------------------- +// functions + +// reconstruct normal from xy, n.z ignored +float3 toNormal(float3 n) +{ + // make sure the normal doesn't exceed the unit circle + // many reconstructs skip and get a non-unit or n.z=0 + // this can all be done with half math too + + float len2 = length_squared(n.xy); + const float maxLen2 = 0.999 * 0.999; + + if (len2 <= maxLen2) { + // textures should be corrected to always take this path + n.z = sqrt(1.0 - len2); + } + else { + n.xy *= 0.999 * rsqrt(len2); + n.z = 0.0447108; // sqrt(1-maxLen2) + } + + return n; +} + +// reconstruct normal from xy, n.z ignored +half3 toNormal(half3 n) +{ + // make sure the normal doesn't exceed the unit circle + // many reconstructs skip and get a non-unit or n.z=0 + // this can all be done with half math too + + half len2 = length_squared(n.xy); + const half maxLen2 = 0.999h * 0.999h; + + if (len2 <= maxLen2) { + // textures should be corrected to always take this path + n.z = sqrt(1.0h - len2); + } + else { + n.xy *= 0.999h * rsqrt(len2); + n.z = 0.0447108h; // sqrt(1-maxLen2) + } + + return n; +} + +// use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt +// see http://www.mikktspace.com/ +half3 transformNormal(half4 tangent, half3 vertexNormal, + texture2d texture, sampler s, float2 uv, bool isSigned = true) +{ + // Normalize tangent/vertexNormal in vertex shader + // but don't renormalize interpolated tangent, vertexNormal in fragment shader + // Reconstruct bitan in frag shader + // https://bgolus.medium.com/generating-perfect-normal-maps-for-unity-f929e673fc57 + + half4 nmap = texture.sample(s, uv); + if (!isSigned) { + nmap.xy = toSnorm8(nmap.xy); + } + half3 normal = toNormal(nmap.xyz); + + // now transform by basis and normalize from any shearing, and since interpolated basis vectors + // are not normalized + half3x3 tbn = half3x3(tangent.xyz, tangent.w * cross(vertexNormal, tangent.xyz), vertexNormal); + normal = tbn * normal; + return normalize(normal); +} + +// TODO: have more bones, or read from texture instead of uniforms +// can then do instanced skining, but vfetch lookup slower +#define maxBones 128 + +// this is for vertex shader +void skinPosAndBasis(thread float4& position, thread float3& tangent, thread float3& normal, + uint4 indices, float4 weights, float3x4 bones[maxBones]) +{ + // TODO: might do this as up to 12x vtex lookup, fetch from buffer texture + // but uniforms after setup would be faster if many bones + float3x4 bindPoseToBoneTransform = bones[indices.x]; + + if (weights[0] != 1.0) + { + // weight the bone transforms + bindPoseToBoneTransform *= weights[0]; + + // with RGB10A2U have 2 bits in weights.w to store the boneCount + // or could count non-zero weights, make sure to set w > 0 if 4 bones + // the latter is more compatible with more conent + + //int numBones = 1 + int(weights.w * 3.0); + + int numBones = int(dot(float4(weights > 0.0), float4(1))); + + // reconstruct so can store weights in RGB10A2U + if (numBones == 4) + weights.w = 1 - saturate(dot(weights.xyz, float3(1.0))); + + for (int i = 1; i < numBones; ++i) + { + bindPoseToBoneTransform += bones[indices[i]] * weights[i]; + } + } + + // 3x4 is a transpose of 4x4 transform + position.xyz = position * bindPoseToBoneTransform; + + // not dealing with non-uniform scale correction + // see scale2 handling in transformBasis, a little different with transpose of 3x4 + + tangent = (float4(tangent, 0.0) * bindPoseToBoneTransform); + normal = (float4(normal, 0.0) * bindPoseToBoneTransform); +} + +// this is for vertex shader +void transformBasis(thread float3& tangent, thread float3& normal, + float4x4 modelToWorldTfm, bool isScaled = false) +{ + tangent = (modelToWorldTfm * float4(tangent, 0.0)).xyz; + normal = (modelToWorldTfm * float4(normal, 0.0)).xyz; + + // have to apply invSquare of scale here to approximate invT + // also make sure to identify inversion off determinant before instancing so that backfacing is correct + // this is only needed if non-uniform scale present in modelToWorldTfm, could precompute + if (isScaled) + { + // compute scale squared from rows + float3 scale2 = float3( + length_squared(modelToWorldTfm[0].xyz), + length_squared(modelToWorldTfm[1].xyz), + length_squared(modelToWorldTfm[2].xyz)); + + // do a max(1e4), but really don't have scale be super small + scale2 = max(0.0001 * 0.0001, scale2); + + // apply inverse + tangent /= scale2; + normal /= scale2; + } + + // vertex shader normalize, but the fragment shader should not + tangent = normalize(tangent); + normal = normalize(normal); + + // make sure to preserve bitan sign in tangent.w +} + +//------------------------------------------- + struct Vertex { float4 position [[attribute(VertexAttributePosition)]]; @@ -41,7 +318,9 @@ ColorInOut DrawImageFunc( // this is a 2d coord always which is 0 to 1, or 0 to 2 out.texCoord.xy = in.texCoord; if (uniforms.isWrap) { - out.texCoord.xy *= 2.0; // can make this a repeat value uniform + // can make this a repeat value uniform + float wrapAmount = 2.0; + out.texCoord.xy *= wrapAmount; } // potentially 3d coord, and may be -1 to 1 @@ -71,7 +350,7 @@ vertex ColorInOut DrawCubeVS( // convert to -1 to 1 float3 uvw = out.texCoordXYZ; - uvw.xy = uvw.xy * 2.0 - 1.0; + uvw.xy = toSnorm(uvw.xy); uvw.z = 1.0; // switch to the face @@ -129,100 +408,9 @@ vertex ColorInOut DrawVolumeVS( return out; } -float toUnorm8(float c) -{ - return (127.0 / 255.0) * c + (128 / 255.0); -} -float2 toUnorm8(float2 c) -{ - return (127.0 / 255.0) * c + (128 / 255.0); -} -float3 toUnorm8(float3 c) -{ - return (127.0 / 255.0) * c + (128 / 255.0); -} - -float toUnorm(float c) -{ - return 0.5 * c + 0.5; -} -float2 toUnorm(float2 c) -{ - return 0.5 * c + 0.5; -} -float3 toUnorm(float3 c) -{ - return 0.5 * c + 0.5; -} - -float toSnorm8(float c) -{ - return (255.0 / 127.0) * c - (128 / 127.0); -} - -float2 toSnorm8(float2 c) -{ - return (255.0 / 127.0) * c - (128 / 127.0); -} - -float3 toSnorm8(float3 c) -{ - return (255.0 / 127.0) * c - (128 / 127.0); -} -float4 toSnorm8(float4 c) -{ - return (255.0 / 127.0) * c - (128 / 127.0); -} - -float2 toSnorm(float2 c) -{ - return 2 * c - 1.0; -} - -float recip(float c) -{ - return 1.0 / c; -} -float2 recip(float2 c) -{ - return 1.0 / c; -} -float3 recip(float3 c) -{ - return 1.0 / c; -} -float4 recip(float4 c) -{ - return 1.0 / c; -} - - -// scale and reconstruct normal -float3 toNormal(float3 n) -{ - // make sure the normal doesn't exceed the unit circle - // many reconstructs skip and get a non-unit or n.z=0 - // this can all be done with half math too - - float len2 = length_squared(n.xy); - const float maxLen2 = 0.999 * 0.999; - - if (len2 <= maxLen2) { - // textures should be corrected to always take this path - n.z = sqrt(1 - len2); - } - else { - len2 *= 1.001*1.001; // need n.xy = approx 0.999 length - n.xy *= rsqrt(len2); - n.z = 0.0447108; // sqrt(1-maxLen2) - } - - return n; -} // TODO: do more test shapes, but that affects eyedropper -// use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt -// see http://www.mikktspace.com/ +// generate and pass down tangents + bitanSign in the geometry // TODO: eliminate the toUnorm() calls below, rendering to rgba16f // but also need to remove conversion code on cpu side expecting unorm in eyedropper @@ -455,7 +643,9 @@ float4 DrawPixels( // flag pixels that would throw off normal reconstruct sqrt(1-dot(n.xy,n.xy)) // see code above in shader that helps keep that from z = 0 float len2 = length_squared(toSnorm(c.rg)); - if (len2 > (0.999 * 0.999)) { + const float maxLen2 = 0.999 * 0.999; + + if (len2 > maxLen2) { isHighlighted = true; } } From 647af3d2500d1638bdf9c6fa34b1f6b66e7fa8ad Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Wed, 12 May 2021 22:54:06 -0700 Subject: [PATCH 040/901] kram - new blit loader, start of ktx2 save, srgb test also fix formatSources script for clang_format. blit encoder started to stage textures and upload those to gpu side. Needed to land this for rgb support. ktx2 needs ztd encode path, and dfd structures to write out, but fleshed out the basics in KramImage. It's commented out. also still need to honor .ktx2 as an output extension. --- kramv/KramLoader.h | 10 ++ kramv/KramLoader.mm | 298 ++++++++++++++++++++++++++++++++++++- kramv/KramRenderer.mm | 4 +- kramv/KramShaders.metal | 44 ++++-- kramv/KramViewerBase.h | 5 +- kramv/KramViewerMain.mm | 9 +- libkram/kram/KTXImage.cpp | 8 - libkram/kram/KTXImage.h | 27 +++- libkram/kram/Kram.cpp | 68 +++++++++ libkram/kram/KramImage.cpp | 138 +++++++++++++++++ scripts/formatSources.sh | 2 +- 11 files changed, 577 insertions(+), 36 deletions(-) diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h index 9ed5da22..2b14b16f 100644 --- a/kramv/KramLoader.h +++ b/kramv/KramLoader.h @@ -40,6 +40,16 @@ //------------------------------------- +// This loads KTX and PNG data synchronously. Will likely move to only loading KTX files, with a png -> ktx conversion. +// The underlying KTXImage is not yet returned to the caller, but would be useful for prop queries. +@interface KramBlitLoader : NSObject + +@property (retain, nonatomic, readwrite, nonnull) id device; + +@end + +//------------------------------------- + // for toLower #include diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 3ef9c0cb..97cac1d5 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -14,6 +14,7 @@ #include #include // for max +#include #include "Kram.h" #include "KramLog.h" @@ -75,15 +76,86 @@ - (BOOL)decodeImageIfNeeded:(KTXImage&)image imageDecoded:(KTXImage&)imageDecode return YES; } - + +#if SUPPORT_RGB +inline bool isInternalRGBFormat(MyMTLPixelFormat format) { + bool isInternal = false; + switch(format) { + case MyMTLPixelFormatRGB8Unorm_internal: + case MyMTLPixelFormatRGB8Unorm_sRGB_internal: + case MyMTLPixelFormatRGB16Float_internal: + case MyMTLPixelFormatRGB32Float_internal: + isInternal = true; + break; + default: + break; + } + return isInternal; +} + +inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { + MyMTLPixelFormat remapFormat = MyMTLPixelFormatInvalid; + switch(format) { + case MyMTLPixelFormatRGB8Unorm_internal: + remapFormat = MyMTLPixelFormatRGBA8Unorm; + break; + case MyMTLPixelFormatRGB8Unorm_sRGB_internal: + remapFormat = MyMTLPixelFormatRGBA8Unorm_sRGB; + break; + case MyMTLPixelFormatRGB16Float_internal: + remapFormat = MyMTLPixelFormatRGBA32Float; + break; + case MyMTLPixelFormatRGB32Float_internal: + remapFormat = MyMTLPixelFormatRGBA32Float; + break; + default: + break; + } + return remapFormat; +} + +#endif - (nullable id)loadTextureFromData:(nonnull const uint8_t *)imageData imageDataLength:(int32_t)imageDataLength originalFormat:(nullable MTLPixelFormat*)originalFormat { KTXImage image; + if (!image.open(imageData, imageDataLength)) { return nil; } +#if SUPPORT_RGB + if (isInternalRGBFormat(image.pixelFormat)) { + // loads and converts image to RGBA version + Image rbgaImage; + if (!rbgaImage.loadImageFromKTX(image)) + return nil; + + // re-encode it as a KTXImage, even though this is just a copy + KTXImage rbgaImage2; + + ImageInfoArgs dstImageInfoArgs; + dstImageInfoArgs.pixelFormat = remapInternalRGBFormat(image.pixelFormat); + dstImageInfoArgs.doMipmaps = false; + dstImageInfoArgs.textureEncoder = kTexEncoderExplicit; + dstImageInfoArgs.swizzleText = "rgb1"; + + ImageInfo dstImageInfo; + dstImageInfo.initWithArgs(dstImageInfoArgs); + + if (!rbgaImage.encode(dstImageInfo, rbgaImage2)) { + return nil; + } + + if (originalFormat != nullptr) { + *originalFormat = (MTLPixelFormat)rbgaImage2.pixelFormat; + } + + return [self loadTextureFromImage:rbgaImage2]; + } +#endif + + if (originalFormat != nullptr) { *originalFormat = (MTLPixelFormat)image.pixelFormat; } @@ -236,7 +308,7 @@ - (void)setMipgenNeeded:(BOOL)enabled { // and only get box filtering in API-level filters. But would cut storage. textureDescriptor.mipmapLevelCount = MAX(1, image.header.numberOfMipmapLevels); - // only do this for viewer, this disables lossless compression + // only do this for viewer // but allows encoded textures to enable/disable their sRGB state. // Since the view isn't accurate, will probably pull this out. // Keep usageRead set by default. @@ -405,6 +477,228 @@ - (void)setMipgenNeeded:(BOOL)enabled { @end +//-------------------------- + + + + +@implementation KramBlitLoader { + // this must be created in render, and then do blits into this + id _blitEncoder; + id _buffer; + uint8_t* data; + size_t dataSize; +} + +- (nonnull instancetype)init { + self = [super init]; + + // must be aligned to pagesize() or can't use with newBufferWithBytesNoCopy + dataSize = 16*1024*1024; + posix_memalign((void**)&data, getpagesize(), dataSize); + + // allocate memory for circular staging buffer, only need to memcpy to this + // but need a rolling buffer atop to track current begin/end. + + _buffer = [_device newBufferWithBytesNoCopy:data + length:dataSize + options:MTLResourceStorageModeShared + deallocator: ^(void *macroUnusedArg(pointer), NSUInteger macroUnusedArg(length)) { + delete data; + } + ]; + return self; +} + +- (nullable id)createTexture:(KTXImage&)image { + MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init]; + + // Indicate that each pixel has a blue, green, red, and alpha channel, where each channel is + // an 8-bit unsigned normalized value (i.e. 0 maps to 0.0 and 255 maps to 1.0) + textureDescriptor.textureType = (MTLTextureType)image.textureType; + textureDescriptor.pixelFormat = (MTLPixelFormat)image.pixelFormat; + + // Set the pixel dimensions of the texture + textureDescriptor.width = image.width; + textureDescriptor.height = MAX(1, image.height); + textureDescriptor.depth = MAX(1, image.depth); + + textureDescriptor.arrayLength = MAX(1, image.header.numberOfArrayElements); + + // ignoring 0 (auto mip), but might need to support for explicit formats + // must have hw filtering support for format, and 32f filtering only first appeared on A14/M1 + // and only get box filtering in API-level filters. But would cut storage. + textureDescriptor.mipmapLevelCount = MAX(1, image.header.numberOfMipmapLevels); + + // needed for blit, + textureDescriptor.storageMode = MTLStorageModePrivate; + + // only do this for viewer + // but allows encoded textures to enable/disable their sRGB state. + // Since the view isn't accurate, will probably pull this out. + // Keep usageRead set by default. + //textureDescriptor.usage = MTLTextureUsageShaderRead; + + // this was so that could toggle srgb on/off, but mips are built linear and encoded as lin or srgb + // in the encoded formats so this wouldn't accurately reflect with/without srgb. + //textureDescriptor.usage |= MTLTextureUsagePixelFormatView; + + // Create the texture from the device by using the descriptor + id texture = [self.device newTextureWithDescriptor:textureDescriptor]; + if (!texture) { + KLOGE("kramv", "could not allocate texture"); + return nil; + } + + return texture; +} + +//for (int mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) { +// +// // zstd decompress entire mip level to the staging buffer +// zstd +//} +// +//// so first memcpy and entire level(s) into the buffer +////memcpy(...); + + +// Has a synchronous upload via replaceRegion that only works for shared/managed (f.e. ktx), +// and another path for private that uses a blitEncoder and must have block aligned data (f.e. ktxa, ktx2). +// Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer. +- (nullable id)blitTextureFromImage:(KTXImage &)image +{ + id texture = [self createTexture:image]; + + //-------------------------------- + // upload mip levels + + // TODO: about aligning to 4k for base + length + // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html + + int32_t w = image.width; + int32_t h = image.height; + + int32_t numMips = MAX(1, image.header.numberOfMipmapLevels); + int32_t numArrays = MAX(1, image.header.numberOfArrayElements); + int32_t numFaces = MAX(1, image.header.numberOfFaces); + int32_t numSlices = MAX(1, image.depth); + + Int2 blockDims = image.blockDims(); + + for (int mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) { + // there's a 4 byte levelSize for each mipLevel + // the mipLevel.offset is immediately after this + + // this is offset to a given level + const KTXImageLevel& mipLevel = image.mipLevels[mipLevelNumber]; + + // only have face, face+array, or slice but this handles all cases + for (int array = 0; array < numArrays; ++array) { + for (int face = 0; face < numFaces; ++face) { + for (int slice = 0; slice < numSlices; ++slice) { + + int32_t bytesPerRow = 0; + + // 1D/1DArray textures set bytesPerRow to 0 + if ((MTLTextureType)image.textureType != MTLTextureType1D && + (MTLTextureType)image.textureType != MTLTextureType1DArray) + { + // for compressed, bytesPerRow needs to be multiple of block size + // so divide by the number of blocks making up the height + //int xBlocks = ((w + blockDims.x - 1) / blockDims.x); + int32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y); + + // Calculate the number of bytes per row in the image. + // for compressed images this is xBlocks * blockSize + bytesPerRow = (int32_t)mipLevel.length / yBlocks; + } + + int32_t sliceOrArrayOrFace; + + if (image.header.numberOfArrayElements > 0) { + // can be 1d, 2d, or cube array + sliceOrArrayOrFace = array; + if (numFaces > 1) { + sliceOrArrayOrFace = 6 * sliceOrArrayOrFace + face; + } + } + else { + // can be 1d, 2d, or 3d + sliceOrArrayOrFace = slice; + if (numFaces > 1) { + sliceOrArrayOrFace = face; + } + } + + // this is size of one face/slice/texture, not the levels size + int32_t mipStorageSize = (int32_t)mipLevel.length; + + int32_t mipOffset = (int32_t)mipLevel.offset + sliceOrArrayOrFace * mipStorageSize; + + int32_t bufferBaseOffset = 0; // TODO: pos offset into the staging buffer + mipOffset += bufferBaseOffset; + + // using buffer to store + // offset into the level + //const uint8_t *srcBytes = image.fileData + mipOffset; + + // had blitEncoder support here + + { + // Note: this only works for managed/shared textures. + // For private upload to buffer and then use blitEncoder to copy to texture. + //bool isCubemap = image.textureType == MyMTLTextureTypeCube || + // image.textureType == MyMTLTextureTypeCubeArray; + bool is3D = image.textureType == MyMTLTextureType3D; + //bool is2DArray = image.textureType == MyMTLTextureType2DArray; + //bool is1DArray = image.textureType == MyMTLTextureType1DArray; + + // cpu copy the bytes from the data object into the texture + MTLRegion region = { + { 0, 0, 0 }, // MTLOrigin + { (NSUInteger)w, (NSUInteger)h, 1 } // MTLSize + }; + + // TODO: revist how loading is done to load entire levels + // otherwise too many replaceRegion calls. Data is already packed by mip. + + if (is3D) { + region.origin.z = sliceOrArrayOrFace; + sliceOrArrayOrFace = 0; + } + + // TODO: no call on MTLBlitEncoder to copy entire level of mips like glTexImage3D + + [_blitEncoder copyFromBuffer:_buffer + sourceOffset:mipOffset + sourceBytesPerRow:bytesPerRow + sourceBytesPerImage:mipStorageSize + sourceSize:region.size + + toTexture:texture + destinationSlice:sliceOrArrayOrFace + destinationLevel:mipLevelNumber + destinationOrigin:region.origin + options:MTLBlitOptionNone + ]; + } + } + } + } + + mipDown(w, h); + } + + // this only affect managed textures + [_blitEncoder optimizeContentsForGPUAccess:texture]; + + return texture; +} + + +@end + diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index a98896b0..1f44314e 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -654,8 +654,8 @@ - (void)_updateGameState } } else if (_showSettings->isAtlasGridShown) { - uniforms.gridX = _showSettings->gridSize; - uniforms.gridY = _showSettings->gridSize; + uniforms.gridX = _showSettings->gridSizeX; + uniforms.gridY = _showSettings->gridSizeY; } // no debug mode when preview kicks on, make it possible to toggle back and forth more easily diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 5c3a1116..54832b83 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -133,6 +133,9 @@ float4 toFloat(half4 c) return float4(c); } +// TODO: note that Metal must pass the same half3 from vertex to fragment shader +// so can't mix a float vs with half fs. + //------------------------------------------- // functions @@ -213,6 +216,9 @@ void skinPosAndBasis(thread float4& position, thread float3& tangent, thread flo { // TODO: might do this as up to 12x vtex lookup, fetch from buffer texture // but uniforms after setup would be faster if many bones + // instances use same bones, but different indices/weights already + // but could draw skinned variants with vtex lookup and not have so much upload prep + float3x4 bindPoseToBoneTransform = bones[indices.x]; if (weights[0] != 1.0) @@ -226,11 +232,11 @@ void skinPosAndBasis(thread float4& position, thread float3& tangent, thread flo //int numBones = 1 + int(weights.w * 3.0); - int numBones = int(dot(float4(weights > 0.0), float4(1))); + int numBones = int(dot(float4(weights > 0.0), float4(1.0))); // reconstruct so can store weights in RGB10A2U if (numBones == 4) - weights.w = 1 - saturate(dot(weights.xyz, float3(1.0))); + weights.w = 1.0 - saturate(dot(weights.xyz, float3(1.0))); for (int i = 1; i < numBones; ++i) { @@ -248,30 +254,46 @@ void skinPosAndBasis(thread float4& position, thread float3& tangent, thread flo normal = (float4(normal, 0.0) * bindPoseToBoneTransform); } +float3x3 toFloat3x3(float4x4 m) +{ + return float3x3(m[0].xyz, m[1].xyz, m[2].xyz); +} + // this is for vertex shader void transformBasis(thread float3& tangent, thread float3& normal, float4x4 modelToWorldTfm, bool isScaled = false) { - tangent = (modelToWorldTfm * float4(tangent, 0.0)).xyz; - normal = (modelToWorldTfm * float4(normal, 0.0)).xyz; + float3x3 m = toFloat3x3(modelToWorldTfm); + + // question here of whether tangent is transformed by m or mInvT + // most apps assume m, but after averaging it can be just as off the surface as the normal + bool useInverseOnTangent = true; + if (useInverseOnTangent) + tangent = tangent * m; + else + tangent = m * tangent; + + // note this is n * R = Rt * n, for simple affine transforms Rinv = Rt, invScale then handled below + normal = normal * m; + // have to apply invSquare of scale here to approximate invT // also make sure to identify inversion off determinant before instancing so that backfacing is correct - // this is only needed if non-uniform scale present in modelToWorldTfm, could precompute + // this is only needed if non-uniform scale present in modelToWorldTfm, could precompute scale2 if (isScaled) { // compute scale squared from rows float3 scale2 = float3( - length_squared(modelToWorldTfm[0].xyz), - length_squared(modelToWorldTfm[1].xyz), - length_squared(modelToWorldTfm[2].xyz)); + length_squared(m[0].xyz), + length_squared(m[1].xyz), + length_squared(m[2].xyz)); // do a max(1e4), but really don't have scale be super small - scale2 = max(0.0001 * 0.0001, scale2); + scale2 = recip(max(0.0001 * 0.0001, scale2)); // apply inverse - tangent /= scale2; - normal /= scale2; + tangent *= scale2; + normal *= scale2; } // vertex shader normalize, but the fragment shader should not diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index 4c82328b..3f706a06 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -112,8 +112,9 @@ class ShowSettings { int32_t blockX = 1; int32_t blockY = 1; - // set when isGridShow is true - int32_t gridSize = 1; + // set when isGridShown is true + int32_t gridSizeX = 1; + int32_t gridSizeY = 1; // for eyedropper, lookup this pixel value, and return it to CPU int32_t textureLookupX = 0; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 2ccf6801..9f4fcdb6 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1275,10 +1275,13 @@ - (void)keyDown:(NSEvent *)theEvent else { _showSettings->isAtlasGridShown = true; - _showSettings->gridSize = gridSizes[grid]; - + // want to be able to show altases tht have long entries derived from props + // but right now just a square grid atlas + _showSettings->gridSizeX = + _showSettings->gridSizeY = gridSizes[grid]; + sprintf(text, "Atlas Grid %dx%d On", - _showSettings->gridSize, _showSettings->gridSize); + _showSettings->gridSizeX, _showSettings->gridSizeY); } isChanged = true; diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 868c77af..c9f37333 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -1240,14 +1240,6 @@ const char* textureTypeName(MyMTLTextureType textureType) } -// This is one entire level of mipLevels. -class KTX2ImageLevel { -public: - uint64_t offset; // numChunks * length - uint64_t lengthCompressed; // can only be read in, can't compute this, but can compute upper bound from zstd - uint64_t length; // size of a single mip -}; - //// Data Format Descriptor diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index bf377602..a5787056 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -179,19 +179,26 @@ class KTXHeader { MyMTLPixelFormat metalFormat() const; }; +// This is one entire level of mipLevels. +// In KTX, the image levels are assumed from format and size since no compression applied. +class KTXImageLevel { +public: + uint64_t offset; // numChunks * length + uint64_t length; // size of a single mip +}; + //--------------------------------------------- // Mips are reversed from KTX1 (mips are smallest first for streaming), // and this stores an array of supercompressed levels, and has dfds. class KTX2Header { public: - - uint8_t identifier[kKTXIdentifierSize] = { // same is kKTX2Identifier + uint8_t identifier[kKTXIdentifierSize] = { // same is kKTX2Identifier 0xAB, 0x4B, 0x54, 0x58, 0x20, 0x32, 0x30, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A // '«', 'K', 'T', 'X', ' ', '2', '0', '»', '\r', '\n', '\x1A', '\n' }; - uint32_t vkFormat = 0; // invalid + uint32_t vkFormat = 0; // invalid format uint32_t typeSize = 1; uint32_t pixelWidth = 1; @@ -218,18 +225,24 @@ class KTX2Header { uint64_t sgdByteLength = 0; // chunks hold levelCount of all mips of the same size - // KTX2ImageChunk* chunks; // [levelCount] + // KTX2ImageLevel* chunks; // [levelCount] }; -//--------------------------------------------- - +// Unlike KTX, KTX2 writes an array of level sizes since compression may e involved. +// These correspond to an entire compressed array of chunks. +// So often an entire level mus be decompressed before a chunk can be accessed. // This is one entire level of mipLevels. -class KTXImageLevel { +class KTX2ImageLevel { public: uint64_t offset; // numChunks * length + uint64_t lengthCompressed; // can only be read in, can't compute this, but can compute upper bound from zstd uint64_t length; // size of a single mip }; +//--------------------------------------------- + + + // Since can't add anything to KTXHeader without throwing off KTXHeader size, // this holds any mutable data for reading/writing KTX images. class KTXImage { diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index b3a73281..5cf1d8a2 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -2303,8 +2303,76 @@ CommandType parseCommandType(const char* command) return commandType; } +void PSTest() { + static bool doTest = false; + if (doTest) { + return; + } + + // So it looks like Photoshop is doing srgb * alpha right away on PNG import. This results in dimmer colors + // when they are read on the GPU, since then the gpu does srgb to linear conversion. values2 + // is that case below. Also note that the Photoshop color picker shows only srgb intensities, not the linear. + // color value. This lets it line up with screen color pickers like Apple DCM. Apple Preview also shows + // images with the same dim colors, so it's replicating what Photoshop does. + // + // Gimp and kramv do what is in values3 resulting in brighter intensities. One question with formats like + // astc that interpolate the endpoints in srgb space off the selectors is how to encode colors. + // Almost makes sense to drop srgb when premul alpha is involved and store linear color instead. + // Figma follows that convention. + + // Here's kramv's srgb flow: + // PNG unmul alpha -> srbToLinear(rgb) * alpha -> build mips in linear -> linearToSrgb(lin.rgb) + // -> encode endpoints/colors -> BC/ASTC/ETC2 + // + // Here's Photoshop I think: + // PNG unmul alpha -> srgbToLinear(rgb * alpha) -> linarToSrgb( c ) -> toUnmul( c/alpha ) -> Png + + + Mipper mipper; + + // 1. srgb 8-bit values + uint8_t alpha = 200; + float alphaF = mipper.toAlphaFloat(alpha); + + uint8_t values1[256]; + uint8_t values2[256]; + uint8_t values3[256]; + + for (int32_t i = 0; i < 256; ++i) { + // premul and then snap back to store + values1[i] = ((uint32_t)i * (uint32_t)alpha) / 255; + } + + // now convert those values to linear color (float) + for (int32_t i = 0; i < 256; ++i) { + float value = mipper.toLinear(values1[i]); + + values2[i] = uint8_t(value * 255.1); + + //KLOGI("srgb", "[%d] = %g\n", i, value); + } + + // convert srgb to linear and then do premul + for (int32_t i = 0; i < 256; ++i) { + float value = mipper.toLinear(i); + value *= alphaF; + + values3[i] = uint8_t(value * 255.1); + } + + // log them side-by-side for comparison + KLOGI("srgb", "premul by %0.3f", 200.0/255.0); + for (int32_t i = 0; i < 256; ++i) { + KLOGI("srgb", "[%d] = %u, %u, %u", + i, values1[i], values2[i], values3[i]); + } +} + + int32_t kramAppCommand(vector& args) { + PSTest(); + // make sure next arg is a valid command type CommandType commandType = kCommandTypeUnknown; if (args.size() >= 1) { diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index b1f4ade0..c8ce1b0b 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -1223,6 +1223,144 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const Mipper mipper; SDFMipper sdfMipper; +#if 0 + // TODO: can go out to KTX2 here instead + // It has two different blocks, supercompression for BasisLZ + // and a DFD block which details the block content. + // And mips are reversed. + bool doWriteKTX2 = false; + if (doWriteKTX2 && dstFile) // in memory version will always be KTX1 format for nwo + { + KTX2Header header2; + + header2.vkFormat = vulkanType(info.pixelFormat); + // header2.typeSize = 1; // skip + + header2.pixelWidth = header.pixelWidth; + header2.pixelHeight = header.pixelHeight; + header2.pixelDepth = header.pixelDepth; + + if (dstImage.textureType == MyMTLTextureType1DArray) { + header2.pixelHeight = 0; + header2.pixelDepth = 0; + } + + header2.layerCount = header.numberOfArrayElements; + header2.faceCount = header.numberOfFaces; + header2.levelCount = numDstMipLevels; // header.numberOfMipmapLevels; + + // compute size of dfd + vector dfdData; + + // compute offsets and lengts of data blocks + header2.dfdByteOffset = sizeof(header2); + header2.kvdByteOffset = header2.dfdByteOffset + dfdData.size(); + header2.sgdByteOffset = header2.kvdByteOffset + propsData.size(); + + header2.dfdByteLength = dfdData.size(); + header2.kvdByteLength = propsData.size(); + header2.sgdByteLength = 0; + + // TODO: figure out dfd here + + // write the header + if (!writeDataAtOffset((const uint8_t*)&header2, sizeof(header2), 0, dstFile, dstImage)) { + return false; + } + + // write the dfd + if (!writeDataAtOffset(dfdData.data(), dfdData.size(), header2.dfdByteOffset, dstFile, dstImage)) { + return false; + } + + // write the props + if (!writeDataAtOffset(propsData.data(), propsData.size(), header2.kvdByteOffset, dstFile, dstImage)) { + return false; + } + + // skip supercompression block + + // TODO: this either writes to file or to dstImage (in-memory KTX file) + + // TODO: also need to support a few compressions + // zstd and zlib, does dfd contain the offsets of each chunk + // and the compressed sizes of mips. Know format and sizes uncompressed. + // but need to fill out the compressed size field. + + vector levels; + levels.resize(numDstMipLevels); + + size_t levelListStartOffset = header2.sgdByteOffset + header2.sgdByteLength; + size_t levelStartOffset = levelListStartOffset + levels.size() * sizeof(KTX2ImageLevel); + + size_t lastLevelOffset = levelStartOffset; + for (int32_t i = 0; i < numDstMipLevels; ++i) { + levels[i].length = numChunks * numDstMipLevels; + levels[i].lengthCompressed = levels[i].length; + levels[i].offset = lastLevelOffset + levels[i].lengthCompressed; + lastLevelOffset = levels[i].offset; + } + + // TODO: compress to a seperate zstd stream for each level + // then can continue to do mips in place, and just append the bytes to that level + // after compression. If not compressed, then code from KTX1 can be used. + bool isCompressed = false; + + if (!isCompressed) { + if (!writeDataAtOffset(levels.data(), levels.size(), levelListStartOffset, dstFile, dstImage)) { + return false; + } + } + + // TODO: here allocate a zstd encoder for each level + vector< vector > compressedLevels; + if (isCompressed) { + compressedLevels.resize(numDstMipLevels); + } + + // write the chunks of mips see code below, seeks are important since + // it's building mips on the fly. + for (int32_t chunk = 0; chunk < numChunks; ++chunk) { + // TODO: actually build the mip (reuse code below for KTX) + + if (!isCompressed) + continue; + + // handle zstd compression here, and add to end of existing encoder for level + zstd_compress(level); + + // append the compressed bytes to each strea + levels[mipLevel].append(data); + } + + if (isCompressed) { + + // update the offsets and compressed sizes + lastLevelOffset = levelStartOffset; + for (int32_t i = 0; i < numDstMipLevels; ++i) { + levels[i].lengthCompressed = compressedLevels[i].size(); + levels[i].offset = lastLevelOffset + levels[i].lengthCompressed; + lastLevelOffset = levels[i].offset; + } + + // write out sizes + if (!writeDataAtOffset(levels.data(), levels.size(), levelListStartOffset, dstFile, dstImage)) { + return false; + } + + // and now seek and write out each compressed level + for (int32_t i = 0; i < numDstMipLevels; ++i) { + if (!writeDataAtOffset(compressedLevels[i].data(), compressedLevels[i].size(), levels[i].offset, dstFile, dstImage)) { + return false; + } + } + } + + return true; + } +#endif + + // ---------------------------------------------------- // write the header out KTXHeader headerCopy = header; diff --git a/scripts/formatSources.sh b/scripts/formatSources.sh index 5578718d..45547dd0 100755 --- a/scripts/formatSources.sh +++ b/scripts/formatSources.sh @@ -10,7 +10,7 @@ clang-format -style=file -i KTX*.cpp clang-format -style=file -i KTX*.h popd -pushd ../viewer +pushd ../kramv clang-format -style=file -i Kram*.cpp clang-format -style=file -i Kram*.h clang-format -style=file -i Kram*.mm From 76393eeb9100e8ef6353faf2fe7a9d866f8439d7 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Wed, 12 May 2021 22:58:17 -0700 Subject: [PATCH 041/901] kram - fix Mipper with newer calls Just to bury more non-public calls/data. --- libkram/kram/KramMipper.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h index e5439f25..c6f60391 100644 --- a/libkram/kram/KramMipper.h +++ b/libkram/kram/KramMipper.h @@ -57,21 +57,25 @@ class ImageData { }; class Mipper { -public: +private: float srgbToLinear[256]; float alphaToFloat[256]; +public: Mipper(); - void initTables(); - // drop by 1 mip level by box filter void mipmap(const ImageData &srcImage, ImageData &dstImage) const; void initPixelsHalfIfNeeded(ImageData &srcImage, bool doPremultiply, bool doPrezero, vector &halfImage) const; + float toLinear(uint8_t srgb) const { return srgbToLinear[srgb]; } + float toAlphaFloat(uint8_t alpha) const { return alphaToFloat[alpha]; } + private: + void initTables(); + void mipmapLevel(const ImageData &srcImage, ImageData &dstImage) const; void mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) const; From 2c425f53f415e647d3bdb3de48adfe3e5b64eba3 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Thu, 13 May 2021 14:24:15 -0700 Subject: [PATCH 042/901] Kram - add premulrgb option to match Photoshop's and Apple Previews non-srgb compliant handling of premul srgb files. This does premul directly to the raw srgb data in the png file in 8-bits. Mips are not done in linear space either. Turn off the srgb test for this. --- libkram/kram/Kram.cpp | 45 +++++++++++++++++++++++++++++++++------ libkram/kram/Kram.h | 2 +- libkram/kram/KramMipper.h | 2 ++ 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 5cf1d8a2..a3c6db07 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -47,7 +47,14 @@ bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage) return sourceImage.loadImageFromKTX(image); } -bool LoadPng(const uint8_t* data, size_t dataSize, Image& sourceImage) +inline Color toPremul(Color c) { + c.r = ((uint32_t)c.r * (uint32_t)c.a) / 255; + c.g = ((uint32_t)c.g * (uint32_t)c.a) / 255; + c.b = ((uint32_t)c.b * (uint32_t)c.a) / 255; + return c; +} + +bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, Image& sourceImage) { uint32_t width = 0; uint32_t height = 0; @@ -107,6 +114,18 @@ bool LoadPng(const uint8_t* data, size_t dataSize, Image& sourceImage) return false; } + // apply premul srgb right away, don't use with -premul or alpha is applied twice + // this may throw off the props. Note this ignores srgb conversion. + // This is hack to look like Photoshop and Apple Preview, where they process srgb wrong + // on premul PNG data on load, and colors look much darker. + + if (hasAlpha && isPremulRgb) { + Color* colors = (Color*)pixels.data(); + for (int32_t i = 0, iEnd = width*height; i < iEnd; ++i) { + colors[i] = toPremul(colors[i]); + } + } + return sourceImage.loadImageFromPixels(pixels, width, height, hasColor, hasAlpha); } @@ -117,7 +136,7 @@ bool SetupTmpFile(FileHelper& tmpFileHelper, const char* suffix) bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper, vector& fileBuffer, - const string& srcFilename, Image& sourceImage) + const string& srcFilename, Image& sourceImage, bool isPremulSrgb = false) { bool isKTX = endsWith(srcFilename, ".ktx") || endsWith(srcFilename, ".ktx2"); bool isPNG = endsWith(srcFilename, ".png"); @@ -143,7 +162,7 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper, } } else if (isPNG) { - if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(), + if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(), isPremulSrgb, sourceImage)) { return false; // error } @@ -171,7 +190,7 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper, } } else if (isPNG) { - if (!LoadPng(fileBuffer.data(), fileHelper.size(), + if (!LoadPng(fileBuffer.data(), fileHelper.size(), isPremulSrgb, sourceImage)) { return false; // error } @@ -1029,6 +1048,10 @@ void kramEncodeUsage(bool showVersion = true) "\tPremultiplied alpha to src pixels before output but only where a=0\n" "\n" + "\t-premulrgb" + "\tPremultiplied alpha to src pixels at load to emulate Photoshop, don't use with -premul\n" + "\n" + "\t-optopaque" "\tChange format from bc7/3 to bc1, or etc2rgba to rgba if opaque\n" "\n" @@ -1632,6 +1655,7 @@ static int32_t kramAppEncode(vector& args) ImageInfoArgs infoArgs; + bool isPremulRgb = false; bool error = false; for (int32_t i = 0; i < argc; ++i) { @@ -1874,6 +1898,9 @@ static int32_t kramAppEncode(vector& args) continue; } + // This means to post-multiply alpha after loading, not that incoming data in already premul + // png has the limitation that it's unmul, but tiff/exr can store premul. With 8-bit images + // really would prefer to premul them when building the texture. else if (isStringEqual(word, "-premul")) { infoArgs.isPremultiplied = true; continue; @@ -1882,6 +1909,12 @@ static int32_t kramAppEncode(vector& args) infoArgs.isPrezero = true; continue; } + // this means premul the data at read from srgb, this it to match photoshop + else if (isStringEqual(word, "-premulrgb")) { + isPremulRgb = true; + continue; + } + else if (isStringEqual(word, "-v") || isStringEqual(word, "-verbose")) { infoArgs.isVerbose = true; @@ -1977,7 +2010,7 @@ static int32_t kramAppEncode(vector& args) vector srcFileBuffer; bool success = SetupSourceImage(srcMmapHelper, srcFileHelper, srcFileBuffer, - srcFilename, srcImage); + srcFilename, srcImage, isPremulRgb); if (success) { success = SetupTmpFile(tmpFileHelper, ".ktx"); @@ -2305,7 +2338,7 @@ CommandType parseCommandType(const char* command) void PSTest() { static bool doTest = false; - if (doTest) { + if (!doTest) { return; } diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h index 13273035..9a52a695 100644 --- a/libkram/kram/Kram.h +++ b/libkram/kram/Kram.h @@ -14,7 +14,7 @@ class KTXImage; // helpers to source from a png or single level of a ktx bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage); -bool LoadPng(const uint8_t* data, size_t dataSize, Image& sourceImage); +bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulSrgb, Image& sourceImage); // can call these with data instead of needing a file string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint64_t dataSize, bool isVerbose); diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h index c6f60391..65013751 100644 --- a/libkram/kram/KramMipper.h +++ b/libkram/kram/KramMipper.h @@ -73,6 +73,8 @@ class Mipper { float toLinear(uint8_t srgb) const { return srgbToLinear[srgb]; } float toAlphaFloat(uint8_t alpha) const { return alphaToFloat[alpha]; } + uint8_t toPremul(uint8_t channelIntensity, uint8_t alpha) const { return ((uint32_t)channelIntensity * (uint32_t)alpha) / 255; } + private: void initTables(); From 4be8f27613f8058aa3bcaae43765a9be433542ee Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Thu, 13 May 2021 14:40:43 -0700 Subject: [PATCH 043/901] kram - fix build break in loader pass false for isPremulRgb to LoadPng. --- kramv/KramLoader.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 97cac1d5..11421ae5 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -186,7 +186,7 @@ static int32_t numberOfMipmapLevels(const Image& image) { { // can only load 8u and 16u from png, no hdr formats, no premul either, no props Image sourceImage; - bool isLoaded = LoadPng(data, dataSize, sourceImage); + bool isLoaded = LoadPng(data, dataSize, false, sourceImage); if (!isLoaded) { return nil; } From f0f81cbc44b956fddbfe7699b311e067256235cc Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Thu, 13 May 2021 15:24:52 -0700 Subject: [PATCH 044/901] kram - fix bug in length() call, and warnings in lodepng --- libkram/kram/float4a.h | 2 +- libkram/lodepng/lodepng.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libkram/kram/float4a.h b/libkram/kram/float4a.h index ca8111e7..80ba20a4 100644 --- a/libkram/kram/float4a.h +++ b/libkram/kram/float4a.h @@ -368,7 +368,7 @@ inline float length_squared(const float4& vv) } inline float length(const float4& vv) { - return sqrtf(length(vv)); + return sqrtf(length_squared(vv)); } // sse4.1 ops diff --git a/libkram/lodepng/lodepng.cpp b/libkram/lodepng/lodepng.cpp index b08b0858..58c61022 100644 --- a/libkram/lodepng/lodepng.cpp +++ b/libkram/lodepng/lodepng.cpp @@ -715,7 +715,7 @@ static unsigned HuffmanTree_makeTable(HuffmanTree* tree) { size = headsize; for(i = 0; i < headsize; ++i) { unsigned l = maxlens[i]; - if(l > FIRSTBITS) size += (1u << (l - FIRSTBITS)); + if(l > FIRSTBITS) size += (unsigned)(1u << (l - FIRSTBITS)); } tree->table_len = (unsigned char*)lodepng_malloc(size * sizeof(*tree->table_len)); tree->table_value = (unsigned short*)lodepng_malloc(size * sizeof(*tree->table_value)); @@ -734,7 +734,7 @@ static unsigned HuffmanTree_makeTable(HuffmanTree* tree) { if(l <= FIRSTBITS) continue; tree->table_len[i] = l; tree->table_value[i] = pointer; - pointer += (1u << (l - FIRSTBITS)); + pointer += (unsigned)(1u << (l - FIRSTBITS)); } lodepng_free(maxlens); @@ -5447,7 +5447,7 @@ static size_t ilog2i(size_t i) { l = ilog2(i); /* approximate i*log2(i): l is integer logarithm, ((i - (1u << l)) << 1u) linearly approximates the missing fractional part multiplied by i */ - return i * l + ((i - (1u << l)) << 1u); + return i * l + ((i - ((size_t)1 << l)) << (size_t)1); } static unsigned filter(unsigned char* out, const unsigned char* in, unsigned w, unsigned h, From ce3a6d8e56e2d0a37df0b1d62ddc2bf61a8ebbf5 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 16 May 2021 16:14:12 -0700 Subject: [PATCH 045/901] kram - update zstd and add encoder, move miniz into libkram from kramv, small fix to bc7enc --- libkram/CMakeLists.txt | 18 +- libkram/bc7enc/bc7enc.cpp | 5 +- {kramv => libkram/miniz}/miniz.cpp | 1 + {kramv => libkram/miniz}/miniz.h | 0 libkram/zstd/zstd.cpp | 40929 +++++++++++++++++++++++++++ libkram/zstd/zstd.h | 2532 ++ libkram/zstd/zstddeclib.cpp | 13875 +++++---- 7 files changed, 52085 insertions(+), 5275 deletions(-) rename {kramv => libkram/miniz}/miniz.cpp (99%) rename {kramv => libkram/miniz}/miniz.h (100%) create mode 100644 libkram/zstd/zstd.cpp create mode 100644 libkram/zstd/zstd.h diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt index 0b501ee3..e5fd590b 100644 --- a/libkram/CMakeLists.txt +++ b/libkram/CMakeLists.txt @@ -86,7 +86,19 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS "${SOURCE_DIR}/tmpfileplus/*.cpp" "${SOURCE_DIR}/tmpfileplus/*.h" - "${SOURCE_DIR}/zstd/zstddeclib.cpp" + # decoder unity file + # cd zstd/build/single_file_libs + # ./combine.sh -r ../../lib -o zstddeclib.c zstddeclib-in.c + # "${SOURCE_DIR}/zstd/zstddeclib.cpp" + + # full unity file + # cd zstd/build/single_file_libs + # ./combine.sh -r ../../lib -o zstd.c zstd-in.c + "${SOURCE_DIR}/zstd/zstd.h" + "${SOURCE_DIR}/zstd/zstd.cpp" + + "${SOURCE_DIR}/miniz/miniz.h" + "${SOURCE_DIR}/miniz/miniz.cpp" ) # no objc on win or linux @@ -128,6 +140,10 @@ target_include_directories(${myTargetLib} PRIVATE "${SOURCE_DIR}/zstd/" ) +target_include_directories(${myTargetLib} PUBLIC + "${SOURCE_DIR}/miniz/" + ) + # only add sources to the library target_sources(${myTargetLib} PRIVATE ${libSources}) diff --git a/libkram/bc7enc/bc7enc.cpp b/libkram/bc7enc/bc7enc.cpp index 61b4abe6..b2403b84 100644 --- a/libkram/bc7enc/bc7enc.cpp +++ b/libkram/bc7enc/bc7enc.cpp @@ -1944,8 +1944,9 @@ static void handle_alpha_block(void *pBlock, const color_quad_u8 *pPixels, const pParams->m_pSelector_weightsx = (const vec4F *)g_bc7_weights4x; pParams->m_num_selector_weights = 16; pParams->m_comp_bits = 7; - pParams->m_has_pbits = BC7ENC_TRUE; - pParams->m_has_alpha = BC7ENC_TRUE; + pParams->m_has_pbits = BC7ENC_TRUE; + pParams->m_endpoints_share_pbit = BC7ENC_FALSE; + pParams->m_has_alpha = BC7ENC_TRUE; pParams->m_perceptual = pComp_params->m_perceptual; pParams->m_num_pixels = 16; pParams->m_pPixels = pPixels; diff --git a/kramv/miniz.cpp b/libkram/miniz/miniz.cpp similarity index 99% rename from kramv/miniz.cpp rename to libkram/miniz/miniz.cpp index e3deec32..62ea05c4 100644 --- a/kramv/miniz.cpp +++ b/libkram/miniz/miniz.cpp @@ -3229,6 +3229,7 @@ struct mz_zip_internal_state_tag #if defined(DEBUG) || defined(_DEBUG) || defined(NDEBUG) static MZ_FORCEINLINE mz_uint mz_zip_array_range_check(const mz_zip_array *pArray, mz_uint index) { + (void)pArray; MZ_ASSERT(index < pArray->m_size); return index; } diff --git a/kramv/miniz.h b/libkram/miniz/miniz.h similarity index 100% rename from kramv/miniz.h rename to libkram/miniz/miniz.h diff --git a/libkram/zstd/zstd.cpp b/libkram/zstd/zstd.cpp new file mode 100644 index 00000000..45a4c83e --- /dev/null +++ b/libkram/zstd/zstd.cpp @@ -0,0 +1,40929 @@ +/** + * \file zstd.c + * Single-file Zstandard library. + * + * Generate using: + * \code + * combine.sh -r ../../lib -o zstd.c zstd-in.c + * \endcode + */ +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +/* + * Settings to bake for the single library file. + * + * Note: It's important that none of these affects 'zstd.h' (only the + * implementation files we're amalgamating). + * + * Note: MEM_MODULE stops xxhash redefining BYTE, U16, etc., which are also + * defined in mem.h (breaking C99 compatibility). + * + * Note: the undefs for xxHash allow Zstd's implementation to coinside with with + * standalone xxHash usage (with global defines). + * + * Note: multithreading is enabled for all platforms apart from Emscripten. + */ +#define DEBUGLEVEL 0 +#define MEM_MODULE +#undef XXH_NAMESPACE +#define XXH_NAMESPACE ZSTD_ +#undef XXH_PRIVATE_API +#define XXH_PRIVATE_API +#undef XXH_INLINE_ALL +#define XXH_INLINE_ALL +#define ZSTD_LEGACY_SUPPORT 0 +#ifndef __EMSCRIPTEN__ +#define ZSTD_MULTITHREAD +#endif +#define ZSTD_TRACE 0 + +/* Include zstd_deps.h first with all the options we need enabled. */ +#define ZSTD_DEPS_NEED_MALLOC +#define ZSTD_DEPS_NEED_MATH64 +/**** start inlining common/zstd_deps.h ****/ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This file provides common libc dependencies that zstd requires. + * The purpose is to allow replacing this file with a custom implementation + * to compile zstd without libc support. + */ + +/* Need: + * NULL + * INT_MAX + * UINT_MAX + * ZSTD_memcpy() + * ZSTD_memset() + * ZSTD_memmove() + */ +#ifndef ZSTD_DEPS_COMMON +#define ZSTD_DEPS_COMMON + +#include +#include +#include + +#if defined(__GNUC__) && __GNUC__ >= 4 +# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l)) +#else +# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) memset((p),(v),(l)) +#endif + +#endif /* ZSTD_DEPS_COMMON */ + +/* Need: + * ZSTD_malloc() + * ZSTD_free() + * ZSTD_calloc() + */ +#ifdef ZSTD_DEPS_NEED_MALLOC +#ifndef ZSTD_DEPS_MALLOC +#define ZSTD_DEPS_MALLOC + +#include + +#define ZSTD_malloc(s) malloc(s) +#define ZSTD_calloc(n,s) calloc((n), (s)) +#define ZSTD_free(p) free((p)) + +#endif /* ZSTD_DEPS_MALLOC */ +#endif /* ZSTD_DEPS_NEED_MALLOC */ + +/* + * Provides 64-bit math support. + * Need: + * U64 ZSTD_div64(U64 dividend, U32 divisor) + */ +#ifdef ZSTD_DEPS_NEED_MATH64 +#ifndef ZSTD_DEPS_MATH64 +#define ZSTD_DEPS_MATH64 + +#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor)) + +#endif /* ZSTD_DEPS_MATH64 */ +#endif /* ZSTD_DEPS_NEED_MATH64 */ + +/* Need: + * assert() + */ +#ifdef ZSTD_DEPS_NEED_ASSERT +#ifndef ZSTD_DEPS_ASSERT +#define ZSTD_DEPS_ASSERT + +#include + +#endif /* ZSTD_DEPS_ASSERT */ +#endif /* ZSTD_DEPS_NEED_ASSERT */ + +/* Need: + * ZSTD_DEBUG_PRINT() + */ +#ifdef ZSTD_DEPS_NEED_IO +#ifndef ZSTD_DEPS_IO +#define ZSTD_DEPS_IO + +#include +#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) + +#endif /* ZSTD_DEPS_IO */ +#endif /* ZSTD_DEPS_NEED_IO */ + +/* Only requested when is known to be present. + * Need: + * intptr_t + */ +#ifdef ZSTD_DEPS_NEED_STDINT +#ifndef ZSTD_DEPS_STDINT +#define ZSTD_DEPS_STDINT + +#include + +#endif /* ZSTD_DEPS_STDINT */ +#endif /* ZSTD_DEPS_NEED_STDINT */ +/**** ended inlining common/zstd_deps.h ****/ + +/**** start inlining common/debug.c ****/ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * This module only hosts one global variable + * which can be used to dynamically influence the verbosity of traces, + * such as DEBUGLOG and RAWLOG + */ + +/**** start inlining debug.h ****/ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * The purpose of this header is to enable debug functions. + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, + * and DEBUG_STATIC_ASSERT() for compile-time. + * + * By default, DEBUGLEVEL==0, which means run-time debug is disabled. + * + * Level 1 enables assert() only. + * Starting level 2, traces can be generated and pushed to stderr. + * The higher the level, the more verbose the traces. + * + * It's possible to dynamically adjust level using variable g_debug_level, + * which is only declared if DEBUGLEVEL>=2, + * and is a global variable, not multi-thread protected (use with care) + */ + +#ifndef DEBUG_H_12987983217 +#define DEBUG_H_12987983217 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* static assert is triggered at compile time, leaving no runtime artefact. + * static assert only works with compile-time constants. + * Also, this variant can only be used inside a function. */ +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) + + +/* DEBUGLEVEL is expected to be defined externally, + * typically through compiler command line. + * Value must be a number. */ +#ifndef DEBUGLEVEL +# define DEBUGLEVEL 0 +#endif + + +/* recommended values for DEBUGLEVEL : + * 0 : release mode, no debug, all run-time checks disabled + * 1 : enables assert() only, no display + * 2 : reserved, for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (verbose) + * 7+: events at every position (*very* verbose) + * + * It's generally inconvenient to output traces > 5. + * In which case, it's possible to selectively trigger high verbosity levels + * by modifying g_debug_level. + */ + +#if (DEBUGLEVEL>=1) +# define ZSTD_DEPS_NEED_ASSERT +/**** skipping file: zstd_deps.h ****/ +#else +# ifndef assert /* assert may be already defined, due to prior #include */ +# define assert(condition) ((void)0) /* disable assert (default) */ +# endif +#endif + +#if (DEBUGLEVEL>=2) +# define ZSTD_DEPS_NEED_IO +/**** skipping file: zstd_deps.h ****/ +extern int g_debuglevel; /* the variable is only declared, + it actually lives in debug.c, + and is shared by the whole process. + It's not thread-safe. + It's useful when enabling very verbose levels + on selective conditions (such as position in src) */ + +# define RAWLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ + ZSTD_DEBUG_PRINT(" \n"); \ + } } +#else +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + + +#if defined (__cplusplus) +} +#endif + +#endif /* DEBUG_H_12987983217 */ +/**** ended inlining debug.h ****/ + +int g_debuglevel = DEBUGLEVEL; +/**** ended inlining common/debug.c ****/ +/**** start inlining common/entropy_common.c ****/ +/* ****************************************************************** + * Common functions of New Generation Entropy library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************* +* Dependencies +***************************************/ +/**** start inlining mem.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-**************************************** +* Dependencies +******************************************/ +#include /* size_t, ptrdiff_t */ +/**** start inlining compiler.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +/* force inlining */ + +#if !defined(ZSTD_NO_INLINE) +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define FORCE_INLINE_ATTR __forceinline +#else +# define FORCE_INLINE_ATTR +#endif + +#else + +#define INLINE_KEYWORD +#define FORCE_INLINE_ATTR + +#endif + +/** + On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). + This explictly marks such functions as __cdecl so that the code will still compile + if a CC other than __cdecl has been made the default. +*/ +#if defined(_MSC_VER) +# define WIN_CDECL __cdecl +#else +# define WIN_CDECL +#endif + +/** + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to eliminate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +/** + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR +#endif + +/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ +#if defined(__GNUC__) +# define UNUSED_ATTR __attribute__((unused)) +#else +# define UNUSED_ATTR +#endif + +/* force no inlining */ +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif + + +/* target attribute */ +#ifndef __has_attribute + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ +#endif +#if defined(__GNUC__) || defined(__ICCARM__) +# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) +#else +# define TARGET_ATTRIBUTE(target) +#endif + +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if ((defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (defined(__x86_64__) || defined(_M_X86)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif + +/* prefetch + * can be disabled, by declaring NO_PREFETCH build macro */ +#if defined(NO_PREFETCH) +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +#else +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +# elif defined(__aarch64__) +# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) +# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) +# else +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +# endif +#endif /* NO_PREFETCH */ + +#define CACHELINE_SIZE 64 + +#define PREFETCH_AREA(p, s) { \ + const char* const _ptr = (const char*)(p); \ + size_t const _size = (size_t)(s); \ + size_t _pos; \ + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ + PREFETCH_L2(_ptr + _pos); \ + } \ +} + +/* vectorization + * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ +#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) +# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) +# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +# else +# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") +# endif +#else +# define DONT_VECTORIZE +#endif + +/* Tell the compiler that a branch is likely or unlikely. + * Only use these macros if it causes the compiler to generate better code. + * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc + * and clang, please do. + */ +#if defined(__GNUC__) +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + +/* disable warnings */ +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + +/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ +#ifndef STATIC_BMI2 +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) +# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 +# define STATIC_BMI2 1 +# endif +# endif +#endif + +#ifndef STATIC_BMI2 + #define STATIC_BMI2 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +/* detects whether we are being compiled under msan */ +#ifndef ZSTD_MEMORY_SANITIZER +# if __has_feature(memory_sanitizer) +# define ZSTD_MEMORY_SANITIZER 1 +# else +# define ZSTD_MEMORY_SANITIZER 0 +# endif +#endif + +#if ZSTD_MEMORY_SANITIZER +/* Not all platforms that support msan provide sanitizers/msan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ +#include /* size_t */ +#define ZSTD_DEPS_NEED_STDINT +/**** skipping file: zstd_deps.h ****/ + +/* Make memory region fully initialized (without changing its contents). */ +void __msan_unpoison(const volatile void *a, size_t size); + +/* Make memory region fully uninitialized (without changing its contents). + This is a legacy interface that does not update origin information. Use + __msan_allocated_memory() instead. */ +void __msan_poison(const volatile void *a, size_t size); + +/* Returns the offset of the first (at least partially) poisoned byte in the + memory range, or -1 if the whole range is good. */ +intptr_t __msan_test_shadow(const volatile void *x, size_t size); +#endif + +/* detects whether we are being compiled under asan */ +#ifndef ZSTD_ADDRESS_SANITIZER +# if __has_feature(address_sanitizer) +# define ZSTD_ADDRESS_SANITIZER 1 +# elif defined(__SANITIZE_ADDRESS__) +# define ZSTD_ADDRESS_SANITIZER 1 +# else +# define ZSTD_ADDRESS_SANITIZER 0 +# endif +#endif + +#if ZSTD_ADDRESS_SANITIZER +/* Not all platforms that support asan provide sanitizers/asan_interface.h. + * We therefore declare the functions we need ourselves, rather than trying to + * include the header file... */ +#include /* size_t */ + +/** + * Marks a memory region ([addr, addr+size)) as unaddressable. + * + * This memory must be previously allocated by your program. Instrumented + * code is forbidden from accessing addresses in this region until it is + * unpoisoned. This function is not guaranteed to poison the entire region - + * it could poison only a subregion of [addr, addr+size) due to ASan + * alignment restrictions. + * + * \note This function is not thread-safe because no two threads can poison or + * unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_poison_memory_region(void const volatile *addr, size_t size); + +/** + * Marks a memory region ([addr, addr+size)) as addressable. + * + * This memory must be previously allocated by your program. Accessing + * addresses in this region is allowed until this region is poisoned again. + * This function could unpoison a super-region of [addr, addr+size) due + * to ASan alignment restrictions. + * + * \note This function is not thread-safe because no two threads can + * poison or unpoison memory in the same memory region simultaneously. + * + * \param addr Start of memory region. + * \param size Size of memory region. */ +void __asan_unpoison_memory_region(void const volatile *addr, size_t size); +#endif + +#endif /* ZSTD_COMPILER_H */ +/**** ended inlining compiler.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: zstd_deps.h ****/ + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif +#if defined(__GNUC__) +# define MEM_STATIC static __inline __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + +/*-************************************************************** +* Basic Types +*****************************************************************/ +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else +# include +#if CHAR_BIT != 8 +# error "this implementation requires char to be exactly 8-bit type" +#endif + typedef unsigned char BYTE; +#if USHRT_MAX != 65535 +# error "this implementation requires short to be exactly 16-bit type" +#endif + typedef unsigned short U16; + typedef signed short S16; +#if UINT_MAX != 4294967295 +# error "this implementation requires int to be exactly 32-bit type" +#endif + typedef unsigned int U32; + typedef signed int S32; +/* note : there are no limits defined for long long type in C90. + * limits exist in C99, however, in such case, is preferred */ + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/*-************************************************************** +* Memory I/O API +*****************************************************************/ +/*=== Static platform detection ===*/ +MEM_STATIC unsigned MEM_32bits(void); +MEM_STATIC unsigned MEM_64bits(void); +MEM_STATIC unsigned MEM_isLittleEndian(void); + +/*=== Native unaligned read/write ===*/ +MEM_STATIC U16 MEM_read16(const void* memPtr); +MEM_STATIC U32 MEM_read32(const void* memPtr); +MEM_STATIC U64 MEM_read64(const void* memPtr); +MEM_STATIC size_t MEM_readST(const void* memPtr); + +MEM_STATIC void MEM_write16(void* memPtr, U16 value); +MEM_STATIC void MEM_write32(void* memPtr, U32 value); +MEM_STATIC void MEM_write64(void* memPtr, U64 value); + +/*=== Little endian unaligned read/write ===*/ +MEM_STATIC U16 MEM_readLE16(const void* memPtr); +MEM_STATIC U32 MEM_readLE24(const void* memPtr); +MEM_STATIC U32 MEM_readLE32(const void* memPtr); +MEM_STATIC U64 MEM_readLE64(const void* memPtr); +MEM_STATIC size_t MEM_readLEST(const void* memPtr); + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val); +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val); +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val); + +/*=== Big endian unaligned read/write ===*/ +MEM_STATIC U32 MEM_readBE32(const void* memPtr); +MEM_STATIC U64 MEM_readBE64(const void* memPtr); +MEM_STATIC size_t MEM_readBEST(const void* memPtr); + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val); + +/*=== Byteswap ===*/ +MEM_STATIC U32 MEM_swap32(U32 in); +MEM_STATIC U64 MEM_swap64(U64 in); +MEM_STATIC size_t MEM_swapST(size_t in); + + +/*-************************************************************** +* Memory I/O Implementation +*****************************************************************/ +/* MEM_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method is portable but violate C standard. + * It can generate buggy code on targets depending on alignment. + * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6) + * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# define MEM_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; } +MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; } + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ + return one.c[0]; +} + +#if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) + +/* violates C standard, by lying on structure alignment. +Only use if no other choice to achieve best performance on target platform */ +MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } +MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } +MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } +MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } + +#elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +#if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32)) + __pragma( pack(push, 1) ) + typedef struct { U16 v; } unalign16; + typedef struct { U32 v; } unalign32; + typedef struct { U64 v; } unalign64; + typedef struct { size_t v; } unalignArch; + __pragma( pack(pop) ) +#else + typedef struct { U16 v; } __attribute__((packed)) unalign16; + typedef struct { U32 v; } __attribute__((packed)) unalign32; + typedef struct { U64 v; } __attribute__((packed)) unalign64; + typedef struct { size_t v; } __attribute__((packed)) unalignArch; +#endif + +MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; } +MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; } +MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; } +MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; } + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; } +MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; } +MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; } + +#else + +/* default method, safe and standard. + can sometimes prove slower */ + +MEM_STATIC U16 MEM_read16(const void* memPtr) +{ + U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U32 MEM_read32(const void* memPtr) +{ + U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC U64 MEM_read64(const void* memPtr) +{ + U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC size_t MEM_readST(const void* memPtr) +{ + size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; +} + +MEM_STATIC void MEM_write16(void* memPtr, U16 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write32(void* memPtr, U32 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +MEM_STATIC void MEM_write64(void* memPtr, U64 value) +{ + ZSTD_memcpy(memPtr, &value, sizeof(value)); +} + +#endif /* MEM_FORCE_MEMORY_ACCESS */ + +MEM_STATIC U32 MEM_swap32(U32 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_ulong(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap32)) + return __builtin_bswap32(in); +#else + return ((in << 24) & 0xff000000 ) | + ((in << 8) & 0x00ff0000 ) | + ((in >> 8) & 0x0000ff00 ) | + ((in >> 24) & 0x000000ff ); +#endif +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ +#if defined(_MSC_VER) /* Visual Studio */ + return _byteswap_uint64(in); +#elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \ + || (defined(__clang__) && __has_builtin(__builtin_bswap64)) + return __builtin_bswap64(in); +#else + return ((in << 56) & 0xff00000000000000ULL) | + ((in << 40) & 0x00ff000000000000ULL) | + ((in << 24) & 0x0000ff0000000000ULL) | + ((in << 8) & 0x000000ff00000000ULL) | + ((in >> 8) & 0x00000000ff000000ULL) | + ((in >> 24) & 0x0000000000ff0000ULL) | + ((in >> 40) & 0x000000000000ff00ULL) | + ((in >> 56) & 0x00000000000000ffULL); +#endif +} + +MEM_STATIC size_t MEM_swapST(size_t in) +{ + if (MEM_32bits()) + return (size_t)MEM_swap32((U32)in); + else + return (size_t)MEM_swap64((U64)in); +} + +/*=== Little endian r/w ===*/ + +MEM_STATIC U16 MEM_readLE16(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read16(memPtr); + else { + const BYTE* p = (const BYTE*)memPtr; + return (U16)(p[0] + (p[1]<<8)); + } +} + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) +{ + if (MEM_isLittleEndian()) { + MEM_write16(memPtr, val); + } else { + BYTE* p = (BYTE*)memPtr; + p[0] = (BYTE)val; + p[1] = (BYTE)(val>>8); + } +} + +MEM_STATIC U32 MEM_readLE24(const void* memPtr) +{ + return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16); +} + +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE*)memPtr)[2] = (BYTE)(val>>16); +} + +MEM_STATIC U32 MEM_readLE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read32(memPtr); + else + return MEM_swap32(MEM_read32(memPtr)); +} + +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, val32); + else + MEM_write32(memPtr, MEM_swap32(val32)); +} + +MEM_STATIC U64 MEM_readLE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_read64(memPtr); + else + return MEM_swap64(MEM_read64(memPtr)); +} + +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, val64); + else + MEM_write64(memPtr, MEM_swap64(val64)); +} + +MEM_STATIC size_t MEM_readLEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +/*=== Big endian r/w ===*/ + +MEM_STATIC U32 MEM_readBE32(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap32(MEM_read32(memPtr)); + else + return MEM_read32(memPtr); +} + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32) +{ + if (MEM_isLittleEndian()) + MEM_write32(memPtr, MEM_swap32(val32)); + else + MEM_write32(memPtr, val32); +} + +MEM_STATIC U64 MEM_readBE64(const void* memPtr) +{ + if (MEM_isLittleEndian()) + return MEM_swap64(MEM_read64(memPtr)); + else + return MEM_read64(memPtr); +} + +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64) +{ + if (MEM_isLittleEndian()) + MEM_write64(memPtr, MEM_swap64(val64)); + else + MEM_write64(memPtr, val64); +} + +MEM_STATIC size_t MEM_readBEST(const void* memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readBE32(memPtr); + else + return (size_t)MEM_readBE64(memPtr); +} + +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeBE32(memPtr, (U32)val); + else + MEM_writeBE64(memPtr, (U64)val); +} + +/* code only tested on 32 and 64 bits systems */ +MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } + + +#if defined (__cplusplus) +} +#endif + +#endif /* MEM_H_MODULE */ +/**** ended inlining mem.h ****/ +/**** start inlining error_private.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* Note : this module is expected to remain private, do not expose it */ + +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + +/* **************************************** +* Dependencies +******************************************/ +/**** start inlining ../zstd_errors.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_ERRORS_H_398273423 +#define ZSTD_ERRORS_H_398273423 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*===== dependency =====*/ +#include /* size_t */ + + +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDERRORLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDERRORLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDERRORLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDERRORLIB_API __declspec(dllexport) ZSTDERRORLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDERRORLIB_API __declspec(dllimport) ZSTDERRORLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY +#endif + +/*-********************************************* + * Error codes list + *-********************************************* + * Error codes _values_ are pinned down since v1.3.1 only. + * Therefore, don't rely on values if you may link to any version < v1.3.1. + * + * Only values < 100 are considered stable. + * + * note 1 : this API shall be used with static linking only. + * dynamic linking is not yet officially supported. + * note 2 : Prefer relying on the enum than on its value whenever possible + * This is the only supported way to use the error list < v1.3.1 + * note 3 : ZSTD_isError() is always correct, whatever the library version. + **********************************************/ +typedef enum { + ZSTD_error_no_error = 0, + ZSTD_error_GENERIC = 1, + ZSTD_error_prefix_unknown = 10, + ZSTD_error_version_unsupported = 12, + ZSTD_error_frameParameter_unsupported = 14, + ZSTD_error_frameParameter_windowTooLarge = 16, + ZSTD_error_corruption_detected = 20, + ZSTD_error_checksum_wrong = 22, + ZSTD_error_dictionary_corrupted = 30, + ZSTD_error_dictionary_wrong = 32, + ZSTD_error_dictionaryCreation_failed = 34, + ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_outOfBound = 42, + ZSTD_error_tableLog_tooLarge = 44, + ZSTD_error_maxSymbolValue_tooLarge = 46, + ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_stage_wrong = 60, + ZSTD_error_init_missing = 62, + ZSTD_error_memory_allocation = 64, + ZSTD_error_workSpace_tooSmall= 66, + ZSTD_error_dstSize_tooSmall = 70, + ZSTD_error_srcSize_wrong = 72, + ZSTD_error_dstBuffer_null = 74, + /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ + ZSTD_error_frameIndex_tooLarge = 100, + ZSTD_error_seekableIO = 102, + ZSTD_error_dstBuffer_wrong = 104, + ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ +} ZSTD_ErrorCode; + +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare with enum list published above */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_ERRORS_H_398273423 */ +/**** ended inlining ../zstd_errors.h ****/ +/**** skipping file: zstd_deps.h ****/ + + +/* **************************************** +* Compiler-specific +******************************************/ +#if defined(__GNUC__) +# define ERR_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define ERR_STATIC static inline +#elif defined(_MSC_VER) +# define ERR_STATIC static __inline +#else +# define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef ZSTD_ErrorCode ERR_enum; +#define PREFIX(name) ZSTD_error_##name + + +/*-**************************************** +* Error codes handling +******************************************/ +#undef ERROR /* already defined on Visual Studio */ +#define ERROR(name) ZSTD_ERROR(name) +#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } + +/* check and forward error code */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + + +/*-**************************************** +* Error Strings +******************************************/ + +const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + return ERR_getErrorString(ERR_getErrorCode(code)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ERROR_H_MODULE */ +/**** ended inlining error_private.h ****/ +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ +/**** start inlining fse.h ****/ +/* ****************************************************************** + * FSE : Finite State Entropy codec + * Public Prototypes declaration + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef FSE_H +#define FSE_H + + +/*-***************************************** +* Dependencies +******************************************/ +/**** skipping file: zstd_deps.h ****/ + + +/*-***************************************** +* FSE_PUBLIC_API : control library symbols visibility +******************************************/ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define FSE_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define FSE_PUBLIC_API +#endif + +/*------ Version ------*/ +#define FSE_VERSION_MAJOR 0 +#define FSE_VERSION_MINOR 9 +#define FSE_VERSION_RELEASE 0 + +#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE +#define FSE_QUOTE(str) #str +#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) +#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) + +#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) +FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ + + +/*-**************************************** +* FSE simple functions +******************************************/ +/*! FSE_compress() : + Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. + 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). + @return : size of compressed data (<= dstCapacity). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. + if FSE_isError(return), compression failed (more details using FSE_getErrorName()) +*/ +FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/*! FSE_decompress(): + Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', + into already allocated destination buffer 'dst', of size 'dstCapacity'. + @return : size of regenerated data (<= maxDstSize), + or an error code, which can be tested using FSE_isError() . + + ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! + Why ? : making this distinction requires a header. + Header management is intentionally delegated to the user layer, which can better manage special cases. +*/ +FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); + + +/*-***************************************** +* Tool functions +******************************************/ +FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ + +/* Error Management */ +FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ + + +/*-***************************************** +* FSE advanced functions +******************************************/ +/*! FSE_compress2() : + Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' + Both parameters can be defined as '0' to mean : use default value + @return : size of compressed data + Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. + if FSE_isError(return), it's an error code. +*/ +FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + + +/*-***************************************** +* FSE detailed API +******************************************/ +/*! +FSE_compress() does the following: +1. count symbol occurrence from source[] into table count[] (see hist.h) +2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) +3. save normalized counters to memory buffer using writeNCount() +4. build encoding table 'CTable' from normalized counters +5. encode the data stream using encoding table 'CTable' + +FSE_decompress() does the following: +1. read normalized counters with readNCount() +2. build decoding table 'DTable' from normalized counters +3. decode the data stream using decoding table 'DTable' + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and provide normalized distribution using external method. +*/ + +/* *** COMPRESSION *** */ + +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= 'maxTableLog') */ +FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + useLowProbCount is a boolean parameter which trades off compressed size for + faster header decoding. When it is set to 1, the compressed data will be slightly + smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be + faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0 + is a good default, since header deserialization makes a big speed difference. + Otherwise, useLowProbCount=1 is a good default, since the speed difference is small. + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount); + +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ +FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, + unsigned maxSymbolValue, unsigned tableLog); + +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ +FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); + +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); + +/*! +Tutorial : +---------- +The first step is to count all symbols. FSE_count() does this job very fast. +Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. +'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] +maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) +FSE_count() will return the number of occurrence of the most frequent symbol. +This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). + +The next step is to normalize the frequencies. +FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. +It also guarantees a minimum of 1 to any Symbol with frequency >= 1. +You can use 'tableLog'==0 to mean "use default tableLog value". +If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), +which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). + +The result of FSE_normalizeCount() will be saved into a table, +called 'normalizedCounter', which is a table of signed short. +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. +The return value is tableLog if everything proceeded as expected. +It is 0 if there is a single symbol within distribution. +If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). + +'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). +'buffer' must be already allocated. +For guaranteed success, buffer size must be at least FSE_headerBound(). +The result of the function is the number of bytes written into 'buffer'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). + +'normalizedCounter' can then be used to create the compression table 'CTable'. +The space required by 'CTable' must be already allocated, using FSE_createCTable(). +You can then use FSE_buildCTable() to fill 'CTable'. +If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). + +'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). +Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. +If it returns '0', compressed data could not fit into 'dst'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). +*/ + + +/* *** DECOMPRESSION *** */ + +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize); + +/*! FSE_readNCount_bmi2(): + * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise. + */ +FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize, int bmi2); + +/*! Constructor and Destructor of FSE_DTable. + Note that its size depends on 'tableLog' */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); +FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); + +/*! FSE_buildDTable(): + Builds 'dt', which must be already allocated, using FSE_createDTable(). + return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_decompress_usingDTable(): + Decompress compressed source `cSrc` of size `cSrcSize` using `dt` + into `dst` which must be already allocated. + @return : size of regenerated data (necessarily <= `dstCapacity`), + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); + +/*! +Tutorial : +---------- +(Note : these functions only decompress FSE-compressed blocks. + If block is uncompressed, use memcpy() instead + If block is a single repeated byte, use memset() instead ) + +The first step is to obtain the normalized frequencies of symbols. +This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. +In practice, that means it's necessary to know 'maxSymbolValue' beforehand, +or size the table to handle worst case situations (typically 256). +FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. +The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. +Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. +This is performed by the function FSE_buildDTable(). +The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) +*/ + +#endif /* FSE_H */ + +#if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY) +#define FSE_H_FSE_STATIC_LINKING_ONLY + +/* *** Dependency *** */ +/**** start inlining bitstream.h ****/ +/* ****************************************************************** + * bitstream + * Part of FSE library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif +/* +* This API consists of small unitary functions, which must be inlined for best performance. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + +/*-**************************************** +* Dependencies +******************************************/ +/**** skipping file: mem.h ****/ +/**** skipping file: compiler.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: error_private.h ****/ + + +/*========================================= +* Target specific +=========================================*/ +#ifndef ZSTD_NO_INTRINSICS +# if defined(__BMI__) && defined(__GNUC__) +# include /* support for bextr (experimental) */ +# elif defined(__ICCARM__) +# include +# endif +#endif + +#define STREAM_ACCUMULATOR_MIN_32 25 +#define STREAM_ACCUMULATOR_MIN_64 57 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) + + +/*-****************************************** +* bitStream encoding API (write forward) +********************************************/ +/* bitStream can mix input from multiple sources. + * A critical property of these streams is that they encode and decode in **reverse** direction. + * So the first bit sequence you add will be the last to be read, like a LIFO stack. + */ +typedef struct { + size_t bitContainer; + unsigned bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); + +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +*/ + + +/*-******************************************** +* bitStream decoding API (read backward) +**********************************************/ +typedef struct { + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; + const char* limitPtr; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). +*/ + + +/*-**************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ + +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ + +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/*-************************************************************** +* Internal functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val) ^ 31; +# else + unsigned long r = 0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; +# endif + } +} + +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) + +/*-************************************************************** +* bitStream encoding +****************************************************************/ +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(size_t) + * @return : 0 if success, + * otherwise an error code (can be tested using ERR_isError()) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, + void* startPtr, size_t dstCapacity) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); + return 0; +} + +/*! BIT_addBits() : + * can add up to 31 bits into `bitC`. + * Note : does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); + assert(nbBits < BIT_MASK_SIZE); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_addBitsFast() : + * works only if `value` is _clean_, + * meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + assert((value>>nbBits) == 0); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_flushBitsFast() : + * assumption : bitContainer has not overflowed + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_flushBits() : + * assumption : bitContainer has not overflowed + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. + * overflow will be revealed later on using BIT_closeCStream() */ +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, + * or 0 if it could not fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) +{ + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); +} + + +/*-******************************************************** +* bitStream decoding +**********************************************************/ +/*! BIT_initDStream() : + * Initialize a BIT_DStream_t. + * `bitD` : a pointer to an already allocated BIT_DStream_t structure. + * `srcSize` must be the *exact* size of the bitStream, in bytes. + * @return : size of stream (== srcSize), or an errorCode if a problem is detected + */ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + bitD->start = (const char*)srcBuffer; + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); + + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + } else { + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + /* fall-through */ + + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + /* fall-through */ + + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + /* fall-through */ + + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; + /* fall-through */ + + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; + /* fall-through */ + + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; + /* fall-through */ + + default: break; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ + } + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; + } + + return srcSize; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +{ + return bitContainer >> start; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +{ + U32 const regMask = sizeof(bitContainer)*8 - 1; + /* if start > regMask, bitstream is corrupted, and result is undefined */ + assert(nbBits < BIT_MASK_SIZE); + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +{ +#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 + return _bzhi_u64(bitContainer, nbBits); +#else + assert(nbBits < BIT_MASK_SIZE); + return bitContainer & BIT_mask[nbBits]; +#endif +} + +/*! BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified. + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted */ +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +{ + /* arbitrate between double-shift and shift+mask */ +#if 1 + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, + * bitstream is likely corrupted, and result is undefined */ + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); +#else + /* this code path is slower on my os-x laptop */ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); +#endif +} + +/*! BIT_lookBitsFast() : + * unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) +{ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + assert(nbBits >= 1); + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); +} + +MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*! BIT_readBits() : + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. + * @return : extracted value. */ +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_readBitsFast() : + * unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBitsFast(bitD, nbBits); + assert(nbBits >= 1); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_reloadDStreamFast() : + * Similar to BIT_reloadDStream(), but with two differences: + * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! + * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this + * point you must use BIT_reloadDStream() to reload. + */ +MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) +{ + if (UNLIKELY(bitD->ptr < bitD->limitPtr)) + return BIT_DStream_overflow; + assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; +} + +/*! BIT_reloadDStream() : + * Refill `bitD` from buffer previously set in BIT_initDStream() . + * This function is safe, it guarantees it will not read beyond src buffer. + * @return : status of `BIT_DStream_t` internal register. + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->limitPtr) { + return BIT_reloadDStreamFast(bitD); + } + if (bitD->ptr == bitD->start) { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + /* start < ptr < limitPtr */ + { U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ + return result; + } +} + +/*! BIT_endOfDStream() : + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). + */ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + +#if defined (__cplusplus) +} +#endif + +#endif /* BITSTREAM_H_MODULE */ +/**** ended inlining bitstream.h ****/ + + +/* ***************************************** +* Static allocation +*******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog))) + +/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */ +#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable)) +#define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable)) + + +/* ***************************************** + * FSE advanced API + ***************************************** */ + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); +/**< same as FSE_optimalTableLog(), which used `minus==2` */ + +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. + */ +#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) +size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); +/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ + +size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); +/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. + */ +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2))) +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) +size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8) +#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned)) +FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); +/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ + +size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); +/**< build a fake FSE_DTable, designed to always generate the same symbolValue */ + +#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) +#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); +/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); +/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ + +typedef enum { + FSE_repeat_none, /**< Cannot use the previous table */ + FSE_repeat_check, /**< Can use the previous table but it must be checked */ + FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } FSE_repeat; + +/* ***************************************** +* FSE symbol compression API +*******************************************/ +/*! + This API consists of small unitary functions, which highly benefit from being inlined. + Hence their body are included in next section. +*/ +typedef struct { + ptrdiff_t value; + const void* stateTable; + const void* symbolTT; + unsigned stateLog; +} FSE_CState_t; + +static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct); + +static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol); + +static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr); + +/**< +These functions are inner components of FSE_compress_usingCTable(). +They allow the creation of custom streams, mixing multiple tables and bit sources. + +A key property to keep in mind is that encoding and decoding are done **in reverse direction**. +So the first symbol you will encode is the last you will decode, like a LIFO stack. + +You will need a few variables to track your CStream. They are : + +FSE_CTable ct; // Provided by FSE_buildCTable() +BIT_CStream_t bitStream; // bitStream tracking structure +FSE_CState_t state; // State tracking structure (can have several) + + +The first thing to do is to init bitStream and state. + size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize); + FSE_initCState(&state, ct); + +Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError(); +You can then encode your input data, byte after byte. +FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time. +Remember decoding will be done in reverse direction. + FSE_encodeByte(&bitStream, &state, symbol); + +At any time, you can also add any bit sequence. +Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders + BIT_addBits(&bitStream, bitField, nbBits); + +The above methods don't commit data to memory, they just store it into local register, for speed. +Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +Writing data to memory is a manual operation, performed by the flushBits function. + BIT_flushBits(&bitStream); + +Your last FSE encoding operation shall be to flush your last state value(s). + FSE_flushState(&bitStream, &state); + +Finally, you must close the bitStream. +The function returns the size of CStream in bytes. +If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible) +If there is an error, it returns an errorCode (which can be tested using FSE_isError()). + size_t size = BIT_closeCStream(&bitStream); +*/ + + +/* ***************************************** +* FSE symbol decompression API +*******************************************/ +typedef struct { + size_t state; + const void* table; /* precise table may vary, depending on U16 */ +} FSE_DState_t; + + +static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); + +static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); + +static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); + +/**< +Let's now decompose FSE_decompress_usingDTable() into its unitary components. +You will decode FSE-encoded symbols from the bitStream, +and also any other bitFields you put in, **in reverse order**. + +You will need a few variables to track your bitStream. They are : + +BIT_DStream_t DStream; // Stream context +FSE_DState_t DState; // State context. Multiple ones are possible +FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable() + +The first thing to do is to init the bitStream. + errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); + +You should then retrieve your initial state(s) +(in reverse flushing order if you have several ones) : + errorCode = FSE_initDState(&DState, &DStream, DTablePtr); + +You can then decode your data, symbol after symbol. +For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. +Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). + unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); + +You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) +Note : maximum allowed nbBits is 25, for 32-bits compatibility + size_t bitField = BIT_readBits(&DStream, nbBits); + +All above operations only read from local register (which size depends on size_t). +Refueling the register from memory is manually performed by the reload method. + endSignal = FSE_reloadDStream(&DStream); + +BIT_reloadDStream() result tells if there is still some more data to read from DStream. +BIT_DStream_unfinished : there is still some data left into the DStream. +BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. +BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. +BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. + +When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, +to properly detect the exact end of stream. +After each decoded symbol, check if DStream is fully consumed using this simple test : + BIT_reloadDStream(&DStream) >= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/* ***************************************** +* FSE unsafe API +*******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/* ***************************************** +* Implementation of inlined functions +*******************************************/ +typedef struct { + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +{ + const void* ptr = ct; + const U16* u16ptr = (const U16*) ptr; + const U32 tableLog = MEM_read16(ptr); + statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; + statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); + statePtr->stateLog = tableLog; +} + + +/*! FSE_initCState2() : +* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) +* uses the smallest state value possible, saving the cost of this symbol */ +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + } +} + +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) +{ + FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} + +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} + + +/* FSE_getMaxNbBits() : + * Approximate maximum cost of a symbol, in bits. + * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; +} + +/* FSE_bitCost() : + * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; + U32 const threshold = (minNbBits+1) << 16; + assert(tableLog < 16); + assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ + { U32 const tableSize = 1 << tableLog; + U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); + U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ + U32 const bitMultiplier = 1 << accuracyLog; + assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); + assert(normalizedDeltaFromThreshold <= bitMultiplier); + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; + } +} + + +/* ====== Decompression ====== */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; +} + +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/* ************************************************************** +* Tuning parameters +****************************************************************/ +/*!MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#ifndef FSE_MAX_MEMORY_USAGE +# define FSE_MAX_MEMORY_USAGE 14 +#endif +#ifndef FSE_DEFAULT_MEMORY_USAGE +# define FSE_DEFAULT_MEMORY_USAGE 13 +#endif +#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE) +# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE" +#endif + +/*!FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#ifndef FSE_MAX_SYMBOL_VALUE +# define FSE_MAX_SYMBOL_VALUE 255 +#endif + +/* ************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION +#define FSE_DECODE_TYPE FSE_decode_t + + +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/* *************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + +#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3) + + +#endif /* FSE_STATIC_LINKING_ONLY */ + + +#if defined (__cplusplus) +} +#endif +/**** ended inlining fse.h ****/ +#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ +/**** start inlining huf.h ****/ +/* ****************************************************************** + * huff0 huffman codec, + * part of Finite State Entropy library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef HUF_H_298734234 +#define HUF_H_298734234 + +/* *** Dependencies *** */ +/**** skipping file: zstd_deps.h ****/ + + +/* *** library symbols visibility *** */ +/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, + * HUF symbols remain "private" (internal symbols for library only). + * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define HUF_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ +#else +# define HUF_PUBLIC_API +#endif + + +/* ========================== */ +/* *** simple functions *** */ +/* ========================== */ + +/** HUF_compress() : + * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. + * 'dst' buffer must be already allocated. + * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). + * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. + * @return : size of compressed data (<= `dstCapacity`). + * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) + */ +HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/** HUF_decompress() : + * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', + * into already allocated buffer 'dst', of minimum size 'dstSize'. + * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. + * Note : in contrast with FSE, HUF_decompress can regenerate + * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, + * because it knows size to regenerate (originalSize). + * @return : size of regenerated data (== originalSize), + * or an error code, which can be tested using HUF_isError() + */ +HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize); + + +/* *** Tool functions *** */ +#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ +HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ + +/* Error Management */ +HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ + + +/* *** Advanced function *** */ + +/** HUF_compress2() : + * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. + * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . + * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ +HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog); + +/** HUF_compress4X_wksp() : + * Same as HUF_compress2(), but uses externally allocated `workSpace`. + * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ +#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) +#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) +HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize); + +#endif /* HUF_H_298734234 */ + +/* ****************************************************************** + * WARNING !! + * The following section contains advanced and experimental definitions + * which shall never be used in the context of a dynamic library, + * because they are not guaranteed to remain stable in the future. + * Only consider them in association with static linking. + * *****************************************************************/ +#if defined(HUF_STATIC_LINKING_ONLY) && !defined(HUF_H_HUF_STATIC_LINKING_ONLY) +#define HUF_H_HUF_STATIC_LINKING_ONLY + +/* *** Dependencies *** */ +/**** skipping file: mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ + + +/* *** Constants *** */ +#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ +#define HUF_SYMBOLVALUE_MAX 255 + +#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" +#endif + + +/* **************************************** +* Static allocation +******************************************/ +/* HUF buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of HUF's Compression Table */ +/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt */ +typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */ +#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) +#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ + HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */ + +/* static allocation of HUF's DTable */ +typedef U32 HUF_DTable; +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) +#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } + + +/* **************************************** +* Advanced decompression functions +******************************************/ +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +#endif + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif + + +/* **************************************** + * HUF detailed API + * ****************************************/ + +/*! HUF_compress() does the following: + * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") + * 2. (optional) refine tableLog using HUF_optimalTableLog() + * 3. build Huffman table from count using HUF_buildCTable() + * 4. save Huffman table to memory buffer using HUF_writeCTable() + * 5. encode the data stream using HUF_compress4X_usingCTable() + * + * The following API allows targeting specific sub-functions for advanced tasks. + * For example, it's possible to compress several blocks using the same 'CTable', + * or to save and regenerate 'CTable' using external methods. + */ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); +size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); + +typedef enum { + HUF_repeat_none, /**< Cannot use the previous table */ + HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ + HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } HUF_repeat; +/** HUF_compress4X_repeat() : + * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress4X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. + */ +#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) +#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_buildCTable_wksp (HUF_CElt* tree, + const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, + void* workSpace, size_t wkspSize); + +/*! HUF_readStats() : + * Read compact Huffman tree, saved by HUF_writeCTable(). + * `huffWeight` is destination buffer. + * @return : size read from `src` , or an error Code . + * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize); + +/*! HUF_readStats_wksp() : + * Same as HUF_readStats() but takes an external workspace which must be + * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1) +#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + int bmi2); + +/** HUF_readCTable() : + * Loading a CTable saved with HUF_writeCTable() */ +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); + +/** HUF_getNbBits() : + * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX + * Note 1 : is not inlined, as HUF_CElt definition is private + * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ +U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); + +/* + * HUF_decompress() does the following: + * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics + * 2. build Huffman table from save, using HUF_readDTableX?() + * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() + */ + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); + +/** + * The minimum workspace size for the `workSpace` used in + * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). + * + * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when + * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. + * Buffer overflow errors may potentially occur if code modifications result in + * a required workspace size greater than that specified in the following + * macro. + */ +#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) +#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif + + +/* ====================== */ +/* single stream variants */ +/* ====================== */ + +size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +/** HUF_compress1X_repeat() : + * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress1X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +#endif + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif + +/* BMI2 variants. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif + +#endif /* HUF_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif +/**** ended inlining huf.h ****/ + + +/*=== Version ===*/ +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } + + +/*=== Error Management ===*/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } + +unsigned HUF_isError(size_t code) { return ERR_isError(code); } +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +static U32 FSE_ctz(U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + return _BitScanForward(&r, val) ? (unsigned)r : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_ctz(val); +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return __CTZ(val); +# else /* Software version */ + U32 count = 0; + while ((val & 1) == 0) { + val >>= 1; + ++count; + } + return count; +# endif + } +} + +FORCE_INLINE_TEMPLATE +size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + unsigned const maxSV1 = *maxSVPtr + 1; + int previous0 = 0; + + if (hbSize < 8) { + /* This function only works when hbSize >= 8 */ + char buffer[8] = {0}; + ZSTD_memcpy(buffer, headerBuffer, hbSize); + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, + buffer, sizeof(buffer)); + if (FSE_isError(countSize)) return countSize; + if (countSize > hbSize) return ERROR(corruption_detected); + return countSize; + } } + assert(hbSize >= 8); + + /* init */ + ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ + bitStream = MEM_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<> 1; + while (repeats >= 12) { + charnum += 3 * 12; + if (LIKELY(ip <= iend-7)) { + ip += 3; + } else { + bitCount -= (int)(8 * (iend - 7 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + } + charnum += 3 * repeats; + bitStream >>= 2 * repeats; + bitCount += 2 * repeats; + + /* Add the final repeat which isn't 0b11. */ + assert((bitStream & 3) < 3); + charnum += bitStream & 3; + bitCount += 2; + + /* This is an error, but break and return an error + * at the end, because returning out of a loop makes + * it harder for the compiler to optimize. + */ + if (charnum >= maxSV1) break; + + /* We don't need to set the normalized count to 0 + * because we already memset the whole buffer to 0. + */ + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + assert((bitCount >> 3) <= 3); /* For first condition to work */ + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } + { + int const max = (2*threshold-1) - remaining; + int count; + + if ((bitStream & (threshold-1)) < (U32)max) { + count = bitStream & (threshold-1); + bitCount += nbBits-1; + } else { + count = bitStream & (2*threshold-1); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + /* When it matters (small blocks), this is a + * predictable branch, because we don't use -1. + */ + if (count >= 0) { + remaining -= count; + } else { + assert(count == -1); + remaining += count; + } + normalizedCounter[charnum++] = (short)count; + previous0 = !count; + + assert(threshold > 1); + if (remaining < threshold) { + /* This branch can be folded into the + * threshold update condition because we + * know that threshold > 1. + */ + if (remaining <= 1) break; + nbBits = BIT_highbit32(remaining) + 1; + threshold = 1 << (nbBits - 1); + } + if (charnum >= maxSV1) break; + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } } + if (remaining != 1) return ERROR(corruption_detected); + /* Only possible when there are too many zeros. */ + if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall); + if (bitCount > 32) return ERROR(corruption_detected); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + return ip-istart; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_readNCount_body_default( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} +#endif + +size_t FSE_readNCount_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); + } +#endif + (void)bmi2; + return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +size_t FSE_readNCount( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); +} + + +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . +*/ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); +} + +FORCE_INLINE_TEMPLATE size_t +HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ + U32 weightTotal; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + /* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) { /* special header */ + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + { U32 n; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } } } + else { /* header compressed with FSE (normal case) */ + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + /* max (hwSize-1) values decoded, as last one is implied */ + oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2); + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); + weightTotal = 0; + { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } } + if (weightTotal == 0) return ERROR(corruption_detected); + + /* get last non-null symbol weight (implied, total must be 2^n) */ + { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); + *tableLogPtr = tableLog; + /* determine last weight */ + { U32 const total = 1 << tableLog; + U32 const rest = total - weightTotal; + U32 const verif = 1 << BIT_highbit32(rest); + U32 const lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + return iSize+1; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1); +} +#endif + +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); + } +#endif + (void)bmi2; + return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); +} +/**** ended inlining common/entropy_common.c ****/ +/**** start inlining common/error_private.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* The purpose of this file is to have a single list of error strings embedded in binary */ + +/**** skipping file: error_private.h ****/ + +const char* ERR_getErrorString(ERR_enum code) +{ +#ifdef ZSTD_STRIP_ERROR_STRINGS + (void)code; + return "Error strings stripped"; +#else + static const char* const notErrorCode = "Unspecified error code"; + switch( code ) + { + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(prefix_unknown): return "Unknown frame descriptor"; + case PREFIX(version_unsupported): return "Version not supported"; + case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; + case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; + case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; + case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(dictionary_wrong): return "Dictionary mismatch"; + case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; + /* following error codes are not stable and may be removed or changed in a future version */ + case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; + case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; + case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; + case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; + case PREFIX(maxCode): + default: return notErrorCode; + } +#endif +} +/**** ended inlining common/error_private.c ****/ +/**** start inlining common/fse_decompress.c ****/ +/* ****************************************************************** + * FSE : Finite State Entropy decoder + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* ************************************************************** +* Includes +****************************************************************/ +/**** skipping file: debug.h ****/ +/**** skipping file: bitstream.h ****/ +/**** skipping file: compiler.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ +/**** skipping file: error_private.h ****/ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError +#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ +FSE_DTable* FSE_createDTable (unsigned tableLog) +{ + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); +} + +void FSE_freeDTable (FSE_DTable* dt) +{ + ZSTD_free(dt); +} + +static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) +{ + void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); + U16* symbolNext = (U16*)workSpace; + BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1); + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge); + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + { FSE_DTableHeader DTableH; + DTableH.tableLog = (U16)tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; utableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + void* dPtr = dt + 1; + FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSV1 = tableMask+1; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) { + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state1); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state2); + break; + } + + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state2); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state1); + break; + } } + + return op-ostart; +} + + +size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0); +} + +typedef struct { + short ncount[FSE_MAX_SYMBOL_VALUE + 1]; + FSE_DTable dtable[1]; /* Dynamically sized */ +} FSE_DecompressWksp; + + +FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( + void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize, + unsigned maxLog, void* workSpace, size_t wkspSize, + int bmi2) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace; + + DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); + if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC); + + /* normal FSE decoding mode */ + { + size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); + if (FSE_isError(NCountLength)) return NCountLength; + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + assert(NCountLength <= cSrcSize); + ip += NCountLength; + cSrcSize -= NCountLength; + } + + if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); + workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog); + wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); + + CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); + + { + const void* ptr = wksp->dtable; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1); + return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0); + } +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); +} +#endif + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); + } +#endif + (void)bmi2; + return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); +} + + +typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { + U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)]; + return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp)); +} + +size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) +{ + /* Static analyzer seems unable to understand this table will be properly initialized later */ + U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp)); +} +#endif + + +#endif /* FSE_COMMONDEFS_ONLY */ +/**** ended inlining common/fse_decompress.c ****/ +/**** start inlining common/threading.c ****/ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/** + * This file will hold wrapper for systems, which do not support pthreads + */ + +/**** start inlining threading.h ****/ +/** + * Copyright (c) 2016 Tino Reichardt + * All rights reserved. + * + * You can contact the author at: + * - zstdmt source repository: https://github.com/mcmilk/zstdmt + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef THREADING_H_938743 +#define THREADING_H_938743 + +/**** skipping file: debug.h ****/ + +#if defined (__cplusplus) +extern "C" { +#endif + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ +#ifdef WINVER +# undef WINVER +#endif +#define WINVER 0x0600 + +#ifdef _WIN32_WINNT +# undef _WIN32_WINNT +#endif +#define _WIN32_WINNT 0x0600 + +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif + +#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#include +#undef ERROR +#define ERROR(name) ZSTD_ERROR(name) + + +/* mutex */ +#define ZSTD_pthread_mutex_t CRITICAL_SECTION +#define ZSTD_pthread_mutex_init(a, b) ((void)(b), InitializeCriticalSection((a)), 0) +#define ZSTD_pthread_mutex_destroy(a) DeleteCriticalSection((a)) +#define ZSTD_pthread_mutex_lock(a) EnterCriticalSection((a)) +#define ZSTD_pthread_mutex_unlock(a) LeaveCriticalSection((a)) + +/* condition variable */ +#define ZSTD_pthread_cond_t CONDITION_VARIABLE +#define ZSTD_pthread_cond_init(a, b) ((void)(b), InitializeConditionVariable((a)), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) SleepConditionVariableCS((a), (b), INFINITE) +#define ZSTD_pthread_cond_signal(a) WakeConditionVariable((a)) +#define ZSTD_pthread_cond_broadcast(a) WakeAllConditionVariable((a)) + +/* ZSTD_pthread_create() and ZSTD_pthread_join() */ +typedef struct { + HANDLE handle; + void* (*start_routine)(void*); + void* arg; +} ZSTD_pthread_t; + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg); + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void** value_ptr); + +/** + * add here more wrappers as required + */ + + +#elif defined(ZSTD_MULTITHREAD) /* posix assumed ; need a better detection method */ +/* === POSIX Systems === */ +# include + +#if DEBUGLEVEL < 1 + +#define ZSTD_pthread_mutex_t pthread_mutex_t +#define ZSTD_pthread_mutex_init(a, b) pthread_mutex_init((a), (b)) +#define ZSTD_pthread_mutex_destroy(a) pthread_mutex_destroy((a)) +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock((a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock((a)) + +#define ZSTD_pthread_cond_t pthread_cond_t +#define ZSTD_pthread_cond_init(a, b) pthread_cond_init((a), (b)) +#define ZSTD_pthread_cond_destroy(a) pthread_cond_destroy((a)) +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait((a), (b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal((a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast((a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) + +#else /* DEBUGLEVEL >= 1 */ + +/* Debug implementation of threading. + * In this implementation we use pointers for mutexes and condition variables. + * This way, if we forget to init/destroy them the program will crash or ASAN + * will report leaks. + */ + +#define ZSTD_pthread_mutex_t pthread_mutex_t* +int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr); +int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex); +#define ZSTD_pthread_mutex_lock(a) pthread_mutex_lock(*(a)) +#define ZSTD_pthread_mutex_unlock(a) pthread_mutex_unlock(*(a)) + +#define ZSTD_pthread_cond_t pthread_cond_t* +int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr); +int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond); +#define ZSTD_pthread_cond_wait(a, b) pthread_cond_wait(*(a), *(b)) +#define ZSTD_pthread_cond_signal(a) pthread_cond_signal(*(a)) +#define ZSTD_pthread_cond_broadcast(a) pthread_cond_broadcast(*(a)) + +#define ZSTD_pthread_t pthread_t +#define ZSTD_pthread_create(a, b, c, d) pthread_create((a), (b), (c), (d)) +#define ZSTD_pthread_join(a, b) pthread_join((a),(b)) + +#endif + +#else /* ZSTD_MULTITHREAD not defined */ +/* No multithreading support */ + +typedef int ZSTD_pthread_mutex_t; +#define ZSTD_pthread_mutex_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_mutex_destroy(a) ((void)(a)) +#define ZSTD_pthread_mutex_lock(a) ((void)(a)) +#define ZSTD_pthread_mutex_unlock(a) ((void)(a)) + +typedef int ZSTD_pthread_cond_t; +#define ZSTD_pthread_cond_init(a, b) ((void)(a), (void)(b), 0) +#define ZSTD_pthread_cond_destroy(a) ((void)(a)) +#define ZSTD_pthread_cond_wait(a, b) ((void)(a), (void)(b)) +#define ZSTD_pthread_cond_signal(a) ((void)(a)) +#define ZSTD_pthread_cond_broadcast(a) ((void)(a)) + +/* do not use ZSTD_pthread_t */ + +#endif /* ZSTD_MULTITHREAD */ + +#if defined (__cplusplus) +} +#endif + +#endif /* THREADING_H_938743 */ +/**** ended inlining threading.h ****/ + +/* create fake symbol to avoid empty translation unit warning */ +int g_ZSTD_threading_useless_symbol; + +#if defined(ZSTD_MULTITHREAD) && defined(_WIN32) + +/** + * Windows minimalist Pthread Wrapper, based on : + * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html + */ + + +/* === Dependencies === */ +#include +#include + + +/* === Implementation === */ + +static unsigned __stdcall worker(void *arg) +{ + ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg; + thread->arg = thread->start_routine(thread->arg); + return 0; +} + +int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused, + void* (*start_routine) (void*), void* arg) +{ + (void)unused; + thread->arg = arg; + thread->start_routine = start_routine; + thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL); + + if (!thread->handle) + return errno; + else + return 0; +} + +int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr) +{ + DWORD result; + + if (!thread.handle) return 0; + + result = WaitForSingleObject(thread.handle, INFINITE); + switch (result) { + case WAIT_OBJECT_0: + if (value_ptr) *value_ptr = thread.arg; + return 0; + case WAIT_ABANDONED: + return EINVAL; + default: + return GetLastError(); + } +} + +#endif /* ZSTD_MULTITHREAD */ + +#if defined(ZSTD_MULTITHREAD) && DEBUGLEVEL >= 1 && !defined(_WIN32) + +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ + +int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr) +{ + *mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t)); + if (!*mutex) + return 1; + return pthread_mutex_init(*mutex, attr); +} + +int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex) +{ + if (!*mutex) + return 0; + { + int const ret = pthread_mutex_destroy(*mutex); + ZSTD_free(*mutex); + return ret; + } +} + +int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr) +{ + *cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t)); + if (!*cond) + return 1; + return pthread_cond_init(*cond, attr); +} + +int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond) +{ + if (!*cond) + return 0; + { + int const ret = pthread_cond_destroy(*cond); + ZSTD_free(*cond); + return ret; + } +} + +#endif +/**** ended inlining common/threading.c ****/ +/**** start inlining common/pool.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Dependencies ======= */ +/**** skipping file: zstd_deps.h ****/ +/**** skipping file: debug.h ****/ +/**** start inlining zstd_internal.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE + +/* this module contains definitions which must be identical + * across compression, decompression and dictBuilder. + * It also contains a few functions useful to at least 2 of them + * and which benefit from being inlined */ + +/*-************************************* +* Dependencies +***************************************/ +#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) +#include +#endif +/**** skipping file: compiler.h ****/ +/**** skipping file: mem.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: error_private.h ****/ +#define ZSTD_STATIC_LINKING_ONLY +/**** start inlining ../zstd.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 + +/* ====== Dependency ======*/ +#include /* INT_MAX */ +#include /* size_t */ + + +/* ===== ZSTDLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDLIB_API ZSTDLIB_VISIBILITY +#endif + + +/******************************************************************************* + Introduction + + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. + + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). + + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit context) + - unbounded multiple steps (described as Streaming compression) + + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) + + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. + + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ + +/*------ Version ------*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 5 +#define ZSTD_VERSION_RELEASE 0 +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/*! ZSTD_versionNumber() : + * Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */ +ZSTDLIB_API unsigned ZSTD_versionNumber(void); + +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) + +/*! ZSTD_versionString() : + * Return runtime library version, like "1.4.5". Requires v1.3.0+. */ +ZSTDLIB_API const char* ZSTD_versionString(void); + +/* ************************************* + * Default constant + ***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif + +/* ************************************* + * Constants + ***************************************/ + +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 + +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*! ZSTD_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/*! ZSTD_getFrameContentSize() : requires v1.3.0+ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + +/*! ZSTD_getDecompressedSize() : + * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+ + * `src` should point to the start of a ZSTD frame or skippable frame. + * `srcSize` must be >= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); + + +/*====== Helper functions ======*/ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ +ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ + + +/*************************************** +* Explicit context +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer */ + +/*! ZSTD_compressCCtx() : + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. + * Important : in order to behave similarly to `ZSTD_compress()`, + * this function compresses at requested compression level, + * __ignoring any other parameter__ . + * If any advanced parameter was set using the advanced API, + * they will all be reset. Only `compressionLevel` remains. + */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution. */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer */ + +/*! ZSTD_decompressDCtx() : + * Same as ZSTD_decompress(), + * requires an allocated ZSTD_DCtx. + * Compatible with sticky parameters. + */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/********************************************* +* Advanced compression API (Requires v1.4.0+) +**********************************************/ + +/* API design : + * Parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. + * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! + * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * + * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). + * + * This API supercedes all other "advanced" API entry points in the experimental section. + * In the future, we expect to remove from experimental API entry points which are redundant with this API. + */ + + +/* Compression strategies, listed from fastest to strongest */ +typedef enum { ZSTD_fast=1, + ZSTD_dfast=2, + ZSTD_greedy=3, + ZSTD_lazy=4, + ZSTD_lazy2=5, + ZSTD_btlazy2=6, + ZSTD_btopt=7, + ZSTD_btultra=8, + ZSTD_btultra2=9 + /* note : new strategies _might_ be added in the future. + Only the order (from fast to strong) is guaranteed */ +} ZSTD_strategy; + +typedef enum { + + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. + * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. + * Note that exact compression parameters are dynamically determined, + * depending on both compression level and srcSize (when known). + * Default level is ZSTD_CLEVEL_DEFAULT==3. + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. + * Note 1 : it's possible to pass a negative compression level. + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ + /* Advanced compression parameters : + * It's possible to pin down compression parameters to some specific values. + * In which case, these values are no longer dynamically selected by the compressor */ + ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * This will set a memory budget for streaming decompression, + * with larger values requiring more memory + * and typically compressing more. + * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. + * Special: value 0 means "use default windowLog". + * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT + * requires explicitly allowing such size at streaming decompression stage. */ + ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. + * Resulting memory usage is (1 << (hashLog+2)). + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. + * Larger tables improve compression ratio of strategies <= dFast, + * and improve speed of strategies > dFast. + * Special: value 0 means "use default hashLog". */ + ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. + * Resulting memory usage is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. + * Larger tables result in better and slower compression. + * This parameter is useless for "fast" strategy. + * It's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. + * Special: value 0 means "use default chainLog". */ + ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. + * More attempts result in better and slower compression. + * This parameter is useless for "fast" and "dFast" strategies. + * Special: value 0 means "use default searchLog". */ + ZSTD_c_minMatch=105, /* Minimum size of searched matches. + * Note that Zstandard can still find matches of smaller size, + * it just tweaks its search algorithm to look for this size and larger. + * Larger values increase compression and decompression speed, but decrease ratio. + * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. + * Note that currently, for all strategies < btopt, effective minimum is 4. + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ + ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. + * For strategies btopt, btultra & btultra2: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. + * Special: value 0 means "use default targetLength". */ + ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. + * The higher the value of selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "use default strategy". */ + /* LDM mode parameters */ + ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve compression ratio + * for large inputs, by finding large matches at long distance. + * It increases memory usage and window size. + * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB + * except when expressly set to a different value. + * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and + * compression strategy >= ZSTD_btopt (== compression level 16+) */ + ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * default: windowlog - 7. + * Special: value 0 means "automatically determine hashlog". */ + ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ + ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. + * Special: value 0 means "use default value" (default: 3). */ + ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Special: value 0 means "automatically determine hashRateLog". */ + + /* frame parameters */ + ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ + ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ + + /* multi-threading parameters */ + /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * Otherwise, trying to set any other value than default (0) will be a no-op and return an error. + * In a situation where it's unknown if the linked library supports multi-threading or not, + * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. + */ + ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. + * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : + * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, + * while compression is performed in parallel, within worker thread(s). + * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : + * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, + * compression is performed inside Caller's thread, and all invocations are blocking */ + ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. + * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. + * 0 means default, which is dynamically determined based on compression parameters. + * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest. + * The minimum size is automatically and transparently enforced. */ + ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. + * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. + * It helps preserve compression ratio, while each job is compressed in parallel. + * This value is enforced only when nbWorkers >= 1. + * Larger values increase compression ratio, but decrease speed. + * Possible values range from 0 to 9 : + * - 0 means "default" : value will be determined by the library, depending on strategy + * - 1 means "no overlap" + * - 9 means "full overlap", using a full window size. + * Each intermediate rank increases/decreases load size by a factor 2 : + * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default + * default value varies between 6 and 9, depending on strategy */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_rsyncable + * ZSTD_c_format + * ZSTD_c_forceMaxWindow + * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint + * ZSTD_c_enableDedicatedDictSearch + * ZSTD_c_stableInBuffer + * ZSTD_c_stableOutBuffer + * ZSTD_c_blockDelimiters + * ZSTD_c_validateSequences + * ZSTD_c_splitBlocks + * ZSTD_c_useRowMatchFinder + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. + */ + ZSTD_c_experimentalParam1=500, + ZSTD_c_experimentalParam2=10, + ZSTD_c_experimentalParam3=1000, + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004, + ZSTD_c_experimentalParam8=1005, + ZSTD_c_experimentalParam9=1006, + ZSTD_c_experimentalParam10=1007, + ZSTD_c_experimentalParam11=1008, + ZSTD_c_experimentalParam12=1009, + ZSTD_c_experimentalParam13=1010, + ZSTD_c_experimentalParam14=1011, + ZSTD_c_experimentalParam15=1012 +} ZSTD_cParameter; + +typedef struct { + size_t error; + int lowerBound; + int upperBound; +} ZSTD_bounds; + +/*! ZSTD_cParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - lower and upper bounds, both inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); + +/*! ZSTD_CCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_cParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbWorkers >= 1), + * the following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active for next job only (after a flush()). + * @return : an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtx_setPledgedSrcSize() : + * Total input data size to be compressed as a single frame. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. + * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. + * Note 2 : pledgedSrcSize is only valid once, for the next frame. + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), + * this value is automatically overridden by srcSize instead. + */ +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); + +typedef enum { + ZSTD_reset_session_only = 1, + ZSTD_reset_parameters = 2, + ZSTD_reset_session_and_parameters = 3 +} ZSTD_ResetDirective; + +/*! ZSTD_CCtx_reset() : + * There are 2 different things that can be reset, independently or jointly : + * - The session : will stop compressing current frame, and make CCtx ready to start a new one. + * Useful after an error, or to interrupt any ongoing compression. + * Any internal data not yet flushed is cancelled. + * Compression parameters and dictionary remain unchanged. + * They will be used to compress next frame. + * Resetting session never fails. + * - The parameters : changes all parameters back to "default". + * This removes any reference to any dictionary too. + * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) + * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) + * - Both : similar to resetting the session, followed by resetting parameters. + */ +ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); + +/*! ZSTD_compress2() : + * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - The function is always blocking, returns when compression is completed. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*********************************************** +* Advanced decompression API (Requires v1.4.0+) +************************************************/ + +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ + +typedef enum { + + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_d_format + * ZSTD_d_stableOutBuffer + * ZSTD_d_forceIgnoreChecksum + * ZSTD_d_refMultipleDDicts + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001, + ZSTD_d_experimentalParam3=1002, + ZSTD_d_experimentalParam4=1003 + +} ZSTD_dParameter; + +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); + +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); + +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); + + +/**************************** +* Streaming +****************************/ + +typedef struct ZSTD_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; + +typedef struct ZSTD_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; + + + +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* +* For parallel execution, use one separate ZSTD_CStream per thread. +* +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ + +typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); /* accept NULL pointer */ + +/*===== Streaming compression functions =====*/ +typedef enum { + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush=1, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ + ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. + * note that frame is only closed after compressed data is fully flushed (return value == 0). + * After that point, any additional data starts a new frame. + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ +} ZSTD_EndDirective; + +/*! ZSTD_compressStream2() : Requires v1.4.0+ + * Behaves about the same as ZSTD_compressStream, with additional control on end directive. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) + * - output->pos must be <= dstCapacity, input->pos must be <= srcSize + * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - endOp must be a valid directive + * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. + * - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. + * - @return provides a minimum amount of data remaining to be flushed from internal buffers + * or an error code, which can be tested using ZSTD_isError(). + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. + * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), + * only ZSTD_e_end or ZSTD_e_flush operations are allowed. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ + + +/* ***************************************************************************** + * This following is a legacy streaming API, available since v1.0+ . + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. + * Streaming in combination with advanced parameters and dictionary compression + * can only be used through the new API. + ******************************************************************************/ + +/*! + * Equivalent to: + * + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + */ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/*! + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the minimum nb of bytes left to flush (if non-zero and not an error). + */ +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); + + +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. +* If `output.pos < output.size`, decoder has flushed everything it could. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. +* *******************************************************************************/ + +typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); /* accept NULL pointer */ + +/*===== Streaming decompression functions =====*/ + +/* This function is redundant with the advanced API and equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, NULL); + */ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); + +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ + + +/************************** +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*********************************** + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); + +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). + * If a NULL pointer is passed, no operation is performed. */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + + +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() + * If a NULL pointer is passed, no operation is performed. */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/******************************** + * Dictionary helper functions + *******************************/ + +/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+ + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); + +/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+ + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+ + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+ + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); + + +/******************************************************************************* + * Advanced dictionary and prefix API (Requires v1.4.0+) + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ + + +/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+ + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+ + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+ + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); + +/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+ + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * The dictionary remains valid for all future frames, until explicitly invalidated. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * It's recommended to "load once, use many times", to amortize the cost + * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. + * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of + * how dictionary content is loaded and interpreted. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+ + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special: referencing a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+ + * Reference a prefix (single-usage dictionary) to decompress next frame. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_decompressStream() returns 0. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_decompressStream() returns 0. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A full dictionary is more costly, as it requires building tables. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); + +/* === Memory management === */ + +/*! ZSTD_sizeof_*() : Requires v1.4.0+ + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + +#endif /* ZSTD_H_235446 */ + + +/* ************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ + +#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + +/* Deprecation warnings : + * Should these warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. + * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. + */ +#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS +# define ZSTD_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZSTD_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API +# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) +# define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ >= 3) +# define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZSTD_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") +# define ZSTD_DEPRECATED(message) ZSTDLIB_API +# endif +#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ + +/**************************************************************************************** + * experimental API (static linking only) + **************************************************************************************** + * The following symbols and constants + * are not planned to join "stable API" status in the near future. + * They can still change in future versions. + * Some of them are planned to remain in the static_only section indefinitely. + * Some of them might be removed in the future (especially when redundant with existing stable functions) + * ***************************************************************************************/ + +#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ +#define ZSTD_SKIPPABLEHEADERSIZE 8 + +/* compression parameter bounds */ +#define ZSTD_WINDOWLOG_MAX_32 30 +#define ZSTD_WINDOWLOG_MAX_64 31 +#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX_32 29 +#define ZSTD_CHAINLOG_MAX_64 30 +#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ +#define ZSTD_STRATEGY_MIN ZSTD_fast +#define ZSTD_STRATEGY_MAX ZSTD_btultra2 + + +#define ZSTD_OVERLAPLOG_MIN 0 +#define ZSTD_OVERLAPLOG_MAX 9 + +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame + * requiring larger than (1< 0: + * If litLength != 0: + * rep == 1 --> offset == repeat_offset_1 + * rep == 2 --> offset == repeat_offset_2 + * rep == 3 --> offset == repeat_offset_3 + * If litLength == 0: + * rep == 1 --> offset == repeat_offset_2 + * rep == 2 --> offset == repeat_offset_3 + * rep == 3 --> offset == repeat_offset_1 - 1 + * + * Note: This field is optional. ZSTD_generateSequences() will calculate the value of + * 'rep', but repeat offsets do not necessarily need to be calculated from an external + * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * use this 'rep' field at all (as of now). + */ +} ZSTD_Sequence; + +typedef struct { + unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /**< dispatch table : larger == faster, more memory */ + unsigned searchLog; /**< nb of searches : larger == more compression, slower */ + unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ +} ZSTD_compressionParameters; + +typedef struct { + int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ + int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ + int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ +} ZSTD_frameParameters; + +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + +typedef enum { + ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ + ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ +} ZSTD_dictContentType_e; + +typedef enum { + ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ + ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ +} ZSTD_dictLoadMethod_e; + +typedef enum { + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder cannot recognise automatically this format, requiring this instruction. */ +} ZSTD_format_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */ + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_forceIgnoreChecksum_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_refMultipleDDicts */ + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 +} ZSTD_refMultipleDDicts_e; + +typedef enum { + /* Note: this enum and the behavior it controls are effectively internal + * implementation details of the compressor. They are expected to continue + * to evolve and should be considered only in the context of extremely + * advanced performance tuning. + * + * Zstd currently supports the use of a CDict in three ways: + * + * - The contents of the CDict can be copied into the working context. This + * means that the compression can search both the dictionary and input + * while operating on a single set of internal tables. This makes + * the compression faster per-byte of input. However, the initial copy of + * the CDict's tables incurs a fixed cost at the beginning of the + * compression. For small compressions (< 8 KB), that copy can dominate + * the cost of the compression. + * + * - The CDict's tables can be used in-place. In this model, compression is + * slower per input byte, because the compressor has to search two sets of + * tables. However, this model incurs no start-up cost (as long as the + * working context's tables can be reused). For small inputs, this can be + * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * + * Zstd has a simple internal heuristic that selects which strategy to use + * at the beginning of a compression. However, if experimentation shows that + * Zstd is making poor choices, it is possible to override that choice with + * this enum. + */ + ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ + ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ + ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ +} ZSTD_dictAttachPref_e; + +typedef enum { + ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. + * Negative compression levels will be uncompressed, and positive compression + * levels will be compressed. */ + ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be + * emitted if Huffman compression is not profitable. */ + ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ +} ZSTD_literalCompressionMode_e; + +typedef enum { + ZSTD_urm_auto = 0, /* Automatically determine whether or not we use row matchfinder */ + ZSTD_urm_disableRowMatchFinder = 1, /* Never use row matchfinder */ + ZSTD_urm_enableRowMatchFinder = 2 /* Always use row matchfinder when applicable */ +} ZSTD_useRowMatchFinderMode_e; + +/*************************************** +* Frame size functions +***************************************/ + +/*! ZSTD_findDecompressedSize() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - decompressed size of all data in all successive frames + * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + * read each contained frame header. This is fast as most of the data is skipped, + * however it does mean that all frame data must be present and valid. */ +ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_decompressBound() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - upper-bound for the decompressed size of all data in all successive frames + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. + * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. + * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. + * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: + * upper-bound = # blocks * min(128 KB, Window_Size) + */ +ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); + +/*! ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); + +typedef enum { + ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ + ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ +} ZSTD_sequenceFormat_e; + +/*! ZSTD_generateSequences() : + * Generate sequences using ZSTD_compress2, given a source buffer. + * + * Each block will end with a dummy sequence + * with offset == 0, matchLength == 0, and litLength == length of last literals. + * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) + * simply acts as a block delimiter. + * + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters + * @return : number of sequences generated + */ + +ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); + +/*! ZSTD_mergeBlockDelimiters() : + * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals + * by merging them into into the literals of the next sequence. + * + * As such, the final generated result has no explicit representation of block boundaries, + * and the final last literals segment is not represented in the sequences. + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters + * @return : number of sequences left after merging + */ +ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); + +/*! ZSTD_compressSequences() : + * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. + * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * The entire source is compressed into a single frame. + * + * The compression behavior changes based on cctx params. In particular: + * If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on + * the block size derived from the cctx, and sequences may be split. This is the default setting. + * + * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * + * In addition to the two adjustable experimental params, there are other important cctx params. + * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. + * - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression. + * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset + * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md + * + * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. + * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history + * @return : final compressed size or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); + + +/*! ZSTD_writeSkippableFrame() : + * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * + * Returns an error if destination buffer is not large enough, if the source size is not representable + * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); + + +/*************************************** +* Memory management +***************************************/ + +/*! ZSTD_estimate*() : + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. + * + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. + * + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); + +/*! ZSTD_estimateCStreamSize() : + * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. + * It will also consider src size to be arbitrarily "large", which is worst case. + * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note : CStream size estimation is only correct for single-threaded compression. + * ZSTD_DStream memory budget depends on window Size. + * This information can be passed manually, using ZSTD_estimateDStreamSize, + * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), + * an internal ?Dict will be created, which additional size is not estimated here. + * In this case, get total size by adding ZSTD_estimate?DictSize */ +ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); + +/*! ZSTD_estimate?DictSize() : + * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. + */ +ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); + +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). + * Note 2 : there is no corresponding "free" function. + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. + */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ + +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ + +ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams); + +ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType); + + +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +static +#ifdef __GNUC__ +__attribute__((__unused__)) +#endif +ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ + +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem); + +/* ! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer. + */ +typedef struct POOL_ctx_s ZSTD_threadPool; +ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); /* accept NULL pointer */ +ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); + + +/* + * This API is temporary and is expected to change or disappear in the future! + */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* cctxParams, + ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); + + +/*************************************** +* Advanced compression functions +***************************************/ + +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is just referenced, not duplicated. + * As a consequence, `dictBuffer` **must** outlive CDict, + * and its content must remain unmodified throughout the lifetime of CDict. + * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. + * `estimatedSrcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_getParams() : + * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. + * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_checkCParams() : + * Ensure param values remain within authorized range. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + +/*! ZSTD_adjustCParams() : + * optimize params for a given `srcSize` and `dictSize`. + * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. + * `dictSize` must be `0` when there is no dictionary. + * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. + * This function never fails (wide contract) */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_compress_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. + * This prototype will generate compilation warnings. */ +ZSTD_DEPRECATED("use ZSTD_compress2") +size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/*! ZSTD_compress_usingCDict_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. + * This prototype will generate compilation warnings. */ +ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary") +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams); + + +/*! ZSTD_CCtx_loadDictionary_byReference() : + * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. + * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_loadDictionary_advanced() : + * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_CCtx_refPrefix_advanced() : + * Same as ZSTD_CCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/* === experimental parameters === */ +/* these parameters can be used with ZSTD_setParameter() + * they are not guaranteed to remain supported in the future */ + + /* Enables rsyncable mode, + * which makes compressed files more rsync friendly + * by adding periodic synchronization points to the compressed data. + * The target average block size is ZSTD_c_jobSize / 2. + * It's possible to modify the job size to increase or decrease + * the granularity of the synchronization point. + * Once the jobSize is smaller than the window size, + * it will result in compression ratio degradation. + * NOTE 1: rsyncable mode only works when multithreading is enabled. + * NOTE 2: rsyncable performs poorly in combination with long range mode, + * since it will decrease the effectiveness of synchronization points, + * though mileage may vary. + * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. + * If the selected compression level is already running significantly slower, + * the overall speed won't be significantly impacted. + */ + #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 + +/* Select a compression format. + * The value must be of type ZSTD_format_e. + * See ZSTD_format_e enum definition for details */ +#define ZSTD_c_format ZSTD_c_experimentalParam2 + +/* Force back-reference distances to remain < windowSize, + * even when referencing into Dictionary content (default:0) */ +#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 + +/* Controls whether the contents of a CDict + * are used in place, or copied into the working context. + * Accepts values from the ZSTD_dictAttachPref_e enum. + * See the comments on that enum for an explanation of the feature. */ +#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 + +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_e enum definition for details. + */ +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 + +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 + +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 + +/* Controls whether the new and experimental "dedicated dictionary search + * structure" can be used. This feature is still rough around the edges, be + * prepared for surprising behavior! + * + * How to use it: + * + * When using a CDict, whether to use this feature or not is controlled at + * CDict creation, and it must be set in a CCtxParams set passed into that + * construction (via ZSTD_createCDict_advanced2()). A compression will then + * use the feature or not based on how the CDict was constructed; the value of + * this param, set in the CCtx, will have no effect. + * + * However, when a dictionary buffer is passed into a CCtx, such as via + * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control + * whether the CDict that is created internally can use the feature or not. + * + * What it does: + * + * Normally, the internal data structures of the CDict are analogous to what + * would be stored in a CCtx after compressing the contents of a dictionary. + * To an approximation, a compression using a dictionary can then use those + * data structures to simply continue what is effectively a streaming + * compression where the simulated compression of the dictionary left off. + * Which is to say, the search structures in the CDict are normally the same + * format as in the CCtx. + * + * It is possible to do better, since the CDict is not like a CCtx: the search + * structures are written once during CDict creation, and then are only read + * after that, while the search structures in the CCtx are both read and + * written as the compression goes along. This means we can choose a search + * structure for the dictionary that is read-optimized. + * + * This feature enables the use of that different structure. + * + * Note that some of the members of the ZSTD_compressionParameters struct have + * different semantics and constraints in the dedicated search structure. It is + * highly recommended that you simply set a compression level in the CCtxParams + * you pass into the CDict creation call, and avoid messing with the cParams + * directly. + * + * Effects: + * + * This will only have any effect when the selected ZSTD_strategy + * implementation supports this feature. Currently, that's limited to + * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2. + * + * Note that this means that the CDict tables can no longer be copied into the + * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be + * useable. The dictionary can only be attached or reloaded. + * + * In general, you should expect compression to be faster--sometimes very much + * so--and CDict creation to be slightly slower. Eventually, we will probably + * make this mode the default. + */ +#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 + +/* ZSTD_c_stableInBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the compressor, and + * compression will fail if it ever changes. This means the only flush + * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end + * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) + * MUST not be modified during compression or you will get data corruption. + * + * When this flag is enabled zstd won't allocate an input window buffer, + * because the user guarantees it can reference the ZSTD_inBuffer until + * the frame is complete. But, it will still allocate an output buffer + * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also + * avoid the memcpy() from the input buffer to the input window buffer. + * + * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. + * That means this flag cannot be used with ZSTD_compressStream(). + * + * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, compression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST + * not be modified during compression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_inBuffer to find + * matches. Normally zstd maintains its own window buffer for this purpose, + * but passing this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 + +/* ZSTD_c_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells he compressor that the ZSTD_outBuffer will not be resized between + * calls. Specifically: (out.size - out.pos) will never grow. This gives the + * compressor the freedom to say: If the compressed data doesn't fit in the + * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to + * always decompress directly into the output buffer, instead of decompressing + * into an internal buffer and copying to the output buffer. + * + * When this flag is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer. It will still allocate the + * input window buffer (see ZSTD_c_stableInBuffer). + * + * Zstd will check that (out.size - out.pos) never grows and return an error + * if it does. While not strictly necessary, this should prevent surprises. + */ +#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10 + +/* ZSTD_c_blockDelimiters + * Default is 0 == ZSTD_sf_noBlockDelimiters. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * + * Designates whether or not the given array of ZSTD_Sequence contains block delimiters + * and last literals, which are defined as sequences with offset == 0 and matchLength == 0. + * See the definition of ZSTD_Sequence for more specifics. + */ +#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11 + +/* ZSTD_c_validateSequences + * Default is 0 == disabled. Set to 1 to enable sequence validation. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * during function execution. + * + * Without validation, providing a sequence that does not conform to the zstd spec will cause + * undefined behavior, and may produce a corrupted block. + * + * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and + * return an error. + * + */ +#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 + +/* ZSTD_c_splitBlocks + * Default is 0 == disabled. Set to 1 to enable block splitting. + * + * Will attempt to split blocks in order to improve compression ratio at the cost of speed. + */ +#define ZSTD_c_splitBlocks ZSTD_c_experimentalParam13 + +/* ZSTD_c_useRowMatchFinder + * Default is ZSTD_urm_auto. + * Controlled with ZSTD_useRowMatchFinderMode_e enum. + * + * By default, in ZSTD_urm_auto, when finalizing the compression parameters, the library + * will decide at runtime whether to use the row-based matchfinder based on support for SIMD + * instructions as well as the windowLog. + * + * Set to ZSTD_urm_disableRowMatchFinder to never use row-based matchfinder. + * Set to ZSTD_urm_enableRowMatchFinder to force usage of row-based matchfinder. + */ +#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14 + +/* ZSTD_c_deterministicRefPrefix + * Default is 0 == disabled. Set to 1 to enable. + * + * Zstd produces different results for prefix compression when the prefix is + * directly adjacent to the data about to be compressed vs. when it isn't. + * This is because zstd detects that the two buffers are contiguous and it can + * use a more efficient match finding algorithm. However, this produces different + * results than when the two buffers are non-contiguous. This flag forces zstd + * to always load the prefix in non-contiguous mode, even if it happens to be + * adjacent to the data, to guarantee determinism. + * + * If you really care about determinism when using a dictionary or prefix, + * like when doing delta compression, you should select this option. It comes + * at a speed penalty of about ~2.5% if the dictionary and data happened to be + * contiguous, and is free if they weren't contiguous. We don't expect that + * intentionally making the dictionary and data contiguous will be worth the + * cost to memcpy() the data. + */ +#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 + +/*! ZSTD_CCtx_getParameter() : + * Get the requested compression parameter value, selected by enum ZSTD_cParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); + + +/*! ZSTD_CCtx_params : + * Quick howto : + * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent frames. + * - ZSTD_compressStream2() : Do compression using the CCtx. + * - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer. + * + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation of CCtx for single-threaded compression. + */ +ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); /* accept NULL pointer */ + +/*! ZSTD_CCtxParams_reset() : + * Reset params to default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_init() : + * Initializes the compression parameters of cctxParams according to + * compression level. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); + +/*! ZSTD_CCtxParams_init_advanced() : + * Initializes the compression and frame parameters of cctxParams according to + * params. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); + +/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+ + * Similar to ZSTD_CCtx_setParameter. + * Set one compression parameter, selected by enum ZSTD_cParameter. + * Parameters must be applied to a ZSTD_CCtx using + * ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : a code representing success or failure (which can be tested with + * ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtxParams_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); + +/*! ZSTD_CCtx_setParametersUsingCCtxParams() : + * Apply a set of ZSTD_CCtx_params to the compression context. + * This can be done even after compression is started, + * if nbWorkers==0, this will have no impact until a new compression is started. + * if nbWorkers>=1, new parameters will be picked up at next job, + * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); + +/*! ZSTD_compressStream2_simpleArgs() : + * Same as ZSTD_compressStream2(), + * but using only integral types as arguments. + * This variant might be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp); + + +/*************************************** +* Advanced decompression functions +***************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, + * it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_byReference() : + * Same as ZSTD_DCtx_loadDictionary(), + * but references `dict` content instead of copying it into `dctx`. + * This saves memory if `dict` remains around., + * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_advanced() : + * Same as ZSTD_DCtx_loadDictionary(), + * but gives direct control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_refPrefix_advanced() : + * Same as ZSTD_DCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This protects a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); + +/*! ZSTD_DCtx_getParameter() : + * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); + +/* ZSTD_d_format + * experimental parameter, + * allowing selection between ZSTD_format_e input compression formats + */ +#define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. + * + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 + +/* ZSTD_d_forceIgnoreChecksum + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * Tells the decompressor to skip checksum validation during decompression, regardless + * of whether checksumming was specified during compression. This offers some + * slight performance benefits, and may be useful for debugging. + * Param has values of type ZSTD_forceIgnoreChecksum_e + */ +#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 + +/* ZSTD_d_refMultipleDDicts + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * If enabled and dctx is allocated on the heap, then additional memory will be allocated + * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. + * + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. + * + * Param has values of byte ZSTD_refMultipleDDicts_e + * + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. + * + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. + */ +#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 + + +/*! ZSTD_DCtx_setFormat() : + * This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead") +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); + +/*! ZSTD_decompressStream_simpleArgs() : + * Same as ZSTD_decompressStream(), + * but using only integral types as arguments. + * This can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); + + +/******************************************************************** +* Advanced streaming functions +* Warning : most of these functions are now redundant with the Advanced API. +* Once Advanced API reaches "stable" status, +* redundant functions will be deprecated, and then at some point removed. +********************************************************************/ + +/*===== Advanced Streaming compression functions =====*/ + +/*! ZSTD_initCStream_srcSize() : + * This function is DEPRECATED, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, + int compressionLevel, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingDict() : + * This function is DEPRECATED, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + int compressionLevel); + +/*! ZSTD_initCStream_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. + * pledgedSrcSize must be correct. + * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingCDict() : + * This function is DEPRECATED, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); + +/*! ZSTD_initCStream_usingCDict_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize); + +/*! ZSTD_resetCStream() : + * This function is DEPRECATED, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but + * ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be + * explicitly specified. + * + * start a new frame, using same parameters from previous frame. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); + + +typedef struct { + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ +} ZSTD_frameProgression; + +/* ZSTD_getFrameProgression() : + * tells how much data has been ingested (read from input) + * consumed (input actually compressed) and produced (output) for current frame. + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. + */ +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); + +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flush speed is limited by production speed of oldest job + * irrespective of the speed of concurrent (and newer) jobs. + */ +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); + + +/*===== Advanced Streaming decompression functions =====*/ + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); + * + * note: no dictionary will be used if dict == NULL or dictSize < 8 + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, ddict); + * + * note : ddict is referenced, it must outlive decompression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * + * re-use decompression parameters from previous init; saves dictionary loading + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); + + +/********************************************************************* +* Buffer-less and synchronous inner streaming functions +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. +********************************************************************* */ + +/** + Buffer-less streaming compression (synchronous mode) + + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. + + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. +*/ + +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */ +ZSTD_DEPRECATED("use advanced API to access custom parameters") +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTD_DEPRECATED("use advanced API to access custom parameters") +size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +/** + Buffer-less streaming decompression (synchronous mode) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). + Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. + Data fragment must be large enough to ensure successful decoding. + `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. + @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). + + It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). + Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. + As a consequence, check that values remain within valid application range. + For example, do not allocate memory blindly, check that `windowSize` is within expectation. + Each application can set its own limits, depending on local restrictions. + For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference distance. + There are multiple ways to guarantee this condition. + + The most memory efficient way is to use a round buffer of sufficient size. + Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), + which can @return an error code if required value is too large for current system (in 32-bits mode). + In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, + up to the moment there is not enough room left in the buffer to guarantee decoding another full block, + which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. + At which point, decoding can resume from the beginning of the buffer. + Note that already decoded data stored in the buffer should be flushed before being overwritten. + + There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. + + Finally, if you control the compression process, you can also ignore all buffer size rules, + as long as the encoder and decoder progress in "lock-step", + aka use exactly the same buffer sizes, break contiguity at the same place, etc. + + Once buffers are setup, start decompression, with ZSTD_decompressBegin(). + If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. + + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + + == Special case : skippable frames == + + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by decompressor. + The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. + For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. +*/ + +/*===== Buffer-less streaming decompression functions =====*/ +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; +typedef struct { + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameHeader; + +/*! ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* misc */ +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + + + + +/* ============================ */ +/** Block level API */ +/* ============================ */ + +/*! + Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. +*/ + +/*===== Raw zstd block functions =====*/ +ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + + +#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif +/**** ended inlining ../zstd.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: huf.h ****/ +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#endif +/**** start inlining xxhash.h ****/ +/* + * xxHash - Extremely Fast Hash algorithm + * Header File + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + + +/* **************************** +* Definitions +******************************/ +/**** skipping file: zstd_deps.h ****/ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/* **************************** +* API modifier +******************************/ +/** XXH_PRIVATE_API +* This is useful if you want to include xxhash functions in `static` mode +* in order to inline them, and remove their symbol from the public list. +* Methodology : +* #define XXH_PRIVATE_API +* #include "xxhash.h" +* `xxhash.c` is automatically included. +* It's not useful to compile and link it as a separate module anymore. +*/ +#ifdef XXH_PRIVATE_API +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else +# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif /* XXH_PRIVATE_API */ + +/*!XXH_NAMESPACE, aka Namespace Emulation : + +If you want to include _and expose_ xxHash functions from within your own library, +but also want to avoid symbol collisions with another library which also includes xxHash, + +you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library +with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). + +Note that no change is required within the calling program as long as it includes `xxhash.h` : +regular symbol name will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 6 +#define XXH_VERSION_RELEASE 2 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/* **************************** +* Simple Hash Functions +******************************/ +typedef unsigned int XXH32_hash_t; +typedef unsigned long long XXH64_hash_t; + +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + +/*! +XXH32() : + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). +*/ + + +/* **************************** +* Streaming Hash Functions +******************************/ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ + +/*! State allocation, compatible with dynamic libraries */ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); + + +/* hash streaming */ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/* +These functions generate the xxHash of an input provided in multiple segments. +Note that, for small input, they are slower than single-call functions, due to state management. +For small input, prefer `XXH32()` and `XXH64()` . + +XXH state must first be allocated, using XXH*_createState() . + +Start a new hash by initializing state with a seed, using XXH*_reset(). + +Then, feed the hash state by calling XXH*_update() as many times as necessary. +Obviously, input must be allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + +Finally, a hash value can be produced anytime, by using XXH*_digest(). +This function returns the nn-bits hash as an int or long long. + +It's still possible to continue inserting input into the hash state after a digest, +and generate some new hashes later on, by calling again XXH*_digest(). + +When done, free XXH state space if it was allocated dynamically. +*/ + + +/* ************************** +* Utils +****************************/ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ +# define restrict /* disable restrict */ +#endif + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); + + +/* ************************** +* Canonical representation +****************************/ +/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. +* The canonical representation uses human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. +*/ +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + +#endif /* XXHASH_H_5627135585666179 */ + + + +/* ================================================================================================ + This section contains definitions which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + They shall only be used with static linking. + Never use these definitions in association with dynamic linking ! +=================================================================================================== */ +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) +#define XXH_STATIC_H_3543687687345 + +/* These definitions are only meant to allow allocation of XXH state + statically, on stack, or in a struct for example. + Do not use members directly. */ + + struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; /* buffer defined as U32 for alignment */ + unsigned memsize; + unsigned reserved; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH32_state_t */ + + struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ + unsigned memsize; + unsigned reserved[2]; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH64_state_t */ + + +# ifdef XXH_PRIVATE_API +/**** start inlining xxhash.c ****/ +/* + * xxHash - Fast Hash algorithm + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash homepage: http://www.xxhash.com + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +*/ + + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ + defined(__ICCARM__) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ + +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independence be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ +# define XXH_FORCE_NATIVE_FORMAT 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; set to 0 when the input data + * is guaranteed to be aligned. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/* Modify the local functions below should you wish to use some other memory routines */ +/* for ZSTD_malloc(), ZSTD_free() */ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ +static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); } +static void XXH_free (void* p) { ZSTD_free(p); } +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); } + +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +#endif +/**** skipping file: xxhash.h ****/ + + +/* ************************************* +* Compiler Specific Options +***************************************/ +/**** skipping file: compiler.h ****/ + + +/* ************************************* +* Basic Types +***************************************/ +/**** skipping file: mem.h ****/ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + ZSTD_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + ZSTD_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +#if defined(__ICCARM__) +# include +# define XXH_rotl32(x,r) __ROR(x,(32 - r)) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#endif +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + + +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN + static const int g_one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +static U32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************* +* Constants +***************************************/ +static const U32 PRIME32_1 = 2654435761U; +static const U32 PRIME32_2 = 2246822519U; +static const U32 PRIME32_3 = 3266489917U; +static const U32 PRIME32_4 = 668265263U; +static const U32 PRIME32_5 = 374761393U; + +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 = 1609587929392839161ULL; +static const U64 PRIME64_4 = 9650029242287828579ULL; +static const U64 PRIME64_5 = 2870177450012600261ULL; + +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ************************** +* Utils +****************************/ +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState) +{ + ZSTD_memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState) +{ + ZSTD_memcpy(dstState, srcState, sizeof(*dstState)); +} + + +/* *************************** +* Simple Hash Functions +*****************************/ + +static U32 XXH32_round(U32 seed, U32 input) +{ + seed += input * PRIME32_2; + seed = XXH_rotl32(seed, 13); + seed *= PRIME32_1; + return seed; +} + +FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } +#endif + + if (len>=16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; + v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; + v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; + v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; + } while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p+4<=bEnd) { + h32 += XXH_get32bits(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_CREATESTATE_STATIC(state); + XXH32_reset(state, seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +static U64 XXH64_round(U64 acc, U64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_CREATESTATE_STATIC(state); + XXH64_reset(state, seed); + XXH64_update(state, input, len); + return XXH64_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +/* ************************************************** +* Advanced Hash Functions +****************************************************/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + + +/*** Hash feed ***/ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + ZSTD_memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + ZSTD_memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + ZSTD_memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + ZSTD_memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len_32 += (unsigned)len; + state->large_len |= (len>=16) | (state->total_len_32>=16); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (unsigned)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const U32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem32; + const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; + U32 h32; + + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + PRIME32_5; + } + + h32 += state->total_len_32; + + while (p+4<=bEnd) { + h32 += XXH_readLE32(p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_digest_endian(state_in, XXH_littleEndian); + else + return XXH32_digest_endian(state_in, XXH_bigEndian); +} + + + +/* **** XXH64 **** */ + +FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len += len; + + if (state->memsize + len < 32) { /* fill in tmp buffer */ + if (input != NULL) { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + } + state->memsize += (U32)len; + return XXH_OK; + } + + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); + p += 32-state->memsize; + state->memsize = 0; + } + + if (p+32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; + + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} + + + +FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem64; + const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; + U64 h64; + + if (state->total_len >= 32) { + U64 const v1 = state->v1; + U64 const v2 = state->v2; + U64 const v3 = state->v3; + U64 const v4 = state->v4; + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 + PRIME64_5; + } + + h64 += (U64) state->total_len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} + + +/* ************************** +* Canonical representation +****************************/ + +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +*/ + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + ZSTD_memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + ZSTD_memcpy(dst, &hash, sizeof(*dst)); +} + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} +/**** ended inlining xxhash.c ****/ +# endif + +#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ + + +#if defined (__cplusplus) +} +#endif +/**** ended inlining xxhash.h ****/ +#ifndef ZSTD_NO_TRACE +/**** start inlining zstd_trace.h ****/ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_TRACE_H +#define ZSTD_TRACE_H + +#if defined (__cplusplus) +extern "C" { +#endif + +#include + +/* weak symbol support */ +#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \ + !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \ + !defined(__CYGWIN__) +# define ZSTD_HAVE_WEAK_SYMBOLS 1 +#else +# define ZSTD_HAVE_WEAK_SYMBOLS 0 +#endif +#if ZSTD_HAVE_WEAK_SYMBOLS +# define ZSTD_WEAK_ATTR __attribute__((__weak__)) +#else +# define ZSTD_WEAK_ATTR +#endif + +/* Only enable tracing when weak symbols are available. */ +#ifndef ZSTD_TRACE +# define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS +#endif + +#if ZSTD_TRACE + +struct ZSTD_CCtx_s; +struct ZSTD_DCtx_s; +struct ZSTD_CCtx_params_s; + +typedef struct { + /** + * ZSTD_VERSION_NUMBER + * + * This is guaranteed to be the first member of ZSTD_trace. + * Otherwise, this struct is not stable between versions. If + * the version number does not match your expectation, you + * should not interpret the rest of the struct. + */ + unsigned version; + /** + * Non-zero if streaming (de)compression is used. + */ + unsigned streaming; + /** + * The dictionary ID. + */ + unsigned dictionaryID; + /** + * Is the dictionary cold? + * Only set on decompression. + */ + unsigned dictionaryIsCold; + /** + * The dictionary size or zero if no dictionary. + */ + size_t dictionarySize; + /** + * The uncompressed size of the data. + */ + size_t uncompressedSize; + /** + * The compressed size of the data. + */ + size_t compressedSize; + /** + * The fully resolved CCtx parameters (NULL on decompression). + */ + struct ZSTD_CCtx_params_s const* params; + /** + * The ZSTD_CCtx pointer (NULL on decompression). + */ + struct ZSTD_CCtx_s const* cctx; + /** + * The ZSTD_DCtx pointer (NULL on compression). + */ + struct ZSTD_DCtx_s const* dctx; +} ZSTD_Trace; + +/** + * A tracing context. It must be 0 when tracing is disabled. + * Otherwise, any non-zero value returned by a tracing begin() + * function is presented to any subsequent calls to end(). + * + * Any non-zero value is treated as tracing is enabled and not + * interpreted by the library. + * + * Two possible uses are: + * * A timestamp for when the begin() function was called. + * * A unique key identifying the (de)compression, like the + * address of the [dc]ctx pointer if you need to track + * more information than just a timestamp. + */ +typedef unsigned long long ZSTD_TraceCtx; + +/** + * Trace the beginning of a compression call. + * @param cctx The dctx pointer for the compression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin( + struct ZSTD_CCtx_s const* cctx); + +/** + * Trace the end of a compression call. + * @param ctx The return value of ZSTD_trace_compress_begin(). + * @param trace The zstd tracing info. + */ +ZSTD_WEAK_ATTR void ZSTD_trace_compress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +/** + * Trace the beginning of a decompression call. + * @param dctx The dctx pointer for the decompression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin( + struct ZSTD_DCtx_s const* dctx); + +/** + * Trace the end of a decompression call. + * @param ctx The return value of ZSTD_trace_decompress_begin(). + * @param trace The zstd tracing info. + */ +ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); + +#endif /* ZSTD_TRACE */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_TRACE_H */ +/**** ended inlining zstd_trace.h ****/ +#else +# define ZSTD_TRACE 0 +#endif + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ---- static assert (debug) --- */ +#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError + + +/*-************************************* +* shared macros +***************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) + +/** + * Ignore: this is an internal helper. + * + * This is a helper function to help force C99-correctness during compilation. + * Under strict compilation modes, variadic macro arguments can't be empty. + * However, variadic function arguments can be. Using a function therefore lets + * us statically check that at least one (string) argument was passed, + * independent of the compilation flags. + */ +static INLINE_KEYWORD UNUSED_ATTR +void _force_has_format_string(const char *format, ...) { + (void)format; +} + +/** + * Ignore: this is an internal helper. + * + * We want to force this function invocation to be syntactically correct, but + * we don't want to force runtime evaluation of its arguments. + */ +#define _FORCE_HAS_FORMAT_STRING(...) \ + if (0) { \ + _force_has_format_string(__VA_ARGS__); \ + } + +/** + * Return the specified error if the condition evaluates to true. + * + * In debug modes, prints additional information. + * In order to do that (particularly, printing the conditional that failed), + * this can't just wrap RETURN_ERROR(). + */ +#define RETURN_ERROR_IF(cond, err, ...) \ + if (cond) { \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } + +/** + * Unconditionally return the specified error. + * + * In debug modes, prints additional information. + */ +#define RETURN_ERROR(err, ...) \ + do { \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } while(0); + +/** + * If the provided expression evaluates to an error code, returns that error code. + * + * In debug modes, prints additional information. + */ +#define FORWARD_IF_ERROR(err, ...) \ + do { \ + size_t const err_code = (err); \ + if (ERR_isError(err_code)) { \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return err_code; \ + } \ + } while(0); + + +/*-************************************* +* Common constants +***************************************/ +#define ZSTD_OPT_NUM (1<<12) + +#define ZSTD_REP_NUM 3 /* number of repcodes */ +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 + +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 +static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; + +#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ + +#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ +static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; + +#define ZSTD_FRAMECHECKSUMSIZE 4 + +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ + +#define HufLog 12 +typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; + +#define LONGNBSEQ 0x7F00 + +#define MINMATCH 3 + +#define Litbits 8 +#define MaxLit ((1<= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); + + if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { + /* Handle short offset copies. */ + do { + COPY8(op, ip) + } while (op < oend); + } else { + assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); + /* Separate out the first COPY16() call because the copy length is + * almost certain to be short, so the branches have different + * probabilities. Since it is almost certain to be short, only do + * one COPY16() in the first call. Then, do two calls per loop since + * at that point it is more likely to have a high trip count. + */ +#ifdef __aarch64__ + do { + COPY16(op, ip); + } + while (op < oend); +#else + ZSTD_copy16(op, ip); + if (16 >= length) return; + op += 16; + ip += 16; + do { + COPY16(op, ip); + COPY16(op, ip); + } + while (op < oend); +#endif + } +} + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + if (length > 0) { + ZSTD_memcpy(dst, src, length); + } + return length; +} + +/* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 + +/* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 + +/* Controls whether the input/output buffer is buffered or stable. */ +typedef enum { + ZSTD_bm_buffered = 0, /* Buffer the input/output */ + ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */ +} ZSTD_bufferMode_e; + + +/*-******************************************* +* Private declarations +*********************************************/ +typedef struct seqDef_s { + U32 offset; /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */ + U16 litLength; + U16 matchLength; +} seqDef; + +/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */ +typedef enum { + ZSTD_llt_none = 0, /* no longLengthType */ + ZSTD_llt_literalLength = 1, /* represents a long literal */ + ZSTD_llt_matchLength = 2 /* represents a long match */ +} ZSTD_longLengthType_e; + +typedef struct { + seqDef* sequencesStart; + seqDef* sequences; /* ptr to end of sequences */ + BYTE* litStart; + BYTE* lit; /* ptr to end of literals */ + BYTE* llCode; + BYTE* mlCode; + BYTE* ofCode; + size_t maxNbSeq; + size_t maxNbLit; + + /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength + * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment + * the existing value of the litLength or matchLength by 0x10000. + */ + ZSTD_longLengthType_e longLengthType; + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ +} seqStore_t; + +typedef struct { + U32 litLength; + U32 matchLength; +} ZSTD_sequenceLength; + +/** + * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences + * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. + */ +MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) +{ + ZSTD_sequenceLength seqLen; + seqLen.litLength = seq->litLength; + seqLen.matchLength = seq->matchLength + MINMATCH; + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { + if (seqStore->longLengthType == ZSTD_llt_literalLength) { + seqLen.litLength += 0xFFFF; + } + if (seqStore->longLengthType == ZSTD_llt_matchLength) { + seqLen.matchLength += 0xFFFF; + } + } + return seqLen; +} + +/** + * Contains the compressed frame size and an upper-bound for the decompressed frame size. + * Note: before using `compressedSize`, check for errors using ZSTD_isError(). + * similarly, before using `decompressedBound`, check for errors using: + * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` + */ +typedef struct { + size_t compressedSize; + unsigned long long decompressedBound; +} ZSTD_frameSizeInfo; /* decompress & legacy */ + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ + +/* custom memory allocation functions */ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); + + +MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val)^31; +# else + unsigned long r=0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); +# else /* Software version */ + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; +# endif + } +} + + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ + + +typedef struct { + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; /* declared here for decompress and fullbench */ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr); + +/*! ZSTD_decodeSeqHeaders() : + * decode sequence header from src */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_CCOMMON_H_MODULE */ +/**** ended inlining zstd_internal.h ****/ +/**** start inlining pool.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef POOL_H +#define POOL_H + +#if defined (__cplusplus) +extern "C" { +#endif + + +/**** skipping file: zstd_deps.h ****/ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_customMem */ +/**** skipping file: ../zstd.h ****/ + +typedef struct POOL_ctx_s POOL_ctx; + +/*! POOL_create() : + * Create a thread pool with at most `numThreads` threads. + * `numThreads` must be at least 1. + * The maximum number of queued jobs before blocking is `queueSize`. + * @return : POOL_ctx pointer on success, else NULL. +*/ +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize); + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem); + +/*! POOL_free() : + * Free a thread pool returned by POOL_create(). + */ +void POOL_free(POOL_ctx* ctx); + +/*! POOL_resize() : + * Expands or shrinks pool's number of threads. + * This is more efficient than releasing + creating a new context, + * since it tries to preserve and re-use existing threads. + * `numThreads` must be at least 1. + * @return : 0 when resize was successful, + * !0 (typically 1) if there is an error. + * note : only numThreads can be resized, queueSize remains unchanged. + */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads); + +/*! POOL_sizeof() : + * @return threadpool memory usage + * note : compatible with NULL (returns 0 in this case) + */ +size_t POOL_sizeof(POOL_ctx* ctx); + +/*! POOL_function : + * The function type that can be added to a thread pool. + */ +typedef void (*POOL_function)(void*); + +/*! POOL_add() : + * Add the job `function(opaque)` to the thread pool. `ctx` must be valid. + * Possibly blocks until there is room in the queue. + * Note : The function may be executed asynchronously, + * therefore, `opaque` must live until function has been completed. + */ +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque); + + +/*! POOL_tryAdd() : + * Add the job `function(opaque)` to thread pool _if_ a worker is available. + * Returns immediately even if not (does not block). + * @return : 1 if successful, 0 if not. + */ +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque); + + +#if defined (__cplusplus) +} +#endif + +#endif +/**** ended inlining pool.h ****/ + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +#ifdef ZSTD_MULTITHREAD + +/**** skipping file: threading.h ****/ + +/* A job is a function and an opaque argument */ +typedef struct POOL_job_s { + POOL_function function; + void *opaque; +} POOL_job; + +struct POOL_ctx_s { + ZSTD_customMem customMem; + /* Keep track of the threads */ + ZSTD_pthread_t* threads; + size_t threadCapacity; + size_t threadLimit; + + /* The queue is a circular buffer */ + POOL_job *queue; + size_t queueHead; + size_t queueTail; + size_t queueSize; + + /* The number of threads working on jobs */ + size_t numThreadsBusy; + /* Indicates if the queue is empty */ + int queueEmpty; + + /* The mutex protects the queue */ + ZSTD_pthread_mutex_t queueMutex; + /* Condition variable for pushers to wait on when the queue is full */ + ZSTD_pthread_cond_t queuePushCond; + /* Condition variables for poppers to wait on when the queue is empty */ + ZSTD_pthread_cond_t queuePopCond; + /* Indicates if the queue is shutting down */ + int shutdown; +}; + +/* POOL_thread() : + * Work thread for the thread pool. + * Waits for jobs and executes them. + * @returns : NULL on failure else non-null. + */ +static void* POOL_thread(void* opaque) { + POOL_ctx* const ctx = (POOL_ctx*)opaque; + if (!ctx) { return NULL; } + for (;;) { + /* Lock the mutex and wait for a non-empty queue or until shutdown */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + + while ( ctx->queueEmpty + || (ctx->numThreadsBusy >= ctx->threadLimit) ) { + if (ctx->shutdown) { + /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit), + * a few threads will be shutdown while !queueEmpty, + * but enough threads will remain active to finish the queue */ + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return opaque; + } + ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex); + } + /* Pop a job off the queue */ + { POOL_job const job = ctx->queue[ctx->queueHead]; + ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize; + ctx->numThreadsBusy++; + ctx->queueEmpty = ctx->queueHead == ctx->queueTail; + /* Unlock the mutex, signal a pusher, and run the job */ + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + + job.function(job.opaque); + + /* If the intended queue size was 0, signal after finishing job */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->numThreadsBusy--; + if (ctx->queueSize == 1) { + ZSTD_pthread_cond_signal(&ctx->queuePushCond); + } + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + } + } /* for (;;) */ + assert(0); /* Unreachable */ +} + +POOL_ctx* ZSTD_createThreadPool(size_t numThreads) { + return POOL_create (numThreads, 0); +} + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, + ZSTD_customMem customMem) { + POOL_ctx* ctx; + /* Check parameters */ + if (!numThreads) { return NULL; } + /* Allocate the context and zero initialize */ + ctx = (POOL_ctx*)ZSTD_customCalloc(sizeof(POOL_ctx), customMem); + if (!ctx) { return NULL; } + /* Initialize the job queue. + * It needs one extra space since one space is wasted to differentiate + * empty and full queues. + */ + ctx->queueSize = queueSize + 1; + ctx->queue = (POOL_job*)ZSTD_customMalloc(ctx->queueSize * sizeof(POOL_job), customMem); + ctx->queueHead = 0; + ctx->queueTail = 0; + ctx->numThreadsBusy = 0; + ctx->queueEmpty = 1; + { + int error = 0; + error |= ZSTD_pthread_mutex_init(&ctx->queueMutex, NULL); + error |= ZSTD_pthread_cond_init(&ctx->queuePushCond, NULL); + error |= ZSTD_pthread_cond_init(&ctx->queuePopCond, NULL); + if (error) { POOL_free(ctx); return NULL; } + } + ctx->shutdown = 0; + /* Allocate space for the thread handles */ + ctx->threads = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), customMem); + ctx->threadCapacity = 0; + ctx->customMem = customMem; + /* Check for errors */ + if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; } + /* Initialize the threads */ + { size_t i; + for (i = 0; i < numThreads; ++i) { + if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = i; + POOL_free(ctx); + return NULL; + } } + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + } + return ctx; +} + +/*! POOL_join() : + Shutdown the queue, wake any sleeping threads, and join all of the threads. +*/ +static void POOL_join(POOL_ctx* ctx) { + /* Shut down the queue */ + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + ctx->shutdown = 1; + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + /* Wake up sleeping threads */ + ZSTD_pthread_cond_broadcast(&ctx->queuePushCond); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + /* Join all of the threads */ + { size_t i; + for (i = 0; i < ctx->threadCapacity; ++i) { + ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */ + } } +} + +void POOL_free(POOL_ctx *ctx) { + if (!ctx) { return; } + POOL_join(ctx); + ZSTD_pthread_mutex_destroy(&ctx->queueMutex); + ZSTD_pthread_cond_destroy(&ctx->queuePushCond); + ZSTD_pthread_cond_destroy(&ctx->queuePopCond); + ZSTD_customFree(ctx->queue, ctx->customMem); + ZSTD_customFree(ctx->threads, ctx->customMem); + ZSTD_customFree(ctx, ctx->customMem); +} + +void ZSTD_freeThreadPool (ZSTD_threadPool* pool) { + POOL_free (pool); +} + +size_t POOL_sizeof(POOL_ctx *ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + return sizeof(*ctx) + + ctx->queueSize * sizeof(POOL_job) + + ctx->threadCapacity * sizeof(ZSTD_pthread_t); +} + + +/* @return : 0 on success, 1 on error */ +static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads) +{ + if (numThreads <= ctx->threadCapacity) { + if (!numThreads) return 1; + ctx->threadLimit = numThreads; + return 0; + } + /* numThreads > threadCapacity */ + { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customMalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem); + if (!threadPool) return 1; + /* replace existing thread pool */ + ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool)); + ZSTD_customFree(ctx->threads, ctx->customMem); + ctx->threads = threadPool; + /* Initialize additional threads */ + { size_t threadId; + for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) { + if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) { + ctx->threadCapacity = threadId; + return 1; + } } + } } + /* successfully expanded */ + ctx->threadCapacity = numThreads; + ctx->threadLimit = numThreads; + return 0; +} + +/* @return : 0 on success, 1 on error */ +int POOL_resize(POOL_ctx* ctx, size_t numThreads) +{ + int result; + if (ctx==NULL) return 1; + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + result = POOL_resize_internal(ctx, numThreads); + ZSTD_pthread_cond_broadcast(&ctx->queuePopCond); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return result; +} + +/** + * Returns 1 if the queue is full and 0 otherwise. + * + * When queueSize is 1 (pool was created with an intended queueSize of 0), + * then a queue is empty if there is a thread free _and_ no job is waiting. + */ +static int isQueueFull(POOL_ctx const* ctx) { + if (ctx->queueSize > 1) { + return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize); + } else { + return (ctx->numThreadsBusy == ctx->threadLimit) || + !ctx->queueEmpty; + } +} + + +static void POOL_add_internal(POOL_ctx* ctx, POOL_function function, void *opaque) +{ + POOL_job const job = {function, opaque}; + assert(ctx != NULL); + if (ctx->shutdown) return; + + ctx->queueEmpty = 0; + ctx->queue[ctx->queueTail] = job; + ctx->queueTail = (ctx->queueTail + 1) % ctx->queueSize; + ZSTD_pthread_cond_signal(&ctx->queuePopCond); +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + /* Wait until there is space in the queue for the new job */ + while (isQueueFull(ctx) && (!ctx->shutdown)) { + ZSTD_pthread_cond_wait(&ctx->queuePushCond, &ctx->queueMutex); + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); +} + + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) +{ + assert(ctx != NULL); + ZSTD_pthread_mutex_lock(&ctx->queueMutex); + if (isQueueFull(ctx)) { + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 0; + } + POOL_add_internal(ctx, function, opaque); + ZSTD_pthread_mutex_unlock(&ctx->queueMutex); + return 1; +} + + +#else /* ZSTD_MULTITHREAD not defined */ + +/* ========================== */ +/* No multi-threading support */ +/* ========================== */ + + +/* We don't need any data, but if it is empty, malloc() might return NULL. */ +struct POOL_ctx_s { + int dummy; +}; +static POOL_ctx g_poolCtx; + +POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) { + return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem); +} + +POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) { + (void)numThreads; + (void)queueSize; + (void)customMem; + return &g_poolCtx; +} + +void POOL_free(POOL_ctx* ctx) { + assert(!ctx || ctx == &g_poolCtx); + (void)ctx; +} + +int POOL_resize(POOL_ctx* ctx, size_t numThreads) { + (void)ctx; (void)numThreads; + return 0; +} + +void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); +} + +int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque) { + (void)ctx; + function(opaque); + return 1; +} + +size_t POOL_sizeof(POOL_ctx* ctx) { + if (ctx==NULL) return 0; /* supports sizeof NULL */ + assert(ctx == &g_poolCtx); + return sizeof(*ctx); +} + +#endif /* ZSTD_MULTITHREAD */ +/**** ended inlining common/pool.c ****/ +/**** start inlining common/zstd_common.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/*-************************************* +* Dependencies +***************************************/ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ +/**** skipping file: error_private.h ****/ +/**** skipping file: zstd_internal.h ****/ + + +/*-**************************************** +* Version +******************************************/ +unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } + +const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } + + +/*-**************************************** +* ZSTD Error Management +******************************************/ +#undef ZSTD_isError /* defined within zstd_internal.h */ +/*! ZSTD_isError() : + * tells if a return value is an error code + * symbol is required for external callers */ +unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } + +/*! ZSTD_getErrorName() : + * provides error code string from function result (useful for debugging) */ +const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } + +/*! ZSTD_getError() : + * convert a `size_t` function result into a proper ZSTD_errorCode enum */ +ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } + +/*! ZSTD_getErrorString() : + * provides error code string from enum */ +const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } + + + +/*=************************************************************** +* Custom allocator +****************************************************************/ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) + return customMem.customAlloc(customMem.opaque, size); + return ZSTD_malloc(size); +} + +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) { + /* calloc implemented as malloc+memset; + * not as efficient as calloc, but next best guess for custom malloc */ + void* const ptr = customMem.customAlloc(customMem.opaque, size); + ZSTD_memset(ptr, 0, size); + return ptr; + } + return ZSTD_calloc(1, size); +} + +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) +{ + if (ptr!=NULL) { + if (customMem.customFree) + customMem.customFree(customMem.opaque, ptr); + else + ZSTD_free(ptr); + } +} +/**** ended inlining common/zstd_common.c ****/ + +/**** start inlining compress/fse_compress.c ****/ +/* ****************************************************************** + * FSE : Finite State Entropy encoder + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Includes +****************************************************************/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/debug.h ****/ +/**** start inlining hist.h ****/ +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* --- dependencies --- */ +/**** skipping file: ../common/zstd_deps.h ****/ + + +/* --- simple histogram functions --- */ + +/*! HIST_count(): + * Provides the precise count of each byte within a table 'count'. + * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + * Updates *maxSymbolValuePtr with actual largest symbol value detected. + * @return : count of the most frequent symbol (which isn't identified). + * or an error code, which can be tested using HIST_isError(). + * note : if return == srcSize, there is only one symbol. + */ +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ + + +/* --- advanced histogram functions --- */ + +#define HIST_WKSP_SIZE_U32 1024 +#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) +/** HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * Benefit is this function will use very little stack space. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/** HIST_countFast() : + * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` + */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +/** HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/*! HIST_count_simple() : + * Same as HIST_countFast(), this function is unsafe, + * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. + * It is also a bit slower for large inputs. + * However, it does not need any additional memory (not even on stack). + * @return : count of the most frequent symbol. + * Note this function doesn't produce any error (i.e. it must succeed). + */ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); +/**** ended inlining hist.h ****/ +/**** skipping file: ../common/bitstream.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +/**** skipping file: ../common/error_private.h ****/ +#define ZSTD_DEPS_NEED_MALLOC +#define ZSTD_DEPS_NEED_MATH64 +/**** skipping file: ../common/zstd_deps.h ****/ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * wkspSize should be sized to handle worst case situation, which is `1<>1 : 1) ; + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + U32 const step = FSE_TABLESTEP(tableSize); + + U32* cumul = (U32*)workSpace; + FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2)); + + U32 highThreshold = tableSize-1; + + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */ + if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); + /* CTable header */ + tableU16[-2] = (U16) tableLog; + tableU16[-1] = (U16) maxSymbolValue; + assert(tableLog < 16); /* required for threshold strategy to work */ + + /* For explanations on how to distribute symbol values over the table : + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + #ifdef __clang_analyzer__ + ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ + #endif + + /* symbol start positions */ + { U32 u; + cumul[0] = 0; + for (u=1; u <= maxSymbolValue+1; u++) { + if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ + cumul[u] = cumul[u-1] + 1; + tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); + } else { + cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + } } + cumul[maxSymbolValue+1] = tableSize+1; + } + + /* Spread symbols */ + { U32 position = 0; + U32 symbol; + for (symbol=0; symbol<=maxSymbolValue; symbol++) { + int nbOccurrences; + int const freq = normalizedCounter[symbol]; + for (nbOccurrences=0; nbOccurrences highThreshold) + position = (position + step) & tableMask; /* Low proba area */ + } } + + assert(position==0); /* Must have initialized all positions */ + } + + /* Build table */ + { U32 u; for (u=0; u> 3) + 3; + return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ +} + +static size_t +FSE_writeNCount_generic (void* header, size_t headerBufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) +{ + BYTE* const ostart = (BYTE*) header; + BYTE* out = ostart; + BYTE* const oend = ostart + headerBufferSize; + int nbBits; + const int tableSize = 1 << tableLog; + int remaining; + int threshold; + U32 bitStream = 0; + int bitCount = 0; + unsigned symbol = 0; + unsigned const alphabetSize = maxSymbolValue + 1; + int previousIs0 = 0; + + /* Table Size */ + bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; + bitCount += 4; + + /* Init */ + remaining = tableSize+1; /* +1 for extra accuracy */ + threshold = tableSize; + nbBits = tableLog+1; + + while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ + if (previousIs0) { + unsigned start = symbol; + while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; + if (symbol == alphabetSize) break; /* incorrect distribution */ + while (symbol >= start+24) { + start+=24; + bitStream += 0xFFFFU << bitCount; + if ((!writeIsSafe) && (out > oend-2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE) bitStream; + out[1] = (BYTE)(bitStream>>8); + out+=2; + bitStream>>=16; + } + while (symbol >= start+3) { + start+=3; + bitStream += 3 << bitCount; + bitCount += 2; + } + bitStream += (symbol-start) << bitCount; + bitCount += 2; + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + { int count = normalizedCounter[symbol++]; + int const max = (2*threshold-1) - remaining; + remaining -= count < 0 ? -count : count; + count++; /* +1 for extra accuracy */ + if (count>=threshold) + count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + bitStream += count << bitCount; + bitCount += nbBits; + bitCount -= (count>=1; } + } + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + + if (remaining != 1) + return ERROR(GENERIC); /* incorrect normalized distribution */ + assert(symbol <= alphabetSize); + + /* flush remaining bitStream */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out+= (bitCount+7) /8; + + return (out-ostart); +} + + +size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + + if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); + + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); +} + + +/*-************************************************************** +* FSE Compression Code +****************************************************************/ + +FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) +{ + size_t size; + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); + return (FSE_CTable*)ZSTD_malloc(size); +} + +void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); } + +/* provides the minimum logSize to safely represent a distribution */ +static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) +{ + U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + return minBits; +} + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) +{ + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 tableLog = maxTableLog; + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; + if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; + return tableLog; +} + +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); +} + +/* Secondary normalization method. + To be used when primary method fails. */ + +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount) +{ + short const NOT_YET_ASSIGNED = -2; + U32 s; + U32 distributed = 0; + U32 ToDistribute; + + /* Init */ + U32 const lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == 0) { + norm[s]=0; + continue; + } + if (count[s] <= lowThreshold) { + norm[s] = lowProbCount; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + + norm[s]=NOT_YET_ASSIGNED; + } + ToDistribute = (1 << tableLog) - distributed; + + if (ToDistribute == 0) + return 0; + + if ((total / ToDistribute) > lowOne) { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s=0; s<=maxSymbolValue; s++) { + if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } } + ToDistribute = (1 << tableLog) - distributed; + } + + if (distributed == maxSymbolValue+1) { + /* all values are pretty poor; + probably incompressible data (should have already been detected); + find max, then give all remaining points to max */ + U32 maxV = 0, maxC = 0; + for (s=0; s<=maxSymbolValue; s++) + if (count[s] > maxC) { maxV=s; maxC=count[s]; } + norm[maxV] += (short)ToDistribute; + return 0; + } + + if (total == 0) { + /* all of the symbols were low enough for the lowOne or lowThreshold */ + for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) + if (norm[s] > 0) { ToDistribute--; norm[s]++; } + return 0; + } + + { U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog-1)) - 1; + U64 const rStep = ZSTD_div64((((U64)1<> vStepLog); + U32 const sEnd = (U32)(end >> vStepLog); + U32 const weight = sEnd - sStart; + if (weight < 1) + return ERROR(GENERIC); + norm[s] = (short)weight; + tmpTotal = end; + } } } + + return 0; +} + +size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t total, + unsigned maxSymbolValue, unsigned useLowProbCount) +{ + /* Sanity checks */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ + + { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + short const lowProbCount = useLowProbCount ? -1 : 1; + U64 const scale = 62 - tableLog; + U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */ + U64 const vStep = 1ULL<<(scale-20); + int stillToDistribute = 1<> tableLog); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == total) return 0; /* rle special case */ + if (count[s] == 0) { normalizedCounter[s]=0; continue; } + if (count[s] <= lowThreshold) { + normalizedCounter[s] = lowProbCount; + stillToDistribute--; + } else { + short proba = (short)((count[s]*step) >> scale); + if (proba<8) { + U64 restToBeat = vStep * rtbTable[proba]; + proba += (count[s]*step) - ((U64)proba< restToBeat; + } + if (proba > largestP) { largestP=proba; largest=s; } + normalizedCounter[s] = proba; + stillToDistribute -= proba; + } } + if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { + /* corner case, need another normalization method */ + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount); + if (FSE_isError(errorCode)) return errorCode; + } + else normalizedCounter[largest] += (short)stillToDistribute; + } + +#if 0 + { /* Print Table (debug) */ + U32 s; + U32 nTotal = 0; + for (s=0; s<=maxSymbolValue; s++) + RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); + for (s=0; s<=maxSymbolValue; s++) + nTotal += abs(normalizedCounter[s]); + if (nTotal != (1U<>1); /* assumption : tableLog >= 1 */ + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* header */ + tableU16[-2] = (U16) nbBits; + tableU16[-1] = (U16) maxSymbolValue; + + /* Build table */ + for (s=0; s FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } + + /* 2 or 4 encoding per loop */ + while ( ip>istart ) { + + FSE_encodeSymbol(&bitC, &CState2, *--ip); + + if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ + FSE_FLUSHBITS(&bitC); + + FSE_encodeSymbol(&bitC, &CState1, *--ip); + + if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + } + + FSE_FLUSHBITS(&bitC); + } + + FSE_flushCState(&bitC, &CState2); + FSE_flushCState(&bitC, &CState1); + return BIT_closeCStream(&bitC); +} + +size_t FSE_compress_usingCTable (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct) +{ + unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + + if (fast) + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); + else + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); +} + + +size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` size must be `(1< not compressible */ + if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ + } + + tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue, /* useLowProbCount */ srcSize >= 2048) ); + + /* Write table description header */ + { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); + op += nc_err; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + /* check compressibility */ + if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; + + return op-ostart; +} + +typedef struct { + FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + union { + U32 hist_wksp[HIST_WKSP_SIZE_U32]; + BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; + } workspace; +} fseWkspMax_t; + +size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) +{ + fseWkspMax_t scratchBuffer; + DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_COMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); +} + +size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); +} +#endif + +#endif /* FSE_COMMONDEFS_ONLY */ +/**** ended inlining compress/fse_compress.c ****/ +/**** start inlining compress/hist.c ****/ +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* --- dependencies --- */ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/debug.h ****/ +/**** skipping file: ../common/error_private.h ****/ +/**** skipping file: hist.h ****/ + + +/* --- Error management --- */ +unsigned HIST_isError(size_t code) { return ERR_isError(code); } + +/*-************************************************************** + * Histogram functions + ****************************************************************/ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned largestCount=0; + + ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + + while (ip largestCount) largestCount = count[s]; + } + + return largestCount; +} + +typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; + +/* HIST_count_parallel_wksp() : + * store histogram into 4 intermediate tables, recombined at the end. + * this design makes better use of OoO cpus, + * and is noticeably faster when some values are heavily repeated. + * But it needs some additional workspace for intermediate tables. + * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32. + * @return : largest histogram frequency, + * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */ +static size_t HIST_count_parallel_wksp( + unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + HIST_checkInput_e check, + U32* const workSpace) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count); + unsigned max=0; + U32* const Counting1 = workSpace; + U32* const Counting2 = Counting1 + 256; + U32* const Counting3 = Counting2 + 256; + U32* const Counting4 = Counting3 + 256; + + /* safety checks */ + assert(*maxSymbolValuePtr <= 255); + if (!sourceSize) { + ZSTD_memset(count, 0, countSize); + *maxSymbolValuePtr = 0; + return 0; + } + ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned)); + + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ip max) max = Counting1[s]; + } } + + { unsigned maxSymbolValue = 255; + while (!Counting1[maxSymbolValue]) maxSymbolValue--; + if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall); + *maxSymbolValuePtr = maxSymbolValue; + ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */ + } + return (size_t)max; +} + +/* HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if (sourceSize < 1500) /* heuristic threshold */ + return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); +} + +/* HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + if (*maxSymbolValuePtr < 255) + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); + *maxSymbolValuePtr = 255; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); +} + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +/* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters, sizeof(tmpCounters)); +} + +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + unsigned tmpCounters[HIST_WKSP_SIZE_U32]; + return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters, sizeof(tmpCounters)); +} +#endif +/**** ended inlining compress/hist.c ****/ +/**** start inlining compress/huf_compress.c ****/ +/* ****************************************************************** + * Huffman encoder, part of New Generation Entropy library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/* ************************************************************** +* Includes +****************************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/bitstream.h ****/ +/**** skipping file: hist.h ****/ +#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/error_private.h ****/ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError +#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + + +/* ************************************************************** +* Utils +****************************************************************/ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); +} + + +/* ******************************************************* +* HUF : Huffman block compression +*********************************************************/ +/* HUF_compressWeights() : + * Same as FSE_compress(), but dedicated to huff0's weights compression. + * The use case needs much less stack memory. + * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. + */ +#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 + +typedef struct { + FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; + U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)]; + unsigned count[HUF_TABLELOG_MAX+1]; + S16 norm[HUF_TABLELOG_MAX+1]; +} HUF_CompressWeightsWksp; + +static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + unsigned maxSymbolValue = HUF_TABLELOG_MAX; + U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; + HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace; + + if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC); + + /* init conditions */ + if (wtSize <= 1) return 0; /* Not compressible */ + + /* Scan input and build symbol stats */ + { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */ + if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ + if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ + } + + tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) ); + + /* Write table description header */ + { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) ); + op += hSize; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + return (size_t)(op-ostart); +} + + +typedef struct { + HUF_CompressWeightsWksp wksp; + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; +} HUF_WriteCTableWksp; + +size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, + const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, + void* workspace, size_t workspaceSize) +{ + BYTE* op = (BYTE*)dst; + U32 n; + HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace; + + /* check conditions */ + if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + + /* convert to weight */ + wksp->bitsToWeight[0] = 0; + for (n=1; nbitsToWeight[n] = (BYTE)(huffLog + 1 - n); + for (n=0; nhuffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits]; + + /* attempt weights compression by FSE */ + { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) ); + if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ + op[0] = (BYTE)hSize; + return hSize+1; + } } + + /* write raw values as 4-bits (max : 15) */ + if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ + if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ + op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); + wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ + for (n=0; nhuffWeight[n] << 4) + wksp->huffWeight[n+1]); + return ((maxSymbolValue+1)/2) + 1; +} + +/*! HUF_writeCTable() : + `CTable` : Huffman tree to save, using huf representation. + @return : size of saved CTable */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, + const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) +{ + HUF_WriteCTableWksp wksp; + return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp)); +} + + +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) +{ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + U32 nbSymbols = 0; + + /* get symbol weights */ + CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); + *hasZeroWeights = (rankVal[0] > 0); + + /* check result */ + if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); + + /* Prepare base value per rank */ + { U32 n, nextRankStart = 0; + for (n=1; n<=tableLog; n++) { + U32 curr = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = curr; + } } + + /* fill nbBits */ + { U32 n; for (n=0; nn=tableLog+1 */ + U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; + { U32 n; for (n=0; n0; n--) { /* start at n=tablelog <-> w=1 */ + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + /* assign value within rank, symbol order */ + { U32 n; for (n=0; n maxNbBits to be maxNbBits. Then it adjusts + * the tree to so that it is a valid canonical Huffman tree. + * + * @pre The sum of the ranks of each symbol == 2^largestBits, + * where largestBits == huffNode[lastNonNull].nbBits. + * @post The sum of the ranks of each symbol == 2^largestBits, + * where largestBits is the return value <= maxNbBits. + * + * @param huffNode The Huffman tree modified in place to enforce maxNbBits. + * @param lastNonNull The symbol with the lowest count in the Huffman tree. + * @param maxNbBits The maximum allowed number of bits, which the Huffman tree + * may not respect. After this function the Huffman tree will + * respect maxNbBits. + * @return The maximum number of bits of the Huffman tree after adjustment, + * necessarily no more than maxNbBits. + */ +static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) +{ + const U32 largestBits = huffNode[lastNonNull].nbBits; + /* early exit : no elt > maxNbBits, so the tree is already valid. */ + if (largestBits <= maxNbBits) return largestBits; + + /* there are several too large elements (at least >= 2) */ + { int totalCost = 0; + const U32 baseCost = 1 << (largestBits - maxNbBits); + int n = (int)lastNonNull; + + /* Adjust any ranks > maxNbBits to maxNbBits. + * Compute totalCost, which is how far the sum of the ranks is + * we are over 2^largestBits after adjust the offending ranks. + */ + while (huffNode[n].nbBits > maxNbBits) { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); + huffNode[n].nbBits = (BYTE)maxNbBits; + n--; + } + /* n stops at huffNode[n].nbBits <= maxNbBits */ + assert(huffNode[n].nbBits <= maxNbBits); + /* n end at index of smallest symbol using < maxNbBits */ + while (huffNode[n].nbBits == maxNbBits) --n; + + /* renorm totalCost from 2^largestBits to 2^maxNbBits + * note : totalCost is necessarily a multiple of baseCost */ + assert((totalCost & (baseCost - 1)) == 0); + totalCost >>= (largestBits - maxNbBits); + assert(totalCost > 0); + + /* repay normalized cost */ + { U32 const noSymbol = 0xF0F0F0F0; + U32 rankLast[HUF_TABLELOG_MAX+2]; + + /* Get pos of last (smallest = lowest cum. count) symbol per rank */ + ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); + { U32 currentNbBits = maxNbBits; + int pos; + for (pos=n ; pos >= 0; pos--) { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ + rankLast[maxNbBits-currentNbBits] = (U32)pos; + } } + + while (totalCost > 0) { + /* Try to reduce the next power of 2 above totalCost because we + * gain back half the rank. + */ + U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; + for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { + U32 const highPos = rankLast[nBitsToDecrease]; + U32 const lowPos = rankLast[nBitsToDecrease-1]; + if (highPos == noSymbol) continue; + /* Decrease highPos if no symbols of lowPos or if it is + * not cheaper to remove 2 lowPos than highPos. + */ + if (lowPos == noSymbol) break; + { U32 const highTotal = huffNode[highPos].count; + U32 const lowTotal = 2 * huffNode[lowPos].count; + if (highTotal <= lowTotal) break; + } } + /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ + assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1); + /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) + nBitsToDecrease++; + assert(rankLast[nBitsToDecrease] != noSymbol); + /* Increase the number of bits to gain back half the rank cost. */ + totalCost -= 1 << (nBitsToDecrease-1); + huffNode[rankLast[nBitsToDecrease]].nbBits++; + + /* Fix up the new rank. + * If the new rank was empty, this symbol is now its smallest. + * Otherwise, this symbol will be the largest in the new rank so no adjustment. + */ + if (rankLast[nBitsToDecrease-1] == noSymbol) + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; + /* Fix up the old rank. + * If the symbol was at position 0, meaning it was the highest weight symbol in the tree, + * it must be the only symbol in its rank, so the old rank now has no symbols. + * Otherwise, since the Huffman nodes are sorted by count, the previous position is now + * the smallest node in the rank. If the previous position belongs to a different rank, + * then the rank is now empty. + */ + if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ + rankLast[nBitsToDecrease] = noSymbol; + else { + rankLast[nBitsToDecrease]--; + if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) + rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ + } + } /* while (totalCost > 0) */ + + /* If we've removed too much weight, then we have to add it back. + * To avoid overshooting again, we only adjust the smallest rank. + * We take the largest nodes from the lowest rank 0 and move them + * to rank 1. There's guaranteed to be enough rank 0 symbols because + * TODO. + */ + while (totalCost < 0) { /* Sometimes, cost correction overshoot */ + /* special case : no rank 1 symbol (using maxNbBits-1); + * let's create one from largest rank 0 (using maxNbBits). + */ + if (rankLast[1] == noSymbol) { + while (huffNode[n].nbBits == maxNbBits) n--; + huffNode[n+1].nbBits--; + assert(n >= 0); + rankLast[1] = (U32)(n+1); + totalCost++; + continue; + } + huffNode[ rankLast[1] + 1 ].nbBits--; + rankLast[1]++; + totalCost ++; + } + } /* repay normalized cost */ + } /* there are several too large elements (at least >= 2) */ + + return maxNbBits; +} + +typedef struct { + U32 base; + U32 curr; +} rankPos; + +typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; + +#define RANK_POSITION_TABLE_SIZE 32 + +typedef struct { + huffNodeTable huffNodeTbl; + rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; +} HUF_buildCTable_wksp_tables; + +/** + * HUF_sort(): + * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. + * + * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. + * Must have (maxSymbolValue + 1) entries. + * @param[in] count Histogram of the symbols. + * @param[in] maxSymbolValue Maximum symbol value. + * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. + */ +static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) +{ + int n; + int const maxSymbolValue1 = (int)maxSymbolValue + 1; + + /* Compute base and set curr to base. + * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1. + * Then 2^lowerRank <= count[n]+1 <= 2^rank. + * We attribute each symbol to lowerRank's base value, because we want to know where + * each rank begins in the output, so for rank R we want to count ranks R+1 and above. + */ + ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); + for (n = 0; n < maxSymbolValue1; ++n) { + U32 lowerRank = BIT_highbit32(count[n] + 1); + rankPosition[lowerRank].base++; + } + assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0); + for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) { + rankPosition[n-1].base += rankPosition[n].base; + rankPosition[n-1].curr = rankPosition[n-1].base; + } + /* Sort */ + for (n = 0; n < maxSymbolValue1; ++n) { + U32 const c = count[n]; + U32 const r = BIT_highbit32(c+1) + 1; + U32 pos = rankPosition[r].curr++; + /* Insert into the correct position in the rank. + * We have at most 256 symbols, so this insertion should be fine. + */ + while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { + huffNode[pos] = huffNode[pos-1]; + pos--; + } + huffNode[pos].count = c; + huffNode[pos].byte = (BYTE)n; + } +} + + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). + */ +#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) + +/* HUF_buildTree(): + * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree. + * + * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array. + * @param maxSymbolValue The maximum symbol value. + * @return The smallest node in the Huffman tree (by count). + */ +static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) +{ + nodeElt* const huffNode0 = huffNode - 1; + int nonNullRank; + int lowS, lowN; + int nodeNb = STARTNODE; + int n, nodeRoot; + /* init for parents */ + nonNullRank = (int)maxSymbolValue; + while(huffNode[nonNullRank].count == 0) nonNullRank--; + lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; + huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; + nodeNb++; lowS-=2; + for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); + huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ + + /* create parents */ + while (nodeNb <= nodeRoot) { + int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; + huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; + nodeNb++; + } + + /* distribute weights (unlimited tree height) */ + huffNode[nodeRoot].nbBits = 0; + for (n=nodeRoot-1; n>=STARTNODE; n--) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + for (n=0; n<=nonNullRank; n++) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + + return nonNullRank; +} + +/** + * HUF_buildCTableFromTree(): + * Build the CTable given the Huffman tree in huffNode. + * + * @param[out] CTable The output Huffman CTable. + * @param huffNode The Huffman tree. + * @param nonNullRank The last and smallest node in the Huffman tree. + * @param maxSymbolValue The maximum symbol value. + * @param maxNbBits The exact maximum number of bits used in the Huffman tree. + */ +static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) +{ + /* fill result into ctable (val, nbBits) */ + int n; + U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + int const alphabetSize = (int)(maxSymbolValue + 1); + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + /* determine starting value per rank */ + { U16 min = 0; + for (n=(int)maxNbBits; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + for (n=0; nhuffNodeTbl; + nodeElt* const huffNode = huffNode0+1; + int nonNullRank; + + /* safety checks */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) + return ERROR(workSpace_tooSmall); + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) + return ERROR(maxSymbolValue_tooLarge); + ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); + + /* sort, decreasing order */ + HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); + + /* build tree */ + nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); + + /* enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ + + HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits); + + return maxNbBits; +} + +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) +{ + size_t nbBits = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + nbBits += CTable[s].nbBits * count[s]; + } + return nbBits >> 3; +} + +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + int bad = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + bad |= (count[s] != 0) & (CTable[s].nbBits == 0); + } + return !bad; +} + +size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } + +FORCE_INLINE_TEMPLATE void +HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +{ + BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); +} + +#define HUF_FLUSHBITS(s) BIT_flushBits(s) + +#define HUF_FLUSHBITS_1(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) + +#define HUF_FLUSHBITS_2(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) + +FORCE_INLINE_TEMPLATE size_t +HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + const BYTE* ip = (const BYTE*) src; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + size_t n; + BIT_CStream_t bitC; + + /* init */ + if (dstSize < 8) return 0; /* not enough space to compress */ + { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); + if (HUF_isError(initErr)) return 0; } + + n = srcSize & ~3; /* join to mod 4 */ + switch (srcSize & 3) + { + case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); + HUF_FLUSHBITS_2(&bitC); + /* fall-through */ + case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); + HUF_FLUSHBITS_1(&bitC); + /* fall-through */ + case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); + HUF_FLUSHBITS(&bitC); + /* fall-through */ + case 0 : /* fall-through */ + default: break; + } + + for (; n>0; n-=4) { /* note : n&3==0 at this stage */ + HUF_encodeSymbol(&bitC, ip[n- 1], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 2], CTable); + HUF_FLUSHBITS_2(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 3], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 4], CTable); + HUF_FLUSHBITS(&bitC); + } + + return BIT_closeCStream(&bitC); +} + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + if (bmi2) { + return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); + } + return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); +} + +#else + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + (void)bmi2; + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +#endif + +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + + +static size_t +HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, int bmi2) +{ + size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ + if (srcSize < 12) return 0; /* no saving possible : too small input */ + op += 6; /* jumpTable */ + + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+2, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+4, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + assert(ip <= iend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); + if (cSize==0) return 0; + op += cSize; + } + + return (size_t)(op-ostart); +} + +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + +typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; + +static size_t HUF_compressCTable_internal( + BYTE* const ostart, BYTE* op, BYTE* const oend, + const void* src, size_t srcSize, + HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) +{ + size_t const cSize = (nbStreams==HUF_singleStream) ? + HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : + HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); + if (HUF_isError(cSize)) { return cSize; } + if (cSize==0) { return 0; } /* uncompressible */ + op += cSize; + /* check compressibility */ + assert(op >= ostart); + if ((size_t)(op-ostart) >= srcSize-1) { return 0; } + return (size_t)(op-ostart); +} + +typedef struct { + unsigned count[HUF_SYMBOLVALUE_MAX + 1]; + HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; + union { + HUF_buildCTable_wksp_tables buildCTable_wksp; + HUF_WriteCTableWksp writeCTable_wksp; + } wksps; +} HUF_compress_tables_t; + +/* HUF_compress_internal() : + * `workSpace_align4` must be aligned on 4-bytes boundaries, + * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */ +static size_t +HUF_compress_internal (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + HUF_nbStreams_e nbStreams, + void* workSpace_align4, size_t wkspSize, + HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, + const int bmi2) +{ + HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); + assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */ + + /* checks & inits */ + if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); + if (!srcSize) return 0; /* Uncompressed */ + if (!dstSize) return 0; /* cannot fit anything within dst budget */ + if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; + if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; + + /* Heuristic : If old table is valid, use it for small inputs */ + if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } + + /* Scan input and build symbol stats */ + { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) ); + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ + } + + /* Check validity of previous table */ + if ( repeat + && *repeat == HUF_repeat_check + && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { + *repeat = HUF_repeat_none; + } + /* Heuristic : use existing table for small inputs */ + if (preferRepeat && repeat && *repeat != HUF_repeat_none) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } + + /* Build Huffman Tree */ + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, + maxSymbolValue, huffLog, + &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); + CHECK_F(maxBits); + huffLog = (U32)maxBits; + /* Zero unused symbols in CTable, so we can check it for validity */ + ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0, + sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); + } + + /* Write table description header */ + { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, + &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) ); + /* Check if using previous huffman table is beneficial */ + if (repeat && *repeat != HUF_repeat_none) { + size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); + size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); + if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } } + + /* Use the new huffman table */ + if (hSize + 12ul >= srcSize) { return 0; } + op += hSize; + if (repeat) { *repeat = HUF_repeat_none; } + if (oldHufTable) + ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ + } + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, table->CTable, bmi2); +} + + +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +size_t HUF_compress1X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, hufTable, + repeat, preferRepeat, bmi2); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * provide workspace to generate compression tables */ +size_t HUF_compress4X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * re-use an existing huffman compression table */ +size_t HUF_compress4X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + hufTable, repeat, preferRepeat, bmi2); +} + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +/** HUF_buildCTable() : + * @return : maxNbBits + * Note : count is used before tree is written, so they can safely overlap + */ +size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits) +{ + HUF_buildCTable_wksp_tables workspace; + return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace)); +} + +size_t HUF_compress1X (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; + return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} + +size_t HUF_compress2 (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog) +{ + unsigned workSpace[HUF_WORKSPACE_SIZE_U32]; + return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); +} + +size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) +{ + return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT); +} +#endif +/**** ended inlining compress/huf_compress.c ****/ +/**** start inlining compress/zstd_compress_literals.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +/**** start inlining zstd_compress_literals.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_LITERALS_H +#define ZSTD_COMPRESS_LITERALS_H + +/**** start inlining zstd_compress_internal.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This header contains definitions + * that shall **only** be used by modules within lib/compress. + */ + +#ifndef ZSTD_COMPRESS_H +#define ZSTD_COMPRESS_H + +/*-************************************* +* Dependencies +***************************************/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** start inlining zstd_cwksp.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CWKSP_H +#define ZSTD_CWKSP_H + +/*-************************************* +* Dependencies +***************************************/ +/**** skipping file: ../common/zstd_internal.h ****/ + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Constants +***************************************/ + +/* Since the workspace is effectively its own little malloc implementation / + * arena, when we run under ASAN, we should similarly insert redzones between + * each internal element of the workspace, so ASAN will catch overruns that + * reach outside an object but that stay inside the workspace. + * + * This defines the size of that redzone. + */ +#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE +#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 +#endif + + +/* Set our tables and aligneds to align by 64 bytes */ +#define ZSTD_CWKSP_ALIGNMENT_BYTES 64 + +/*-************************************* +* Structures +***************************************/ +typedef enum { + ZSTD_cwksp_alloc_objects, + ZSTD_cwksp_alloc_buffers, + ZSTD_cwksp_alloc_aligned +} ZSTD_cwksp_alloc_phase_e; + +/** + * Used to describe whether the workspace is statically allocated (and will not + * necessarily ever be freed), or if it's dynamically allocated and we can + * expect a well-formed caller to free this. + */ +typedef enum { + ZSTD_cwksp_dynamic_alloc, + ZSTD_cwksp_static_alloc +} ZSTD_cwksp_static_alloc_e; + +/** + * Zstd fits all its internal datastructures into a single continuous buffer, + * so that it only needs to perform a single OS allocation (or so that a buffer + * can be provided to it and it can perform no allocations at all). This buffer + * is called the workspace. + * + * Several optimizations complicate that process of allocating memory ranges + * from this workspace for each internal datastructure: + * + * - These different internal datastructures have different setup requirements: + * + * - The static objects need to be cleared once and can then be trivially + * reused for each compression. + * + * - Various buffers don't need to be initialized at all--they are always + * written into before they're read. + * + * - The matchstate tables have a unique requirement that they don't need + * their memory to be totally cleared, but they do need the memory to have + * some bound, i.e., a guarantee that all values in the memory they've been + * allocated is less than some maximum value (which is the starting value + * for the indices that they will then use for compression). When this + * guarantee is provided to them, they can use the memory without any setup + * work. When it can't, they have to clear the area. + * + * - These buffers also have different alignment requirements. + * + * - We would like to reuse the objects in the workspace for multiple + * compressions without having to perform any expensive reallocation or + * reinitialization work. + * + * - We would like to be able to efficiently reuse the workspace across + * multiple compressions **even when the compression parameters change** and + * we need to resize some of the objects (where possible). + * + * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp + * abstraction was created. It works as follows: + * + * Workspace Layout: + * + * [ ... workspace ... ] + * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] + * + * The various objects that live in the workspace are divided into the + * following categories, and are allocated separately: + * + * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, + * so that literally everything fits in a single buffer. Note: if present, + * this must be the first object in the workspace, since ZSTD_customFree{CCtx, + * CDict}() rely on a pointer comparison to see whether one or two frees are + * required. + * + * - Fixed size objects: these are fixed-size, fixed-count objects that are + * nonetheless "dynamically" allocated in the workspace so that we can + * control how they're initialized separately from the broader ZSTD_CCtx. + * Examples: + * - Entropy Workspace + * - 2 x ZSTD_compressedBlockState_t + * - CDict dictionary contents + * + * - Tables: these are any of several different datastructures (hash tables, + * chain tables, binary trees) that all respect a common format: they are + * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). + * Their sizes depend on the cparams. These tables are 64-byte aligned. + * + * - Aligned: these buffers are used for various purposes that require 4 byte + * alignment, but don't require any initialization before they're used. These + * buffers are each aligned to 64 bytes. + * + * - Buffers: these buffers are used for various purposes that don't require + * any alignment or initialization before they're used. This means they can + * be moved around at no cost for a new compression. + * + * Allocating Memory: + * + * The various types of objects must be allocated in order, so they can be + * correctly packed into the workspace buffer. That order is: + * + * 1. Objects + * 2. Buffers + * 3. Aligned/Tables + * + * Attempts to reserve objects of different types out of order will fail. + */ +typedef struct { + void* workspace; + void* workspaceEnd; + + void* objectEnd; + void* tableEnd; + void* tableValidEnd; + void* allocStart; + + BYTE allocFailed; + int workspaceOversizedDuration; + ZSTD_cwksp_alloc_phase_e phase; + ZSTD_cwksp_static_alloc_e isStatic; +} ZSTD_cwksp; + +/*-************************************* +* Functions +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); + +MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { + (void)ws; + assert(ws->workspace <= ws->objectEnd); + assert(ws->objectEnd <= ws->tableEnd); + assert(ws->objectEnd <= ws->tableValidEnd); + assert(ws->tableEnd <= ws->allocStart); + assert(ws->tableValidEnd <= ws->allocStart); + assert(ws->allocStart <= ws->workspaceEnd); +} + +/** + * Align must be a power of 2. + */ +MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { + size_t const mask = align - 1; + assert((align & mask) == 0); + return (size + mask) & ~mask; +} + +/** + * Use this to determine how much space in the workspace we will consume to + * allocate this object. (Normally it should be exactly the size of the object, + * but under special conditions, like ASAN, where we pad each object, it might + * be larger.) + * + * Since tables aren't currently redzoned, you don't need to call through this + * to figure out how much space you need for the matchState tables. Everything + * else is though. + * + * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size(). + */ +MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { + if (size == 0) + return 0; +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + return size + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#else + return size; +#endif +} + +/** + * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes. + * Used to determine the number of bytes required for a given "aligned". + */ +MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) { + return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES)); +} + +/** + * Returns the amount of additional space the cwksp must allocate + * for internal purposes (currently only alignment). + */ +MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) { + /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes + * to align the beginning of tables section, as well as another n_2=[0, 63] bytes + * to align the beginning of the aligned secion. + * + * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and + * aligneds being sized in multiples of 64 bytes. + */ + size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES; + return slackSpace; +} + + +/** + * Return the number of additional bytes required to align a pointer to the given number of bytes. + * alignBytes must be a power of two. + */ +MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) { + size_t const alignBytesMask = alignBytes - 1; + size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask; + assert((alignBytes & alignBytesMask) == 0); + assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES); + return bytes; +} + +/** + * Internal function. Do not use directly. + * Reserves the given number of bytes within the aligned/buffer segment of the wksp, which + * counts from the end of the wksp. (as opposed to the object/table segment) + * + * Returns a pointer to the beginning of that space. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes) { + void* const alloc = (BYTE*)ws->allocStart - bytes; + void* const bottom = ws->tableEnd; + DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(alloc >= bottom); + if (alloc < bottom) { + DEBUGLOG(4, "cwksp: alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + if (alloc < ws->tableValidEnd) { + ws->tableValidEnd = alloc; + } + ws->allocStart = alloc; + return alloc; +} + +/** + * Moves the cwksp to the next phase, and does any necessary allocations. + * Returns a 0 on success, or zstd error + */ +MEM_STATIC size_t ZSTD_cwksp_internal_advance_phase( + ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { + assert(phase >= ws->phase); + if (phase > ws->phase) { + /* Going from allocating objects to allocating buffers */ + if (ws->phase < ZSTD_cwksp_alloc_buffers && + phase >= ZSTD_cwksp_alloc_buffers) { + ws->tableValidEnd = ws->objectEnd; + } + + /* Going from allocating buffers to allocating aligneds/tables */ + if (ws->phase < ZSTD_cwksp_alloc_aligned && + phase >= ZSTD_cwksp_alloc_aligned) { + { /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */ + size_t const bytesToAlign = + ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES); + DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign); + ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */ + RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign), + memory_allocation, "aligned phase - alignment initial allocation failed!"); + } + { /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */ + void* const alloc = ws->objectEnd; + size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES); + void* const end = (BYTE*)alloc + bytesToAlign; + DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign); + RETURN_ERROR_IF(end > ws->workspaceEnd, memory_allocation, + "table phase - alignment initial allocation failed!"); + ws->objectEnd = end; + ws->tableEnd = end; + ws->tableValidEnd = end; + } + } + ws->phase = phase; + ZSTD_cwksp_assert_internal_consistency(ws); + } + return 0; +} + +/** + * Returns whether this object/buffer/etc was allocated in this workspace. + */ +MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { + return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); +} + +/** + * Internal function. Do not use directly. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_internal( + ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { + void* alloc; + if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) { + return NULL; + } + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* over-reserve space */ + bytes += 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#endif + + alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes); + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on + * either size. */ + if (alloc) { + alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } + } +#endif + + return alloc; +} + +/** + * Reserves and returns unaligned memory. + */ +MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { + return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); +} + +/** + * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { + void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES), + ZSTD_cwksp_alloc_aligned); + assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); + return ptr; +} + +/** + * Aligned on 64 bytes. These buffers have the special property that + * their values remain constrained, allowing us to re-use them without + * memset()-ing them. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { + const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; + void* alloc; + void* end; + void* top; + + if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) { + return NULL; + } + alloc = ws->tableEnd; + end = (BYTE *)alloc + bytes; + top = ws->allocStart; + + DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + assert((bytes & (sizeof(U32)-1)) == 0); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(end <= top); + if (end > top) { + DEBUGLOG(4, "cwksp: table alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->tableEnd = end; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } +#endif + + assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0); + assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0); + return alloc; +} + +/** + * Aligned on sizeof(void*). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { + size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); + void* alloc = ws->objectEnd; + void* end = (BYTE*)alloc + roundedBytes; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* over-reserve space */ + end = (BYTE *)end + 2 * ZSTD_CWKSP_ASAN_REDZONE_SIZE; +#endif + + DEBUGLOG(5, + "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", + alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); + assert(((size_t)alloc & (sizeof(void*)-1)) == 0); + assert((bytes & (sizeof(void*)-1)) == 0); + ZSTD_cwksp_assert_internal_consistency(ws); + /* we must be in the first phase, no advance is possible */ + if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { + DEBUGLOG(4, "cwksp: object alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->objectEnd = end; + ws->tableEnd = end; + ws->tableValidEnd = end; + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* Move alloc so there's ZSTD_CWKSP_ASAN_REDZONE_SIZE unused space on + * either size. */ + alloc = (BYTE *)alloc + ZSTD_CWKSP_ASAN_REDZONE_SIZE; + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + __asan_unpoison_memory_region(alloc, bytes); + } +#endif + + return alloc; +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); + +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table re-use logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. */ + { + size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; + assert(__msan_test_shadow(ws->objectEnd, size) == -1); + __msan_poison(ws->objectEnd, size); + } +#endif + + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + ws->tableValidEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ws->tableValidEnd = ws->tableEnd; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Zero the part of the allocated tables not already marked clean. + */ +MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); + } + ZSTD_cwksp_mark_tables_clean(ws); +} + +/** + * Invalidates table allocations. + * All other allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing tables!"); + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* We don't do this when the workspace is statically allocated, because + * when that is the case, we have no capability to hook into the end of the + * workspace's lifecycle to unpoison the memory. + */ + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + size_t size = (BYTE*)ws->tableValidEnd - (BYTE*)ws->objectEnd; + __asan_poison_memory_region(ws->objectEnd, size); + } +#endif + + ws->tableEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Invalidates all buffer, aligned, and table allocations. + * Object allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing!"); + +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the context re-use logic is sound, and that we don't + * access stuff that this compression hasn't initialized, we re-"poison" + * the workspace (or at least the non-static, non-table parts of it) + * every time we start a new compression. */ + { + size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->tableValidEnd; + __msan_poison(ws->tableValidEnd, size); + } +#endif + +#if ZSTD_ADDRESS_SANITIZER && !defined (ZSTD_ASAN_DONT_POISON_WORKSPACE) + /* We don't do this when the workspace is statically allocated, because + * when that is the case, we have no capability to hook into the end of the + * workspace's lifecycle to unpoison the memory. + */ + if (ws->isStatic == ZSTD_cwksp_dynamic_alloc) { + size_t size = (BYTE*)ws->workspaceEnd - (BYTE*)ws->objectEnd; + __asan_poison_memory_region(ws->objectEnd, size); + } +#endif + + ws->tableEnd = ws->objectEnd; + ws->allocStart = ws->workspaceEnd; + ws->allocFailed = 0; + if (ws->phase > ZSTD_cwksp_alloc_buffers) { + ws->phase = ZSTD_cwksp_alloc_buffers; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * The provided workspace takes ownership of the buffer [start, start+size). + * Any existing values in the workspace are ignored (the previously managed + * buffer, if present, must be separately freed). + */ +MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) { + DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); + assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ + ws->workspace = start; + ws->workspaceEnd = (BYTE*)start + size; + ws->objectEnd = ws->workspace; + ws->tableValidEnd = ws->objectEnd; + ws->phase = ZSTD_cwksp_alloc_objects; + ws->isStatic = isStatic; + ZSTD_cwksp_clear(ws); + ws->workspaceOversizedDuration = 0; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { + void* workspace = ZSTD_customMalloc(size, customMem); + DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); + RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); + ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc); + return 0; +} + +MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { + void *ptr = ws->workspace; + DEBUGLOG(4, "cwksp: freeing workspace"); + ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp)); + ZSTD_customFree(ptr, customMem); +} + +/** + * Moves the management of a workspace from one cwksp to another. The src cwksp + * is left in an invalid state (src must be re-init()'ed before it's used again). + */ +MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { + *dst = *src; + ZSTD_memset(src, 0, sizeof(ZSTD_cwksp)); +} + +MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); +} + +MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) + + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); +} + +MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { + return ws->allocFailed; +} + +/*-************************************* +* Functions Checking Free Space +***************************************/ + +/* ZSTD_alignmentSpaceWithinBounds() : + * Returns if the estimated space needed for a wksp is within an acceptable limit of the + * actual amount of space used. + */ +MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws, + size_t const estimatedSpace, int resizedWorkspace) { + if (resizedWorkspace) { + /* Resized/newly allocated wksp should have exact bounds */ + return ZSTD_cwksp_used(ws) == estimatedSpace; + } else { + /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes + * than estimatedSpace. See the comments in zstd_cwksp.h for details. + */ + return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63); + } +} + + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); +} + +MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; +} + +MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_available( + ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); +} + +MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) + && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( + ZSTD_cwksp* ws, size_t additionalNeededSpace) { + if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { + ws->workspaceOversizedDuration++; + } else { + ws->workspaceOversizedDuration = 0; + } +} + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_CWKSP_H */ +/**** ended inlining zstd_cwksp.h ****/ +#ifdef ZSTD_MULTITHREAD +/**** start inlining zstdmt_compress.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + #ifndef ZSTDMT_COMPRESS_H + #define ZSTDMT_COMPRESS_H + + #if defined (__cplusplus) + extern "C" { + #endif + + +/* Note : This is an internal API. + * These APIs used to be exposed with ZSTDLIB_API, + * because it used to be the only way to invoke MT compression. + * Now, you must use ZSTD_compress2 and ZSTD_compressStream2() instead. + * + * This API requires ZSTD_MULTITHREAD to be defined during compilation, + * otherwise ZSTDMT_createCCtx*() will fail. + */ + +/* === Dependencies === */ +/**** skipping file: ../common/zstd_deps.h ****/ +#define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters */ +/**** skipping file: ../zstd.h ****/ + + +/* === Constants === */ +#ifndef ZSTDMT_NBWORKERS_MAX /* a different value can be selected at compile time */ +# define ZSTDMT_NBWORKERS_MAX ((sizeof(void*)==4) /*32-bit*/ ? 64 : 256) +#endif +#ifndef ZSTDMT_JOBSIZE_MIN /* a different value can be selected at compile time */ +# define ZSTDMT_JOBSIZE_MIN (512 KB) +#endif +#define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30) +#define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB)) + + +/* ======================================================== + * === Private interface, for use by ZSTD_compress.c === + * === Not exposed in libzstd. Never invoke directly === + * ======================================================== */ + +/* === Memory management === */ +typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; +/* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */ +ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, + ZSTD_customMem cMem, + ZSTD_threadPool *pool); +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); + +size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); + +/* === Streaming functions === */ + +size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx); + +/*! ZSTDMT_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTDMT_initCStream_internal(ZSTDMT_CCtx* zcs, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, unsigned long long pledgedSrcSize); + +/*! ZSTDMT_compressStream_generic() : + * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream() + * depending on flush directive. + * @return : minimum amount of data still to be flushed + * 0 if fully flushed + * or an error code + * note : needs to be init using any ZSTD_initCStream*() variant */ +size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + /*! ZSTDMT_toFlushNow() + * Tell how many bytes are ready to be flushed immediately. + * Probe the oldest active job (not yet entirely flushed) and check its output buffer. + * If return 0, it means there is no active job, + * or, it means oldest job is still active, but everything produced has been flushed so far, + * therefore flushing is limited by speed of oldest job. */ +size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx); + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates only a selected set of compression parameters, to remain compatible with current frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams); + +/*! ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDMT_COMPRESS_H */ +/**** ended inlining zstdmt_compress.h ****/ +#endif + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Constants +***************************************/ +#define kSearchStrength 8 +#define HASH_READ_SIZE 8 +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". + It could be confused for a real successor at index "1", if sorted as larger than its predecessor. + It's not a big deal though : candidate will just be sorted again. + Additionally, candidate position 1 will be lost. + But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. + This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ + + +/*-************************************* +* Context memory management +***************************************/ +typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; + +typedef struct ZSTD_prefixDict_s { + const void* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; +} ZSTD_prefixDict; + +typedef struct { + void* dictBuffer; + void const* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; + ZSTD_CDict* cdict; +} ZSTD_localDict; + +typedef struct { + HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)]; + HUF_repeat repeatMode; +} ZSTD_hufCTables_t; + +typedef struct { + FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + FSE_repeat offcode_repeatMode; + FSE_repeat matchlength_repeatMode; + FSE_repeat litlength_repeatMode; +} ZSTD_fseCTables_t; + +typedef struct { + ZSTD_hufCTables_t huf; + ZSTD_fseCTables_t fse; +} ZSTD_entropyCTables_t; + +/*********************************************** +* Entropy buffer statistics structs and funcs * +***********************************************/ +/** ZSTD_hufCTablesMetadata_t : + * Stores Literals Block Type for a super-block in hType, and + * huffman tree description in hufDesBuffer. + * hufDesSize refers to the size of huffman tree description in bytes. + * This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */ +typedef struct { + symbolEncodingType_e hType; + BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/** ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; + size_t fseTablesSize; + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + +/** ZSTD_buildBlockEntropyStats() : + * Builds entropy for the block. + * @return : 0 on success or error code */ +size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize); + +/********************************* +* Compression internals structs * +*********************************/ + +typedef struct { + U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */ + U32 len; /* Raw length of match */ +} ZSTD_match_t; + +typedef struct { + U32 offset; /* Offset of sequence */ + U32 litLength; /* Length of literals prior to match */ + U32 matchLength; /* Raw length of match */ +} rawSeq; + +typedef struct { + rawSeq* seq; /* The start of the sequences */ + size_t pos; /* The index in seq where reading stopped. pos <= size. */ + size_t posInSequence; /* The position within the sequence at seq[pos] where reading + stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ + size_t size; /* The number of sequences. <= capacity. */ + size_t capacity; /* The capacity starting from `seq` pointer */ +} rawSeqStore_t; + +UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; + +typedef struct { + int price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_optimal_t; + +typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; + +typedef struct { + /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ + unsigned* litFreq; /* table of literals statistics, of size 256 */ + unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ + unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ + unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ + + U32 litSum; /* nb of literals */ + U32 litLengthSum; /* nb of litLength codes */ + U32 matchLengthSum; /* nb of matchLength codes */ + U32 offCodeSum; /* nb of offset codes */ + U32 litSumBasePrice; /* to compare to log2(litfreq) */ + U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ + U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ + U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ + ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ + const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ + ZSTD_literalCompressionMode_e literalCompressionMode; +} optState_t; + +typedef struct { + ZSTD_entropyCTables_t entropy; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_compressedBlockState_t; + +typedef struct { + BYTE const* nextSrc; /* next block here to continue on current prefix */ + BYTE const* base; /* All regular indexes relative to this position */ + BYTE const* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more valid data */ + U32 nbOverflowCorrections; /* Number of times overflow correction has run since + * ZSTD_window_init(). Useful for debugging coredumps + * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY. + */ +} ZSTD_window_t; + +typedef struct ZSTD_matchState_t ZSTD_matchState_t; + +#define ZSTD_ROW_HASH_CACHE_SIZE 8 /* Size of prefetching hash cache for row-based matchfinder */ + +struct ZSTD_matchState_t { + ZSTD_window_t window; /* State for window round buffer management */ + U32 loadedDictEnd; /* index of end of dictionary, within context's referential. + * When loadedDictEnd != 0, a dictionary is in use, and still valid. + * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. + * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). + * When dict referential is copied into active context (i.e. not attached), + * loadedDictEnd == dictSize, since referential starts from zero. + */ + U32 nextToUpdate; /* index from which to continue table update */ + U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ + + U32 rowHashLog; /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/ + U16* tagTable; /* For row-based matchFinder: A row-based table containing the hashes and head index. */ + U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */ + + U32* hashTable; + U32* hashTable3; + U32* chainTable; + + U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */ + + int dedicatedDictSearch; /* Indicates whether this matchState is using the + * dedicated dictionary search structure. + */ + optState_t opt; /* optimal parser state */ + const ZSTD_matchState_t* dictMatchState; + ZSTD_compressionParameters cParams; + const rawSeqStore_t* ldmSeqStore; +}; + +typedef struct { + ZSTD_compressedBlockState_t* prevCBlock; + ZSTD_compressedBlockState_t* nextCBlock; + ZSTD_matchState_t matchState; +} ZSTD_blockState_t; + +typedef struct { + U32 offset; + U32 checksum; +} ldmEntry_t; + +typedef struct { + BYTE const* split; + U32 hash; + U32 checksum; + ldmEntry_t* bucket; +} ldmMatchCandidate_t; + +#define LDM_BATCH_SIZE 64 + +typedef struct { + ZSTD_window_t window; /* State for the window round buffer management */ + ldmEntry_t* hashTable; + U32 loadedDictEnd; + BYTE* bucketOffsets; /* Next position in bucket to insert entry */ + size_t splitIndices[LDM_BATCH_SIZE]; + ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; +} ldmState_t; + +typedef struct { + U32 enableLdm; /* 1 if enable long distance matching */ + U32 hashLog; /* Log size of hashTable */ + U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ + U32 minMatchLength; /* Minimum match length */ + U32 hashRateLog; /* Log number of entries to skip */ + U32 windowLog; /* Window log for the LDM */ +} ldmParams_t; + +typedef struct { + int collectSequences; + ZSTD_Sequence* seqStart; + size_t seqIndex; + size_t maxSequences; +} SeqCollector; + +struct ZSTD_CCtx_params_s { + ZSTD_format_e format; + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; + + int compressionLevel; + int forceWindow; /* force back-references to respect limit of + * 1< 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; +} + +/* ZSTD_MLcode() : + * note : mlBase = matchLength - MINMATCH; + * because it's the format it's stored in seqStore->sequences */ +MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) +{ + static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + static const U32 ML_deltaCode = 36; + return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; +} + +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +{ + repcodes_t newReps; + if (offset >= ZSTD_REP_NUM) { /* full offset */ + newReps.rep[2] = rep[1]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = currentOffset; + } else { /* repCode == 0 */ + ZSTD_memcpy(&newReps, rep, sizeof(newReps)); + } + } + return newReps; +} + +/* ZSTD_cParam_withinBounds: + * @return 1 if value is within cParam bounds, + * 0 otherwise */ +MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +/* ZSTD_noCompressBlock() : + * Writes uncompressed block to dst buffer from given src. + * Returns the size of the block */ +MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) +{ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, + dstSize_tooSmall, "dst buf too small for uncompressed block"); + MEM_writeLE24(dst, cBlockHeader24); + ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + return ZSTD_blockHeaderSize + srcSize; +} + +MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) +{ + BYTE* const op = (BYTE*)dst; + U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); + MEM_writeLE24(op, cBlockHeader); + op[3] = src; + return 4; +} + + +/* ZSTD_minGain() : + * minimum compression required + * to generate a compress block or a compressed literals section. + * note : use same formula for both situations */ +MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) +{ + U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; + ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + return (srcSize >> minlog) + 2; +} + +MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +{ + switch (cctxParams->literalCompressionMode) { + case ZSTD_lcm_huffman: + return 0; + case ZSTD_lcm_uncompressed: + return 1; + default: + assert(0 /* impossible: pre-validated */); + /* fall-through */ + case ZSTD_lcm_auto: + return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); + } +} + +/*! ZSTD_safecopyLiterals() : + * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. + * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single + * large copies. + */ +static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { + assert(iend > ilimit_w); + if (ip <= ilimit_w) { + ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); + op += ilimit_w - ip; + ip = ilimit_w; + } + while (ip < iend) *op++ = *ip++; +} + +/*! ZSTD_storeSeq() : + * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. + * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). + * `mlBase` : matchLength - MINMATCH + * Allowed to overread literals up to litLimit. +*/ +HINT_INLINE UNUSED_ATTR +void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) +{ + BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; + BYTE const* const litEnd = literals + litLength; +#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) + static const BYTE* g_start = NULL; + if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ + { U32 const pos = (U32)((const BYTE*)literals - g_start); + DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", + pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); + } +#endif + assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); + /* copy Literals */ + assert(seqStorePtr->maxNbLit <= 128 KB); + assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); + assert(literals + litLength <= litLimit); + if (litEnd <= litLimit_w) { + /* Common case we can use wildcopy. + * First copy 16 bytes, because literals are likely short. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(seqStorePtr->lit, literals); + if (litLength > 16) { + ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); + } + } else { + ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); + } + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { + assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ + seqStorePtr->longLengthType = ZSTD_llt_literalLength; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].litLength = (U16)litLength; + + /* match offset */ + seqStorePtr->sequences[0].offset = offCode + 1; + + /* match Length */ + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */ + seqStorePtr->longLengthType = ZSTD_llt_matchLength; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].matchLength = (U16)mlBase; + + seqStorePtr->sequences++; +} + + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 + return _tzcnt_u64(val) >> 3; +# else + unsigned long r = 0; + return _BitScanForward64( &r, (U64)val ) ? (unsigned)(r >> 3) : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + return _BitScanForward( &r, (U32)val ) ? (unsigned)(r >> 3) : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) +# if STATIC_BMI2 + return _lzcnt_u64(val) >> 3; +# else + unsigned long r = 0; + return _BitScanReverse64(&r, (U64)val) ? (unsigned)(r >> 3) : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 4) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + return _BitScanReverse( &r, (unsigned long)val ) ? (unsigned)(r >> 3) : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + if (pIn < pInLoopLimit) { + { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (diff) return ZSTD_NbCommonBytes(diff); } + pIn+=sizeof(size_t); pMatch+=sizeof(size_t); + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } } + if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } + +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } + +MEM_STATIC FORCE_INLINE_ATTR +size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + case 8: return ZSTD_hash8Ptr(p, hBits); + } +} + +/** ZSTD_ipow() : + * Return base^exponent. + */ +static U64 ZSTD_ipow(U64 base, U64 exponent) +{ + U64 power = 1; + while (exponent) { + if (exponent & 1) power *= base; + exponent >>= 1; + base *= base; + } + return power; +} + +#define ZSTD_ROLL_HASH_CHAR_OFFSET 10 + +/** ZSTD_rollingHash_append() : + * Add the buffer to the hash value. + */ +static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) +{ + BYTE const* istart = (BYTE const*)buf; + size_t pos; + for (pos = 0; pos < size; ++pos) { + hash *= prime8bytes; + hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; + } + return hash; +} + +/** ZSTD_rollingHash_compute() : + * Compute the rolling hash value of the buffer. + */ +MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) +{ + return ZSTD_rollingHash_append(0, buf, size); +} + +/** ZSTD_rollingHash_primePower() : + * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash + * over a window of length bytes. + */ +MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) +{ + return ZSTD_ipow(prime8bytes, length - 1); +} + +/** ZSTD_rollingHash_rotate() : + * Rotate the rolling hash by one byte. + */ +MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) +{ + hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; + hash *= prime8bytes; + hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; + return hash; +} + +/*-************************************* +* Round buffer management +***************************************/ +#if (ZSTD_WINDOWLOG_MAX_64 > 31) +# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" +#endif +/* Max current allowed */ +#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) +/* Maximum chunk size before overflow correction needs to be called again */ +#define ZSTD_CHUNKSIZE_MAX \ + ( ((U32)-1) /* Maximum ending current index */ \ + - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ + +/** + * ZSTD_window_clear(): + * Clears the window containing the history by simply setting it to empty. + */ +MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) +{ + size_t const endT = (size_t)(window->nextSrc - window->base); + U32 const end = (U32)endT; + + window->lowLimit = end; + window->dictLimit = end; +} + +MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window) +{ + return window.dictLimit == 1 && + window.lowLimit == 1 && + (window.nextSrc - window.base) == 1; +} + +/** + * ZSTD_window_hasExtDict(): + * Returns non-zero if the window has a non-empty extDict. + */ +MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) +{ + return window.lowLimit < window.dictLimit; +} + +/** + * ZSTD_matchState_dictMode(): + * Inspects the provided matchState and figures out what dictMode should be + * passed to the compressor. + */ +MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) +{ + return ZSTD_window_hasExtDict(ms->window) ? + ZSTD_extDict : + ms->dictMatchState != NULL ? + (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : + ZSTD_noDict; +} + +/* Defining this macro to non-zero tells zstd to run the overflow correction + * code much more frequently. This is very inefficient, and should only be + * used for tests and fuzzers. + */ +#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY +# ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1 +# else +# define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0 +# endif +#endif + +/** + * ZSTD_window_canOverflowCorrect(): + * Returns non-zero if the indices are large enough for overflow correction + * to work correctly without impacting compression ratio. + */ +MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window, + U32 cycleLog, + U32 maxDist, + U32 loadedDictEnd, + void const* src) +{ + U32 const cycleSize = 1u << cycleLog; + U32 const curr = (U32)((BYTE const*)src - window.base); + U32 const minIndexToOverflowCorrect = cycleSize + MAX(maxDist, cycleSize); + + /* Adjust the min index to backoff the overflow correction frequency, + * so we don't waste too much CPU in overflow correction. If this + * computation overflows we don't really care, we just need to make + * sure it is at least minIndexToOverflowCorrect. + */ + U32 const adjustment = window.nbOverflowCorrections + 1; + U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment, + minIndexToOverflowCorrect); + U32 const indexLargeEnough = curr > adjustedIndex; + + /* Only overflow correct early if the dictionary is invalidated already, + * so we don't hurt compression ratio. + */ + U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd; + + return indexLargeEnough && dictionaryInvalidated; +} + +/** + * ZSTD_window_needOverflowCorrection(): + * Returns non-zero if the indices are getting too large and need overflow + * protection. + */ +MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, + U32 cycleLog, + U32 maxDist, + U32 loadedDictEnd, + void const* src, + void const* srcEnd) +{ + U32 const curr = (U32)((BYTE const*)srcEnd - window.base); + if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { + if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) { + return 1; + } + } + return curr > ZSTD_CURRENT_MAX; +} + +/** + * ZSTD_window_correctOverflow(): + * Reduces the indices to protect from index overflow. + * Returns the correction made to the indices, which must be applied to every + * stored index. + * + * The least significant cycleLog bits of the indices must remain the same, + * which may be 0. Every index up to maxDist in the past must be valid. + */ +MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, + U32 maxDist, void const* src) +{ + /* preemptive overflow correction: + * 1. correction is large enough: + * lowLimit > (3<<29) ==> current > 3<<29 + 1< (3<<29 + 1< (3<<29) - (1< (3<<29) - (1<<30) (NOTE: chainLog <= 30) + * > 1<<29 + * + * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: + * After correction, current is less than (1<base < 1<<32. + * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); + U32 const currentCycle0 = curr & cycleMask; + /* Exclude zero so that newCurrent - maxDist >= 1. */ + U32 const currentCycle1 = currentCycle0 == 0 ? cycleSize : currentCycle0; + U32 const newCurrent = currentCycle1 + MAX(maxDist, cycleSize); + U32 const correction = curr - newCurrent; + /* maxDist must be a power of two so that: + * (newCurrent & cycleMask) == (curr & cycleMask) + * This is required to not corrupt the chains / binary tree. + */ + assert((maxDist & (maxDist - 1)) == 0); + assert((curr & cycleMask) == (newCurrent & cycleMask)); + assert(curr > newCurrent); + if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) { + /* Loose bound, should be around 1<<29 (see above) */ + assert(correction > 1<<28); + } + + window->base += correction; + window->dictBase += correction; + if (window->lowLimit <= correction) window->lowLimit = 1; + else window->lowLimit -= correction; + if (window->dictLimit <= correction) window->dictLimit = 1; + else window->dictLimit -= correction; + + /* Ensure we can still reference the full window. */ + assert(newCurrent >= maxDist); + assert(newCurrent - maxDist >= 1); + /* Ensure that lowLimit and dictLimit didn't underflow. */ + assert(window->lowLimit <= newCurrent); + assert(window->dictLimit <= newCurrent); + + ++window->nbOverflowCorrections; + + DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, + window->lowLimit); + return correction; +} + +/** + * ZSTD_window_enforceMaxDist(): + * Updates lowLimit so that: + * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd + * + * It ensures index is valid as long as index >= lowLimit. + * This must be called before a block compression call. + * + * loadedDictEnd is only defined if a dictionary is in use for current compression. + * As the name implies, loadedDictEnd represents the index at end of dictionary. + * The value lies within context's referential, it can be directly compared to blockEndIdx. + * + * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. + * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. + * This is because dictionaries are allowed to be referenced fully + * as long as the last byte of the dictionary is in the window. + * Once input has progressed beyond window size, dictionary cannot be referenced anymore. + * + * In normal dict mode, the dictionary lies between lowLimit and dictLimit. + * In dictMatchState mode, lowLimit and dictLimit are the same, + * and the dictionary is below them. + * forceWindow and dictMatchState are therefore incompatible. + */ +MEM_STATIC void +ZSTD_window_enforceMaxDist(ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + + /* - When there is no dictionary : loadedDictEnd == 0. + In which case, the test (blockEndIdx > maxDist) is merely to avoid + overflowing next operation `newLowLimit = blockEndIdx - maxDist`. + - When there is a standard dictionary : + Index referential is copied from the dictionary, + which means it starts from 0. + In which case, loadedDictEnd == dictSize, + and it makes sense to compare `blockEndIdx > maxDist + dictSize` + since `blockEndIdx` also starts from zero. + - When there is an attached dictionary : + loadedDictEnd is expressed within the referential of the context, + so it can be directly compared against blockEndIdx. + */ + if (blockEndIdx > maxDist + loadedDictEnd) { + U32 const newLowLimit = blockEndIdx - maxDist; + if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; + if (window->dictLimit < window->lowLimit) { + DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", + (unsigned)window->dictLimit, (unsigned)window->lowLimit); + window->dictLimit = window->lowLimit; + } + /* On reaching window size, dictionaries are invalidated */ + if (loadedDictEndPtr) *loadedDictEndPtr = 0; + if (dictMatchStatePtr) *dictMatchStatePtr = NULL; + } +} + +/* Similar to ZSTD_window_enforceMaxDist(), + * but only invalidates dictionary + * when input progresses beyond window size. + * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) + * loadedDictEnd uses same referential as window->base + * maxDist is the window size */ +MEM_STATIC void +ZSTD_checkDictValidity(const ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + assert(loadedDictEndPtr != NULL); + assert(dictMatchStatePtr != NULL); + { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = *loadedDictEndPtr; + DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + assert(blockEndIdx >= loadedDictEnd); + + if (blockEndIdx > loadedDictEnd + maxDist) { + /* On reaching window size, dictionaries are invalidated. + * For simplification, if window size is reached anywhere within next block, + * the dictionary is invalidated for the full block. + */ + DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); + *loadedDictEndPtr = 0; + *dictMatchStatePtr = NULL; + } else { + if (*loadedDictEndPtr != 0) { + DEBUGLOG(6, "dictionary considered valid for current block"); + } } } +} + +MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { + ZSTD_memset(window, 0, sizeof(*window)); + window->base = (BYTE const*)""; + window->dictBase = (BYTE const*)""; + window->dictLimit = 1; /* start from 1, so that 1st position is valid */ + window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ + window->nextSrc = window->base + 1; /* see issue #1241 */ + window->nbOverflowCorrections = 0; +} + +/** + * ZSTD_window_update(): + * Updates the window by appending [src, src + srcSize) to the window. + * If it is not contiguous, the current prefix becomes the extDict, and we + * forget about the extDict. Handles overlap of the prefix and extDict. + * Returns non-zero if the segment is contiguous. + */ +MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, + void const* src, size_t srcSize, + int forceNonContiguous) +{ + BYTE const* const ip = (BYTE const*)src; + U32 contiguous = 1; + DEBUGLOG(5, "ZSTD_window_update"); + if (srcSize == 0) + return contiguous; + assert(window->base != NULL); + assert(window->dictBase != NULL); + /* Check if blocks follow each other */ + if (src != window->nextSrc || forceNonContiguous) { + /* not contiguous */ + size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); + DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); + window->lowLimit = window->dictLimit; + assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ + window->dictLimit = (U32)distanceFromBase; + window->dictBase = window->base; + window->base = ip - distanceFromBase; + /* ms->nextToUpdate = window->dictLimit; */ + if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ + contiguous = 0; + } + window->nextSrc = ip + srcSize; + /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ + if ( (ip+srcSize > window->dictBase + window->lowLimit) + & (ip < window->dictBase + window->dictLimit)) { + ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; + U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; + window->lowLimit = lowLimitMax; + DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); + } + return contiguous; +} + +/** + * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.lowLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary + * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't + * valid for the entire block. So this check is sufficient to find the lowest valid match index. + */ + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + +/** + * Returns the lowest allowed match index in the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.dictLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When computing the lowest prefix index we need to take the dictionary into account to handle + * the edge case where the dictionary and the source are contiguous in memory. + */ + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + + + +/* debug functions */ +#if (DEBUGLEVEL>=2) + +MEM_STATIC double ZSTD_fWeight(U32 rawStat) +{ + U32 const fp_accuracy = 8; + U32 const fp_multiplier = (1 << fp_accuracy); + U32 const newStat = rawStat + 1; + U32 const hb = ZSTD_highbit32(newStat); + U32 const BWeight = hb * fp_multiplier; + U32 const FWeight = (newStat << fp_accuracy) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + fp_accuracy < 31); + return (double)weight / fp_multiplier; +} + +/* display a table content, + * listing each element, its frequency, and its predicted bit cost */ +MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) +{ + unsigned u, sum; + for (u=0, sum=0; u<=max; u++) sum += table[u]; + DEBUGLOG(2, "total nb elts: %u", sum); + for (u=0; u<=max; u++) { + DEBUGLOG(2, "%2u: %5u (%.2f)", + u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); + } +} + +#endif + + +#if defined (__cplusplus) +} +#endif + +/* =============================================================== + * Shared internal declarations + * These prototypes may be called from sources not in lib/compress + * =============================================================== */ + +/* ZSTD_loadCEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * return : size of dictionary header (size of magic number + dict ID + entropy tables) + * assumptions : magic number supposed already checked + * and dictSize >= 8 */ +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize); + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); + +/* ============================================================== + * Private declarations + * These prototypes shall only be called from within lib/compress + * ============================================================== */ + +/* ZSTD_getCParamsFromCCtxParams() : + * cParams are built depending on compressionLevel, src size hints, + * LDM and manually set compression parameters. + * Note: srcSizeHint == 0 means 0! + */ +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + +/*! ZSTD_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); + +void ZSTD_resetSeqStore(seqStore_t* ssPtr); + +/*! ZSTD_getCParamsFromCDict() : + * as the name implies */ +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); + +/* ZSTD_compressBegin_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize); + +/* ZSTD_compress_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params); + + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small ( 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); + +/** ZSTD_CCtx_trace() : + * Trace the end of a compression call. + */ +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); + +#endif /* ZSTD_COMPRESS_H */ +/**** ended inlining zstd_compress_internal.h ****/ + + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const int bmi2); + +#endif /* ZSTD_COMPRESS_LITERALS_H */ +/**** ended inlining zstd_compress_literals.h ****/ + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE*)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ZSTD_memcpy(ostart + flSize, src, srcSize); + DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); + return srcSize + flSize; +} + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE*)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ostart[flSize] = *(const BYTE*)src; + DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); + return flSize+1; +} + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const int bmi2) +{ + size_t const minGain = ZSTD_minGain(srcSize, strategy); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; + + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", + disableLiteralCompression, (U32)srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralCompression) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); + { HUF_repeat repeat = prevHuf->repeatMode; + int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; + if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; + cLitSize = singleStream ? + HUF_compress1X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : + HUF_compress4X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + if (repeat != HUF_repeat_none) { + /* reused the existing table */ + DEBUGLOG(5, "Reusing previous huffman table"); + hType = set_repeat; + } + } + + if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + if (cLitSize==1) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + } + + if (hType == set_compressed) { + /* using a newly constructed table */ + nextHuf->repeatMode = HUF_repeat_check; + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); + return lhSize+cLitSize; +} +/**** ended inlining compress/zstd_compress_literals.c ****/ +/**** start inlining compress/zstd_compress_sequences.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +/**** start inlining zstd_compress_sequences.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_SEQUENCES_H +#define ZSTD_COMPRESS_SEQUENCES_H + +/**** skipping file: ../common/fse.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ + +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy); + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize); + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); + +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max); + +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max); +#endif /* ZSTD_COMPRESS_SEQUENCES_H */ +/**** ended inlining zstd_compress_sequences.h ****/ + +/** + * -log2(x / 256) lookup table for x in [0, 256). + * If x == 0: Return 0 + * Else: Return floor(-log2(x / 256) * 256) + */ +static unsigned const kInverseProbabilityLog256[256] = { + 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, + 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, + 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, + 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, + 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, + 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, + 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, + 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, + 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, + 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, + 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, + 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, + 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, + 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, + 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, + 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, + 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, + 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, + 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, + 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, + 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, + 5, 4, 2, 1, +}; + +static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { + void const* ptr = ctable; + U16 const* u16ptr = (U16 const*)ptr; + U32 const maxSymbolValue = MEM_read16(u16ptr + 1); + return maxSymbolValue; +} + +/** + * Returns true if we should use ncount=-1 else we should + * use ncount=1 for low probability symbols instead. + */ +static unsigned ZSTD_useLowProbCount(size_t const nbSeq) +{ + /* Heuristic: This should cover most blocks <= 16K and + * start to fade out after 16K to about 32K depending on + * comprssibility. + */ + return nbSeq >= 2048; +} + +/** + * Returns the cost in bytes of encoding the normalized count header. + * Returns an error if any of the helper functions return an error. + */ +static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, + size_t const nbSeq, unsigned const FSELog) +{ + BYTE wksp[FSE_NCOUNTBOUND]; + S16 norm[MaxSeq + 1]; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), ""); + return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); +} + +/** + * Returns the cost in bits of encoding the distribution described by count + * using the entropy bound. + */ +static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) +{ + unsigned cost = 0; + unsigned s; + + assert(total > 0); + for (s = 0; s <= max; ++s) { + unsigned norm = (unsigned)((256 * count[s]) / total); + if (count[s] != 0 && norm == 0) + norm = 1; + assert(count[s] < total); + cost += count[s] * kInverseProbabilityLog256[norm]; + } + return cost >> 8; +} + +/** + * Returns the cost in bits of encoding the distribution in count using ctable. + * Returns an error if ctable cannot represent all the symbols in count. + */ +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max) +{ + unsigned const kAccuracyLog = 8; + size_t cost = 0; + unsigned s; + FSE_CState_t cstate; + FSE_initCState(&cstate, ctable); + if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { + DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", + ZSTD_getFSEMaxSymbolValue(ctable), max); + return ERROR(GENERIC); + } + for (s = 0; s <= max; ++s) { + unsigned const tableLog = cstate.stateLog; + unsigned const badCost = (tableLog + 1) << kAccuracyLog; + unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); + if (count[s] == 0) + continue; + if (bitCost >= badCost) { + DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); + return ERROR(GENERIC); + } + cost += (size_t)count[s] * bitCost; + } + return cost >> kAccuracyLog; +} + +/** + * Returns the cost in bits of encoding the distribution in count using the + * table described by norm. The max symbol support by norm is assumed >= max. + * norm must be valid for every symbol with non-zero probability in count. + */ +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) +{ + unsigned const shift = 8 - accuracyLog; + size_t cost = 0; + unsigned s; + assert(accuracyLog <= 8); + for (s = 0; s <= max; ++s) { + unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1; + unsigned const norm256 = normAcc << shift; + assert(norm256 > 0); + assert(norm256 < 256); + cost += count[s] * kInverseProbabilityLog256[norm256]; + } + return cost >> 8; +} + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy) +{ + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if (mostFrequent == nbSeq) { + *repeatMode = FSE_repeat_none; + if (isDefaultAllowed && nbSeq <= 2) { + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ + DEBUGLOG(5, "Selected set_basic"); + return set_basic; + } + DEBUGLOG(5, "Selected set_rle"); + return set_rle; + } + if (strategy < ZSTD_lazy) { + if (isDefaultAllowed) { + size_t const staticFse_nbSeq_max = 1000; + size_t const mult = 10 - strategy; + size_t const baseLog = 3; + size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ + assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ + assert(mult <= 9 && mult >= 7); + if ( (*repeatMode == FSE_repeat_valid) + && (nbSeq < staticFse_nbSeq_max) ) { + DEBUGLOG(5, "Selected set_repeat"); + return set_repeat; + } + if ( (nbSeq < dynamicFse_nbSeq_min) + || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; + return set_basic; + } + } + } else { + size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); + size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); + size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); + size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); + + if (isDefaultAllowed) { + assert(!ZSTD_isError(basicCost)); + assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); + } + assert(!ZSTD_isError(NCountCost)); + assert(compressedCost < ERROR(maxCode)); + DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", + (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); + if (basicCost <= repeatCost && basicCost <= compressedCost) { + DEBUGLOG(5, "Selected set_basic"); + assert(isDefaultAllowed); + *repeatMode = FSE_repeat_none; + return set_basic; + } + if (repeatCost <= compressedCost) { + DEBUGLOG(5, "Selected set_repeat"); + assert(!ZSTD_isError(repeatCost)); + return set_repeat; + } + assert(compressedCost < basicCost && compressedCost < repeatCost); + } + DEBUGLOG(5, "Selected set_compressed"); + *repeatMode = FSE_repeat_check; + return set_compressed; +} + +typedef struct { + S16 norm[MaxSeq + 1]; + U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)]; +} ZSTD_BuildCTableWksp; + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize) +{ + BYTE* op = (BYTE*)dst; + const BYTE* const oend = op + dstCapacity; + DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); + + switch (type) { + case set_rle: + FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), ""); + RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space"); + *op = codeTable[0]; + return 1; + case set_repeat: + ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize); + return 0; + case set_basic: + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ + return 0; + case set_compressed: { + ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + if (count[codeTable[nbSeq-1]] > 1) { + count[codeTable[nbSeq-1]]--; + nbSeq_1--; + } + assert(nbSeq_1 > 1); + assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp)); + (void)entropyWorkspaceSize; + FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); + { size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */ + FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), ""); + return NCountSize; + } + } + default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach"); + } +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_encodeSequences_body( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + RETURN_ERROR_IF( + ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), + dstSize_tooSmall, "not enough space remaining"); + DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", + (int)(blockStream.endPtr - blockStream.startPtr), + (unsigned)dstCapacity); + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); + if (longOffsets) { + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); + } } + + DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); + FSE_flushCState(&blockStream, &stateMatchLength); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); + FSE_flushCState(&blockStream, &stateOffsetBits); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); + return streamSize; + } +} + +static size_t +ZSTD_encodeSequences_default( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_encodeSequences_bmi2( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + +#endif + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) +{ + DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); +#if DYNAMIC_BMI2 + if (bmi2) { + return ZSTD_encodeSequences_bmi2(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); + } +#endif + (void)bmi2; + return ZSTD_encodeSequences_default(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} +/**** ended inlining compress/zstd_compress_sequences.c ****/ +/**** start inlining compress/zstd_compress_superblock.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +/**** start inlining zstd_compress_superblock.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_ADVANCED_H +#define ZSTD_COMPRESS_ADVANCED_H + +/*-************************************* +* Dependencies +***************************************/ + +/**** skipping file: ../zstd.h ****/ + +/*-************************************* +* Target Compressed Block Size +***************************************/ + +/* ZSTD_compressSuperBlock() : + * Used to compress a super block when targetCBlockSize is being used. + * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock); + +#endif /* ZSTD_COMPRESS_ADVANCED_H */ +/**** ended inlining zstd_compress_superblock.h ****/ + +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: hist.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_compress_sequences.h ****/ +/**** skipping file: zstd_compress_literals.h ****/ + +/** ZSTD_compressSubBlock_literal() : + * Compresses literals section for a sub-block. + * When we have to write the Huffman table we will sometimes choose a header + * size larger than necessary. This is because we have to pick the header size + * before we know the table size + compressed size, so we have a bound on the + * table size. If we guessed incorrectly, we fall back to uncompressed literals. + * + * We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded + * in writing the header, otherwise it is set to 0. + * + * hufMetadata->hType has literals block type info. + * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. + * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. + * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block + * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block + * and the following sub-blocks' literals sections will be Treeless_Literals_Block. + * @return : compressed size of literals section of a sub-block + * Or 0 if it unable to compress. + * Or error code */ +static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + const BYTE* literals, size_t litSize, + void* dst, size_t dstSize, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + size_t const header = writeEntropy ? 200 : 0; + size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart + lhSize; + U32 const singleStream = lhSize == 3; + symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; + size_t cLitSize = 0; + + (void)bmi2; /* TODO bmi2... */ + + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); + + *entropyWritten = 0; + if (litSize == 0 || hufMetadata->hType == set_basic) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } else if (hufMetadata->hType == set_rle) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); + return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); + } + + assert(litSize > 0); + assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); + + if (writeEntropy && hufMetadata->hType == set_compressed) { + ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); + op += hufMetadata->hufDesSize; + cLitSize += hufMetadata->hufDesSize; + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); + } + + /* TODO bmi2 */ + { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) + : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); + op += cSize; + cLitSize += cSize; + if (cSize == 0 || ERR_isError(cSize)) { + DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize)); + return 0; + } + /* If we expand and we aren't writing a header then emit uncompressed */ + if (!writeEntropy && cLitSize >= litSize) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + /* If we are writing headers then allow expansion that doesn't change our header size. */ + if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) { + assert(cLitSize > litSize); + DEBUGLOG(5, "Literals expanded beyond allowed header size"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + *entropyWritten = 1; + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); + return op-ostart; +} + +static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { + const seqDef* const sstart = sequences; + const seqDef* const send = sequences + nbSeq; + const seqDef* sp = sstart; + size_t matchLengthSum = 0; + size_t litLengthSum = 0; + while (send-sp > 0) { + ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); + litLengthSum += seqLen.litLength; + matchLengthSum += seqLen.matchLength; + sp++; + } + assert(litLengthSum <= litSize); + if (!lastSequence) { + assert(litLengthSum == litSize); + } + return matchLengthSum + litSize; +} + +/** ZSTD_compressSubBlock_sequences() : + * Compresses sequences section for a sub-block. + * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have + * symbol compression modes for the super-block. + * The first successfully compressed block will have these in its header. + * We set entropyWritten=1 when we succeed in compressing the sequences. + * The following sub-blocks will always have repeat mode. + * @return : compressed size of sequences section of a sub-block + * Or 0 if it is unable to compress + * Or error code. */ +static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + BYTE* seqHead; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); + + *entropyWritten = 0; + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, ""); + if (nbSeq < 0x7F) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) { + return op - ostart; + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); + + if (writeEntropy) { + const U32 LLtype = fseMetadata->llType; + const U32 Offtype = fseMetadata->ofType; + const U32 MLtype = fseMetadata->mlType; + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); + op += fseMetadata->fseTablesSize; + } else { + const U32 repeat = set_repeat; + *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); + } + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, oend - op, + fseTables->matchlengthCTable, mlCode, + fseTables->offcodeCTable, ofCode, + fseTables->litlengthCTable, llCode, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(fseMetadata->lastCountSize + bitstreamSize == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } +#endif + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); + } + + /* zstd versions <= 1.4.0 mistakenly report error when + * sequences section body size is less than 3 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1664. + * This can happen when the previous sequences section block is compressed + * with rle mode and the current block's sequences section is compressed + * with repeat mode where sequences section body size can be 1 byte. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (op-seqHead < 4) { + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " + "an uncompressed block when sequences are < 4 bytes"); + return 0; + } +#endif + + *entropyWritten = 1; + return op - ostart; +} + +/** ZSTD_compressSubBlock() : + * Compresses a single sub-block. + * @return : compressed size of the sub-block + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* literals, size_t litSize, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, + int writeLitEntropy, int writeSeqEntropy, + int* litEntropyWritten, int* seqEntropyWritten, + U32 lastBlock) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart + ZSTD_blockHeaderSize; + DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)", + litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); + { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, + &entropyMetadata->hufMetadata, literals, litSize, + op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); + FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); + if (cLitSize == 0) return 0; + op += cLitSize; + } + { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, + &entropyMetadata->fseMetadata, + sequences, nbSeq, + llCode, mlCode, ofCode, + cctxParams, + op, oend-op, + bmi2, writeSeqEntropy, seqEntropyWritten); + FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); + if (cSeqSize == 0) return 0; + op += cSeqSize; + } + /* Write block header */ + { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize; + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(ostart, cBlockHeader24); + } + return op-ostart; +} + +static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = 255; + size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, unsigned maxCode, + size_t nbSeq, const FSE_CTable* fseCTable, + const U32* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + /* We selected this encoding type, so it must be valid. */ + assert(max <= defaultMax); + cSymbolTypeSizeEstimateInBits = max <= defaultMax + ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max) + : ERROR(GENERIC); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits / 8; +} + +static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t cSeqSizeEstimate = 0; + if (nbSeq == 0) return sequencesSectionHeaderSize; + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, + nbSeq, fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, + nbSeq, fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, + nbSeq, fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { + size_t cSizeEstimate = 0; + cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + return cSizeEstimate + ZSTD_blockHeaderSize; +} + +static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) +{ + if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) + return 1; + if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle) + return 1; + if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle) + return 1; + return 0; +} + +/** ZSTD_compressSubBlock_multi() : + * Breaks super-block into multiple sub-blocks and compresses them. + * Entropy will be written to the first block. + * The following blocks will use repeat mode to compress. + * All sub-blocks are compressed blocks (no raw or rle blocks). + * @return : compressed size of the super block (which is multiple ZSTD blocks) + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, + const ZSTD_compressedBlockState_t* prevCBlock, + ZSTD_compressedBlockState_t* nextCBlock, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const int bmi2, U32 lastBlock, + void* workspace, size_t wkspSize) +{ + const seqDef* const sstart = seqStorePtr->sequencesStart; + const seqDef* const send = seqStorePtr->sequences; + const seqDef* sp = sstart; + const BYTE* const lstart = seqStorePtr->litStart; + const BYTE* const lend = seqStorePtr->lit; + const BYTE* lp = lstart; + BYTE const* ip = (BYTE const*)src; + BYTE const* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + const BYTE* llCodePtr = seqStorePtr->llCode; + const BYTE* mlCodePtr = seqStorePtr->mlCode; + const BYTE* ofCodePtr = seqStorePtr->ofCode; + size_t targetCBlockSize = cctxParams->targetCBlockSize; + size_t litSize, seqCount; + int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; + int writeSeqEntropy = 1; + int lastSequence = 0; + + DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", + (unsigned)(lend-lp), (unsigned)(send-sstart)); + + litSize = 0; + seqCount = 0; + do { + size_t cBlockSizeEstimate = 0; + if (sstart == send) { + lastSequence = 1; + } else { + const seqDef* const sequence = sp + seqCount; + lastSequence = sequence == send - 1; + litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; + seqCount++; + } + if (lastSequence) { + assert(lp <= lend); + assert(litSize <= (size_t)(lend - lp)); + litSize = (size_t)(lend - lp); + } + /* I think there is an optimization opportunity here. + * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful + * since it recalculates estimate from scratch. + * For example, it would recount literal distribution and symbol codes everytime. + */ + cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, + &nextCBlock->entropy, entropyMetadata, + workspace, wkspSize, writeLitEntropy, writeSeqEntropy); + if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { + int litEntropyWritten = 0; + int seqEntropyWritten = 0; + const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); + const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, + sp, seqCount, + lp, litSize, + llCodePtr, mlCodePtr, ofCodePtr, + cctxParams, + op, oend-op, + bmi2, writeLitEntropy, writeSeqEntropy, + &litEntropyWritten, &seqEntropyWritten, + lastBlock && lastSequence); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); + if (cSize > 0 && cSize < decompressedSize) { + DEBUGLOG(5, "Committed the sub-block"); + assert(ip + decompressedSize <= iend); + ip += decompressedSize; + sp += seqCount; + lp += litSize; + op += cSize; + llCodePtr += seqCount; + mlCodePtr += seqCount; + ofCodePtr += seqCount; + litSize = 0; + seqCount = 0; + /* Entropy only needs to be written once */ + if (litEntropyWritten) { + writeLitEntropy = 0; + } + if (seqEntropyWritten) { + writeSeqEntropy = 0; + } + } + } + } while (!lastSequence); + if (writeLitEntropy) { + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); + ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); + } + if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { + /* If we haven't written our entropy tables, then we've violated our contract and + * must emit an uncompressed block. + */ + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); + return 0; + } + if (ip < iend) { + size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); + DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + assert(cSize != 0); + op += cSize; + /* We have to regenerate the repcodes because we've skipped some sequences */ + if (sp < send) { + seqDef const* seq; + repcodes_t rep; + ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); + for (seq = sstart; seq < sp; ++seq) { + rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); + } + ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); + } + } + DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); + return op-ostart; +} + +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock) { + ZSTD_entropyCTablesMetadata_t entropyMetadata; + + FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + + return ZSTD_compressSubBlock_multi(&zc->seqStore, + zc->blockState.prevCBlock, + zc->blockState.nextCBlock, + &entropyMetadata, + &zc->appliedParams, + dst, dstCapacity, + src, srcSize, + zc->bmi2, lastBlock, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */); +} +/**** ended inlining compress/zstd_compress_superblock.c ****/ +/**** start inlining compress/zstd_compress.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** start inlining ../common/cpu.h ****/ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMMON_CPU_H +#define ZSTD_COMMON_CPU_H + +/** + * Implementation taken from folly/CpuId.h + * https://github.com/facebook/folly/blob/master/folly/CpuId.h + */ + +/**** skipping file: mem.h ****/ + +#ifdef _MSC_VER +#include +#endif + +typedef struct { + U32 f1c; + U32 f1d; + U32 f7b; + U32 f7c; +} ZSTD_cpuid_t; + +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { + U32 f1c = 0; + U32 f1d = 0; + U32 f7b = 0; + U32 f7c = 0; +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) + int reg[4]; + __cpuid((int*)reg, 0); + { + int const n = reg[0]; + if (n >= 1) { + __cpuid((int*)reg, 1); + f1c = (U32)reg[2]; + f1d = (U32)reg[3]; + } + if (n >= 7) { + __cpuidex((int*)reg, 7, 0); + f7b = (U32)reg[1]; + f7c = (U32)reg[2]; + } + } +#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) + /* The following block like the normal cpuid branch below, but gcc + * reserves ebx for use of its pic register so we must specially + * handle the save and restore to avoid clobbering the register + */ + U32 n; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(n) + : "a"(0) + : "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(f1a), "=c"(f1c), "=d"(f1d) + : "a"(1)); + } + if (n >= 7) { + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "movl %%ebx, %%eax\n\t" + "popl %%ebx" + : "=a"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) + U32 n; + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); + } + if (n >= 7) { + U32 f7a; + __asm__("cpuid" + : "=a"(f7a), "=b"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#endif + { + ZSTD_cpuid_t cpuid; + cpuid.f1c = f1c; + cpuid.f1d = f1d; + cpuid.f7b = f7b; + cpuid.f7c = f7c; + return cpuid; + } +} + +#define X(name, r, bit) \ + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ + return ((cpuid.r) & (1U << bit)) != 0; \ + } + +/* cpuid(1): Processor Info and Feature Bits. */ +#define C(name, bit) X(name, f1c, bit) + C(sse3, 0) + C(pclmuldq, 1) + C(dtes64, 2) + C(monitor, 3) + C(dscpl, 4) + C(vmx, 5) + C(smx, 6) + C(eist, 7) + C(tm2, 8) + C(ssse3, 9) + C(cnxtid, 10) + C(fma, 12) + C(cx16, 13) + C(xtpr, 14) + C(pdcm, 15) + C(pcid, 17) + C(dca, 18) + C(sse41, 19) + C(sse42, 20) + C(x2apic, 21) + C(movbe, 22) + C(popcnt, 23) + C(tscdeadline, 24) + C(aes, 25) + C(xsave, 26) + C(osxsave, 27) + C(avx, 28) + C(f16c, 29) + C(rdrand, 30) +#undef C +#define D(name, bit) X(name, f1d, bit) + D(fpu, 0) + D(vme, 1) + D(de, 2) + D(pse, 3) + D(tsc, 4) + D(msr, 5) + D(pae, 6) + D(mce, 7) + D(cx8, 8) + D(apic, 9) + D(sep, 11) + D(mtrr, 12) + D(pge, 13) + D(mca, 14) + D(cmov, 15) + D(pat, 16) + D(pse36, 17) + D(psn, 18) + D(clfsh, 19) + D(ds, 21) + D(acpi, 22) + D(mmx, 23) + D(fxsr, 24) + D(sse, 25) + D(sse2, 26) + D(ss, 27) + D(htt, 28) + D(tm, 29) + D(pbe, 31) +#undef D + +/* cpuid(7): Extended Features. */ +#define B(name, bit) X(name, f7b, bit) + B(bmi1, 3) + B(hle, 4) + B(avx2, 5) + B(smep, 7) + B(bmi2, 8) + B(erms, 9) + B(invpcid, 10) + B(rtm, 11) + B(mpx, 14) + B(avx512f, 16) + B(avx512dq, 17) + B(rdseed, 18) + B(adx, 19) + B(smap, 20) + B(avx512ifma, 21) + B(pcommit, 22) + B(clflushopt, 23) + B(clwb, 24) + B(avx512pf, 26) + B(avx512er, 27) + B(avx512cd, 28) + B(sha, 29) + B(avx512bw, 30) + B(avx512vl, 31) +#undef B +#define C(name, bit) X(name, f7c, bit) + C(prefetchwt1, 0) + C(avx512vbmi, 1) +#undef C + +#undef X + +#endif /* ZSTD_COMMON_CPU_H */ +/**** ended inlining ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: hist.h ****/ +#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_compress_sequences.h ****/ +/**** skipping file: zstd_compress_literals.h ****/ +/**** start inlining zstd_fast.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_FAST_H +#define ZSTD_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ +/**** ended inlining zstd_fast.h ****/ +/**** start inlining zstd_double_fast.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_DOUBLE_FAST_H +#define ZSTD_DOUBLE_FAST_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_DOUBLE_FAST_H */ +/**** ended inlining zstd_double_fast.h ****/ +/**** start inlining zstd_lazy.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LAZY_H +#define ZSTD_LAZY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: zstd_compress_internal.h ****/ + +/** + * Dedicated Dictionary Search Structure bucket log. In the + * ZSTD_dedicatedDictSearch mode, the hashTable has + * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just + * one. + */ +#define ZSTD_LAZY_DDSS_BUCKET_LOG 2 + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); +void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip); + +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); + +void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LAZY_H */ +/**** ended inlining zstd_lazy.h ****/ +/**** start inlining zstd_opt.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_OPT_H +#define ZSTD_OPT_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: zstd_compress_internal.h ****/ + +/* used in ZSTD_loadDictionaryContent() */ +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + /* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_OPT_H */ +/**** ended inlining zstd_opt.h ****/ +/**** start inlining zstd_ldm.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_H +#define ZSTD_LDM_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: ../zstd.h ****/ + +/*-************************************* +* Long distance matching +***************************************/ + +#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT + +void ZSTD_ldm_fillHashTable( + ldmState_t* state, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params); + +/** + * ZSTD_ldm_generateSequences(): + * + * Generates the sequences using the long distance match finder. + * Generates long range matching sequences in `sequences`, which parse a prefix + * of the source. `sequences` must be large enough to store every sequence, + * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. + * @returns 0 or an error code. + * + * NOTE: The user must have called ZSTD_window_update() for all of the input + * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. + * NOTE: This function returns an error if it runs out of space to store + * sequences. + */ +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldms, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize); + +/** + * ZSTD_ldm_blockCompress(): + * + * Compresses a block using the predefined sequences, along with a secondary + * block compressor. The literals section of every sequence is passed to the + * secondary block compressor, and those sequences are interspersed with the + * predefined sequences. Returns the length of the last literals. + * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. + * `rawSeqStore.seq` may also be updated to split the last sequence between two + * blocks. + * @return The length of the last literals. + * + * NOTE: The source must be at most the maximum block size, but the predefined + * sequences can be any size, and may be longer than the block. In the case that + * they are longer than the block, the last sequences may need to be split into + * two. We handle that case correctly, and update `rawSeqStore` appropriately. + * NOTE: This function does not return any errors. + */ +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_useRowMatchFinderMode_e useRowMatchFinder, + void const* src, size_t srcSize); + +/** + * ZSTD_ldm_skipSequences(): + * + * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. + * Avoids emitting matches less than `minMatch` bytes. + * Must be called for data that is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, + U32 const minMatch); + +/* ZSTD_ldm_skipRawSeqStoreBytes(): + * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'. + * Not to be used in conjunction with ZSTD_ldm_skipSequences(). + * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes); + +/** ZSTD_ldm_getTableSize() : + * Estimate the space needed for long distance matching tables or 0 if LDM is + * disabled. + */ +size_t ZSTD_ldm_getTableSize(ldmParams_t params); + +/** ZSTD_ldm_getSeqSpace() : + * Return an upper bound on the number of sequences that can be produced by + * the long distance matcher, or 0 if LDM is disabled. + */ +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); + +/** ZSTD_ldm_adjustParameters() : + * If the params->hashRateLog is not set, set it to its default value based on + * windowLog and params->hashLog. + * + * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to + * params->hashLog if it is not). + * + * Ensures that the minMatchLength >= targetLength during optimal parsing. + */ +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams); + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_FAST_H */ +/**** ended inlining zstd_ldm.h ****/ +/**** skipping file: zstd_compress_superblock.h ****/ + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * COMPRESS_HEAPMODE : + * Select how default decompression function ZSTD_compress() allocates its context, + * on stack (0, default), or into heap (1). + * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected. + */ +#ifndef ZSTD_COMPRESS_HEAPMODE +# define ZSTD_COMPRESS_HEAPMODE 0 +#endif + + +/*-************************************* +* Helper functions +***************************************/ +/* ZSTD_compressBound() + * Note that the result from this function is only compatible with the "normal" + * full-block strategy. + * When there are a lot of small blocks due to frequent flush in streaming mode + * the overhead of headers can make the compressed data to be larger than the + * return value of ZSTD_compressBound(). + */ +size_t ZSTD_compressBound(size_t srcSize) { + return ZSTD_COMPRESSBOUND(srcSize); +} + + +/*-************************************* +* Context memory management +***************************************/ +struct ZSTD_CDict_s { + const void* dictContent; + size_t dictContentSize; + ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ + U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ + ZSTD_cwksp workspace; + ZSTD_matchState_t matchState; + ZSTD_compressedBlockState_t cBlockState; + ZSTD_customMem customMem; + U32 dictID; + int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ + ZSTD_useRowMatchFinderMode_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use + * row-based matchfinder. Unless the cdict is reloaded, we will use + * the same greedy/lazy matchfinder at compression time. + */ +}; /* typedef'd to ZSTD_CDict within "zstd.h" */ + +ZSTD_CCtx* ZSTD_createCCtx(void) +{ + return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) +{ + assert(cctx != NULL); + ZSTD_memset(cctx, 0, sizeof(*cctx)); + cctx->customMem = memManager; + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); + assert(!ZSTD_isError(err)); + (void)err; + } +} + +ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_STATIC_ASSERT(zcss_init==0); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem); + if (!cctx) return NULL; + ZSTD_initCCtx(cctx, customMem); + return cctx; + } +} + +ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) +{ + ZSTD_cwksp ws; + ZSTD_CCtx* cctx; + if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ + if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); + + cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); + if (cctx == NULL) return NULL; + + ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx)); + ZSTD_cwksp_move(&cctx->workspace, &ws); + cctx->staticSize = workspaceSize; + + /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ + if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; + cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE); + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + return cctx; +} + +/** + * Clears and frees all of the dictionaries in the CCtx. + */ +static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) +{ + ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem); + ZSTD_freeCDict(cctx->localDict.cdict); + ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict)); + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); + cctx->cdict = NULL; +} + +static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) +{ + size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; + size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); + return bufferSize + cdictSize; +} + +static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) +{ + assert(cctx != NULL); + assert(cctx->staticSize == 0); + ZSTD_clearAllDicts(cctx); +#ifdef ZSTD_MULTITHREAD + ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; +#endif + ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); +} + +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "not compatible with static CCtx"); + { + int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); + ZSTD_freeCCtxContent(cctx); + if (!cctxInWorkspace) { + ZSTD_customFree(cctx, cctx->customMem); + } + } + return 0; +} + + +static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + return ZSTDMT_sizeof_CCtx(cctx->mtctx); +#else + (void)cctx; + return 0; +#endif +} + + +size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support sizeof on NULL */ + /* cctx may be in the workspace */ + return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) + + ZSTD_cwksp_sizeof(&cctx->workspace) + + ZSTD_sizeof_localDict(cctx->localDict) + + ZSTD_sizeof_mtctx(cctx); +} + +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) +{ + return ZSTD_sizeof_CCtx(zcs); /* same object */ +} + +/* private API call, for dictBuilder only */ +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } + +/* Returns true if the strategy supports using a row based matchfinder */ +static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) { + return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2); +} + +/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder + * for this compression. + */ +static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_useRowMatchFinderMode_e mode) { + assert(mode != ZSTD_urm_auto); + return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_urm_enableRowMatchFinder); +} + +/* Returns row matchfinder usage enum given an initial mode and cParams */ +static ZSTD_useRowMatchFinderMode_e ZSTD_resolveRowMatchFinderMode(ZSTD_useRowMatchFinderMode_e mode, + const ZSTD_compressionParameters* const cParams) { +#if !defined(ZSTD_NO_INTRINSICS) && (defined(__SSE2__) || defined(__ARM_NEON)) + int const kHasSIMD128 = 1; +#else + int const kHasSIMD128 = 0; +#endif + if (mode != ZSTD_urm_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */ + mode = ZSTD_urm_disableRowMatchFinder; + if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode; + if (kHasSIMD128) { + if (cParams->windowLog > 14) mode = ZSTD_urm_enableRowMatchFinder; + } else { + if (cParams->windowLog > 17) mode = ZSTD_urm_enableRowMatchFinder; + } + return mode; +} + +/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */ +static int ZSTD_allocateChainTable(const ZSTD_strategy strategy, + const ZSTD_useRowMatchFinderMode_e useRowMatchFinder, + const U32 forDDSDict) { + assert(useRowMatchFinder != ZSTD_urm_auto); + /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate. + * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder. + */ + return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder)); +} + +/* Returns 1 if compression parameters are such that we should + * enable long distance matching (wlog >= 27, strategy >= btopt). + * Returns 0 otherwise. + */ +static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) { + return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27; +} + +/* Returns 1 if compression parameters are such that we should + * enable blockSplitter (wlog >= 17, strategy >= btopt). + * Returns 0 otherwise. + */ +static U32 ZSTD_CParams_useBlockSplitter(const ZSTD_compressionParameters* const cParams) { + return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17; +} + +static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( + ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params cctxParams; + /* should not matter, as all cParams are presumed properly defined */ + ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT); + cctxParams.cParams = cParams; + + /* Adjust advanced params according to cParams */ + if (ZSTD_CParams_shouldEnableLdm(&cParams)) { + DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params"); + cctxParams.ldmParams.enableLdm = 1; + /* LDM is enabled by default for optimal parser and window size >= 128MB */ + ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams); + assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); + assert(cctxParams.ldmParams.hashRateLog < 32); + } + + if (ZSTD_CParams_useBlockSplitter(&cParams)) { + DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including block splitting into cctx params"); + cctxParams.splitBlocks = 1; + } + + cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); + assert(!ZSTD_checkCParams(cParams)); + return cctxParams; +} + +static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params* params; + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + params = (ZSTD_CCtx_params*)ZSTD_customCalloc( + sizeof(ZSTD_CCtx_params), customMem); + if (!params) { return NULL; } + ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); + params->customMem = customMem; + return params; +} + +ZSTD_CCtx_params* ZSTD_createCCtxParams(void) +{ + return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); +} + +size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) +{ + if (params == NULL) { return 0; } + ZSTD_customFree(params, params->customMem); + return 0; +} + +size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) +{ + return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); +} + +size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->compressionLevel = compressionLevel; + cctxParams->fParams.contentSizeFlag = 1; + return 0; +} + +#define ZSTD_NO_CLEVEL 0 + +/** + * Initializes the cctxParams from params and compressionLevel. + * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. + */ +static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) +{ + assert(!ZSTD_checkCParams(params->cParams)); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = compressionLevel; + cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, ¶ms->cParams); + DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d", cctxParams->useRowMatchFinder); +} + +size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) +{ + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return 0; +} + +/** + * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. + * @param param Validated zstd parameters. + */ +static void ZSTD_CCtxParams_setZstdParams( + ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) +{ + assert(!ZSTD_checkCParams(params->cParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = ZSTD_NO_CLEVEL; +} + +ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + + switch(param) + { + case ZSTD_c_compressionLevel: + bounds.lowerBound = ZSTD_minCLevel(); + bounds.upperBound = ZSTD_maxCLevel(); + return bounds; + + case ZSTD_c_windowLog: + bounds.lowerBound = ZSTD_WINDOWLOG_MIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + + case ZSTD_c_hashLog: + bounds.lowerBound = ZSTD_HASHLOG_MIN; + bounds.upperBound = ZSTD_HASHLOG_MAX; + return bounds; + + case ZSTD_c_chainLog: + bounds.lowerBound = ZSTD_CHAINLOG_MIN; + bounds.upperBound = ZSTD_CHAINLOG_MAX; + return bounds; + + case ZSTD_c_searchLog: + bounds.lowerBound = ZSTD_SEARCHLOG_MIN; + bounds.upperBound = ZSTD_SEARCHLOG_MAX; + return bounds; + + case ZSTD_c_minMatch: + bounds.lowerBound = ZSTD_MINMATCH_MIN; + bounds.upperBound = ZSTD_MINMATCH_MAX; + return bounds; + + case ZSTD_c_targetLength: + bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; + bounds.upperBound = ZSTD_TARGETLENGTH_MAX; + return bounds; + + case ZSTD_c_strategy: + bounds.lowerBound = ZSTD_STRATEGY_MIN; + bounds.upperBound = ZSTD_STRATEGY_MAX; + return bounds; + + case ZSTD_c_contentSizeFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_checksumFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_dictIDFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_nbWorkers: + bounds.lowerBound = 0; +#ifdef ZSTD_MULTITHREAD + bounds.upperBound = ZSTDMT_NBWORKERS_MAX; +#else + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_jobSize: + bounds.lowerBound = 0; +#ifdef ZSTD_MULTITHREAD + bounds.upperBound = ZSTDMT_JOBSIZE_MAX; +#else + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_overlapLog: +#ifdef ZSTD_MULTITHREAD + bounds.lowerBound = ZSTD_OVERLAPLOG_MIN; + bounds.upperBound = ZSTD_OVERLAPLOG_MAX; +#else + bounds.lowerBound = 0; + bounds.upperBound = 0; +#endif + return bounds; + + case ZSTD_c_enableDedicatedDictSearch: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_enableLongDistanceMatching: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_ldmHashLog: + bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; + return bounds; + + case ZSTD_c_ldmMinMatch: + bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; + bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; + return bounds; + + case ZSTD_c_ldmBucketSizeLog: + bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; + bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; + return bounds; + + case ZSTD_c_ldmHashRateLog: + bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; + return bounds; + + /* experimental parameters */ + case ZSTD_c_rsyncable: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_forceMaxWindow : + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_format: + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + bounds.lowerBound = ZSTD_f_zstd1; + bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_forceAttachDict: + ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad); + bounds.lowerBound = ZSTD_dictDefaultAttach; + bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_literalCompressionMode: + ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); + bounds.lowerBound = ZSTD_lcm_auto; + bounds.upperBound = ZSTD_lcm_uncompressed; + return bounds; + + case ZSTD_c_targetCBlockSize: + bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; + bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; + return bounds; + + case ZSTD_c_srcSizeHint: + bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; + bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; + return bounds; + + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + + case ZSTD_c_blockDelimiters: + bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters; + bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters; + return bounds; + + case ZSTD_c_validateSequences: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_splitBlocks: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_useRowMatchFinder: + bounds.lowerBound = (int)ZSTD_urm_auto; + bounds.upperBound = (int)ZSTD_urm_enableRowMatchFinder; + return bounds; + + case ZSTD_c_deterministicRefPrefix: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + default: + bounds.error = ERROR(parameter_unsupported); + return bounds; + } +} + +/* ZSTD_cParam_clampBounds: + * Clamps the value into the bounded range. + */ +static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return bounds.error; + if (*value < bounds.lowerBound) *value = bounds.lowerBound; + if (*value > bounds.upperBound) *value = bounds.upperBound; + return 0; +} + +#define BOUNDCHECK(cParam, val) { \ + RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ + parameter_outOfBound, "Param out of bounds"); \ +} + + +static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) +{ + switch(param) + { + case ZSTD_c_compressionLevel: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + return 1; + + case ZSTD_c_format: + case ZSTD_c_windowLog: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow : + case ZSTD_c_nbWorkers: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: + case ZSTD_c_splitBlocks: + case ZSTD_c_useRowMatchFinder: + case ZSTD_c_deterministicRefPrefix: + default: + return 0; + } +} + +size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); + if (cctx->streamStage != zcss_init) { + if (ZSTD_isUpdateAuthorized(param)) { + cctx->cParamsChanged = 1; + } else { + RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); + } } + + switch(param) + { + case ZSTD_c_nbWorkers: + RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, + "MT not compatible with static alloc"); + break; + + case ZSTD_c_compressionLevel: + case ZSTD_c_windowLog: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_format: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: + case ZSTD_c_splitBlocks: + case ZSTD_c_useRowMatchFinder: + case ZSTD_c_deterministicRefPrefix: + break; + + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); +} + +size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, + ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); + switch(param) + { + case ZSTD_c_format : + BOUNDCHECK(ZSTD_c_format, value); + CCtxParams->format = (ZSTD_format_e)value; + return (size_t)CCtxParams->format; + + case ZSTD_c_compressionLevel : { + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + if (value == 0) + CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + else + CCtxParams->compressionLevel = value; + if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; + return 0; /* return type (size_t) cannot represent negative values */ + } + + case ZSTD_c_windowLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_windowLog, value); + CCtxParams->cParams.windowLog = (U32)value; + return CCtxParams->cParams.windowLog; + + case ZSTD_c_hashLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_hashLog, value); + CCtxParams->cParams.hashLog = (U32)value; + return CCtxParams->cParams.hashLog; + + case ZSTD_c_chainLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_chainLog, value); + CCtxParams->cParams.chainLog = (U32)value; + return CCtxParams->cParams.chainLog; + + case ZSTD_c_searchLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_searchLog, value); + CCtxParams->cParams.searchLog = (U32)value; + return (size_t)value; + + case ZSTD_c_minMatch : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_minMatch, value); + CCtxParams->cParams.minMatch = value; + return CCtxParams->cParams.minMatch; + + case ZSTD_c_targetLength : + BOUNDCHECK(ZSTD_c_targetLength, value); + CCtxParams->cParams.targetLength = value; + return CCtxParams->cParams.targetLength; + + case ZSTD_c_strategy : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_strategy, value); + CCtxParams->cParams.strategy = (ZSTD_strategy)value; + return (size_t)CCtxParams->cParams.strategy; + + case ZSTD_c_contentSizeFlag : + /* Content size written in frame header _when known_ (default:1) */ + DEBUGLOG(4, "set content size flag = %u", (value!=0)); + CCtxParams->fParams.contentSizeFlag = value != 0; + return CCtxParams->fParams.contentSizeFlag; + + case ZSTD_c_checksumFlag : + /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ + CCtxParams->fParams.checksumFlag = value != 0; + return CCtxParams->fParams.checksumFlag; + + case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ + DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); + CCtxParams->fParams.noDictIDFlag = !value; + return !CCtxParams->fParams.noDictIDFlag; + + case ZSTD_c_forceMaxWindow : + CCtxParams->forceWindow = (value != 0); + return CCtxParams->forceWindow; + + case ZSTD_c_forceAttachDict : { + const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; + BOUNDCHECK(ZSTD_c_forceAttachDict, pref); + CCtxParams->attachDictPref = pref; + return CCtxParams->attachDictPref; + } + + case ZSTD_c_literalCompressionMode : { + const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; + BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); + CCtxParams->literalCompressionMode = lcm; + return CCtxParams->literalCompressionMode; + } + + case ZSTD_c_nbWorkers : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + CCtxParams->nbWorkers = value; + return CCtxParams->nbWorkers; +#endif + + case ZSTD_c_jobSize : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + /* Adjust to the minimum non-default value. */ + if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) + value = ZSTDMT_JOBSIZE_MIN; + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + assert(value >= 0); + CCtxParams->jobSize = value; + return CCtxParams->jobSize; +#endif + + case ZSTD_c_overlapLog : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); + CCtxParams->overlapLog = value; + return CCtxParams->overlapLog; +#endif + + case ZSTD_c_rsyncable : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; +#else + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), ""); + CCtxParams->rsyncable = value; + return CCtxParams->rsyncable; +#endif + + case ZSTD_c_enableDedicatedDictSearch : + CCtxParams->enableDedicatedDictSearch = (value!=0); + return CCtxParams->enableDedicatedDictSearch; + + case ZSTD_c_enableLongDistanceMatching : + CCtxParams->ldmParams.enableLdm = (value!=0); + return CCtxParams->ldmParams.enableLdm; + + case ZSTD_c_ldmHashLog : + if (value!=0) /* 0 ==> auto */ + BOUNDCHECK(ZSTD_c_ldmHashLog, value); + CCtxParams->ldmParams.hashLog = value; + return CCtxParams->ldmParams.hashLog; + + case ZSTD_c_ldmMinMatch : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmMinMatch, value); + CCtxParams->ldmParams.minMatchLength = value; + return CCtxParams->ldmParams.minMatchLength; + + case ZSTD_c_ldmBucketSizeLog : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); + CCtxParams->ldmParams.bucketSizeLog = value; + return CCtxParams->ldmParams.bucketSizeLog; + + case ZSTD_c_ldmHashRateLog : + RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, + parameter_outOfBound, "Param out of bounds!"); + CCtxParams->ldmParams.hashRateLog = value; + return CCtxParams->ldmParams.hashRateLog; + + case ZSTD_c_targetCBlockSize : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_targetCBlockSize, value); + CCtxParams->targetCBlockSize = value; + return CCtxParams->targetCBlockSize; + + case ZSTD_c_srcSizeHint : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_srcSizeHint, value); + CCtxParams->srcSizeHint = value; + return CCtxParams->srcSizeHint; + + case ZSTD_c_stableInBuffer: + BOUNDCHECK(ZSTD_c_stableInBuffer, value); + CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->inBufferMode; + + case ZSTD_c_stableOutBuffer: + BOUNDCHECK(ZSTD_c_stableOutBuffer, value); + CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->outBufferMode; + + case ZSTD_c_blockDelimiters: + BOUNDCHECK(ZSTD_c_blockDelimiters, value); + CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; + return CCtxParams->blockDelimiters; + + case ZSTD_c_validateSequences: + BOUNDCHECK(ZSTD_c_validateSequences, value); + CCtxParams->validateSequences = value; + return CCtxParams->validateSequences; + + case ZSTD_c_splitBlocks: + BOUNDCHECK(ZSTD_c_splitBlocks, value); + CCtxParams->splitBlocks = value; + return CCtxParams->splitBlocks; + + case ZSTD_c_useRowMatchFinder: + BOUNDCHECK(ZSTD_c_useRowMatchFinder, value); + CCtxParams->useRowMatchFinder = (ZSTD_useRowMatchFinderMode_e)value; + return CCtxParams->useRowMatchFinder; + + case ZSTD_c_deterministicRefPrefix: + BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value); + CCtxParams->deterministicRefPrefix = !!value; + return CCtxParams->deterministicRefPrefix; + + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } +} + +size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value) +{ + return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); +} + +size_t ZSTD_CCtxParams_getParameter( + ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value) +{ + switch(param) + { + case ZSTD_c_format : + *value = CCtxParams->format; + break; + case ZSTD_c_compressionLevel : + *value = CCtxParams->compressionLevel; + break; + case ZSTD_c_windowLog : + *value = (int)CCtxParams->cParams.windowLog; + break; + case ZSTD_c_hashLog : + *value = (int)CCtxParams->cParams.hashLog; + break; + case ZSTD_c_chainLog : + *value = (int)CCtxParams->cParams.chainLog; + break; + case ZSTD_c_searchLog : + *value = CCtxParams->cParams.searchLog; + break; + case ZSTD_c_minMatch : + *value = CCtxParams->cParams.minMatch; + break; + case ZSTD_c_targetLength : + *value = CCtxParams->cParams.targetLength; + break; + case ZSTD_c_strategy : + *value = (unsigned)CCtxParams->cParams.strategy; + break; + case ZSTD_c_contentSizeFlag : + *value = CCtxParams->fParams.contentSizeFlag; + break; + case ZSTD_c_checksumFlag : + *value = CCtxParams->fParams.checksumFlag; + break; + case ZSTD_c_dictIDFlag : + *value = !CCtxParams->fParams.noDictIDFlag; + break; + case ZSTD_c_forceMaxWindow : + *value = CCtxParams->forceWindow; + break; + case ZSTD_c_forceAttachDict : + *value = CCtxParams->attachDictPref; + break; + case ZSTD_c_literalCompressionMode : + *value = CCtxParams->literalCompressionMode; + break; + case ZSTD_c_nbWorkers : +#ifndef ZSTD_MULTITHREAD + assert(CCtxParams->nbWorkers == 0); +#endif + *value = CCtxParams->nbWorkers; + break; + case ZSTD_c_jobSize : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + assert(CCtxParams->jobSize <= INT_MAX); + *value = (int)CCtxParams->jobSize; + break; +#endif + case ZSTD_c_overlapLog : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + *value = CCtxParams->overlapLog; + break; +#endif + case ZSTD_c_rsyncable : +#ifndef ZSTD_MULTITHREAD + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); +#else + *value = CCtxParams->rsyncable; + break; +#endif + case ZSTD_c_enableDedicatedDictSearch : + *value = CCtxParams->enableDedicatedDictSearch; + break; + case ZSTD_c_enableLongDistanceMatching : + *value = CCtxParams->ldmParams.enableLdm; + break; + case ZSTD_c_ldmHashLog : + *value = CCtxParams->ldmParams.hashLog; + break; + case ZSTD_c_ldmMinMatch : + *value = CCtxParams->ldmParams.minMatchLength; + break; + case ZSTD_c_ldmBucketSizeLog : + *value = CCtxParams->ldmParams.bucketSizeLog; + break; + case ZSTD_c_ldmHashRateLog : + *value = CCtxParams->ldmParams.hashRateLog; + break; + case ZSTD_c_targetCBlockSize : + *value = (int)CCtxParams->targetCBlockSize; + break; + case ZSTD_c_srcSizeHint : + *value = (int)CCtxParams->srcSizeHint; + break; + case ZSTD_c_stableInBuffer : + *value = (int)CCtxParams->inBufferMode; + break; + case ZSTD_c_stableOutBuffer : + *value = (int)CCtxParams->outBufferMode; + break; + case ZSTD_c_blockDelimiters : + *value = (int)CCtxParams->blockDelimiters; + break; + case ZSTD_c_validateSequences : + *value = (int)CCtxParams->validateSequences; + break; + case ZSTD_c_splitBlocks : + *value = (int)CCtxParams->splitBlocks; + break; + case ZSTD_c_useRowMatchFinder : + *value = (int)CCtxParams->useRowMatchFinder; + break; + case ZSTD_c_deterministicRefPrefix: + *value = (int)CCtxParams->deterministicRefPrefix; + break; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return 0; +} + +/** ZSTD_CCtx_setParametersUsingCCtxParams() : + * just applies `params` into `cctx` + * no action is performed, parameters are merely stored. + * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. + * This is possible even if a compression is ongoing. + * In which case, new parameters will be applied on the fly, starting with next compression job. + */ +size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "The context is in the wrong stage!"); + RETURN_ERROR_IF(cctx->cdict, stage_wrong, + "Can't override parameters with cdict attached (some must " + "be inherited from the cdict)."); + + cctx->requestedParams = *params; + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't set pledgedSrcSize when not in init stage."); + cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; + return 0; +} + +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams( + int const compressionLevel, + size_t const dictSize); +static int ZSTD_dedicatedDictSearch_isSupported( + const ZSTD_compressionParameters* cParams); +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams); + +/** + * Initializes the local dict using the requested parameters. + * NOTE: This does not use the pledged src size, because it may be used for more + * than one compression. + */ +static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) +{ + ZSTD_localDict* const dl = &cctx->localDict; + if (dl->dict == NULL) { + /* No local dictionary. */ + assert(dl->dictBuffer == NULL); + assert(dl->cdict == NULL); + assert(dl->dictSize == 0); + return 0; + } + if (dl->cdict != NULL) { + assert(cctx->cdict == dl->cdict); + /* Local dictionary already initialized. */ + return 0; + } + assert(dl->dictSize > 0); + assert(cctx->cdict == NULL); + assert(cctx->prefixDict.dict == NULL); + + dl->cdict = ZSTD_createCDict_advanced2( + dl->dict, + dl->dictSize, + ZSTD_dlm_byRef, + dl->dictContentType, + &cctx->requestedParams, + cctx->customMem); + RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); + cctx->cdict = dl->cdict; + return 0; +} + +size_t ZSTD_CCtx_loadDictionary_advanced( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't load a dictionary when ctx is not in init stage."); + DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); + ZSTD_clearAllDicts(cctx); /* in case one already exists */ + if (dict == NULL || dictSize == 0) /* no dictionary mode */ + return 0; + if (dictLoadMethod == ZSTD_dlm_byRef) { + cctx->localDict.dict = dict; + } else { + void* dictBuffer; + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "no malloc for static CCtx"); + dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem); + RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); + ZSTD_memcpy(dictBuffer, dict, dictSize); + cctx->localDict.dictBuffer = dictBuffer; + cctx->localDict.dict = dictBuffer; + } + cctx->localDict.dictSize = dictSize; + cctx->localDict.dictContentType = dictContentType; + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + + +size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a dict when ctx not in init stage."); + /* Free the existing local cdict (if any) to save memory. */ + ZSTD_clearAllDicts(cctx); + cctx->cdict = cdict; + return 0; +} + +size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a pool when ctx not in init stage."); + cctx->pool = pool; + return 0; +} + +size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + +size_t ZSTD_CCtx_refPrefix_advanced( + ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a prefix when ctx not in init stage."); + ZSTD_clearAllDicts(cctx); + if (prefix != NULL && prefixSize > 0) { + cctx->prefixDict.dict = prefix; + cctx->prefixDict.dictSize = prefixSize; + cctx->prefixDict.dictContentType = dictContentType; + } + return 0; +} + +/*! ZSTD_CCtx_reset() : + * Also dumps dictionary */ +size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + cctx->streamStage = zcss_init; + cctx->pledgedSrcSizePlusOne = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't reset parameters only when not in init stage."); + ZSTD_clearAllDicts(cctx); + return ZSTD_CCtxParams_reset(&cctx->requestedParams); + } + return 0; +} + + +/** ZSTD_checkCParams() : + control CParam values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ + BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); + BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); + BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); + BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); + BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); + BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); + BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); + return 0; +} + +/** ZSTD_clampCParams() : + * make CParam values within valid range. + * @return : valid CParams */ +static ZSTD_compressionParameters +ZSTD_clampCParams(ZSTD_compressionParameters cParams) +{ +# define CLAMP_TYPE(cParam, val, type) { \ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ + if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ + } +# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) + CLAMP(ZSTD_c_windowLog, cParams.windowLog); + CLAMP(ZSTD_c_chainLog, cParams.chainLog); + CLAMP(ZSTD_c_hashLog, cParams.hashLog); + CLAMP(ZSTD_c_searchLog, cParams.searchLog); + CLAMP(ZSTD_c_minMatch, cParams.minMatch); + CLAMP(ZSTD_c_targetLength,cParams.targetLength); + CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); + return cParams; +} + +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + return hashLog - btScale; +} + +/** ZSTD_dictAndWindowLog() : + * Returns an adjusted window log that is large enough to fit the source and the dictionary. + * The zstd format says that the entire dictionary is valid if one byte of the dictionary + * is within the window. So the hashLog and chainLog should be large enough to reference both + * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing + * the hashLog and windowLog. + * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN. + */ +static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) +{ + const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX; + /* No dictionary ==> No change */ + if (dictSize == 0) { + return windowLog; + } + assert(windowLog <= ZSTD_WINDOWLOG_MAX); + assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */ + { + U64 const windowSize = 1ULL << windowLog; + U64 const dictAndWindowSize = dictSize + windowSize; + /* If the window size is already large enough to fit both the source and the dictionary + * then just use the window size. Otherwise adjust so that it fits the dictionary and + * the window. + */ + if (windowSize >= dictSize + srcSize) { + return windowLog; /* Window size large enough already */ + } else if (dictAndWindowSize >= maxWindowSize) { + return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */ + } else { + return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1; + } + } +} + +/** ZSTD_adjustCParams_internal() : + * optimize `cPar` for a specified input (`srcSize` and `dictSize`). + * mostly downsize to reduce memory consumption and initialization latency. + * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. + * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`. + * note : `srcSize==0` means 0! + * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ +static ZSTD_compressionParameters +ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize, + ZSTD_cParamMode_e mode) +{ + const U64 minSrcSize = 513; /* (1<<9) + 1 */ + const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); + assert(ZSTD_checkCParams(cPar)==0); + + switch (mode) { + case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + /* If we don't know the source size, don't make any + * assumptions about it. We will already have selected + * smaller parameters if a dictionary is in use. + */ + break; + case ZSTD_cpm_createCDict: + /* Assume a small source size when creating a dictionary + * with an unkown source size. + */ + if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) + srcSize = minSrcSize; + break; + case ZSTD_cpm_attachDict: + /* Dictionary has its own dedicated parameters which have + * already been selected. We are selecting parameters + * for only the source. + */ + dictSize = 0; + break; + default: + assert(0); + break; + } + + /* resize windowLog if input is small enough, to use less memory */ + if ( (srcSize < maxWindowResize) + && (dictSize < maxWindowResize) ) { + U32 const tSize = (U32)(srcSize + dictSize); + static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; + U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : + ZSTD_highbit32(tSize-1) + 1; + if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; + } + if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); + U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); + if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; + if (cycleLog > dictAndWindowLog) + cPar.chainLog -= (cycleLog - dictAndWindowLog); + } + + if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) + cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ + + return cPar; +} + +ZSTD_compressionParameters +ZSTD_adjustCParams(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize) +{ + cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ + if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown); +} + +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + +static void ZSTD_overrideCParams( + ZSTD_compressionParameters* cParams, + const ZSTD_compressionParameters* overrides) +{ + if (overrides->windowLog) cParams->windowLog = overrides->windowLog; + if (overrides->hashLog) cParams->hashLog = overrides->hashLog; + if (overrides->chainLog) cParams->chainLog = overrides->chainLog; + if (overrides->searchLog) cParams->searchLog = overrides->searchLog; + if (overrides->minMatch) cParams->minMatch = overrides->minMatch; + if (overrides->targetLength) cParams->targetLength = overrides->targetLength; + if (overrides->strategy) cParams->strategy = overrides->strategy; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + ZSTD_compressionParameters cParams; + if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { + srcSizeHint = CCtxParams->srcSizeHint; + } + cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); + if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); + assert(!ZSTD_checkCParams(cParams)); + /* srcSizeHint == 0 means 0 */ + return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode); +} + +static size_t +ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + const ZSTD_useRowMatchFinderMode_e useRowMatchFinder, + const U32 enableDedicatedDictSearch, + const U32 forCCtx) +{ + /* chain table size should be 0 for fast or row-hash strategies */ + size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx) + ? ((size_t)1 << cParams->chainLog) + : 0; + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't + * surrounded by redzones in ASAN. */ + size_t const tableSpace = chainSize * sizeof(U32) + + hSize * sizeof(U32) + + h3Size * sizeof(U32); + size_t const optPotentialSpace = + ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32)) + + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32)) + + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32)) + + ZSTD_cwksp_aligned_alloc_size((1<strategy, useRowMatchFinder) + ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16)) + : 0; + size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt)) + ? optPotentialSpace + : 0; + size_t const slackSpace = ZSTD_cwksp_slack_space_required(); + + /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */ + ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4); + assert(useRowMatchFinder != ZSTD_urm_auto); + + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", + (U32)chainSize, (U32)hSize, (U32)h3Size); + return tableSpace + optSpace + slackSpace + lazyAdditionalSpace; +} + +static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( + const ZSTD_compressionParameters* cParams, + const ldmParams_t* ldmParams, + const int isStatic, + const ZSTD_useRowMatchFinderMode_e useRowMatchFinder, + const size_t buffInSize, + const size_t buffOutSize, + const U64 pledgedSrcSize) +{ + size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (cParams->minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef)) + + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); + size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); + size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); + size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1); + + size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); + size_t const ldmSeqSpace = ldmParams->enableLdm ? + ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; + + + size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + + ZSTD_cwksp_alloc_size(buffOutSize); + + size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; + + size_t const neededSpace = + cctxSpace + + entropySpace + + blockStateSpace + + ldmSpace + + ldmSeqSpace + + matchStateSize + + tokenSpace + + bufferSpace; + + DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); + return neededSpace; +} + +size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, + &cParams); + + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + /* estimateCCtxSize is for one-shot compression. So no buffers should + * be needed. However, we still allocate two 0-sized buffers, which can + * take space under ASAN. */ + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); + if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { + /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ + size_t noRowCCtxSize; + size_t rowCCtxSize; + initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder; + noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder; + rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + return MAX(noRowCCtxSize, rowCCtxSize); + } else { + return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams); + } +} + +static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) +{ + int tier = 0; + size_t largestSize = 0; + static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN}; + for (; tier < 4; ++tier) { + /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict); + largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize); + } + return largestSize; +} + +size_t ZSTD_estimateCCtxSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + /* Ensure monotonically increasing memory usage as compression level increases */ + size_t const newMB = ZSTD_estimateCCtxSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + { ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); + size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) + ? ((size_t)1 << cParams.windowLog) + blockSize + : 0; + size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, ¶ms->cParams); + + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize, + ZSTD_CONTENTSIZE_UNKNOWN); + } +} + +size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams); + if (ZSTD_rowMatchFinderSupported(cParams.strategy)) { + /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */ + size_t noRowCCtxSize; + size_t rowCCtxSize; + initialParams.useRowMatchFinder = ZSTD_urm_disableRowMatchFinder; + noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + initialParams.useRowMatchFinder = ZSTD_urm_enableRowMatchFinder; + rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + return MAX(noRowCCtxSize, rowCCtxSize); + } else { + return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams); + } +} + +static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + return ZSTD_estimateCStreamSize_usingCParams(cParams); +} + +size_t ZSTD_estimateCStreamSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCStreamSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +/* ZSTD_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads (non-blocking mode). + */ +ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + return ZSTDMT_getFrameProgression(cctx->mtctx); + } +#endif + { ZSTD_frameProgression fp; + size_t const buffered = (cctx->inBuff == NULL) ? 0 : + cctx->inBuffPos - cctx->inToCompress; + if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); + assert(buffered <= ZSTD_BLOCKSIZE_MAX); + fp.ingested = cctx->consumedSrcSize + buffered; + fp.consumed = cctx->consumedSrcSize; + fp.produced = cctx->producedCSize; + fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ + fp.currentJobID = 0; + fp.nbActiveWorkers = 0; + return fp; +} } + +/*! ZSTD_toFlushNow() + * Only useful for multithreading scenarios currently (nbWorkers >= 1). + */ +size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + return ZSTDMT_toFlushNow(cctx->mtctx); + } +#endif + (void)cctx; + return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ +} + +static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, + ZSTD_compressionParameters cParams2) +{ + (void)cParams1; + (void)cParams2; + assert(cParams1.windowLog == cParams2.windowLog); + assert(cParams1.chainLog == cParams2.chainLog); + assert(cParams1.hashLog == cParams2.hashLog); + assert(cParams1.searchLog == cParams2.searchLog); + assert(cParams1.minMatch == cParams2.minMatch); + assert(cParams1.targetLength == cParams2.targetLength); + assert(cParams1.strategy == cParams2.strategy); +} + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +{ + int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + bs->rep[i] = repStartValue[i]; + bs->entropy.huf.repeatMode = HUF_repeat_none; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; +} + +/*! ZSTD_invalidateMatchState() + * Invalidate all the matches in the match finder tables. + * Requires nextSrc and base to be set (can be NULL). + */ +static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) +{ + ZSTD_window_clear(&ms->window); + + ms->nextToUpdate = ms->window.dictLimit; + ms->loadedDictEnd = 0; + ms->opt.litLengthSum = 0; /* force reset of btopt stats */ + ms->dictMatchState = NULL; +} + +/** + * Controls, for this matchState reset, whether the tables need to be cleared / + * prepared for the coming compression (ZSTDcrp_makeClean), or whether the + * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a + * subsequent operation will overwrite the table space anyways (e.g., copying + * the matchState contents in from a CDict). + */ +typedef enum { + ZSTDcrp_makeClean, + ZSTDcrp_leaveDirty +} ZSTD_compResetPolicy_e; + +/** + * Controls, for this matchState reset, whether indexing can continue where it + * left off (ZSTDirp_continue), or whether it needs to be restarted from zero + * (ZSTDirp_reset). + */ +typedef enum { + ZSTDirp_continue, + ZSTDirp_reset +} ZSTD_indexResetPolicy_e; + +typedef enum { + ZSTD_resetTarget_CDict, + ZSTD_resetTarget_CCtx +} ZSTD_resetTarget_e; + + +static size_t +ZSTD_reset_matchState(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + const ZSTD_compressionParameters* cParams, + const ZSTD_useRowMatchFinderMode_e useRowMatchFinder, + const ZSTD_compResetPolicy_e crp, + const ZSTD_indexResetPolicy_e forceResetIndex, + const ZSTD_resetTarget_e forWho) +{ + /* disable chain table allocation for fast or row-based strategies */ + size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, + ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict)) + ? ((size_t)1 << cParams->chainLog) + : 0; + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + + DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); + assert(useRowMatchFinder != ZSTD_urm_auto); + if (forceResetIndex == ZSTDirp_reset) { + ZSTD_window_init(&ms->window); + ZSTD_cwksp_mark_tables_dirty(ws); + } + + ms->hashLog3 = hashLog3; + + ZSTD_invalidateMatchState(ms); + + assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ + + ZSTD_cwksp_clear_tables(ws); + + DEBUGLOG(5, "reserving table space"); + /* table Space */ + ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); + ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); + ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); + if (crp!=ZSTDcrp_leaveDirty) { + /* reset tables only */ + ZSTD_cwksp_clean_tables(ws); + } + + /* opt parser space */ + if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { + DEBUGLOG(4, "reserving optimal parser space"); + ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); + ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); + ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); + ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); + ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); + } + + if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) { + { /* Row match finder needs an additional table of hashes ("tags") */ + size_t const tagTableSize = hSize*sizeof(U16); + ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize); + if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize); + } + { /* Switch to 32-entry rows if searchLog is 5 (or more) */ + U32 const rowLog = cParams->searchLog < 5 ? 4 : 5; + assert(cParams->hashLog > rowLog); + ms->rowHashLog = cParams->hashLog - rowLog; + } + } + + ms->cParams = *cParams; + + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + return 0; +} + +/* ZSTD_indexTooCloseToMax() : + * minor optimization : prefer memset() rather than reduceIndex() + * which is measurably slow in some circumstances (reported for Visual Studio). + * Works when re-using a context for a lot of smallish inputs : + * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, + * memset() will be triggered before reduceIndex(). + */ +#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) +static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) +{ + return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); +} + +/** ZSTD_dictTooBig(): + * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in + * one go generically. So we ensure that in that case we reset the tables to zero, + * so that we can load as much of the dictionary as possible. + */ +static int ZSTD_dictTooBig(size_t const loadedDictSize) +{ + return loadedDictSize > ZSTD_CHUNKSIZE_MAX; +} + +/*! ZSTD_resetCCtx_internal() : + * @param loadedDictSize The size of the dictionary to be loaded + * into the context, if any. If no dictionary is used, or the + * dictionary is being attached / copied, then pass 0. + * note : `params` are assumed fully validated at this stage. + */ +static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, + ZSTD_CCtx_params const* params, + U64 const pledgedSrcSize, + size_t const loadedDictSize, + ZSTD_compResetPolicy_e const crp, + ZSTD_buffered_policy_e const zbuff) +{ + ZSTD_cwksp* const ws = &zc->workspace; + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d", + (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + + zc->isFirstBlock = 1; + + /* Set applied params early so we can modify them for LDM, + * and point params at the applied params. + */ + zc->appliedParams = *params; + params = &zc->appliedParams; + + assert(params->useRowMatchFinder != ZSTD_urm_auto); + if (params->ldmParams.enableLdm) { + /* Adjust long distance matching parameters */ + ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, ¶ms->cParams); + assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog); + assert(params->ldmParams.hashRateLog < 32); + } + + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (params->cParams.minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered) + ? windowSize + blockSize + : 0; + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize); + + int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window); + int const dictTooBig = ZSTD_dictTooBig(loadedDictSize); + ZSTD_indexResetPolicy_e needsIndexReset = + (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue; + + size_t const neededSpace = + ZSTD_estimateCCtxSize_usingCCtxParams_internal( + ¶ms->cParams, ¶ms->ldmParams, zc->staticSize != 0, params->useRowMatchFinder, + buffInSize, buffOutSize, pledgedSrcSize); + int resizeWorkspace; + + FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); + + if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); + + { /* Check if workspace is large enough, alloc a new one if needed */ + int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; + int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); + resizeWorkspace = workspaceTooSmall || workspaceWasteful; + DEBUGLOG(4, "Need %zu B workspace", neededSpace); + DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); + + if (resizeWorkspace) { + DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", + ZSTD_cwksp_sizeof(ws) >> 10, + neededSpace >> 10); + + RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); + + needsIndexReset = ZSTDirp_reset; + + ZSTD_cwksp_free(ws, zc->customMem); + FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); + + DEBUGLOG(5, "reserving object space"); + /* Statically sized space. + * entropyWorkspace never moves, + * though prev/next block swap places */ + assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); + zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); + zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); + zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); + } } + + ZSTD_cwksp_clear(ws); + + /* init params */ + zc->blockState.matchState.cParams = params->cParams; + zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; + zc->consumedSrcSize = 0; + zc->producedCSize = 0; + if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) + zc->appliedParams.fParams.contentSizeFlag = 0; + DEBUGLOG(4, "pledged content size : %u ; flag : %u", + (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); + zc->blockSize = blockSize; + + XXH64_reset(&zc->xxhState, 0); + zc->stage = ZSTDcs_init; + zc->dictID = 0; + zc->dictContentSize = 0; + + ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); + + /* ZSTD_wildcopy() is used to copy into the literals buffer, + * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. + */ + zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); + zc->seqStore.maxNbLit = blockSize; + + /* buffers */ + zc->bufferedPolicy = zbuff; + zc->inBuffSize = buffInSize; + zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); + zc->outBuffSize = buffOutSize; + zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); + + /* ldm bucketOffsets table */ + if (params->ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const numBuckets = + ((size_t)1) << (params->ldmParams.hashLog - + params->ldmParams.bucketSizeLog); + zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets); + ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets); + } + + /* sequences storage */ + ZSTD_referenceExternalSequences(zc, NULL, 0); + zc->seqStore.maxNbSeq = maxNbSeq; + zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); + + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &zc->blockState.matchState, + ws, + ¶ms->cParams, + params->useRowMatchFinder, + crp, + needsIndexReset, + ZSTD_resetTarget_CCtx), ""); + + /* ldm hash table */ + if (params->ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog; + zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); + ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); + zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); + zc->maxNbLdmSequences = maxNbLdmSeq; + + ZSTD_window_init(&zc->ldmState.window); + zc->ldmState.loadedDictEnd = 0; + } + + assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace)); + DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); + + zc->initialized = 1; + + return 0; + } +} + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { + int i; + for (i=0; iblockState.prevCBlock->rep[i] = 0; + assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); +} + +/* These are the approximate sizes for each strategy past which copying the + * dictionary tables into the working context is faster than using them + * in-place. + */ +static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { + 8 KB, /* unused */ + 8 KB, /* ZSTD_fast */ + 16 KB, /* ZSTD_dfast */ + 32 KB, /* ZSTD_greedy */ + 32 KB, /* ZSTD_lazy */ + 32 KB, /* ZSTD_lazy2 */ + 32 KB, /* ZSTD_btlazy2 */ + 32 KB, /* ZSTD_btopt */ + 8 KB, /* ZSTD_btultra */ + 8 KB /* ZSTD_btultra2 */ +}; + +static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize) +{ + size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; + int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch; + return dedicatedDictSearch + || ( ( pledgedSrcSize <= cutoff + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || params->attachDictPref == ZSTD_dictForceAttach ) + && params->attachDictPref != ZSTD_dictForceCopy + && !params->forceWindow ); /* dictMatchState isn't correctly + * handled in _enforceMaxDist */ +} + +static size_t +ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu", + (unsigned long long)pledgedSrcSize); + { + ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; + unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Resize working context table params for input only, since the dict + * has its own tables. */ + /* pledgedSrcSize == 0 means 0! */ + + if (cdict->matchState.dedicatedDictSearch) { + ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams); + } + + params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, + cdict->dictContentSize, ZSTD_cpm_attachDict); + params.cParams.windowLog = windowLog; + params.useRowMatchFinder = cdict->useRowMatchFinder; /* cdict overrides */ + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, + ZSTDcrp_makeClean, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); + } + + { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc + - cdict->matchState.window.base); + const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; + if (cdictLen == 0) { + /* don't even attach dictionaries with no contents */ + DEBUGLOG(4, "skipping attaching empty dictionary"); + } else { + DEBUGLOG(4, "attaching dictionary into context"); + cctx->blockState.matchState.dictMatchState = &cdict->matchState; + + /* prep working match state so dict matches never have negative indices + * when they are translated to the working context's index space. */ + if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { + cctx->blockState.matchState.window.nextSrc = + cctx->blockState.matchState.window.base + cdictEnd; + ZSTD_window_clear(&cctx->blockState.matchState.window); + } + /* loadedDictEnd is expressed within the referential of the active context */ + cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; + } } + + cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + + assert(!cdict->matchState.dedicatedDictSearch); + DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu", + (unsigned long long)pledgedSrcSize); + + { unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Copy only compression parameters related to tables. */ + params.cParams = *cdict_cParams; + params.cParams.windowLog = windowLog; + params.useRowMatchFinder = cdict->useRowMatchFinder; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, + ZSTDcrp_leaveDirty, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); + assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); + assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); + } + + ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); + assert(params.useRowMatchFinder != ZSTD_urm_auto); + + /* copy tables */ + { size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */) + ? ((size_t)1 << cdict_cParams->chainLog) + : 0; + size_t const hSize = (size_t)1 << cdict_cParams->hashLog; + + ZSTD_memcpy(cctx->blockState.matchState.hashTable, + cdict->matchState.hashTable, + hSize * sizeof(U32)); + /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */ + if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) { + ZSTD_memcpy(cctx->blockState.matchState.chainTable, + cdict->matchState.chainTable, + chainSize * sizeof(U32)); + } + /* copy tag table */ + if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) { + size_t const tagTableSize = hSize*sizeof(U16); + ZSTD_memcpy(cctx->blockState.matchState.tagTable, + cdict->matchState.tagTable, + tagTableSize); + } + } + + /* Zero the hashTable3, since the cdict never fills it */ + { int const h3log = cctx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + assert(cdict->matchState.hashLog3 == 0); + ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + } + + ZSTD_cwksp_mark_tables_clean(&cctx->workspace); + + /* copy dictionary offsets */ + { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + + cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +/* We have a choice between copying the dictionary context into the working + * context, or referencing the dictionary context from the working context + * in-place. We decide here which strategy to use. */ +static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + + DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", + (unsigned)pledgedSrcSize); + + if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { + return ZSTD_resetCCtx_byAttachingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } else { + return ZSTD_resetCCtx_byCopyingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } +} + +/*! ZSTD_copyCCtx_internal() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * The "context", in this case, refers to the hash and chain tables, + * entropy tables, and dictionary references. + * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. + * @return : 0, or an error code */ +static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, + const ZSTD_CCtx* srcCCtx, + ZSTD_frameParameters fParams, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, + "Can't copy a ctx that's not in init stage."); + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); + ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); + { ZSTD_CCtx_params params = dstCCtx->requestedParams; + /* Copy only compression parameters related to tables. */ + params.cParams = srcCCtx->appliedParams.cParams; + assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_urm_auto); + params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder; + params.fParams = fParams; + ZSTD_resetCCtx_internal(dstCCtx, ¶ms, pledgedSrcSize, + /* loadedDictSize */ 0, + ZSTDcrp_leaveDirty, zbuff); + assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); + assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); + assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); + assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); + assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); + } + + ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); + + /* copy tables */ + { size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy, + srcCCtx->appliedParams.useRowMatchFinder, + 0 /* forDDSDict */) + ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog) + : 0; + size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; + int const h3log = srcCCtx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable, + srcCCtx->blockState.matchState.hashTable, + hSize * sizeof(U32)); + ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable, + srcCCtx->blockState.matchState.chainTable, + chainSize * sizeof(U32)); + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3, + srcCCtx->blockState.matchState.hashTable3, + h3Size * sizeof(U32)); + } + + ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); + + /* copy dictionary offsets */ + { + const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; + ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + dstCCtx->dictID = srcCCtx->dictID; + dstCCtx->dictContentSize = srcCCtx->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); + + return 0; +} + +/*! ZSTD_copyCCtx() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * pledgedSrcSize==0 means "unknown". +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) +{ + ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy; + ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); + + return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, + fParams, pledgedSrcSize, + zbuff); +} + + +#define ZSTD_ROWSIZE 16 +/*! ZSTD_reduceTable() : + * reduce table indexes by `reducerValue`, or squash to zero. + * PreserveMark preserves "unsorted mark" for btlazy2 strategy. + * It must be set to a clear 0/1 value, to remove branch during inlining. + * Presume table size is a multiple of ZSTD_ROWSIZE + * to help auto-vectorization */ +FORCE_INLINE_TEMPLATE void +ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) +{ + int const nbRows = (int)size / ZSTD_ROWSIZE; + int cellNb = 0; + int rowNb; + assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ + assert(size < (1U<<31)); /* can be casted to int */ + +#if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE) + /* To validate that the table re-use logic is sound, and that we don't + * access table space that we haven't cleaned, we re-"poison" the table + * space every time we mark it dirty. + * + * This function however is intended to operate on those dirty tables and + * re-clean them. So when this function is used correctly, we can unpoison + * the memory it operated on. This introduces a blind spot though, since + * if we now try to operate on __actually__ poisoned memory, we will not + * detect that. */ + __msan_unpoison(table, size * sizeof(U32)); +#endif + + for (rowNb=0 ; rowNb < nbRows ; rowNb++) { + int column; + for (column=0; columncParams.hashLog; + ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); + } + + if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) { + U32 const chainSize = (U32)1 << params->cParams.chainLog; + if (params->cParams.strategy == ZSTD_btlazy2) + ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); + else + ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); + } + + if (ms->hashLog3) { + U32 const h3Size = (U32)1 << ms->hashLog3; + ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); + } +} + + +/*-******************************************************* +* Block entropic compression +*********************************************************/ + +/* See doc/zstd_compression_format.md for detailed format description */ + +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) +{ + const seqDef* const sequences = seqStorePtr->sequencesStart; + BYTE* const llCodeTable = seqStorePtr->llCode; + BYTE* const ofCodeTable = seqStorePtr->ofCode; + BYTE* const mlCodeTable = seqStorePtr->mlCode; + U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + U32 u; + assert(nbSeq <= seqStorePtr->maxNbSeq); + for (u=0; ulongLengthType==ZSTD_llt_literalLength) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + if (seqStorePtr->longLengthType==ZSTD_llt_matchLength) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; +} + +/* ZSTD_useTargetCBlockSize(): + * Returns if target compressed block size param is being used. + * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); + return (cctxParams->targetCBlockSize != 0); +} + +/* ZSTD_blockSplitterEnabled(): + * Returns if block splitting param is being used + * If used, compression will do best effort to split a block in order to improve compression ratio. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_blockSplitterEnabled(splitBlocks=%d)", cctxParams->splitBlocks); + return (cctxParams->splitBlocks != 0); +} + +/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types + * and size of the sequences statistics + */ +typedef struct { + U32 LLtype; + U32 Offtype; + U32 MLtype; + size_t size; + size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */ +} ZSTD_symbolEncodingTypeStats_t; + +/* ZSTD_buildSequencesStatistics(): + * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field. + * Modifies `nextEntropy` to have the appropriate values as a side effect. + * nbSeq must be greater than 0. + * + * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32) + */ +static ZSTD_symbolEncodingTypeStats_t +ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq, + const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy, + BYTE* dst, const BYTE* const dstEnd, + ZSTD_strategy strategy, unsigned* countWorkspace, + void* entropyWorkspace, size_t entropyWkspSize) { + BYTE* const ostart = dst; + const BYTE* const oend = dstEnd; + BYTE* op = ostart; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + ZSTD_symbolEncodingTypeStats_t stats; + + stats.lastCountSize = 0; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + assert(op <= oend); + assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */ + /* build CTable for Literal Lengths */ + { unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype, + countWorkspace, max, llCodeTable, nbSeq, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, + sizeof(prevEntropy->litlengthCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed"); + stats.size = countSize; + return stats; + } + if (stats.LLtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + /* build CTable for Offsets */ + { unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp( + countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype, + countWorkspace, max, ofCodeTable, nbSeq, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, + sizeof(prevEntropy->offcodeCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed"); + stats.size = countSize; + return stats; + } + if (stats.Offtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + /* build CTable for MatchLengths */ + { unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp( + countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWorkspace, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype, + countWorkspace, max, mlCodeTable, nbSeq, + ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, + sizeof(prevEntropy->matchlengthCTable), + entropyWorkspace, entropyWkspSize); + if (ZSTD_isError(countSize)) { + DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed"); + stats.size = countSize; + return stats; + } + if (stats.MLtype == set_compressed) + stats.lastCountSize = countSize; + op += countSize; + assert(op <= oend); + } } + stats.size = (size_t)(op-ostart); + return stats; +} + +/* ZSTD_entropyCompressSeqStore_internal(): + * compresses both literals and sequences + * Returns compressed size of block, or a zstd error. + */ +MEM_STATIC size_t +ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + void* entropyWorkspace, size_t entropyWkspSize, + const int bmi2) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + unsigned* count = (unsigned*)entropyWorkspace; + FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; + const seqDef* const sequences = seqStorePtr->sequencesStart; + const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t lastCountSize; + + entropyWorkspace = count + (MaxSeq + 1); + entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); + + DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq); + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); + + /* Compress literals */ + { const BYTE* const literals = seqStorePtr->litStart; + size_t const litSize = (size_t)(seqStorePtr->lit - literals); + size_t const cSize = ZSTD_compressLiterals( + &prevEntropy->huf, &nextEntropy->huf, + cctxParams->cParams.strategy, + ZSTD_disableLiteralsCompression(cctxParams), + op, dstCapacity, + literals, litSize, + entropyWorkspace, entropyWkspSize, + bmi2); + FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); + assert(cSize <= dstCapacity); + op += cSize; + } + + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, "Can't fit seq hdr in output buf!"); + if (nbSeq < 128) { + *op++ = (BYTE)nbSeq; + } else if (nbSeq < LONGNBSEQ) { + op[0] = (BYTE)((nbSeq>>8) + 0x80); + op[1] = (BYTE)nbSeq; + op+=2; + } else { + op[0]=0xFF; + MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); + op+=3; + } + assert(op <= oend); + if (nbSeq==0) { + /* Copy the old tables over as if we repeated them */ + ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); + return (size_t)(op - ostart); + } + { + ZSTD_symbolEncodingTypeStats_t stats; + BYTE* seqHead = op++; + /* build stats for sequences */ + stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, + &prevEntropy->fse, &nextEntropy->fse, + op, oend, + strategy, count, + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); + *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2)); + lastCountSize = stats.lastCountSize; + op += stats.size; + } + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, (size_t)(oend - op), + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + assert(op <= oend); + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ + if (lastCountSize && (lastCountSize + bitstreamSize) < 4) { + /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(lastCountSize + bitstreamSize == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } + } + + DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); + return (size_t)(op - ostart); +} + +MEM_STATIC size_t +ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + size_t srcSize, + void* entropyWorkspace, size_t entropyWkspSize, + int bmi2) +{ + size_t const cSize = ZSTD_entropyCompressSeqStore_internal( + seqStorePtr, prevEntropy, nextEntropy, cctxParams, + dst, dstCapacity, + entropyWorkspace, entropyWkspSize, bmi2); + if (cSize == 0) return 0; + /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. + * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. + */ + if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) + return 0; /* block not compressed */ + FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed"); + + /* Check compressibility */ + { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); + if (cSize >= maxCSize) return 0; /* block not compressed */ + } + DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize); + return cSize; +} + +/* ZSTD_selectBlockCompressor() : + * Not static, but internal use only (used by long distance matcher) + * assumption : strat is a valid strategy */ +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_useRowMatchFinderMode_e useRowMatchFinder, ZSTD_dictMode_e dictMode) +{ + static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { + { ZSTD_compressBlock_fast /* default for 0 */, + ZSTD_compressBlock_fast, + ZSTD_compressBlock_doubleFast, + ZSTD_compressBlock_greedy, + ZSTD_compressBlock_lazy, + ZSTD_compressBlock_lazy2, + ZSTD_compressBlock_btlazy2, + ZSTD_compressBlock_btopt, + ZSTD_compressBlock_btultra, + ZSTD_compressBlock_btultra2 }, + { ZSTD_compressBlock_fast_extDict /* default for 0 */, + ZSTD_compressBlock_fast_extDict, + ZSTD_compressBlock_doubleFast_extDict, + ZSTD_compressBlock_greedy_extDict, + ZSTD_compressBlock_lazy_extDict, + ZSTD_compressBlock_lazy2_extDict, + ZSTD_compressBlock_btlazy2_extDict, + ZSTD_compressBlock_btopt_extDict, + ZSTD_compressBlock_btultra_extDict, + ZSTD_compressBlock_btultra_extDict }, + { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, + ZSTD_compressBlock_fast_dictMatchState, + ZSTD_compressBlock_doubleFast_dictMatchState, + ZSTD_compressBlock_greedy_dictMatchState, + ZSTD_compressBlock_lazy_dictMatchState, + ZSTD_compressBlock_lazy2_dictMatchState, + ZSTD_compressBlock_btlazy2_dictMatchState, + ZSTD_compressBlock_btopt_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState }, + { NULL /* default for 0 */, + NULL, + NULL, + ZSTD_compressBlock_greedy_dedicatedDictSearch, + ZSTD_compressBlock_lazy_dedicatedDictSearch, + ZSTD_compressBlock_lazy2_dedicatedDictSearch, + NULL, + NULL, + NULL, + NULL } + }; + ZSTD_blockCompressor selectedCompressor; + ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); + + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder); + if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) { + static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = { + { ZSTD_compressBlock_greedy_row, + ZSTD_compressBlock_lazy_row, + ZSTD_compressBlock_lazy2_row }, + { ZSTD_compressBlock_greedy_extDict_row, + ZSTD_compressBlock_lazy_extDict_row, + ZSTD_compressBlock_lazy2_extDict_row }, + { ZSTD_compressBlock_greedy_dictMatchState_row, + ZSTD_compressBlock_lazy_dictMatchState_row, + ZSTD_compressBlock_lazy2_dictMatchState_row }, + { ZSTD_compressBlock_greedy_dedicatedDictSearch_row, + ZSTD_compressBlock_lazy_dedicatedDictSearch_row, + ZSTD_compressBlock_lazy2_dedicatedDictSearch_row } + }; + DEBUGLOG(4, "Selecting a row-based matchfinder"); + assert(useRowMatchFinder != ZSTD_urm_auto); + selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy]; + } else { + selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + } + assert(selectedCompressor != NULL); + return selectedCompressor; +} + +static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, + const BYTE* anchor, size_t lastLLSize) +{ + ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; +} + +void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthType = ZSTD_llt_none; +} + +typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; + +static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) +{ + ZSTD_matchState_t* const ms = &zc->blockState.matchState; + DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + /* Assert that we have correctly flushed the ctx params into the ms's copy */ + ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { + ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); + } else { + ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); + } + return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ + } + ZSTD_resetSeqStore(&(zc->seqStore)); + /* required for optimal parser to read stats from dictionary */ + ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; + /* tell the optimal parser how we expect to compress literals */ + ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; + /* a gap between an attached dict and the current window is not safe, + * they must remain adjacent, + * and when that stops being the case, the dict must be unset */ + assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); + + /* limited update after a very long match */ + { const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const U32 curr = (U32)(istart-base); + if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ + if (curr > ms->nextToUpdate + 384) + ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384)); + } + + /* select and store sequences */ + { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); + size_t lastLLSize; + { int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + } + if (zc->externSeqStore.pos < zc->externSeqStore.size) { + assert(!zc->appliedParams.ldmParams.enableLdm); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&zc->externSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + zc->appliedParams.useRowMatchFinder, + src, srcSize); + assert(zc->externSeqStore.pos <= zc->externSeqStore.size); + } else if (zc->appliedParams.ldmParams.enableLdm) { + rawSeqStore_t ldmSeqStore = kNullRawSeqStore; + + ldmSeqStore.seq = zc->ldmSequences; + ldmSeqStore.capacity = zc->maxNbLdmSequences; + /* Updates ldmSeqStore.size */ + FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, + &zc->appliedParams.ldmParams, + src, srcSize), ""); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&ldmSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + zc->appliedParams.useRowMatchFinder, + src, srcSize); + assert(ldmSeqStore.pos == ldmSeqStore.size); + } else { /* not long range mode */ + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, + zc->appliedParams.useRowMatchFinder, + dictMode); + ms->ldmSeqStore = NULL; + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } + { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); + } } + return ZSTDbss_compress; +} + +static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) +{ + const seqStore_t* seqStore = ZSTD_getSeqStore(zc); + const seqDef* seqStoreSeqs = seqStore->sequencesStart; + size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; + size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); + size_t literalsRead = 0; + size_t lastLLSize; + + ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; + size_t i; + repcodes_t updatedRepcodes; + + assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); + /* Ensure we have enough space for last literals "sequence" */ + assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); + ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + for (i = 0; i < seqStoreSeqSize; ++i) { + U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM; + outSeqs[i].litLength = seqStoreSeqs[i].litLength; + outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH; + outSeqs[i].rep = 0; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthType == ZSTD_llt_literalLength) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthType == ZSTD_llt_matchLength) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) { + /* Derive the correct offset corresponding to a repcode */ + outSeqs[i].rep = seqStoreSeqs[i].offset; + if (outSeqs[i].litLength != 0) { + rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; + } else { + if (outSeqs[i].rep == 3) { + rawOffset = updatedRepcodes.rep[0] - 1; + } else { + rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; + } + } + } + outSeqs[i].offset = rawOffset; + /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode + so we provide seqStoreSeqs[i].offset - 1 */ + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, + seqStoreSeqs[i].offset - 1, + seqStoreSeqs[i].litLength == 0); + literalsRead += outSeqs[i].litLength; + } + /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. + * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker + * for the block boundary, according to the API. + */ + assert(seqStoreLiteralsSize >= literalsRead); + lastLLSize = seqStoreLiteralsSize - literalsRead; + outSeqs[i].litLength = (U32)lastLLSize; + outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; + seqStoreSeqSize++; + zc->seqCollector.seqIndex += seqStoreSeqSize; +} + +size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) +{ + const size_t dstCapacity = ZSTD_compressBound(srcSize); + void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); + SeqCollector seqCollector; + + RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); + + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqIndex = 0; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; + + ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_customFree(dst, ZSTD_defaultCMem); + return zc->seqCollector.seqIndex; +} + +size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) { + size_t in = 0; + size_t out = 0; + for (; in < seqsSize; ++in) { + if (sequences[in].offset == 0 && sequences[in].matchLength == 0) { + if (in != seqsSize - 1) { + sequences[in+1].litLength += sequences[in].litLength; + } + } else { + sequences[out] = sequences[in]; + ++out; + } + } + return out; +} + +/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */ +static int ZSTD_isRLE(const BYTE* src, size_t length) { + const BYTE* ip = src; + const BYTE value = ip[0]; + const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL); + const size_t unrollSize = sizeof(size_t) * 4; + const size_t unrollMask = unrollSize - 1; + const size_t prefixLength = length & unrollMask; + size_t i; + size_t u; + if (length == 1) return 1; + /* Check if prefix is RLE first before using unrolled loop */ + if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { + return 0; + } + for (i = prefixLength; i != length; i += unrollSize) { + for (u = 0; u < unrollSize; u += sizeof(size_t)) { + if (MEM_readST(ip + i + u) != valueST) { + return 0; + } + } + } + return 1; +} + +/* Returns true if the given block may be RLE. + * This is just a heuristic based on the compressibility. + * It may return both false positives and false negatives. + */ +static int ZSTD_maybeRLE(seqStore_t const* seqStore) +{ + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); + size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); + + return nbSeqs < 4 && nbLits < 10; +} + +static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs) +{ + ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock; + bs->prevCBlock = bs->nextCBlock; + bs->nextCBlock = tmp; +} + +/* Writes the block header */ +static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock); +} + +/** ZSTD_buildBlockEntropyStats_literals() : + * Builds entropy for the literals. + * Stores literals block type (raw, rle, compressed, repeat) and + * huffman description table to hufMetadata. + * Requires ENTROPY_WORKSPACE_SIZE workspace + * @return : size of huffman description table or error code */ +static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + const int disableLiteralsCompression, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); + BYTE* const nodeWksp = countWkspStart + countWkspSize; + const size_t nodeWkspSize = wkspEnd-nodeWksp; + unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; + unsigned huffLog = HUF_TABLELOG_DEFAULT; + HUF_repeat repeat = prevHuf->repeatMode; + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralsCompression) { + DEBUGLOG(5, "set_basic - disabled"); + hufMetadata->hType = set_basic; + return 0; + } + + /* small ? don't even attempt compression (speed opt) */ +#ifndef COMPRESS_LITERALS_SIZE_MIN +#define COMPRESS_LITERALS_SIZE_MIN 63 +#endif + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { + DEBUGLOG(5, "set_basic - too small"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); + if (largest == srcSize) { + DEBUGLOG(5, "set_rle"); + hufMetadata->hType = set_rle; + return 0; + } + if (largest <= (srcSize >> 7)+4) { + DEBUGLOG(5, "set_basic - no gain"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Validate the previous Huffman table */ + if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + repeat = HUF_repeat_none; + } + + /* Build Huffman Tree */ + ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); + huffLog = (U32)maxBits; + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable_wksp( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_repeat; + return 0; + } + } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; + } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; + } + } +} + + +/* ZSTD_buildDummySequencesStatistics(): + * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic, + * and updates nextEntropy to the appropriate repeatMode. + */ +static ZSTD_symbolEncodingTypeStats_t +ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) { + ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0}; + nextEntropy->litlength_repeatMode = FSE_repeat_none; + nextEntropy->offcode_repeatMode = FSE_repeat_none; + nextEntropy->matchlength_repeatMode = FSE_repeat_none; + return stats; +} + +/** ZSTD_buildBlockEntropyStats_sequences() : + * Builds entropy for the sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * Requires ENTROPY_WORKSPACE_SIZE wksp. + * @return : size of fse tables or error code */ +static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) +{ + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + unsigned* countWorkspace = (unsigned*)workspace; + unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1); + size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace); + ZSTD_symbolEncodingTypeStats_t stats; + + DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq); + stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq, + prevEntropy, nextEntropy, op, oend, + strategy, countWorkspace, + entropyWorkspace, entropyWorkspaceSize) + : ZSTD_buildDummySequencesStatistics(nextEntropy); + FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!"); + fseMetadata->llType = (symbolEncodingType_e) stats.LLtype; + fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype; + fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype; + fseMetadata->lastCountSize = stats.lastCountSize; + return stats.size; +} + + +/** ZSTD_buildBlockEntropyStats() : + * Builds entropy for the block. + * Requires workspace size ENTROPY_WORKSPACE_SIZE + * + * @return : 0 on success or error code + */ +size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) +{ + size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + ZSTD_disableLiteralsCompression(cctxParams), + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed"); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildBlockEntropyStats_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed"); + return 0; +} + +/* Returns the size estimate for the literals section (header + content) of a block */ +static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX; + size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB); + U32 singleStream = litSize < 256; + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */ + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */ +static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, size_t nbSeq, unsigned maxCode, + const FSE_CTable* fseCTable, + const U32* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + /* We selected this encoding type, so it must be valid. */ + assert(max <= defaultMax); + (void)defaultMax; + cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) { + return nbSeq * 10; + } + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits >> 3; +} + +/* Returns the size estimate for the sequences section (header + content) of a block */ +static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ); + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff, + fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL, + fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML, + fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +/* Returns the size estimate for a given stream of literals, of, ll, ml */ +static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { + size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + return seqSize + literalsSize + ZSTD_blockHeaderSize; +} + +/* Builds entropy statistics and uses them for blocksize estimation. + * + * Returns the estimated compressed size of the seqStore, or a zstd error. + */ +static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, const ZSTD_CCtx* zc) { + ZSTD_entropyCTablesMetadata_t entropyMetadata; + FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart), + seqStore->ofCode, seqStore->llCode, seqStore->mlCode, + (size_t)(seqStore->sequences - seqStore->sequencesStart), + &zc->blockState.nextCBlock->entropy, &entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE, + (int)(entropyMetadata.hufMetadata.hType == set_compressed), 1); +} + +/* Returns literals bytes represented in a seqStore */ +static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) { + size_t literalsBytes = 0; + size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; + size_t i; + for (i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + literalsBytes += seq.litLength; + if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) { + literalsBytes += 0x10000; + } + } + return literalsBytes; +} + +/* Returns match bytes represented in a seqStore */ +static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) { + size_t matchBytes = 0; + size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart; + size_t i; + for (i = 0; i < nbSeqs; ++i) { + seqDef seq = seqStore->sequencesStart[i]; + matchBytes += seq.matchLength + MINMATCH; + if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) { + matchBytes += 0x10000; + } + } + return matchBytes; +} + +/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx). + * Stores the result in resultSeqStore. + */ +static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore, + const seqStore_t* originalSeqStore, + size_t startIdx, size_t endIdx) { + BYTE* const litEnd = originalSeqStore->lit; + size_t literalsBytes; + size_t literalsBytesPreceding = 0; + + *resultSeqStore = *originalSeqStore; + if (startIdx > 0) { + resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx; + literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); + } + + /* Move longLengthPos into the correct position if necessary */ + if (originalSeqStore->longLengthType != ZSTD_llt_none) { + if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) { + resultSeqStore->longLengthType = ZSTD_llt_none; + } else { + resultSeqStore->longLengthPos -= (U32)startIdx; + } + } + resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx; + resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx; + literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore); + resultSeqStore->litStart += literalsBytesPreceding; + if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) { + /* This accounts for possible last literals if the derived chunk reaches the end of the block */ + resultSeqStore->lit = litEnd; + } else { + resultSeqStore->lit = resultSeqStore->litStart+literalsBytes; + } + resultSeqStore->llCode += startIdx; + resultSeqStore->mlCode += startIdx; + resultSeqStore->ofCode += startIdx; +} + +/** + * Returns the raw offset represented by the combination of offCode, ll0, and repcode history. + * offCode must be an offCode representing a repcode, therefore in the range of [0, 2]. + */ +static U32 ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0) { + U32 const adjustedOffCode = offCode + ll0; + assert(offCode < ZSTD_REP_NUM); + if (adjustedOffCode == ZSTD_REP_NUM) { + /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */ + assert(rep[0] > 0); + return rep[0] - 1; + } + return rep[adjustedOffCode]; +} + +/** + * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise + * due to emission of RLE/raw blocks that disturb the offset history, and replaces any repcodes within + * the seqStore that may be invalid. + * + * dRepcodes are updated as would be on the decompression side. cRepcodes are updated exactly in + * accordance with the seqStore. + */ +static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes, + seqStore_t* const seqStore, U32 const nbSeq) { + U32 idx = 0; + for (; idx < nbSeq; ++idx) { + seqDef* const seq = seqStore->sequencesStart + idx; + U32 const ll0 = (seq->litLength == 0); + U32 offCode = seq->offset - 1; + assert(seq->offset > 0); + if (offCode <= ZSTD_REP_MOVE) { + U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0); + U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0); + /* Adjust simulated decompression repcode history if we come across a mismatch. Replace + * the repcode with the offset it actually references, determined by the compression + * repcode history. + */ + if (dRawOffset != cRawOffset) { + seq->offset = cRawOffset + ZSTD_REP_NUM; + } + } + /* Compression repcode history is always updated with values directly from the unmodified seqStore. + * Decompression repcode history may use modified seq->offset value taken from compression repcode history. + */ + *dRepcodes = ZSTD_updateRep(dRepcodes->rep, seq->offset - 1, ll0); + *cRepcodes = ZSTD_updateRep(cRepcodes->rep, offCode, ll0); + } +} + +/* ZSTD_compressSeqStore_singleBlock(): + * Compresses a seqStore into a block with a block header, into the buffer dst. + * + * Returns the total size of that block (including header) or a ZSTD error code. + */ +static size_t ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore, + repcodes_t* const dRep, repcodes_t* const cRep, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock, U32 isPartition) { + const U32 rleMaxLength = 25; + BYTE* op = (BYTE*)dst; + const BYTE* ip = (const BYTE*)src; + size_t cSize; + size_t cSeqsSize; + + /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */ + repcodes_t const dRepOriginal = *dRep; + if (isPartition) + ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart)); + + cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, + srcSize, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!"); + + if (!zc->isFirstBlock && + cSeqsSize < rleMaxLength && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + cSeqsSize = 1; + } + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + return 0; + } + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + if (cSeqsSize == 0) { + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "Nocompress block failed"); + DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize); + *dRep = dRepOriginal; /* reset simulated decompression repcode history */ + } else if (cSeqsSize == 1) { + cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "RLE compress block failed"); + DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize); + *dRep = dRepOriginal; /* reset simulated decompression repcode history */ + } else { + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + writeBlockHeader(op, cSeqsSize, srcSize, lastBlock); + cSize = ZSTD_blockHeaderSize + cSeqsSize; + DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize); + } + return cSize; +} + +/* Struct to keep track of where we are in our recursive calls. */ +typedef struct { + U32* splitLocations; /* Array of split indices */ + size_t idx; /* The current index within splitLocations being worked on */ +} seqStoreSplits; + +#define MIN_SEQUENCES_BLOCK_SPLITTING 300 +#define MAX_NB_SPLITS 196 + +/* Helper function to perform the recursive search for block splits. + * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half. + * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then + * we do not recurse. + * + * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING. + * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING). + * In practice, recursion depth usually doesn't go beyond 4. + * + * Furthermore, the number of splits is capped by MAX_NB_SPLITS. At MAX_NB_SPLITS == 196 with the current existing blockSize + * maximum of 128 KB, this value is actually impossible to reach. + */ +static void ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx, + const ZSTD_CCtx* zc, const seqStore_t* origSeqStore) { + seqStore_t fullSeqStoreChunk; + seqStore_t firstHalfSeqStore; + seqStore_t secondHalfSeqStore; + size_t estimatedOriginalSize; + size_t estimatedFirstHalfSize; + size_t estimatedSecondHalfSize; + size_t midIdx = (startIdx + endIdx)/2; + + if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= MAX_NB_SPLITS) { + return; + } + ZSTD_deriveSeqStoreChunk(&fullSeqStoreChunk, origSeqStore, startIdx, endIdx); + ZSTD_deriveSeqStoreChunk(&firstHalfSeqStore, origSeqStore, startIdx, midIdx); + ZSTD_deriveSeqStoreChunk(&secondHalfSeqStore, origSeqStore, midIdx, endIdx); + estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&fullSeqStoreChunk, zc); + estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&firstHalfSeqStore, zc); + estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&secondHalfSeqStore, zc); + DEBUGLOG(5, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu", + estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize); + if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) { + return; + } + if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) { + ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore); + splits->splitLocations[splits->idx] = (U32)midIdx; + splits->idx++; + ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore); + } +} + +/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio. + * + * Returns the number of splits made (which equals the size of the partition table - 1). + */ +static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) { + seqStoreSplits splits = {partitions, 0}; + if (nbSeq <= 4) { + DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split"); + /* Refuse to try and split anything with less than 4 sequences */ + return 0; + } + ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore); + splits.splitLocations[splits.idx] = nbSeq; + DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1); + return splits.idx; +} + +/* ZSTD_compressBlock_splitBlock(): + * Attempts to split a given block into multiple blocks to improve compression ratio. + * + * Returns combined size of all blocks (which includes headers), or a ZSTD error code. + */ +static size_t ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, + const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq) { + size_t cSize = 0; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + U32 partitions[MAX_NB_SPLITS]; + size_t i = 0; + size_t srcBytesTotal = 0; + size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq); + seqStore_t nextSeqStore; + seqStore_t currSeqStore; + + /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history + * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two + * separate repcode histories that simulate repcode history on compression and decompression side, + * and use the histories to determine whether we must replace a particular repcode with its raw offset. + * + * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed + * or RLE. This allows us to retrieve the offset value that an invalid repcode references within + * a nocompress/RLE block. + * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use + * the replacement offset value rather than the original repcode to update the repcode history. + * dRep also will be the final repcode history sent to the next block. + * + * See ZSTD_seqStore_resolveOffCodes() for more details. + */ + repcodes_t dRep; + repcodes_t cRep; + ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + if (numSplits == 0) { + size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore, + &dRep, &cRep, + op, dstCapacity, + ip, blockSize, + lastBlock, 0 /* isPartition */); + FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!"); + DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits"); + assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); + return cSizeSingleBlock; + } + + ZSTD_deriveSeqStoreChunk(&currSeqStore, &zc->seqStore, 0, partitions[0]); + for (i = 0; i <= numSplits; ++i) { + size_t srcBytes; + size_t cSizeChunk; + U32 const lastPartition = (i == numSplits); + U32 lastBlockEntireSrc = 0; + + srcBytes = ZSTD_countSeqStoreLiteralsBytes(&currSeqStore) + ZSTD_countSeqStoreMatchBytes(&currSeqStore); + srcBytesTotal += srcBytes; + if (lastPartition) { + /* This is the final partition, need to account for possible last literals */ + srcBytes += blockSize - srcBytesTotal; + lastBlockEntireSrc = lastBlock; + } else { + ZSTD_deriveSeqStoreChunk(&nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]); + } + + cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, &currSeqStore, + &dRep, &cRep, + op, dstCapacity, + ip, srcBytes, + lastBlockEntireSrc, 1 /* isPartition */); + DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(&currSeqStore, zc), cSizeChunk); + FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!"); + + ip += srcBytes; + op += cSizeChunk; + dstCapacity -= cSizeChunk; + cSize += cSizeChunk; + currSeqStore = nextSeqStore; + assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize); + } + /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes + * for the next block. + */ + ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t)); + return cSize; +} + +static size_t ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 lastBlock) { + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + U32 nbSeq; + size_t cSize; + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock"); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + if (bss == ZSTDbss_noCompress) { + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block"); + return cSize; + } + nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart); + } + + assert(zc->appliedParams.splitBlocks == 1); + cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq); + FORWARD_IF_ERROR(cSize, "Splitting blocks failed!"); + return cSize; +} + +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 frame) +{ + /* This the upper bound for the length of an rle block. + * This isn't the actual upper bound. Finding the real threshold + * needs further investigation. + */ + const U32 rleMaxLength = 25; + size_t cSize; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } + } + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + return 0; + } + + /* encode sequences and literals */ + cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + dst, dstCapacity, + srcSize, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + return 0; + } + + + if (frame && + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + cSize < rleMaxLength && + ZSTD_isRLE(ip, srcSize)) + { + cSize = 1; + op[0] = ip[0]; + } + +out: + if (!ZSTD_isError(cSize) && cSize > 1) { + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + } + /* We check that dictionaries have offset codes available for the first + * block. After the first block, the offcode table might not have large + * enough codes to represent the offsets in the data. + */ + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const size_t bss, U32 lastBlock) +{ + DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); + if (bss == ZSTDbss_compress) { + if (/* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + ZSTD_maybeRLE(&zc->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) + { + return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); + } + /* Attempt superblock compression. + * + * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the + * standard ZSTD_compressBound(). This is a problem, because even if we have + * space now, taking an extra byte now could cause us to run out of space later + * and violate ZSTD_compressBound(). + * + * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. + * + * In order to respect ZSTD_compressBound() we must attempt to emit a raw + * uncompressed block in these cases: + * * cSize == 0: Return code for an uncompressed block. + * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). + * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of + * output space. + * * cSize >= blockBound(srcSize): We have expanded the block too much so + * emit an uncompressed block. + */ + { + size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); + if (cSize != ERROR(dstSize_tooSmall)) { + size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); + if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { + ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState); + return cSize; + } + } + } + } + + DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); + /* Superblock compression failed, attempt to emit a single no compress block. + * The decoder will be able to stream this block since it is uncompressed. + */ + return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); +} + +static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock) +{ + size_t cSize = 0; + const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + + cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + void const* ip, + void const* iend) +{ + U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const maxDist = (U32)1 << params->cParams.windowLog; + if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) { + U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); + ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + ZSTD_cwksp_mark_tables_dirty(ws); + ZSTD_reduceIndex(ms, params, correction); + ZSTD_cwksp_mark_tables_clean(ws); + if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; + else ms->nextToUpdate -= correction; + /* invalidate dictionaries on overflow correction */ + ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; + } +} + +/*! ZSTD_compress_frameChunk() : +* Compress a chunk of data into one or multiple blocks. +* All blocks will be terminated, all input will be consumed. +* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. +* Frame is supposed already started (header already produced) +* @return : compressed size, or an error code +*/ +static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastFrameChunk) +{ + size_t blockSize = cctx->blockSize; + size_t remaining = srcSize; + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; + + assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); + + DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); + if (cctx->appliedParams.fParams.checksumFlag && srcSize) + XXH64_update(&cctx->xxhState, src, srcSize); + + while (remaining) { + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); + + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, + dstSize_tooSmall, + "not enough space to store compressed block"); + if (remaining < blockSize) blockSize = remaining; + + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); + ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + + /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ + if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; + + { size_t cSize; + if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); + assert(cSize > 0); + assert(cSize <= blockSize + ZSTD_blockHeaderSize); + } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed"); + assert(cSize > 0 || cctx->seqCollector.collectSequences == 1); + } else { + cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize, 1 /* frame */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); + + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + } else { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + cSize += ZSTD_blockHeaderSize; + } + } + + + ip += blockSize; + assert(remaining >= blockSize); + remaining -= blockSize; + op += cSize; + assert(dstCapacity >= cSize); + dstCapacity -= cSize; + cctx->isFirstBlock = 0; + DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", + (unsigned)cSize); + } } + + if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; + return (size_t)(op-ostart); +} + + +static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) +{ BYTE* const op = (BYTE*)dst; + U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ + U32 const checksumFlag = params->fParams.checksumFlag>0; + U32 const windowSize = (U32)1 << params->cParams.windowLog; + U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); + BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + U32 const fcsCode = params->fParams.contentSizeFlag ? + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ + BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); + size_t pos=0; + + assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); + RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, + "dst buf is too small to fit worst-case frame header size."); + DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", + !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); + if (params->format == ZSTD_f_zstd1) { + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + pos = 4; + } + op[pos++] = frameHeaderDescriptionByte; + if (!singleSegment) op[pos++] = windowLogByte; + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : op[pos] = (BYTE)(dictID); pos++; break; + case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; + case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; + } + switch(fcsCode) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; + case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; + case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; + case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; + } + return pos; +} + +/* ZSTD_writeSkippableFrame_advanced() : + * Writes out a skippable frame with the specified magic number variant (16 are supported), + * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data. + * + * Returns the total number of bytes written, or a ZSTD error code. + */ +size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant) { + BYTE* op = (BYTE*)dst; + RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */, + dstSize_tooSmall, "Not enough room for skippable frame"); + RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame"); + RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported"); + + MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant)); + MEM_writeLE32(op+4, (U32)srcSize); + ZSTD_memcpy(op+8, src, srcSize); + return srcSize + ZSTD_SKIPPABLEHEADERSIZE; +} + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small (stage != ZSTDcs_init, stage_wrong, + "wrong cctx stage"); + RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, + parameter_unsupported, + "incompatible with ldm"); + cctx->externSeqStore.seq = seq; + cctx->externSeqStore.size = nbSeq; + cctx->externSeqStore.capacity = nbSeq; + cctx->externSeqStore.pos = 0; + cctx->externSeqStore.posInSequence = 0; + return 0; +} + + +static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 frame, U32 lastFrameChunk) +{ + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + size_t fhSize = 0; + + DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", + cctx->stage, (unsigned)srcSize); + RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, + "missing init (ZSTD_compressBegin)"); + + if (frame && (cctx->stage==ZSTDcs_init)) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, + cctx->pledgedSrcSizePlusOne-1, cctx->dictID); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + assert(fhSize <= dstCapacity); + dstCapacity -= fhSize; + dst = (char*)dst + fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (!srcSize) return fhSize; /* do not generate an empty block if no input */ + + if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) { + ms->forceNonContiguous = 0; + ms->nextToUpdate = ms->window.dictLimit; + } + if (cctx->appliedParams.ldmParams.enableLdm) { + ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0); + } + + if (!frame) { + /* overflow check and correction for block mode */ + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, + src, (BYTE const*)src + srcSize); + } + + DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); + { size_t const cSize = frame ? + ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); + FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); + cctx->consumedSrcSize += srcSize; + cctx->producedCSize += (cSize + fhSize); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + RETURN_ERROR_IF( + cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize >= %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } + return cSize + fhSize; + } +} + +size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); +} + + +size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) +{ + ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; + assert(!ZSTD_checkCParams(cParams)); + return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); +} + +size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); + { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); + RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } + + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); +} + +/*! ZSTD_loadDictionaryContent() : + * @return : 0, or an error code + */ +static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* src, size_t srcSize, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + int const loadLdmDict = params->ldmParams.enableLdm && ls != NULL; + + /* Assert that we the ms params match the params we're being given */ + ZSTD_assertEqualCParams(params->cParams, ms->cParams); + + if (srcSize > ZSTD_CHUNKSIZE_MAX) { + /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX. + * Dictionaries right at the edge will immediately trigger overflow + * correction, but I don't want to insert extra constraints here. + */ + U32 const maxDictSize = ZSTD_CURRENT_MAX - 1; + /* We must have cleared our windows when our source is this large. */ + assert(ZSTD_window_isEmpty(ms->window)); + if (loadLdmDict) + assert(ZSTD_window_isEmpty(ls->window)); + /* If the dictionary is too large, only load the suffix of the dictionary. */ + if (srcSize > maxDictSize) { + ip = iend - maxDictSize; + src = ip; + srcSize = maxDictSize; + } + } + + DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder); + ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0); + ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + ms->forceNonContiguous = params->deterministicRefPrefix; + + if (loadLdmDict) { + ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0); + ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); + } + + if (srcSize <= HASH_READ_SIZE) return 0; + + ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend); + + if (loadLdmDict) + ZSTD_ldm_fillHashTable(ls, ip, iend, ¶ms->ldmParams); + + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, iend, dtlm); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, iend, dtlm); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + assert(srcSize >= HASH_READ_SIZE); + if (ms->dedicatedDictSearch) { + assert(ms->chainTable != NULL); + ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE); + } else { + assert(params->useRowMatchFinder != ZSTD_urm_auto); + if (params->useRowMatchFinder == ZSTD_urm_enableRowMatchFinder) { + size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16); + ZSTD_memset(ms->tagTable, 0, tagTableSize); + ZSTD_row_update(ms, iend-HASH_READ_SIZE); + DEBUGLOG(4, "Using row-based hash table for lazy dict"); + } else { + ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE); + DEBUGLOG(4, "Using chain-based hash table for lazy dict"); + } + } + break; + + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + assert(srcSize >= HASH_READ_SIZE); + ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend); + break; + + default: + assert(0); /* not possible : not a valid strategy id */ + } + + ms->nextToUpdate = (U32)(iend - ms->window.base); + return 0; +} + + +/* Dictionaries that assign zero probability to symbols that show up causes problems + * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check + * and only dictionaries with 100% valid symbols can be assumed valid. + */ +static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) +{ + U32 s; + if (dictMaxSymbolValue < maxSymbolValue) { + return FSE_repeat_check; + } + for (s = 0; s <= maxSymbolValue; ++s) { + if (normalizedCounter[s] == 0) { + return FSE_repeat_check; + } + } + return FSE_repeat_valid; +} + +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize) +{ + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; + const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ + const BYTE* const dictEnd = dictPtr + dictSize; + dictPtr += 8; + bs->entropy.huf.repeatMode = HUF_repeat_check; + + { unsigned maxSymbolValue = 255; + unsigned hasZeroWeights = 1; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, + dictEnd-dictPtr, &hasZeroWeights); + + /* We only set the loaded table as valid if it contains all non-zero + * weights. Otherwise, we set it to check */ + if (!hasZeroWeights) + bs->entropy.huf.repeatMode = HUF_repeat_valid; + + RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); + dictPtr += hufHeaderSize; + } + + { unsigned offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + /* fill all offset symbols to avoid garbage at end of table */ + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.offcodeCTable, + offcodeNCount, MaxOff, offcodeLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.matchlengthCTable, + matchlengthNCount, matchlengthMaxValue, matchlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.litlengthCTable, + litlengthNCount, litlengthMaxValue, litlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + bs->rep[0] = MEM_readLE32(dictPtr+0); + bs->rep[1] = MEM_readLE32(dictPtr+4); + bs->rep[2] = MEM_readLE32(dictPtr+8); + dictPtr += 12; + + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + U32 offcodeMax = MaxOff; + if (dictContentSize <= ((U32)-1) - 128 KB) { + U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ + offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ + } + /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */ + bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)); + + /* All repCodes must be <= dictContentSize and != 0 */ + { U32 u; + for (u=0; u<3; u++) { + RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); + RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); + } } } + + return dictPtr - (const BYTE*)dict; +} + +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed >= 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + size_t dictID; + size_t eSize; + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); + eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize); + FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); + dictPtr += eSize; + + { + size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( + ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); + } + return dictID; +} + +/** ZSTD_compress_insertDictionary() : +* @return : dictID, or an error code */ +static size_t +ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + const ZSTD_CCtx_params* params, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); + if ((dict==NULL) || (dictSize<8)) { + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + return 0; + } + + ZSTD_reset_compressedBlockState(bs); + + /* dict restricted modes */ + if (dictContentType == ZSTD_dct_rawContent) + return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); + + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_auto) { + DEBUGLOG(4, "raw content dictionary detected"); + return ZSTD_loadDictionaryContent( + ms, ls, ws, params, dict, dictSize, dtlm); + } + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + assert(0); /* impossible */ + } + + /* dict as full zstd dictionary */ + return ZSTD_loadZstdDictionary( + bs, ms, ws, params, dict, dictSize, dtlm, workspace); +} + +#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) +#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) + +/*! ZSTD_compressBegin_internal() : + * @return : 0, or an error code */ +static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize; +#if ZSTD_TRACE + cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0; +#endif + DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if ( (cdict) + && (cdict->dictContentSize > 0) + && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0) + && (params->attachDictPref != ZSTD_dictForceLoad) ) { + return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); + } + + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + dictContentSize, + ZSTDcrp_makeClean, zbuff) , ""); + { size_t const dictID = cdict ? + ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, + cdict->dictContentSize, cdict->dictContentType, dtlm, + cctx->entropyWorkspace) + : ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, + dictContentType, dtlm, cctx->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= UINT_MAX); + cctx->dictID = (U32)dictID; + cctx->dictContentSize = dictContentSize; + } + return 0; +} + +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); + /* compression parameters verification and optimization */ + FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); + return ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, dtlm, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); +} + +/*! ZSTD_compressBegin_advanced() : +* @return : 0, or an error code */ +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams; + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compressBegin_advanced_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, + NULL /*cdict*/, + &cctxParams, pledgedSrcSize); +} + +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_CCtx_params cctxParams; + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); + } + DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); +} + +size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) +{ + return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); +} + + +/*! ZSTD_writeEpilogue() : +* Ends a frame. +* @return : nb of bytes written into dst (or an error code) */ +static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + size_t fhSize = 0; + + DEBUGLOG(4, "ZSTD_writeEpilogue"); + RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); + + /* special case : empty frame */ + if (cctx->stage == ZSTDcs_init) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + dstCapacity -= fhSize; + op += fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (cctx->stage != ZSTDcs_ending) { + /* write one last empty block, make it the "last" block */ + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + } + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); + MEM_writeLE32(op, checksum); + op += 4; + } + + cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ + return op-ostart; +} + +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize) +{ +#if ZSTD_TRACE + if (cctx->traceCtx && ZSTD_trace_compress_end != NULL) { + int const streaming = cctx->inBuffSize > 0 || cctx->outBuffSize > 0 || cctx->appliedParams.nbWorkers > 0; + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + trace.dictionaryID = cctx->dictID; + trace.dictionarySize = cctx->dictContentSize; + trace.uncompressedSize = cctx->consumedSrcSize; + trace.compressedSize = cctx->producedCSize + extraCSize; + trace.params = &cctx->appliedParams; + trace.cctx = cctx; + ZSTD_trace_compress_end(cctx->traceCtx, &trace); + } + cctx->traceCtx = 0; +#else + (void)cctx; + (void)extraCSize; +#endif +} + +size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t endResult; + size_t const cSize = ZSTD_compressContinue_internal(cctx, + dst, dstCapacity, src, srcSize, + 1 /* frame mode */, 1 /* last chunk */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); + endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); + FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + DEBUGLOG(4, "end of frame : controlling src size"); + RETURN_ERROR_IF( + cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize = %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } + ZSTD_CCtx_trace(cctx, endResult); + return cSize + endResult; +} + +size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + DEBUGLOG(4, "ZSTD_compress_advanced"); + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); + ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compress_advanced_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + &cctx->simpleApiParams); +} + +/* Internal */ +size_t ZSTD_compress_advanced_internal( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + params, srcSize, ZSTDb_not_buffered) , ""); + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel) +{ + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); + assert(params.fParams.contentSizeFlag == 1); + ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); + } + DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams); +} + +size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); + assert(cctx != NULL); + return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); +} + +size_t ZSTD_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + size_t result; +#if ZSTD_COMPRESS_HEAPMODE + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed"); + result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_freeCCtx(cctx); +#else + ZSTD_CCtx ctxBody; + ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem); + result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */ +#endif + return result; +} + + +/* ===== Dictionary API ===== */ + +/*! ZSTD_estimateCDictSize_advanced() : + * Estimate amount of memory that will be needed to create a dictionary with following arguments */ +size_t ZSTD_estimateCDictSize_advanced( + size_t dictSize, ZSTD_compressionParameters cParams, + ZSTD_dictLoadMethod_e dictLoadMethod) +{ + DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); + return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small + * in case we are using DDS with row-hash. */ + + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams), + /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); +} + +size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); +} + +size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support sizeof on NULL */ + DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); + /* cdict may be in the workspace */ + return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) + + ZSTD_cwksp_sizeof(&cdict->workspace); +} + +static size_t ZSTD_initCDict_internal( + ZSTD_CDict* cdict, + const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_CCtx_params params) +{ + DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); + assert(!ZSTD_checkCParams(params.cParams)); + cdict->matchState.cParams = params.cParams; + cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { + cdict->dictContent = dictBuffer; + } else { + void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); + RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); + cdict->dictContent = internalBuffer; + ZSTD_memcpy(internalBuffer, dictBuffer, dictSize); + } + cdict->dictContentSize = dictSize; + cdict->dictContentType = dictContentType; + + cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); + + + /* Reset the state to no dictionary */ + ZSTD_reset_compressedBlockState(&cdict->cBlockState); + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &cdict->matchState, + &cdict->workspace, + ¶ms.cParams, + params.useRowMatchFinder, + ZSTDcrp_makeClean, + ZSTDirp_reset, + ZSTD_resetTarget_CDict), ""); + /* (Maybe) load the dictionary + * Skips loading the dictionary if it is < 8 bytes. + */ + { params.compressionLevel = ZSTD_CLEVEL_DEFAULT; + params.fParams.contentSizeFlag = 1; + { size_t const dictID = ZSTD_compress_insertDictionary( + &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, + ¶ms, cdict->dictContent, cdict->dictContentSize, + dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= (size_t)(U32)-1); + cdict->dictID = (U32)dictID; + } + } + + return 0; +} + +static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_compressionParameters cParams, + ZSTD_useRowMatchFinderMode_e useRowMatchFinder, + U32 enableDedicatedDictSearch, + ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { size_t const workspaceSize = + ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); + void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); + ZSTD_cwksp ws; + ZSTD_CDict* cdict; + + if (!workspace) { + ZSTD_customFree(workspace, customMem); + return NULL; + } + + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc); + + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + assert(cdict != NULL); + ZSTD_cwksp_move(&cdict->workspace, &ws); + cdict->customMem = customMem; + cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ + cdict->useRowMatchFinder = useRowMatchFinder; + return cdict; + } +} + +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams; + ZSTD_memset(&cctxParams, 0, sizeof(cctxParams)); + ZSTD_CCtxParams_init(&cctxParams, 0); + cctxParams.cParams = cParams; + cctxParams.customMem = customMem; + return ZSTD_createCDict_advanced2( + dictBuffer, dictSize, + dictLoadMethod, dictContentType, + &cctxParams, customMem); +} + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* originalCctxParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams = *originalCctxParams; + ZSTD_compressionParameters cParams; + ZSTD_CDict* cdict; + + DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + if (cctxParams.enableDedicatedDictSearch) { + cParams = ZSTD_dedicatedDictSearch_getCParams( + cctxParams.compressionLevel, dictSize); + ZSTD_overrideCParams(&cParams, &cctxParams.cParams); + } else { + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) { + /* Fall back to non-DDSS params */ + cctxParams.enableDedicatedDictSearch = 0; + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch); + cctxParams.cParams = cParams; + cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams); + + cdict = ZSTD_createCDict_advanced_internal(dictSize, + dictLoadMethod, cctxParams.cParams, + cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch, + customMem); + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + cctxParams) )) { + ZSTD_freeCDict(cdict); + return NULL; + } + + return cdict; +} + +ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; +} + +ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byRef, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; +} + +size_t ZSTD_freeCDict(ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = cdict->customMem; + int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); + ZSTD_cwksp_free(&cdict->workspace, cMem); + if (!cdictInWorkspace) { + ZSTD_customFree(cdict, cMem); + } + return 0; + } +} + +/*! ZSTD_initStaticCDict_advanced() : + * Generate a digested dictionary in provided memory area. + * workspace: The memory area to emplace the dictionary into. + * Provided pointer must 8-bytes aligned. + * It must outlive dictionary usage. + * workspaceSize: Use ZSTD_estimateCDictSize() + * to determine how large workspace must be. + * cParams : use ZSTD_getCParams() to transform a compression level + * into its relevants cParams. + * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) + * Note : there is no corresponding "free" function. + * Since workspace was allocated externally, it must be freed externally. + */ +const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) +{ + ZSTD_useRowMatchFinderMode_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_urm_auto, &cParams); + /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0); + size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + matchStateSize; + ZSTD_CDict* cdict; + ZSTD_CCtx_params params; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + + { + ZSTD_cwksp ws; + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + if (cdict == NULL) return NULL; + ZSTD_cwksp_move(&cdict->workspace, &ws); + } + + DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", + (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); + if (workspaceSize < neededSize) return NULL; + + ZSTD_CCtxParams_init(¶ms, 0); + params.cParams = cParams; + params.useRowMatchFinder = useRowMatchFinder; + cdict->useRowMatchFinder = useRowMatchFinder; + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + params) )) + return NULL; + + return cdict; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) +{ + assert(cdict != NULL); + return cdict->matchState.cParams; +} + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; + return cdict->dictID; +} + +/* ZSTD_compressBegin_usingCDict_internal() : + * Implementation of various ZSTD_compressBegin_usingCDict* functions. + */ +static size_t ZSTD_compressBegin_usingCDict_internal( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal"); + RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); + /* Initialize the cctxParams from the cdict */ + { + ZSTD_parameters params; + params.fParams = fParams; + params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0 ) ? + ZSTD_getCParamsFromCDict(cdict) + : ZSTD_getCParams(cdict->compressionLevel, + pledgedSrcSize, + cdict->dictContentSize); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel); + } + /* Increase window log to fit the entire dictionary and source if the + * source size is known. Limit the increase to 19, which is the + * window log for compression level 1 with the largest source size. + */ + if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); + U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; + cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog); + } + return ZSTD_compressBegin_internal(cctx, + NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, + cdict, + &cctxParams, pledgedSrcSize, + ZSTDb_not_buffered); +} + + +/* ZSTD_compressBegin_usingCDict_advanced() : + * This function is DEPRECATED. + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize); +} + +/* ZSTD_compressBegin_usingCDict() : + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); +} + +/*! ZSTD_compress_usingCDict_internal(): + * Implementation of various ZSTD_compress_usingCDict* functions. + */ +static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +/*! ZSTD_compress_usingCDict_advanced(): + * This function is DEPRECATED. + */ +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. + * Note that compression parameters are decided at CDict creation time + * while frame parameters are hardcoded */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} + + + +/* ****************************************************************** +* Streaming +********************************************************************/ + +ZSTD_CStream* ZSTD_createCStream(void) +{ + DEBUGLOG(3, "ZSTD_createCStream"); + return ZSTD_createCStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticCCtx(workspace, workspaceSize); +} + +ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) +{ /* CStream and CCtx are now same object */ + return ZSTD_createCCtx_advanced(customMem); +} + +size_t ZSTD_freeCStream(ZSTD_CStream* zcs) +{ + return ZSTD_freeCCtx(zcs); /* same object */ +} + + + +/*====== Initialization ======*/ + +size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_CStreamOutSize(void) +{ + return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; +} + +static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) +{ + if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) + return ZSTD_cpm_attachDict; + else + return ZSTD_cpm_noAttachDict; +} + +/* ZSTD_resetCStream(): + * pledgedSrcSize == 0 means "unknown" */ +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} + +/*! ZSTD_initCStream_internal() : + * Note : for lib/compress only. Used by zstdmt_compress.c. + * Assumption 1 : params are valid + * Assumption 2 : either dict, or cdict, is defined, not both */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_internal"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + zcs->requestedParams = *params; + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if (dict) { + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + } else { + /* Dictionary is cleared if !cdict */ + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + } + return 0; +} + +/* ZSTD_initCStream_usingCDict_advanced() : + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + zcs->requestedParams.fParams = fParams; + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} + +/* note : cdict must outlive compression session */ +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} + + +/* ZSTD_initCStream_advanced() : + * pledgedSrcSize must be exact. + * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pss) +{ + /* for compatibility with older programs relying on this behavior. + * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. + * This line will be removed in the future. + */ + U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} + +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} + +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_srcSize"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} + +size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + return 0; +} + +/*====== Compression ======*/ + +static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) +{ + size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; + if (hintInSize==0) hintInSize = cctx->blockSize; + return hintInSize; +} + +/** ZSTD_compressStream_generic(): + * internal function for all *compressStream*() variants + * non-static, because can be called from zstdmt_compress.c + * @return : hint size for next input */ +static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective const flushMode) +{ + const char* const istart = (const char*)input->src; + const char* const iend = input->size != 0 ? istart + input->size : istart; + const char* ip = input->pos != 0 ? istart + input->pos : istart; + char* const ostart = (char*)output->dst; + char* const oend = output->size != 0 ? ostart + output->size : ostart; + char* op = output->pos != 0 ? ostart + output->pos : ostart; + U32 someMoreWork = 1; + + /* check expectations */ + DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + assert(zcs->inBuff != NULL); + assert(zcs->inBuffSize > 0); + } + if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) { + assert(zcs->outBuff != NULL); + assert(zcs->outBuffSize > 0); + } + assert(output->pos <= output->size); + assert(input->pos <= input->size); + assert((U32)flushMode <= (U32)ZSTD_e_end); + + while (someMoreWork) { + switch(zcs->streamStage) + { + case zcss_init: + RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); + + case zcss_load: + if ( (flushMode == ZSTD_e_end) + && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */ + || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ + && (zcs->inBuffPos == 0) ) { + /* shortcut to compression pass directly into output buffer */ + size_t const cSize = ZSTD_compressEnd(zcs, + op, oend-op, ip, iend-ip); + DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); + ip = iend; + op += cSize; + zcs->frameEnded = 1; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + someMoreWork = 0; break; + } + /* complete loading into inBuffer in buffered mode */ + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; + size_t const loaded = ZSTD_limitCopy( + zcs->inBuff + zcs->inBuffPos, toLoad, + ip, iend-ip); + zcs->inBuffPos += loaded; + if (loaded != 0) + ip += loaded; + if ( (flushMode == ZSTD_e_continue) + && (zcs->inBuffPos < zcs->inBuffTarget) ) { + /* not enough input to fill full block : stop here */ + someMoreWork = 0; break; + } + if ( (flushMode == ZSTD_e_flush) + && (zcs->inBuffPos == zcs->inToCompress) ) { + /* empty */ + someMoreWork = 0; break; + } + } + /* compress current block (note : this stage cannot be stopped in the middle) */ + DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); + { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered); + void* cDst; + size_t cSize; + size_t oSize = oend-op; + size_t const iSize = inputBuffered + ? zcs->inBuffPos - zcs->inToCompress + : MIN((size_t)(iend - ip), zcs->blockSize); + if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) + cDst = op; /* compress into output buffer, to skip flush stage */ + else + cDst = zcs->outBuff, oSize = zcs->outBuffSize; + if (inputBuffered) { + unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize); + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + /* prepare next block */ + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; + if (zcs->inBuffTarget > zcs->inBuffSize) + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; + DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", + (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); + if (!lastBlock) + assert(zcs->inBuffTarget <= zcs->inBuffSize); + zcs->inToCompress = zcs->inBuffPos; + } else { + unsigned const lastBlock = (ip + iSize == iend); + assert(flushMode == ZSTD_e_end /* Already validated */); + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize); + /* Consume the input prior to error checking to mirror buffered mode. */ + if (iSize > 0) + ip += iSize; + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + if (lastBlock) + assert(ip == iend); + } + if (cDst == op) { /* no need to flush */ + op += cSize; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed directly in outBuffer"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + } + break; + } + zcs->outBuffContentSize = cSize; + zcs->outBuffFlushedSize = 0; + zcs->streamStage = zcss_flush; /* pass-through to flush stage */ + } + /* fall-through */ + case zcss_flush: + DEBUGLOG(5, "flush stage"); + assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered); + { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), + zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", + (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); + if (flushed) + op += flushed; + zcs->outBuffFlushedSize += flushed; + if (toFlush!=flushed) { + /* flush not fully completed, presumably because dst is too small */ + assert(op==oend); + someMoreWork = 0; + break; + } + zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed on flush"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + break; + } + zcs->streamStage = zcss_load; + break; + } + + default: /* impossible */ + assert(0); + } + } + + input->pos = ip - istart; + output->pos = op - ostart; + if (zcs->frameEnded) return 0; + return ZSTD_nextInputSizeHint(zcs); +} + +static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) +{ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers >= 1) { + assert(cctx->mtctx != NULL); + return ZSTDMT_nextInputSizeHint(cctx->mtctx); + } +#endif + return ZSTD_nextInputSizeHint(cctx); + +} + +size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); + return ZSTD_nextInputSizeHint_MTorST(zcs); +} + +/* After a compression call set the expected input/output buffer. + * This is validated at the start of the next compression call. + */ +static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + cctx->expectedInBuffer = *input; + } + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + cctx->expectedOutBufferSize = output->size - output->pos; + } +} + +/* Validate that the input/output buffers match the expectations set by + * ZSTD_setBufferExpectations. + */ +static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx, + ZSTD_outBuffer const* output, + ZSTD_inBuffer const* input, + ZSTD_EndDirective endOp) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + ZSTD_inBuffer const expect = cctx->expectedInBuffer; + if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size) + RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!"); + if (endOp != ZSTD_e_end) + RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!"); + } + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + size_t const outBufferSize = output->size - output->pos; + if (cctx->expectedOutBufferSize != outBufferSize) + RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!"); + } + return 0; +} + +static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, + ZSTD_EndDirective endOp, + size_t inSize) { + ZSTD_CCtx_params params = cctx->requestedParams; + ZSTD_prefixDict const prefixDict = cctx->prefixDict; + FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ + assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ + if (cctx->cdict && !cctx->localDict.cdict) { + /* Let the cdict's compression level take priority over the requested params. + * But do not take the cdict's compression level if the "cdict" is actually a localDict + * generated from ZSTD_initLocalDict(). + */ + params.compressionLevel = cctx->cdict->compressionLevel; + } + DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ + { + size_t const dictSize = prefixDict.dict + ? prefixDict.dictSize + : (cctx->cdict ? cctx->cdict->dictContentSize : 0); + ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); + params.cParams = ZSTD_getCParamsFromCCtxParams( + ¶ms, cctx->pledgedSrcSizePlusOne-1, + dictSize, mode); + } + + if (ZSTD_CParams_shouldEnableLdm(¶ms.cParams)) { + /* Enable LDM by default for optimal parser and window size >= 128MB */ + DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)"); + params.ldmParams.enableLdm = 1; + } + + if (ZSTD_CParams_useBlockSplitter(¶ms.cParams)) { + DEBUGLOG(4, "Block splitter enabled by default (window size >= 128K, strategy >= btopt)"); + params.splitBlocks = 1; + } + + params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, ¶ms.cParams); + +#ifdef ZSTD_MULTITHREAD + if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) { + params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */ + } + if (params.nbWorkers > 0) { +#if ZSTD_TRACE + cctx->traceCtx = (ZSTD_trace_compress_begin != NULL) ? ZSTD_trace_compress_begin(cctx) : 0; +#endif + /* mt context creation */ + if (cctx->mtctx == NULL) { + DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", + params.nbWorkers); + cctx->mtctx = ZSTDMT_createCCtx_advanced((U32)params.nbWorkers, cctx->customMem, cctx->pool); + RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation, "NULL pointer!"); + } + /* mt compression */ + DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); + FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( + cctx->mtctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, + cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) , ""); + cctx->dictID = cctx->cdict ? cctx->cdict->dictID : 0; + cctx->dictContentSize = cctx->cdict ? cctx->cdict->dictContentSize : prefixDict.dictSize; + cctx->consumedSrcSize = 0; + cctx->producedCSize = 0; + cctx->streamStage = zcss_load; + cctx->appliedParams = params; + } else +#endif + { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast, + cctx->cdict, + ¶ms, pledgedSrcSize, + ZSTDb_buffered) , ""); + assert(cctx->appliedParams.nbWorkers == 0); + cctx->inToCompress = 0; + cctx->inBuffPos = 0; + if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) { + /* for small input: avoid automatic flush on reaching end of block, since + * it would require to add a 3-bytes null block to end frame + */ + cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); + } else { + cctx->inBuffTarget = 0; + } + cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; + cctx->streamStage = zcss_load; + cctx->frameEnded = 0; + } + return 0; +} + +size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); + /* check conditions */ + RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer"); + RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer"); + RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective"); + assert(cctx != NULL); + + /* transparent initialization stage */ + if (cctx->streamStage == zcss_init) { + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed"); + ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ + } + /* end of transparent initialization stage */ + + FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers"); + /* compression stage */ +#ifdef ZSTD_MULTITHREAD + if (cctx->appliedParams.nbWorkers > 0) { + size_t flushMin; + if (cctx->cParamsChanged) { + ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); + cctx->cParamsChanged = 0; + } + for (;;) { + size_t const ipos = input->pos; + size_t const opos = output->pos; + flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); + cctx->consumedSrcSize += (U64)(input->pos - ipos); + cctx->producedCSize += (U64)(output->pos - opos); + if ( ZSTD_isError(flushMin) + || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ + if (flushMin == 0) + ZSTD_CCtx_trace(cctx, 0); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + } + FORWARD_IF_ERROR(flushMin, "ZSTDMT_compressStream_generic failed"); + + if (endOp == ZSTD_e_continue) { + /* We only require some progress with ZSTD_e_continue, not maximal progress. + * We're done if we've consumed or produced any bytes, or either buffer is + * full. + */ + if (input->pos != ipos || output->pos != opos || input->pos == input->size || output->pos == output->size) + break; + } else { + assert(endOp == ZSTD_e_flush || endOp == ZSTD_e_end); + /* We require maximal progress. We're done when the flush is complete or the + * output buffer is full. + */ + if (flushMin == 0 || output->pos == output->size) + break; + } + } + DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); + /* Either we don't require maximum forward progress, we've finished the + * flush, or we are out of output space. + */ + assert(endOp == ZSTD_e_continue || flushMin == 0 || output->pos == output->size); + ZSTD_setBufferExpectations(cctx, output, input); + return flushMin; + } +#endif + FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); + DEBUGLOG(5, "completed ZSTD_compressStream2"); + ZSTD_setBufferExpectations(cctx, output, input); + return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ +} + +size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} + +size_t ZSTD_compress2(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode; + ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode; + DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + /* Enable stable input/output buffers. */ + cctx->requestedParams.inBufferMode = ZSTD_bm_stable; + cctx->requestedParams.outBufferMode = ZSTD_bm_stable; + { size_t oPos = 0; + size_t iPos = 0; + size_t const result = ZSTD_compressStream2_simpleArgs(cctx, + dst, dstCapacity, &oPos, + src, srcSize, &iPos, + ZSTD_e_end); + /* Reset to the original values. */ + cctx->requestedParams.inBufferMode = originalInBufferMode; + cctx->requestedParams.outBufferMode = originalOutBufferMode; + FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); + if (result != 0) { /* compression not completed, due to lack of output space */ + assert(oPos == dstCapacity); + RETURN_ERROR(dstSize_tooSmall, ""); + } + assert(iPos == srcSize); /* all input is expected consumed */ + return oPos; + } +} + +typedef struct { + U32 idx; /* Index in array of ZSTD_Sequence */ + U32 posInSequence; /* Position within sequence at idx */ + size_t posInSrc; /* Number of bytes given by sequences provided so far */ +} ZSTD_sequencePosition; + +/* Returns a ZSTD error code if sequence is not valid */ +static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength, + size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) { + size_t offsetBound; + U32 windowSize = 1 << windowLog; + /* posInSrc represents the amount of data the the decoder would decode up to this point. + * As long as the amount of data decoded is less than or equal to window size, offsets may be + * larger than the total length of output decoded in order to reference the dict, even larger than + * window size. After output surpasses windowSize, we're limited to windowSize offsets again. + */ + offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; + RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!"); + RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small"); + return 0; +} + +/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ +static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) { + U32 offCode = rawOffset + ZSTD_REP_MOVE; + U32 repCode = 0; + + if (!ll0 && rawOffset == rep[0]) { + repCode = 1; + } else if (rawOffset == rep[1]) { + repCode = 2 - ll0; + } else if (rawOffset == rep[2]) { + repCode = 3 - ll0; + } else if (ll0 && rawOffset == rep[0] - 1) { + repCode = 3; + } + if (repCode) { + /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */ + offCode = repCode - 1; + } + return offCode; +} + +/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of + * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. + */ +static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) { + U32 idx = seqPos->idx; + BYTE const* ip = (BYTE const*)(src); + const BYTE* const iend = ip + blockSize; + repcodes_t updatedRepcodes; + U32 dictSize; + U32 litLength; + U32 matchLength; + U32 ll0; + U32 offCode; + + if (cctx->cdict) { + dictSize = (U32)cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = (U32)cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { + litLength = inSeqs[idx].litLength; + matchLength = inSeqs[idx].matchLength; + ll0 = litLength == 0; + offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, + cctx->appliedParams.cParams.minMatch), + "Sequence validation failed"); + } + RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ip += matchLength + litLength; + } + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + if (inSeqs[idx].litLength) { + DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength); + ip += inSeqs[idx].litLength; + seqPos->posInSrc += inSeqs[idx].litLength; + } + RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!"); + seqPos->idx = idx+1; + return 0; +} + +/* Returns the number of bytes to move the current read position back by. Only non-zero + * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something + * went wrong. + * + * This function will attempt to scan through blockSize bytes represented by the sequences + * in inSeqs, storing any (partial) sequences. + * + * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to + * avoid splitting a match, or to avoid splitting a match such that it would produce a match + * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. + */ +static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) { + U32 idx = seqPos->idx; + U32 startPosInSequence = seqPos->posInSequence; + U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; + size_t dictSize; + BYTE const* ip = (BYTE const*)(src); + BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ + repcodes_t updatedRepcodes; + U32 bytesAdjustment = 0; + U32 finalMatchSplit = 0; + U32 litLength; + U32 matchLength; + U32 rawOffset; + U32 offCode; + + if (cctx->cdict) { + dictSize = cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); + DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { + const ZSTD_Sequence currSeq = inSeqs[idx]; + litLength = currSeq.litLength; + matchLength = currSeq.matchLength; + rawOffset = currSeq.offset; + + /* Modify the sequence depending on where endPosInSequence lies */ + if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { + if (startPosInSequence >= litLength) { + startPosInSequence -= litLength; + litLength = 0; + matchLength -= startPosInSequence; + } else { + litLength -= startPosInSequence; + } + /* Move to the next sequence */ + endPosInSequence -= currSeq.litLength + currSeq.matchLength; + startPosInSequence = 0; + idx++; + } else { + /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence + does not reach the end of the match. So, we have to split the sequence */ + DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", + currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence); + if (endPosInSequence > litLength) { + U32 firstHalfMatchLength; + litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence; + firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength; + if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) { + /* Only ever split the match if it is larger than the block size */ + U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence; + if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) { + /* Move the endPosInSequence backward so that it creates match of minMatch length */ + endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + firstHalfMatchLength -= bytesAdjustment; + } + matchLength = firstHalfMatchLength; + /* Flag that we split the last match - after storing the sequence, exit the loop, + but keep the value of endPosInSequence */ + finalMatchSplit = 1; + } else { + /* Move the position in sequence backwards so that we don't split match, and break to store + * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence + * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so + * would cause the first half of the match to be too small + */ + bytesAdjustment = endPosInSequence - currSeq.litLength; + endPosInSequence = currSeq.litLength; + break; + } + } else { + /* This sequence ends inside the literals, break to store the last literals */ + break; + } + } + /* Check if this offset can be represented with a repcode */ + { U32 ll0 = (litLength == 0); + offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + } + + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, + cctx->appliedParams.cParams.minMatch), + "Sequence validation failed"); + } + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); + RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ip += matchLength + litLength; + } + DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); + seqPos->idx = idx; + seqPos->posInSequence = endPosInSequence; + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + iend -= bytesAdjustment; + if (ip != iend) { + /* Store any last literals */ + U32 lastLLSize = (U32)(iend - ip); + assert(ip <= iend); + DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize); + seqPos->posInSrc += lastLLSize; + } + + return bytesAdjustment; +} + +typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); +static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) { + ZSTD_sequenceCopier sequenceCopier = NULL; + assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); + if (mode == ZSTD_sf_explicitBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; + } else if (mode == ZSTD_sf_noBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreNoBlockDelim; + } + assert(sequenceCopier != NULL); + return sequenceCopier; +} + +/* Compress, block-by-block, all of the sequences given. + * + * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error. + */ +static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) { + size_t cSize = 0; + U32 lastBlock; + size_t blockSize; + size_t compressedSeqsSize; + size_t remaining = srcSize; + ZSTD_sequencePosition seqPos = {0, 0, 0}; + + BYTE const* ip = (BYTE const*)src; + BYTE* op = (BYTE*)dst; + ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); + + DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); + /* Special case: empty frame */ + if (remaining == 0) { + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header"); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + cSize += ZSTD_blockHeaderSize; + } + + while (remaining) { + size_t cBlockSize; + size_t additionalByteAdjustment; + lastBlock = remaining <= cctx->blockSize; + blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; + ZSTD_resetSeqStore(&cctx->seqStore); + DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize); + + additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize); + FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy"); + blockSize -= additionalByteAdjustment; + + /* If blocks are too small, emit as a nocompress block */ + if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); + DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); + cSize += cBlockSize; + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + continue; + } + + compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore, + &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, + &cctx->appliedParams, + op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, + blockSize, + cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + cctx->bmi2); + FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); + DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize); + + if (!cctx->isFirstBlock && + ZSTD_maybeRLE(&cctx->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + compressedSeqsSize = 1; + } + + if (compressedSeqsSize == 0) { + /* ZSTD_noCompressBlock writes the block header as well */ + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); + DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize); + } else if (compressedSeqsSize == 1) { + cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed"); + DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize); + } else { + U32 cBlockHeader; + /* Error checking and repcodes update */ + ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState); + if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + /* Write block header into beginning of block*/ + cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); + MEM_writeLE24(op, cBlockHeader); + cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; + DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize); + } + + cSize += cBlockSize; + DEBUGLOG(4, "cSize running total: %zu", cSize); + + if (lastBlock) { + break; + } else { + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + cctx->isFirstBlock = 0; + } + } + + return cSize; +} + +size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) { + BYTE* op = (BYTE*)dst; + size_t cSize = 0; + size_t compressedBlocksSize = 0; + size_t frameHeaderSize = 0; + + /* Transparent initialization stage, same as compressStream2() */ + DEBUGLOG(3, "ZSTD_compressSequences()"); + assert(cctx != NULL); + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); + /* Begin writing output, starting with frame header */ + frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); + op += frameHeaderSize; + dstCapacity -= frameHeaderSize; + cSize += frameHeaderSize; + if (cctx->appliedParams.fParams.checksumFlag && srcSize) { + XXH64_update(&cctx->xxhState, src, srcSize); + } + /* cSize includes block header size and compressed sequences size */ + compressedBlocksSize = ZSTD_compressSequences_internal(cctx, + op, dstCapacity, + inSeqs, inSeqsSize, + src, srcSize); + FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); + cSize += compressedBlocksSize; + dstCapacity -= compressedBlocksSize; + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum); + MEM_writeLE32((char*)dst + cSize, checksum); + cSize += 4; + } + + DEBUGLOG(3, "Final compressed size: %zu", cSize); + return cSize; +} + +/*====== Finalize ======*/ + +/*! ZSTD_flushStream() : + * @return : amount of data remaining to flush */ +size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); +} + + +size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); + FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); + if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ + /* single thread mode : attempt to calculate remaining to flush more precisely */ + { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; + size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); + size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; + DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); + return toFlush; + } +} + + +/*-===== Pre-defined compression levels =====-*/ + +#define ZSTD_MAX_CLEVEL 22 +int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } +int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } +int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; } + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" - for any srcSize > 256 KB */ + /* W, C, H, S, L, TL, strat */ + { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ + { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ + { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ + { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ + { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ + { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ + { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ + { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ + { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ + { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ + { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ + { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ + { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ + { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ + { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ + { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ + { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ + { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ + { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ + { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ + { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ + { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ + { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ + { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ + { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ + { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ + { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ + { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ + { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ + { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ + { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ + { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +}; + +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict); + switch (cParams.strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } + return cParams; +} + +static int ZSTD_dedicatedDictSearch_isSupported( + ZSTD_compressionParameters const* cParams) +{ + return (cParams->strategy >= ZSTD_greedy) + && (cParams->strategy <= ZSTD_lazy2) + && (cParams->hashLog > cParams->chainLog) + && (cParams->chainLog <= 24); +} + +/** + * Reverses the adjustment applied to cparams when enabling dedicated dict + * search. This is used to recover the params set to be used in the working + * context. (Otherwise, those tables would also grow.) + */ +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams) { + switch (cParams->strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; + if (cParams->hashLog < ZSTD_HASHLOG_MIN) { + cParams->hashLog = ZSTD_HASHLOG_MIN; + } + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } +} + +static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + switch (mode) { + case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + case ZSTD_cpm_createCDict: + break; + case ZSTD_cpm_attachDict: + dictSize = 0; + break; + default: + assert(0); + break; + } + { int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; + size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; + return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; + } +} + +/*! ZSTD_getCParams_internal() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. + * Use dictSize == 0 for unknown or unused. + * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */ +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); + int row; + DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); + + /* row */ + if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ + else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; + else row = compressionLevel; + + { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy); + /* acceleration factor */ + if (compressionLevel < 0) { + int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); + cp.targetLength = (unsigned)(-clampedCompressionLevel); + } + /* refine parameters based on srcSize & dictSize */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode); + } +} + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) +{ + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { + ZSTD_parameters params; + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); + DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); + ZSTD_memset(¶ms, 0, sizeof(params)); + params.cParams = cParams; + params.fParams.contentSizeFlag = 1; + return params; +} + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} +/**** ended inlining compress/zstd_compress.c ****/ +/**** start inlining compress/zstd_double_fast.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_double_fast.h ****/ + + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashLarge = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32 const mls = cParams->minMatch; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash tables. + * Insert the other positions into the large hash table if their entry + * is empty. + */ + for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + U32 i; + for (i = 0; i < fastHashFillStep; ++i) { + size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); + size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); + if (i == 0) + hashSmall[smHash] = curr + i; + if (i == 0 || hashLarge[lgHash] == 0) + hashLarge[lgHash] = curr + i; + /* Only load extra positions for ZSTD_dtlm_full */ + if (dtlm == ZSTD_dtlm_fast) + break; + } } +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_doubleFast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */, ZSTD_dictMode_e const dictMode) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + /* presumes that, if there is a dictionary, it must be using Attach mode */ + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = + dictMode == ZSTD_dictMatchState ? + &dms->cParams : NULL; + const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? + dms->hashTable : NULL; + const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? + dms->chainTable : NULL; + const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? + dictBase + dictStartIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? + dictCParams->hashLog : hBitsL; + const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? + dictCParams->chainLog : hBitsS; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); + + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + + /* if a dictionary is attached, it must be within window range */ + if (dictMode == ZSTD_dictMatchState) { + assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); + } + + /* init */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + U32 offset; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); + size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); + U32 const curr = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + const U32 repIndex = curr + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ + + /* check dictMatchState repcode */ + if (dictMode == ZSTD_dictMatchState + && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } + + /* check noDict repcode */ + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } + + if (matchIndexL > prefixLowestIndex) { + /* check prefix long match */ + if (MEM_read64(matchLong) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState long match */ + U32 const dictMatchIndexL = dictHashLong[dictHL]; + const BYTE* dictMatchL = dictBase + dictMatchIndexL; + assert(dictMatchL < dictEnd); + + if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { + mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; + offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); + while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ + goto _match_found; + } } + + if (matchIndexS > prefixLowestIndex) { + /* check prefix short match */ + if (MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState short match */ + U32 const dictMatchIndexS = dictHashSmall[dictHS]; + match = dictBase + dictMatchIndexS; + matchIndexS = dictMatchIndexS + dictIndexDelta; + + if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } } + + ip += ((ip-anchor) >> kSearchStrength) + 1; +#if defined(__aarch64__) + PREFETCH_L1(ip+256); +#endif + continue; + +_search_next_long: + + { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = curr + 1; + + /* check prefix long +1 match */ + if (matchIndexL3 > prefixLowestIndex) { + if (MEM_read64(matchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dict long +1 match */ + U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; + const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; + assert(dictMatchL3 < dictEnd); + if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; + ip++; + offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); + while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ + goto _match_found; + } } } + + /* if no long +1 match, explore the short match we found */ + if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { + mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; + offset = (U32)(curr - matchIndexS); + while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip - match); + while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + + /* fall-through */ + +_match_found: + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + +_match_stored: + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState + && repIndex2 < prefixLowestIndex ? + dictBase + repIndex2 - dictIndexDelta : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } + + if (dictMode == ZSTD_noDict) { + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + } /* while (ip < ilimit) */ + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); + } +} + + +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_compressBlock_doubleFast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); + + /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 matchIndex = hashSmall[hSmall]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + + const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); + const U32 matchLongIndex = hashLong[hLong]; + const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; + const BYTE* matchLong = matchLongBase + matchLongIndex; + + const U32 curr = (U32)(ip-base); + const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + size_t mLength; + hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ + + if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ + & (offset_1 < curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */ + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + } else { + if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { + const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; + U32 offset; + mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; + offset = curr - matchLongIndex; + while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; + const BYTE* match3 = match3Base + matchIndex3; + U32 offset; + hashLong[h3] = curr + 1; + if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; + ip++; + offset = curr+1 - matchIndex3; + while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + offset = curr - matchIndex; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else { + ip += ((ip-anchor) >> kSearchStrength) + 1; + continue; + } } + + /* move to next sequence start */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ + & (offset_2 < current2 - dictStartIndex)) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + } +} +/**** ended inlining compress/zstd_double_fast.c ****/ +/**** start inlining compress/zstd_fast.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_fast.h ****/ + + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + const void* const end, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hBits = cParams->hashLog; + U32 const mls = cParams->minMatch; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash table. + * Insert the other positions if their hash entry is empty. + */ + for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); + hashTable[hash0] = curr; + if (dtlm == ZSTD_dtlm_fast) continue; + /* Only load extra positions for ZSTD_dtlm_full */ + { U32 p; + for (p = 1; p < fastHashFillStep; ++p) { + size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); + if (hashTable[hash] == 0) { /* not yet filled */ + hashTable[hash] = curr + p; + } } } } +} + + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_fast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); + ip0 += (ip0 == prefixStart); + ip1 = ip0 + 1; + { U32 const curr = (U32)(ip0 - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ +#ifdef __INTEL_COMPILER + /* From intel 'The vector pragma indicates that the loop should be + * vectorized if it is legal to do so'. Can be used together with + * #pragma ivdep (but have opted to exclude that because intel + * warns against using it).*/ + #pragma vector always +#endif + while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ + size_t mLength; + BYTE const* ip2 = ip0 + 2; + size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); + U32 const val0 = MEM_read32(ip0); + size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); + U32 const val1 = MEM_read32(ip1); + U32 const current0 = (U32)(ip0-base); + U32 const current1 = (U32)(ip1-base); + U32 const matchIndex0 = hashTable[h0]; + U32 const matchIndex1 = hashTable[h1]; + BYTE const* repMatch = ip2 - offset_1; + const BYTE* match0 = base + matchIndex0; + const BYTE* match1 = base + matchIndex1; + U32 offcode; + +#if defined(__aarch64__) + PREFETCH_L1(ip0+256); +#endif + + hashTable[h0] = current0; /* update hash table */ + hashTable[h1] = current1; /* update hash table */ + + assert(ip0 + 1 == ip1); + + if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { + mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; + ip0 = ip2 - mLength; + match0 = repMatch - mLength; + mLength += 4; + offcode = 0; + goto _match; + } + if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { + /* found a regular match */ + goto _offset; + } + if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { + /* found a regular match after one literal */ + ip0 = ip1; + match0 = match1; + goto _offset; + } + { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; + assert(step >= 2); + ip0 += step; + ip1 += step; + continue; + } +_offset: /* Requires: ip0, match0 */ + /* Compute the offset code */ + offset_2 = offset_1; + offset_1 = (U32)(ip0-match0); + offcode = offset_1 + ZSTD_REP_MOVE; + mLength = 4; + /* Count the backwards match length */ + while (((ip0>anchor) & (match0>prefixStart)) + && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ + +_match: /* Requires: ip0, match0, offcode */ + /* Count the forward length */ + mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); + ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); + /* match found */ + ip0 += mLength; + anchor = ip0; + + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ + while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } } } + ip1 = ip0 + 1; + } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState == NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); + } +} + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_fast_dictMatchState_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; + const U32* const dictHashTable = dms->hashTable; + const U32 dictStartIndex = dms->window.dictLimit; + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dms->window.nextSrc; + const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); + const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); + const U32 dictHLog = dictCParams->hashLog; + + /* if a dictionary is still attached, it necessarily means that + * it is within window size. So we just check it. */ + const U32 maxDistance = 1U << cParams->windowLog; + const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); + assert(endIndex - prefixStartIndex <= maxDistance); + (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ + + /* ensure there will be no underflow + * when translating a dict index into a local index */ + assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); + ip += (dictAndPrefixLength == 0); + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h = ZSTD_hashPtr(ip, hlog, mls); + U32 const curr = (U32)(ip-base); + U32 const matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + const U32 repIndex = curr + 1 - offset_1; + const BYTE* repMatch = (repIndex < prefixStartIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashTable[h] = curr; /* update hash table */ + + if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + } else if ( (matchIndex <= prefixStartIndex) ) { + size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); + U32 const dictMatchIndex = dictHashTable[dictHash]; + const BYTE* dictMatch = dictBase + dictMatchIndex; + if (dictMatchIndex <= dictStartIndex || + MEM_read32(dictMatch) != MEM_read32(ip)) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a dict match */ + U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta); + mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; + while (((ip>anchor) & (dictMatch>dictStart)) + && (ip[-1] == dictMatch[-1])) { + ip--; dictMatch--; mLength++; + } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + } else if (MEM_read32(match) != MEM_read32(ip)) { + /* it's not a match, and we're not going to check the dictionary */ + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a regular match */ + U32 const offset = (U32)(ip-match); + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + while (((ip>anchor) & (match>prefixStart)) + && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + assert(base+curr+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState != NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); + } +} + + +static size_t ZSTD_compressBlock_fast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const BYTE* const dictStart = dictBase + dictStartIndex; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); + + /* switch to "regular" variant if extDict is invalidated due to maxDistance */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hlog, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 curr = (U32)(ip-base); + const U32 repIndex = curr + 1 - offset_1; + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + hashTable[h] = curr; /* update hash table */ + DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); + + if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ + & (offset_1 < curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */ + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + } else { + if ( (matchIndex < dictStartIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + U32 const offset = curr - matchIndex; + size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = offset; /* update offset history */ + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ip += mLength; + anchor = ip; + } } + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < curr - dictStartIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + } +} +/**** ended inlining compress/zstd_fast.c ****/ +/**** start inlining compress/zstd_lazy.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_lazy.h ****/ + + +/*-************************************* +* Binary Tree search +***************************************/ + +static void +ZSTD_updateDUBT(ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iend, + U32 mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + if (idx != target) + DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", + idx, target, ms->window.dictLimit); + assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ + (void)iend; + + assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ + for ( ; idx < target ; idx++) { + size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ + U32 const matchIndex = hashTable[h]; + + U32* const nextCandidatePtr = bt + 2*(idx&btMask); + U32* const sortMarkPtr = nextCandidatePtr + 1; + + DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); + hashTable[h] = idx; /* Update Hash Table */ + *nextCandidatePtr = matchIndex; /* update BT like a chain */ + *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; + } + ms->nextToUpdate = target; +} + + +/** ZSTD_insertDUBT1() : + * sort one already inserted but unsorted position + * assumption : curr >= btlow == (curr - btmask) + * doesn't fail */ +static void +ZSTD_insertDUBT1(ZSTD_matchState_t* ms, + U32 curr, const BYTE* inputEnd, + U32 nbCompares, U32 btLow, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr; + const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = smallerPtr + 1; + U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ + U32 dummy32; /* to be nullified at the end */ + U32 const windowValid = ms->window.lowLimit; + U32 const maxDistance = 1U << cParams->windowLog; + U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid; + + + DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", + curr, dictLimit, windowLow); + assert(curr >= btLow); + assert(ip < iend); /* condition for ZSTD_count */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < curr); + /* note : all candidates are now supposed sorted, + * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK + * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ + + if ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ + || (curr < dictLimit) /* both in extDict */) { + const BYTE* const mBase = ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit)) ? + base : dictBase; + assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ + || (curr < dictLimit) ); + match = mBase + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* preparation for next read of match[matchLength] */ + } + + DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", + curr, matchIndex, (U32)matchLength); + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", + matchIndex, btLow, nextPtr[1]); + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", + matchIndex, btLow, nextPtr[0]); + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; +} + + +static size_t +ZSTD_DUBT_findBetterDictMatch ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + size_t bestLength, + U32 nbCompares, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_matchState_t * const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; + const U32 * const dictHashTable = dms->hashTable; + U32 const hashLog = dmsCParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 dictMatchIndex = dictHashTable[h]; + + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + U32 const curr = (U32)(ip-base); + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictEnd = dms->window.nextSrc; + U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); + U32 const dictLowLimit = dms->window.lowLimit; + U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; + + U32* const dictBt = dms->chainTable; + U32 const btLog = dmsCParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; + + size_t commonLengthSmaller=0, commonLengthLarger=0; + + (void)dictMode; + assert(dictMode == ZSTD_dictMatchState); + + while (nbCompares-- && (dictMatchIndex > dictLowLimit)) { + U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dictBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (dictMatchIndex+matchLength >= dictHighLimit) + match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + U32 matchIndex = dictMatchIndex + dictIndexDelta; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { + DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", + curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex); + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + } + if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + + if (bestLength >= MINMATCH) { + U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + curr, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + +} + + +static size_t +ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + + const BYTE* const base = ms->window.base; + U32 const curr = (U32)(ip-base); + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; + U32 const unsortLimit = MAX(btLow, windowLow); + + U32* nextCandidate = bt + 2*(matchIndex&btMask); + U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; + U32 nbCompares = 1U << cParams->searchLog; + U32 nbCandidates = nbCompares; + U32 previousCandidate = 0; + + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr); + assert(ip <= iend-8); /* required for h calculation */ + assert(dictMode != ZSTD_dedicatedDictSearch); + + /* reach end of unsorted candidates list */ + while ( (matchIndex > unsortLimit) + && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) + && (nbCandidates > 1) ) { + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", + matchIndex); + *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ + previousCandidate = matchIndex; + matchIndex = *nextCandidate; + nextCandidate = bt + 2*(matchIndex&btMask); + unsortedMark = bt + 2*(matchIndex&btMask) + 1; + nbCandidates --; + } + + /* nullify last candidate if it's still unsorted + * simplification, detrimental to compression ratio, beneficial for speed */ + if ( (matchIndex > unsortLimit) + && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", + matchIndex); + *nextCandidate = *unsortedMark = 0; + } + + /* batch sort stacked candidates */ + matchIndex = previousCandidate; + while (matchIndex) { /* will end on matchIndex == 0 */ + U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; + U32 const nextCandidateIdx = *nextCandidateIdxPtr; + ZSTD_insertDUBT1(ms, matchIndex, iend, + nbCandidates, unsortLimit, dictMode); + matchIndex = nextCandidateIdx; + nbCandidates++; + } + + /* find longest match */ + { size_t commonLengthSmaller = 0, commonLengthLarger = 0; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr + 8 + 1; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + matchIndex = hashTable[h]; + hashTable[h] = curr; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + if (dictMode == ZSTD_dictMatchState) { + nbCompares = 0; /* in addition to avoiding checking any + * further in this loop, make sure we + * skip checking in the dictionary. */ + } + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + if (dictMode == ZSTD_dictMatchState && nbCompares) { + bestLength = ZSTD_DUBT_findBetterDictMatch( + ms, ip, iend, + offsetPtr, bestLength, nbCompares, + mls, dictMode); + } + + assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + if (bestLength >= MINMATCH) { + U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + curr, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + } +} + + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +FORCE_INLINE_TEMPLATE size_t +ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls /* template */, + const ZSTD_dictMode_e dictMode) +{ + DEBUGLOG(7, "ZSTD_BtFindBestMatch"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateDUBT(ms, ip, iLimit, mls); + return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); +} + + +static size_t +ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + +/*********************************** +* Dedicated dict search +***********************************/ + +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32* const hashTable = ms->hashTable; + U32* const chainTable = ms->chainTable; + U32 const chainSize = 1 << ms->cParams.chainLog; + U32 idx = ms->nextToUpdate; + U32 const minChain = chainSize < target ? target - chainSize : idx; + U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const cacheSize = bucketSize - 1; + U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize; + U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts; + + /* We know the hashtable is oversized by a factor of `bucketSize`. + * We are going to temporarily pretend `bucketSize == 1`, keeping only a + * single entry. We will use the rest of the space to construct a temporary + * chaintable. + */ + U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; + U32* const tmpHashTable = hashTable; + U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog); + U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; + U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx; + U32 hashIdx; + + assert(ms->cParams.chainLog <= 24); + assert(ms->cParams.hashLog > ms->cParams.chainLog); + assert(idx != 0); + assert(tmpMinChain <= minChain); + + /* fill conventional hash table and conventional chain table */ + for ( ; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch); + if (idx >= tmpMinChain) { + tmpChainTable[idx - tmpMinChain] = hashTable[h]; + } + tmpHashTable[h] = idx; + } + + /* sort chains into ddss chain table */ + { + U32 chainPos = 0; + for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) { + U32 count; + U32 countBeyondMinChain = 0; + U32 i = tmpHashTable[hashIdx]; + for (count = 0; i >= tmpMinChain && count < cacheSize; count++) { + /* skip through the chain to the first position that won't be + * in the hash cache bucket */ + if (i < minChain) { + countBeyondMinChain++; + } + i = tmpChainTable[i - tmpMinChain]; + } + if (count == cacheSize) { + for (count = 0; count < chainLimit;) { + if (i < minChain) { + if (!i || ++countBeyondMinChain > cacheSize) { + /* only allow pulling `cacheSize` number of entries + * into the cache or chainTable beyond `minChain`, + * to replace the entries pulled out of the + * chainTable into the cache. This lets us reach + * back further without increasing the total number + * of entries in the chainTable, guaranteeing the + * DDSS chain table will fit into the space + * allocated for the regular one. */ + break; + } + } + chainTable[chainPos++] = i; + count++; + if (i < tmpMinChain) { + break; + } + i = tmpChainTable[i - tmpMinChain]; + } + } else { + count = 0; + } + if (count) { + tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count; + } else { + tmpHashTable[hashIdx] = 0; + } + } + assert(chainPos <= chainSize); /* I believe this is guaranteed... */ + } + + /* move chain pointers into the last entry of each hash bucket */ + for (hashIdx = (1 << hashLog); hashIdx; ) { + U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const chainPackedPointer = tmpHashTable[hashIdx]; + U32 i; + for (i = 0; i < cacheSize; i++) { + hashTable[bucketIdx + i] = 0; + } + hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer; + } + + /* fill the buckets of the hash table */ + for (idx = ms->nextToUpdate; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch) + << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 i; + /* Shift hash cache down 1. */ + for (i = cacheSize - 1; i; i--) + hashTable[h + i] = hashTable[h + i - 1]; + hashTable[h] = idx; + } + + ms->nextToUpdate = target; +} + +/* Returns the longest match length found in the dedicated dict search structure. + * If none are longer than the argument ml, then ml will be returned. + */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts, + const ZSTD_matchState_t* const dms, + const BYTE* const ip, const BYTE* const iLimit, + const BYTE* const prefixStart, const U32 curr, + const U32 dictLimit, const size_t ddsIdx) { + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); + const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; + U32 ddsAttempt; + U32 matchIndex; + + for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 const chainIndex = chainPackedPointer >> 8; + + PREFETCH_L1(&dms->chainTable[chainIndex]); + } + + for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; + match = ddsBase + matchIndex; + + if (!matchIndex) { + return ml; + } + + /* guaranteed by table construction */ + (void)ddsLowestIndex; + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) { + /* best possible, avoids read overflow on next attempt */ + return ml; + } + } + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 chainIndex = chainPackedPointer >> 8; + U32 const chainLength = chainPackedPointer & 0xFF; + U32 const chainAttempts = nbAttempts - ddsAttempt; + U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; + U32 chainAttempt; + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { + PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); + } + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->chainTable[chainIndex]; + match = ddsBase + matchIndex; + + /* guaranteed by table construction */ + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + return ml; +} + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, + const ZSTD_compressionParameters* const cParams, + const BYTE* ip, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { + const ZSTD_compressionParameters* const cParams = &ms->cParams; + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); +} + +/* inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const chainTable = ms->chainTable; + const U32 chainSize = (1 << cParams->chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 curr = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; + const U32 minChain = curr > chainSize ? curr - chainSize : 0; + U32 nbAttempts = 1U << cParams->searchLog; + size_t ml=4-1; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch + ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch + ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + + U32 matchIndex; + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32* entry = &dms->hashTable[ddsIdx]; + PREFETCH_L1(entry); + } + + /* HC4 match finder */ + matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); + + for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + if (dictMode == ZSTD_dedicatedDictSearch) { + ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms, + ip, iLimit, prefixStart, curr, dictLimit, ddsIdx); + } else if (dictMode == ZSTD_dictMatchState) { + const U32* const dmsChainTable = dms->chainTable; + const U32 dmsChainSize = (1 << dms->cParams.chainLog); + const U32 dmsChainMask = dmsChainSize - 1; + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; + + matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; + + for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= dmsMinChain) break; + + matchIndex = dmsChainTable[matchIndex & dmsChainMask]; + } + } + + return ml; +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch); + } +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + +/* ********************************* +* (SIMD) Row-based matchfinder +***********************************/ +/* Constants for row-based hash */ +#define ZSTD_ROW_HASH_TAG_OFFSET 1 /* byte offset of hashes in the match state's tagTable from the beginning of a row */ +#define ZSTD_ROW_HASH_TAG_BITS 8 /* nb bits to use for the tag */ +#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1) + +#define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1) + +typedef U32 ZSTD_VecMask; /* Clarifies when we are interacting with a U32 representing a mask of matches */ + +#if !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) /* SIMD SSE version */ + +#include +typedef __m128i ZSTD_Vec128; + +/* Returns a 128-bit container with 128-bits from src */ +static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) { + return _mm_loadu_si128((ZSTD_Vec128 const*)src); +} + +/* Returns a ZSTD_Vec128 with the byte "val" packed 16 times */ +static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) { + return _mm_set1_epi8((char)val); +} + +/* Do byte-by-byte comparison result of x and y. Then collapse 128-bit resultant mask + * into a 32-bit mask that is the MSB of each byte. + * */ +static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) { + return (ZSTD_VecMask)_mm_movemask_epi8(_mm_cmpeq_epi8(x, y)); +} + +typedef struct { + __m128i fst; + __m128i snd; +} ZSTD_Vec256; + +static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) { + ZSTD_Vec256 v; + v.fst = ZSTD_Vec128_read(ptr); + v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1); + return v; +} + +static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) { + ZSTD_Vec256 v; + v.fst = ZSTD_Vec128_set8(val); + v.snd = ZSTD_Vec128_set8(val); + return v; +} + +static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) { + ZSTD_VecMask fstMask; + ZSTD_VecMask sndMask; + fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst); + sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd); + return fstMask | (sndMask << 16); +} + +#elif !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) /* SIMD ARM NEON Version */ + +#include +typedef uint8x16_t ZSTD_Vec128; + +static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) { + return vld1q_u8((const BYTE* const)src); +} + +static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) { + return vdupq_n_u8(val); +} + +/* Mimics '_mm_movemask_epi8()' from SSE */ +static U32 ZSTD_vmovmaskq_u8(ZSTD_Vec128 val) { + /* Shift out everything but the MSB bits in each byte */ + uint16x8_t highBits = vreinterpretq_u16_u8(vshrq_n_u8(val, 7)); + /* Merge the even lanes together with vsra (right shift and add) */ + uint32x4_t paired16 = vreinterpretq_u32_u16(vsraq_n_u16(highBits, highBits, 7)); + uint64x2_t paired32 = vreinterpretq_u64_u32(vsraq_n_u32(paired16, paired16, 14)); + uint8x16_t paired64 = vreinterpretq_u8_u64(vsraq_n_u64(paired32, paired32, 28)); + /* Extract the low 8 bits from each lane, merge */ + return vgetq_lane_u8(paired64, 0) | ((U32)vgetq_lane_u8(paired64, 8) << 8); +} + +static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) { + return (ZSTD_VecMask)ZSTD_vmovmaskq_u8(vceqq_u8(x, y)); +} + +typedef struct { + uint8x16_t fst; + uint8x16_t snd; +} ZSTD_Vec256; + +static ZSTD_Vec256 ZSTD_Vec256_read(const void* const ptr) { + ZSTD_Vec256 v; + v.fst = ZSTD_Vec128_read(ptr); + v.snd = ZSTD_Vec128_read((ZSTD_Vec128 const*)ptr + 1); + return v; +} + +static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) { + ZSTD_Vec256 v; + v.fst = ZSTD_Vec128_set8(val); + v.snd = ZSTD_Vec128_set8(val); + return v; +} + +static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) { + ZSTD_VecMask fstMask; + ZSTD_VecMask sndMask; + fstMask = ZSTD_Vec128_cmpMask8(x.fst, y.fst); + sndMask = ZSTD_Vec128_cmpMask8(x.snd, y.snd); + return fstMask | (sndMask << 16); +} + +#else /* Scalar fallback version */ + +#define VEC128_NB_SIZE_T (16 / sizeof(size_t)) +typedef struct { + size_t vec[VEC128_NB_SIZE_T]; +} ZSTD_Vec128; + +static ZSTD_Vec128 ZSTD_Vec128_read(const void* const src) { + ZSTD_Vec128 ret; + ZSTD_memcpy(ret.vec, src, VEC128_NB_SIZE_T*sizeof(size_t)); + return ret; +} + +static ZSTD_Vec128 ZSTD_Vec128_set8(BYTE val) { + ZSTD_Vec128 ret = { {0} }; + int startBit = sizeof(size_t) * 8 - 8; + for (;startBit >= 0; startBit -= 8) { + unsigned j = 0; + for (;j < VEC128_NB_SIZE_T; ++j) { + ret.vec[j] |= ((size_t)val << startBit); + } + } + return ret; +} + +/* Compare x to y, byte by byte, generating a "matches" bitfield */ +static ZSTD_VecMask ZSTD_Vec128_cmpMask8(ZSTD_Vec128 x, ZSTD_Vec128 y) { + ZSTD_VecMask res = 0; + unsigned i = 0; + unsigned l = 0; + for (; i < VEC128_NB_SIZE_T; ++i) { + const size_t cmp1 = x.vec[i]; + const size_t cmp2 = y.vec[i]; + unsigned j = 0; + for (; j < sizeof(size_t); ++j, ++l) { + if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) { + res |= ((U32)1 << (j+i*sizeof(size_t))); + } + } + } + return res; +} + +#define VEC256_NB_SIZE_T 2*VEC128_NB_SIZE_T +typedef struct { + size_t vec[VEC256_NB_SIZE_T]; +} ZSTD_Vec256; + +static ZSTD_Vec256 ZSTD_Vec256_read(const void* const src) { + ZSTD_Vec256 ret; + ZSTD_memcpy(ret.vec, src, VEC256_NB_SIZE_T*sizeof(size_t)); + return ret; +} + +static ZSTD_Vec256 ZSTD_Vec256_set8(BYTE val) { + ZSTD_Vec256 ret = { {0} }; + int startBit = sizeof(size_t) * 8 - 8; + for (;startBit >= 0; startBit -= 8) { + unsigned j = 0; + for (;j < VEC256_NB_SIZE_T; ++j) { + ret.vec[j] |= ((size_t)val << startBit); + } + } + return ret; +} + +/* Compare x to y, byte by byte, generating a "matches" bitfield */ +static ZSTD_VecMask ZSTD_Vec256_cmpMask8(ZSTD_Vec256 x, ZSTD_Vec256 y) { + ZSTD_VecMask res = 0; + unsigned i = 0; + unsigned l = 0; + for (; i < VEC256_NB_SIZE_T; ++i) { + const size_t cmp1 = x.vec[i]; + const size_t cmp2 = y.vec[i]; + unsigned j = 0; + for (; j < sizeof(size_t); ++j, ++l) { + if (((cmp1 >> j*8) & 0xFF) == ((cmp2 >> j*8) & 0xFF)) { + res |= ((U32)1 << (j+i*sizeof(size_t))); + } + } + } + return res; +} + +#endif /* !defined(ZSTD_NO_INTRINSICS) && defined(__SSE2__) */ + +/* ZSTD_VecMask_next(): + * Starting from the LSB, returns the idx of the next non-zero bit. + * Basically counting the nb of trailing zeroes. + */ +static U32 ZSTD_VecMask_next(ZSTD_VecMask val) { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + return _BitScanForward(&r, val) ? (U32)r : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (U32)__builtin_ctz(val); +# else + /* Software ctz version: http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightMultLookup */ + static const U32 multiplyDeBruijnBitPosition[32] = + { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + return multiplyDeBruijnBitPosition[((U32)((v & -(int)v) * 0x077CB531U)) >> 27]; +# endif +} + +/* ZSTD_VecMask_rotateRight(): + * Rotates a bitfield to the right by "rotation" bits. + * If the rotation is greater than totalBits, the returned mask is 0. + */ +FORCE_INLINE_TEMPLATE ZSTD_VecMask +ZSTD_VecMask_rotateRight(ZSTD_VecMask mask, U32 const rotation, U32 const totalBits) { + if (rotation == 0) + return mask; + switch (totalBits) { + default: + assert(0); + case 16: + return (mask >> rotation) | (U16)(mask << (16 - rotation)); + case 32: + return (mask >> rotation) | (U32)(mask << (32 - rotation)); + } +} + +/* ZSTD_row_nextIndex(): + * Returns the next index to insert at within a tagTable row, and updates the "head" + * value to reflect the update. Essentially cycles backwards from [0, {entries per row}) + */ +FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) { + U32 const next = (*tagRow - 1) & rowMask; + *tagRow = (BYTE)next; + return next; +} + +/* ZSTD_isAligned(): + * Checks that a pointer is aligned to "align" bytes which must be a power of 2. + */ +MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) { + assert((align & (align - 1)) == 0); + return (((size_t)ptr) & (align - 1)) == 0; +} + +/* ZSTD_row_prefetch(): + * Performs prefetching for the hashTable and tagTable at a given row. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) { + PREFETCH_L1(hashTable + relRow); + if (rowLog == 5) { + PREFETCH_L1(hashTable + relRow + 16); + } + PREFETCH_L1(tagTable + relRow); + assert(rowLog == 4 || rowLog == 5); + assert(ZSTD_isAligned(hashTable + relRow, 64)); /* prefetched hash row always 64-byte aligned */ + assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on a multiple of 32 or 64 bytes */ +} + +/* ZSTD_row_fillHashCache(): + * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries, + * but not beyond iLimit. + */ +static void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base, + U32 const rowLog, U32 const mls, + U32 idx, const BYTE* const iLimit) +{ + U32 const* const hashTable = ms->hashTable; + U16 const* const tagTable = ms->tagTable; + U32 const hashLog = ms->rowHashLog; + U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1); + U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch); + + for (; idx < lim; ++idx) { + U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); + ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash; + } + + DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1], + ms->hashCache[2], ms->hashCache[3], ms->hashCache[4], + ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]); +} + +/* ZSTD_row_nextCachedHash(): + * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at + * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable. + */ +FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable, + U16 const* tagTable, BYTE const* base, + U32 idx, U32 const hashLog, + U32 const rowLog, U32 const mls) +{ + U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + ZSTD_row_prefetch(hashTable, tagTable, row, rowLog); + { U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK]; + cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash; + return hash; + } +} + +/* ZSTD_row_update_internal(): + * Inserts the byte at ip into the appropriate position in the hash table. + * Determines the relative row, and the position within the {16, 32} entry row to insert at. + */ +FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip, + U32 const mls, U32 const rowLog, + U32 const rowMask, U32 const useCache) +{ + U32* const hashTable = ms->hashTable; + U16* const tagTable = ms->tagTable; + U32 const hashLog = ms->rowHashLog; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + DEBUGLOG(6, "ZSTD_row_update_internal(): nextToUpdate=%u, current=%u", idx, target); + for (; idx < target; ++idx) { + U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, idx, hashLog, rowLog, mls) + : (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + U32* const row = hashTable + relRow; + BYTE* tagRow = (BYTE*)(tagTable + relRow); /* Though tagTable is laid out as a table of U16, each tag is only 1 byte. + Explicit cast allows us to get exact desired position within each row */ + U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); + + assert(hash == ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls)); + ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK; + row[pos] = idx; + } + ms->nextToUpdate = target; +} + +/* ZSTD_row_update(): + * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary + * processing. + */ +void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) { + const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5; + const U32 rowMask = (1u << rowLog) - 1; + const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */); + + DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog); + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */); +} + +/* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches + * the hash at the nth position in a row of the tagTable. + */ +FORCE_INLINE_TEMPLATE +ZSTD_VecMask ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries) { + ZSTD_VecMask matches = 0; + if (rowEntries == 16) { + ZSTD_Vec128 hashes = ZSTD_Vec128_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET); + ZSTD_Vec128 expandedTags = ZSTD_Vec128_set8(tag); + matches = ZSTD_Vec128_cmpMask8(hashes, expandedTags); + } else if (rowEntries == 32) { + ZSTD_Vec256 hashes = ZSTD_Vec256_read(tagRow + ZSTD_ROW_HASH_TAG_OFFSET); + ZSTD_Vec256 expandedTags = ZSTD_Vec256_set8(tag); + matches = ZSTD_Vec256_cmpMask8(hashes, expandedTags); + } else { + assert(0); + } + /* Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield + to match up with the actual layout of the entries within the hashTable */ + return ZSTD_VecMask_rotateRight(matches, head, rowEntries); +} + +/* The high-level approach of the SIMD row based match finder is as follows: + * - Figure out where to insert the new entry: + * - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag" + * - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines + * which row to insert into. + * - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can + * be considered as a circular buffer with a "head" index that resides in the tagTable. + * - Also insert the "tag" into the equivalent row and position in the tagTable. + * - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry. + * The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively, + * for alignment/performance reasons, leaving some bytes unused. + * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and + * generate a bitfield that we can cycle through to check the collisions in the hash table. + * - Pick the longest match. + */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_RowFindBestMatch_generic ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode, + const U32 rowLog) +{ + U32* const hashTable = ms->hashTable; + U16* const tagTable = ms->tagTable; + U32* const hashCache = ms->hashCache; + const U32 hashLog = ms->rowHashLog; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 curr = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; + const U32 rowEntries = (1U << rowLog); + const U32 rowMask = rowEntries - 1; + const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */ + U32 nbAttempts = 1U << cappedSearchLog; + size_t ml=4-1; + + /* DMS/DDS variables that may be referenced laster */ + const ZSTD_matchState_t* const dms = ms->dictMatchState; + size_t ddsIdx; + U32 ddsExtraAttempts; /* cctx hash tables are limited in searches, but allow extra searches into DDS */ + U32 dmsTag; + U32* dmsRow; + BYTE* dmsTagRow; + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; + { /* Prefetch DDS hashtable entry */ + ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG; + PREFETCH_L1(&dms->hashTable[ddsIdx]); + } + ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0; + } + + if (dictMode == ZSTD_dictMatchState) { + /* Prefetch DMS rows */ + U32* const dmsHashTable = dms->hashTable; + U16* const dmsTagTable = dms->tagTable; + U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls); + U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK; + dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow); + dmsRow = dmsHashTable + dmsRelRow; + ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog); + } + + /* Update the hashTable and tagTable up to (but not including) ip */ + ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */); + { /* Get the hash for ip, compute the appropriate row */ + U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls); + U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog; + U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK; + U32* const row = hashTable + relRow; + BYTE* tagRow = (BYTE*)(tagTable + relRow); + U32 const head = *tagRow & rowMask; + U32 matchBuffer[32 /* maximum nb entries per row */]; + size_t numMatches = 0; + size_t currMatch = 0; + ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries); + + /* Cycle through the matches and prefetch */ + for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { + U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; + U32 const matchIndex = row[matchPos]; + assert(numMatches < rowEntries); + if (matchIndex < lowLimit) + break; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + PREFETCH_L1(base + matchIndex); + } else { + PREFETCH_L1(dictBase + matchIndex); + } + matchBuffer[numMatches++] = matchIndex; + } + + /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop + in ZSTD_row_update_internal() at the next search. */ + { + U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask); + tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag; + row[pos] = ms->nextToUpdate++; + } + + /* Return the longest match */ + for (; currMatch < numMatches; ++currMatch) { + U32 const matchIndex = matchBuffer[currMatch]; + size_t currentMl=0; + assert(matchIndex < curr); + assert(matchIndex >= lowLimit); + + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* Save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + + if (dictMode == ZSTD_dedicatedDictSearch) { + ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms, + ip, iLimit, prefixStart, curr, dictLimit, ddsIdx); + } else if (dictMode == ZSTD_dictMatchState) { + /* TODO: Measure and potentially add prefetching to DMS */ + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + + { U32 const head = *dmsTagRow & rowMask; + U32 matchBuffer[32 /* maximum nb row entries */]; + size_t numMatches = 0; + size_t currMatch = 0; + ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries); + + for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) { + U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask; + U32 const matchIndex = dmsRow[matchPos]; + if (matchIndex < dmsLowestIndex) + break; + PREFETCH_L1(dmsBase + matchIndex); + matchBuffer[numMatches++] = matchIndex; + } + + /* Return the longest match */ + for (; currMatch < numMatches; ++currMatch) { + U32 const matchIndex = matchBuffer[currMatch]; + size_t currentMl=0; + assert(matchIndex >= dmsLowestIndex); + assert(matchIndex < curr); + + { const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + } + + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; + } + } + } + } + return ml; +} + +/* Inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + const ZSTD_dictMode_e dictMode, size_t* offsetPtr, const U32 rowLog) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, dictMode, rowLog); + case 5 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, dictMode, rowLog); + case 7 : + case 6 : return ZSTD_RowFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, dictMode, rowLog); + } +} + +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_selectRowLog ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5); + switch(cappedSearchLog) + { + default : + case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 4); + case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_noDict, offsetPtr, 5); + } +} + +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dictMatchState_selectRowLog( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5); + switch(cappedSearchLog) + { + default : + case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 4); + case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dictMatchState, offsetPtr, 5); + } +} + +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5); + switch(cappedSearchLog) + { + default : + case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 4); + case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_dedicatedDictSearch, offsetPtr, 5); + } +} + +FORCE_INLINE_TEMPLATE size_t ZSTD_RowFindBestMatch_extDict_selectRowLog ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + const U32 cappedSearchLog = MIN(ms->cParams.searchLog, 5); + switch(cappedSearchLog) + { + default : + case 4 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 4); + case 5 : return ZSTD_RowFindBestMatch_selectMLS(ms, ip, iLimit, ZSTD_extDict, offsetPtr, 5); + } +} + + +/* ******************************* +* Common parser - lazy strategy +*********************************/ +typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e; + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_lazy_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const searchMethod_e searchMethod, const U32 depth, + ZSTD_dictMode_e const dictMode) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8; + const BYTE* const base = ms->window.base; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; + const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + + /** + * This table is indexed first by the four ZSTD_dictMode_e values, and then + * by the two searchMethod_e values. NULLs are placed for configurations + * that should never occur (extDict modes go to the other implementation + * below and there is no DDSS for binary tree search yet). + */ + const searchMax_f searchFuncs[4][3] = { + { + ZSTD_HcFindBestMatch_selectMLS, + ZSTD_BtFindBestMatch_selectMLS, + ZSTD_RowFindBestMatch_selectRowLog + }, + { + NULL, + NULL, + NULL + }, + { + ZSTD_HcFindBestMatch_dictMatchState_selectMLS, + ZSTD_BtFindBestMatch_dictMatchState_selectMLS, + ZSTD_RowFindBestMatch_dictMatchState_selectRowLog + }, + { + ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS, + NULL, + ZSTD_RowFindBestMatch_dedicatedDictSearch_selectRowLog + } + }; + + searchMax_f const searchMax = searchFuncs[dictMode][(int)searchMethod]; + U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; + + const int isDMS = dictMode == ZSTD_dictMatchState; + const int isDDS = dictMode == ZSTD_dedicatedDictSearch; + const int isDxS = isDMS || isDDS; + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0; + const BYTE* const dictBase = isDxS ? dms->window.base : NULL; + const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL; + const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL; + const U32 dictIndexDelta = isDxS ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); + + assert(searchMax != NULL); + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod); + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; + if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; + } + if (isDxS) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, + MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), + ms->nextToUpdate, ilimit); + } + + /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + + /* check repCode */ + if (isDxS) { + const U32 repIndex = (U32)(ip - base) + 1 - offset_1; + const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + if (depth==0) goto _storeSequence; + } + } + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offsetFound = 999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (isDxS) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (isDxS) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* NOTE: + * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. + * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which + * overflows the pointer, which is undefined behavior. + */ + /* catch up */ + if (offset) { + if (dictMode == ZSTD_noDict) { + while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) + && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ + { start--; matchLength++; } + } + if (isDxS) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; + const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + } + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + if (isDxS) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex = current2 - offset_2; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase - dictIndexDelta + repIndex : + base + repIndex; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; + } + break; + } + } + + if (dictMode == ZSTD_noDict) { + while ( ((ip <= ilimit) & (offset_2>0)) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + /* store sequence */ + matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* Save reps for next block */ + rep[0] = offset_1 ? offset_1 : savedOffset; + rep[1] = offset_2 ? offset_2 : savedOffset; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); +} + + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); +} + +/* Row-based matchfinder */ +size_t ZSTD_compressBlock_lazy2_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState); +} + + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch); +} + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const searchMethod_e searchMethod, const U32 depth) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8; + const BYTE* const base = ms->window.base; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + ms->window.lowLimit; + const U32 windowLog = ms->cParams.windowLog; + const U32 rowLog = ms->cParams.searchLog < 5 ? 4 : 5; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + const searchMax_f searchFuncs[3] = { + ZSTD_HcFindBestMatch_extDict_selectMLS, + ZSTD_BtFindBestMatch_extDict_selectMLS, + ZSTD_RowFindBestMatch_extDict_selectRowLog + }; + searchMax_f searchMax = searchFuncs[(int)searchMethod]; + U32 offset_1 = rep[0], offset_2 = rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod); + + /* init */ + ip += (ip == prefixStart); + if (searchMethod == search_rowHash) { + ZSTD_row_fillHashCache(ms, base, rowLog, + MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */), + ms->nextToUpdate, ilimit); + } + + /* Match Loop */ +#if defined(__GNUC__) && defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 curr = (U32)(ip-base); + + /* check repCode */ + { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog); + const U32 repIndex = (U32)(curr+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + & (offset_1 < curr+1 - windowLow) ) /* note: we are searching at curr+1 */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t offsetFound = 999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 1 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_1 < curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 2 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repCurrent = (U32)(ip-base); + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); + const U32 repIndex = repCurrent - offset_2; + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); +} + +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); +} + +size_t ZSTD_compressBlock_greedy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict_row( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2); +} +/**** ended inlining compress/zstd_lazy.c ****/ +/**** start inlining compress/zstd_ldm.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_ldm.h ****/ + +/**** skipping file: ../common/debug.h ****/ +/**** skipping file: ../common/xxhash.h ****/ +/**** skipping file: zstd_fast.h ****/ +/**** skipping file: zstd_double_fast.h ****/ +/**** start inlining zstd_ldm_geartab.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_GEARTAB_H +#define ZSTD_LDM_GEARTAB_H + +static U64 ZSTD_ldm_gearTab[256] = { + 0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc, + 0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05, + 0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e, + 0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889, + 0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e, + 0x37b628620b628, 0x49a8d455d88caf5, 0x8556d711e6958140, + 0x4f7ae74fc605c1f, 0x829f0c3468bd3a20, 0x4ffdc885c625179e, + 0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f, + 0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391, + 0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210, + 0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be, + 0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a, + 0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b, + 0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4, + 0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb, + 0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312, + 0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01, + 0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc, + 0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967, + 0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553, + 0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f, + 0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2, + 0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d, + 0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a, + 0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74, + 0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3, + 0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1, + 0xff452823dbb010a, 0x9d42ed614f3dd267, 0x5b9313c06257c57b, + 0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568, + 0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a, + 0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1, + 0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9, + 0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463, + 0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba, + 0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9, + 0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61, + 0x24a5483879c453e3, 0x88026889192b4b9, 0x28da96671782dbec, + 0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6, + 0xbc135a0a704b70ba, 0x69cd868f7622ada, 0xbc37ba89e0b9c0ab, + 0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5, + 0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59, + 0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7, + 0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc, + 0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb, + 0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be, + 0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312, + 0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1, + 0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc, + 0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d, + 0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445, + 0x820d471e20b348e, 0x1874383cb83d46dc, 0x97edeec7a1efe11c, + 0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5, + 0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5, + 0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28, + 0xaf846af6ab7d0bf4, 0xe5af208eb666e49, 0x5e6622f73534cd6a, + 0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9, + 0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15, + 0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef, + 0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2, + 0x9f90e4c5fd508d8, 0xa34e5956fbaf3385, 0x2e2f8e151d3ef375, + 0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3, + 0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595, + 0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389, + 0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4, + 0x4228e364c5b5ed7, 0x9d7a3edf0da43911, 0x8edcfeda24686756, + 0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc, + 0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45, + 0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea, + 0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f, + 0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc, + 0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c, + 0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a, + 0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17, + 0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3, + 0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4, + 0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91, + 0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40, + 0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741, + 0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f, + 0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4, + 0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad, + 0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047, + 0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2, + 0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e, + 0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b, + 0x2b4da14f2613d8f4 +}; + +#endif /* ZSTD_LDM_GEARTAB_H */ +/**** ended inlining zstd_ldm_geartab.h ****/ + +#define LDM_BUCKET_SIZE_LOG 3 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_HASH_RLOG 7 + +typedef struct { + U64 rolling; + U64 stopMask; +} ldmRollingHashState_t; + +/** ZSTD_ldm_gear_init(): + * + * Initializes the rolling hash state such that it will honor the + * settings in params. */ +static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params) +{ + unsigned maxBitsInMask = MIN(params->minMatchLength, 64); + unsigned hashRateLog = params->hashRateLog; + + state->rolling = ~(U32)0; + + /* The choice of the splitting criterion is subject to two conditions: + * 1. it has to trigger on average every 2^(hashRateLog) bytes; + * 2. ideally, it has to depend on a window of minMatchLength bytes. + * + * In the gear hash algorithm, bit n depends on the last n bytes; + * so in order to obtain a good quality splitting criterion it is + * preferable to use bits with high weight. + * + * To match condition 1 we use a mask with hashRateLog bits set + * and, because of the previous remark, we make sure these bits + * have the highest possible weight while still respecting + * condition 2. + */ + if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) { + state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog); + } else { + /* In this degenerate case we simply honor the hash rate. */ + state->stopMask = ((U64)1 << hashRateLog) - 1; + } +} + +/** ZSTD_ldm_gear_reset() + * Feeds [data, data + minMatchLength) into the hash without registering any + * splits. This effectively resets the hash state. This is used when skipping + * over data, either at the beginning of a block, or skipping sections. + */ +static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state, + BYTE const* data, size_t minMatchLength) +{ + U64 hash = state->rolling; + size_t n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + } while (0) + while (n + 3 < minMatchLength) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < minMatchLength) { + GEAR_ITER_ONCE(); + } +#undef GEAR_ITER_ONCE +} + +/** ZSTD_ldm_gear_feed(): + * + * Registers in the splits array all the split points found in the first + * size bytes following the data pointer. This function terminates when + * either all the data has been processed or LDM_BATCH_SIZE splits are + * present in the splits array. + * + * Precondition: The splits array must not be full. + * Returns: The number of bytes processed. */ +static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state, + BYTE const* data, size_t size, + size_t* splits, unsigned* numSplits) +{ + size_t n; + U64 hash, mask; + + hash = state->rolling; + mask = state->stopMask; + n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + if (UNLIKELY((hash & mask) == 0)) { \ + splits[*numSplits] = n; \ + *numSplits += 1; \ + if (*numSplits == LDM_BATCH_SIZE) \ + goto done; \ + } \ + } while (0) + + while (n + 3 < size) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < size) { + GEAR_ITER_ONCE(); + } + +#undef GEAR_ITER_ONCE + +done: + state->rolling = hash; + return n; +} + +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams) +{ + params->windowLog = cParams->windowLog; + ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); + DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; + if (params->hashLog == 0) { + params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); + assert(params->hashLog <= ZSTD_HASHLOG_MAX); + } + if (params->hashRateLog == 0) { + params->hashRateLog = params->windowLog < params->hashLog + ? 0 + : params->windowLog - params->hashLog; + } + params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); +} + +size_t ZSTD_ldm_getTableSize(ldmParams_t params) +{ + size_t const ldmHSize = ((size_t)1) << params.hashLog; + size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); + size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); + size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) + + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); + return params.enableLdm ? totalSize : 0; +} + +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) +{ + return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; +} + +/** ZSTD_ldm_getBucket() : + * Returns a pointer to the start of the bucket associated with hash. */ +static ldmEntry_t* ZSTD_ldm_getBucket( + ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) +{ + return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); +} + +/** ZSTD_ldm_insertEntry() : + * Insert the entry with corresponding hash into the hash table */ +static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, + size_t const hash, const ldmEntry_t entry, + ldmParams_t const ldmParams) +{ + BYTE* const pOffset = ldmState->bucketOffsets + hash; + unsigned const offset = *pOffset; + + *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry; + *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1)); + +} + +/** ZSTD_ldm_countBackwardsMatch() : + * Returns the number of bytes that match backwards before pIn and pMatch. + * + * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ +static size_t ZSTD_ldm_countBackwardsMatch( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase) +{ + size_t matchLength = 0; + while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} + +/** ZSTD_ldm_countBackwardsMatch_2segments() : + * Returns the number of bytes that match backwards from pMatch, + * even with the backwards match spanning 2 different segments. + * + * On reaching `pMatchBase`, start counting from mEnd */ +static size_t ZSTD_ldm_countBackwardsMatch_2segments( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase, + const BYTE* pExtDictStart, const BYTE* pExtDictEnd) +{ + size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase); + if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) { + /* If backwards match is entirely in the extDict or prefix, immediately return */ + return matchLength; + } + DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength); + matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart); + DEBUGLOG(7, "final backwards match length = %zu", matchLength); + return matchLength; +} + +/** ZSTD_ldm_fillFastTables() : + * + * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. + * This is similar to ZSTD_loadDictionaryContent. + * + * The tables for the other strategies are filled within their + * block compressors. */ +static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, + void const* end) +{ + const BYTE* const iend = (const BYTE*)end; + + switch(ms->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + default: + assert(0); /* not possible : not a valid strategy id */ + } + + return 0; +} + +void ZSTD_ldm_fillHashTable( + ldmState_t* ldmState, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params) +{ + U32 const minMatchLength = params->minMatchLength; + U32 const hBits = params->hashLog - params->bucketSizeLog; + BYTE const* const base = ldmState->window.base; + BYTE const* const istart = ip; + ldmRollingHashState_t hashState; + size_t* const splits = ldmState->splitIndices; + unsigned numSplits; + + DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); + + ZSTD_ldm_gear_init(&hashState, params); + while (ip < iend) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + if (ip + splits[n] >= istart + minMatchLength) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = XXH64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + ldmEntry_t entry; + + entry.offset = (U32)(split - base); + entry.checksum = (U32)(xxhash >> 32); + ZSTD_ldm_insertEntry(ldmState, hash, entry, *params); + } + } + + ip += hashed; + } +} + + +/** ZSTD_ldm_limitTableUpdate() : + * + * Sets cctx->nextToUpdate to a position corresponding closer to anchor + * if it is far way + * (after a long match, only update tables a limited amount). */ +static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) +{ + U32 const curr = (U32)(anchor - ms->window.base); + if (curr > ms->nextToUpdate + 1024) { + ms->nextToUpdate = + curr - MIN(512, curr - ms->nextToUpdate - 1024); + } +} + +static size_t ZSTD_ldm_generateSequences_internal( + ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + /* LDM parameters */ + int const extDict = ZSTD_window_hasExtDict(ldmState->window); + U32 const minMatchLength = params->minMatchLength; + U32 const entsPerBucket = 1U << params->bucketSizeLog; + U32 const hBits = params->hashLog - params->bucketSizeLog; + /* Prefix and extDict parameters */ + U32 const dictLimit = ldmState->window.dictLimit; + U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; + BYTE const* const base = ldmState->window.base; + BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; + BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; + BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; + BYTE const* const lowPrefixPtr = base + dictLimit; + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + BYTE const* const ilimit = iend - HASH_READ_SIZE; + /* Input positions */ + BYTE const* anchor = istart; + BYTE const* ip = istart; + /* Rolling hash state */ + ldmRollingHashState_t hashState; + /* Arrays for staged-processing */ + size_t* const splits = ldmState->splitIndices; + ldmMatchCandidate_t* const candidates = ldmState->matchCandidates; + unsigned numSplits; + + if (srcSize < minMatchLength) + return iend - anchor; + + /* Initialize the rolling hash state with the first minMatchLength bytes */ + ZSTD_ldm_gear_init(&hashState, params); + ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength); + ip += minMatchLength; + + while (ip < ilimit) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip, + splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = XXH64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + + candidates[n].split = split; + candidates[n].hash = hash; + candidates[n].checksum = (U32)(xxhash >> 32); + candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params); + PREFETCH_L1(candidates[n].bucket); + } + + for (n = 0; n < numSplits; n++) { + size_t forwardMatchLength = 0, backwardMatchLength = 0, + bestMatchLength = 0, mLength; + U32 offset; + BYTE const* const split = candidates[n].split; + U32 const checksum = candidates[n].checksum; + U32 const hash = candidates[n].hash; + ldmEntry_t* const bucket = candidates[n].bucket; + ldmEntry_t const* cur; + ldmEntry_t const* bestEntry = NULL; + ldmEntry_t newEntry; + + newEntry.offset = (U32)(split - base); + newEntry.checksum = checksum; + + /* If a split point would generate a sequence overlapping with + * the previous one, we merely register it in the hash table and + * move on */ + if (split < anchor) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } + + for (cur = bucket; cur < bucket + entsPerBucket; cur++) { + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + if (extDict) { + BYTE const* const curMatchBase = + cur->offset < dictLimit ? dictBase : base; + BYTE const* const pMatch = curMatchBase + cur->offset; + BYTE const* const matchEnd = + cur->offset < dictLimit ? dictEnd : iend; + BYTE const* const lowMatchPtr = + cur->offset < dictLimit ? dictStart : lowPrefixPtr; + curForwardMatchLength = + ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments( + split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd); + } else { /* !extDict */ + BYTE const* const pMatch = base + cur->offset; + curForwardMatchLength = ZSTD_count(split, pMatch, iend); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr); + } + curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; + + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + + /* No match found -- insert an entry into the hash table + * and process the next candidate match */ + if (bestEntry == NULL) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } + + /* Match found */ + offset = (U32)(split - base) - bestEntry->offset; + mLength = forwardMatchLength + backwardMatchLength; + { + rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; + + /* Out of sequence storage */ + if (rawSeqStore->size == rawSeqStore->capacity) + return ERROR(dstSize_tooSmall); + seq->litLength = (U32)(split - backwardMatchLength - anchor); + seq->matchLength = (U32)mLength; + seq->offset = offset; + rawSeqStore->size++; + } + + /* Insert the current entry into the hash table --- it must be + * done after the previous block to avoid clobbering bestEntry */ + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + + anchor = split + forwardMatchLength; + + /* If we find a match that ends after the data that we've hashed + * then we have a repeating, overlapping, pattern. E.g. all zeros. + * If one repetition of the pattern matches our `stopMask` then all + * repetitions will. We don't need to insert them all into out table, + * only the first one. So skip over overlapping matches. + * This is a major speed boost (20x) for compressing a single byte + * repeated, when that byte ends up in the table. + */ + if (anchor > ip + hashed) { + ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength); + /* Continue the outter loop at anchor (ip + hashed == anchor). */ + ip = anchor - hashed; + break; + } + } + + ip += hashed; + } + + return iend - anchor; +} + +/*! ZSTD_ldm_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, + U32 const reducerValue) +{ + U32 u; + for (u = 0; u < size; u++) { + if (table[u].offset < reducerValue) table[u].offset = 0; + else table[u].offset -= reducerValue; + } +} + +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldmState, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + U32 const maxDist = 1U << params->windowLog; + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + size_t const kMaxChunkSize = 1 << 20; + size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); + size_t chunk; + size_t leftoverSize = 0; + + assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); + /* Check that ZSTD_window_update() has been called for this chunk prior + * to passing it to this function. + */ + assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); + /* The input could be very large (in zstdmt), so it must be broken up into + * chunks to enforce the maximum distance and handle overflow correction. + */ + assert(sequences->pos <= sequences->size); + assert(sequences->size <= sequences->capacity); + for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { + BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; + size_t const remaining = (size_t)(iend - chunkStart); + BYTE const *const chunkEnd = + (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; + size_t const chunkSize = chunkEnd - chunkStart; + size_t newLeftoverSize; + size_t const prevSize = sequences->size; + + assert(chunkStart < iend); + /* 1. Perform overflow correction if necessary. */ + if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) { + U32 const ldmHSize = 1U << params->hashLog; + U32 const correction = ZSTD_window_correctOverflow( + &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); + ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + /* invalidate dictionaries on overflow correction */ + ldmState->loadedDictEnd = 0; + } + /* 2. We enforce the maximum offset allowed. + * + * kMaxChunkSize should be small enough that we don't lose too much of + * the window through early invalidation. + * TODO: * Test the chunk size. + * * Try invalidation after the sequence generation and test the + * the offset against maxDist directly. + * + * NOTE: Because of dictionaries + sequence splitting we MUST make sure + * that any offset used is valid at the END of the sequence, since it may + * be split into two sequences. This condition holds when using + * ZSTD_window_enforceMaxDist(), but if we move to checking offsets + * against maxDist directly, we'll have to carefully handle that case. + */ + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); + /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ + newLeftoverSize = ZSTD_ldm_generateSequences_internal( + ldmState, sequences, params, chunkStart, chunkSize); + if (ZSTD_isError(newLeftoverSize)) + return newLeftoverSize; + /* 4. We add the leftover literals from previous iterations to the first + * newly generated sequence, or add the `newLeftoverSize` if none are + * generated. + */ + /* Prepend the leftover literals from the last call */ + if (prevSize < sequences->size) { + sequences->seq[prevSize].litLength += (U32)leftoverSize; + leftoverSize = newLeftoverSize; + } else { + assert(newLeftoverSize == chunkSize); + leftoverSize += chunkSize; + } + } + return 0; +} + +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { + while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { + rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; + if (srcSize <= seq->litLength) { + /* Skip past srcSize literals */ + seq->litLength -= (U32)srcSize; + return; + } + srcSize -= seq->litLength; + seq->litLength = 0; + if (srcSize < seq->matchLength) { + /* Skip past the first srcSize of the match */ + seq->matchLength -= (U32)srcSize; + if (seq->matchLength < minMatch) { + /* The match is too short, omit it */ + if (rawSeqStore->pos + 1 < rawSeqStore->size) { + seq[1].litLength += seq[0].matchLength; + } + rawSeqStore->pos++; + } + return; + } + srcSize -= seq->matchLength; + seq->matchLength = 0; + rawSeqStore->pos++; + } +} + +/** + * If the sequence length is longer than remaining then the sequence is split + * between this block and the next. + * + * Returns the current sequence to handle, or if the rest of the block should + * be literals, it returns a sequence with offset == 0. + */ +static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, + U32 const remaining, U32 const minMatch) +{ + rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; + assert(sequence.offset > 0); + /* Likely: No partial sequence */ + if (remaining >= sequence.litLength + sequence.matchLength) { + rawSeqStore->pos++; + return sequence; + } + /* Cut the sequence short (offset == 0 ==> rest is literals). */ + if (remaining <= sequence.litLength) { + sequence.offset = 0; + } else if (remaining < sequence.litLength + sequence.matchLength) { + sequence.matchLength = remaining - sequence.litLength; + if (sequence.matchLength < minMatch) { + sequence.offset = 0; + } + } + /* Skip past `remaining` bytes for the future sequences. */ + ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); + return sequence; +} + +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + ZSTD_useRowMatchFinderMode_e useRowMatchFinder, + void const* src, size_t srcSize) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + unsigned const minMatch = cParams->minMatch; + ZSTD_blockCompressor const blockCompressor = + ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms)); + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + /* Input positions */ + BYTE const* ip = istart; + + DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); + /* If using opt parser, use LDMs only as candidates rather than always accepting them */ + if (cParams->strategy >= ZSTD_btopt) { + size_t lastLLSize; + ms->ldmSeqStore = rawSeqStore; + lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize); + ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize); + return lastLLSize; + } + + assert(rawSeqStore->pos <= rawSeqStore->size); + assert(rawSeqStore->size <= rawSeqStore->capacity); + /* Loop through each sequence and apply the block compressor to the literals */ + while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { + /* maybeSplitSequence updates rawSeqStore->pos */ + rawSeq const sequence = maybeSplitSequence(rawSeqStore, + (U32)(iend - ip), minMatch); + int i; + /* End signal */ + if (sequence.offset == 0) + break; + + assert(ip + sequence.litLength + sequence.matchLength <= iend); + + /* Fill tables for block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Run the block compressor */ + DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); + { + size_t const newLitLength = + blockCompressor(ms, seqStore, rep, ip, sequence.litLength); + ip += sequence.litLength; + /* Update the repcodes */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + rep[i] = rep[i-1]; + rep[0] = sequence.offset; + /* Store the sequence */ + ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, + sequence.offset + ZSTD_REP_MOVE, + sequence.matchLength - MINMATCH); + ip += sequence.matchLength; + } + } + /* Fill the tables for the block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Compress the last literals */ + return blockCompressor(ms, seqStore, rep, ip, iend - ip); +} +/**** ended inlining compress/zstd_ldm.c ****/ +/**** start inlining compress/zstd_opt.c ****/ +/* + * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: hist.h ****/ +/**** skipping file: zstd_opt.h ****/ + + +#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ +#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ +#define ZSTD_MAX_PRICE (1<<30) + +#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ + + +/*-************************************* +* Price functions for optimal parser +***************************************/ + +#if 0 /* approximation at bit level */ +# define BITCOST_ACCURACY 0 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) +#elif 0 /* fractional bit accuracy */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) +#else /* opt==approx, ultra==accurate */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) +#endif + +MEM_STATIC U32 ZSTD_bitWeight(U32 stat) +{ + return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); +} + +MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) +{ + U32 const stat = rawStat + 1; + U32 const hb = ZSTD_highbit32(stat); + U32 const BWeight = hb * BITCOST_MULTIPLIER; + U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + BITCOST_ACCURACY < 31); + return weight; +} + +#if (DEBUGLEVEL>=2) +/* debugging function, + * @return price in bytes as fractional value + * for debug messages only */ +MEM_STATIC double ZSTD_fCost(U32 price) +{ + return (double)price / (BITCOST_MULTIPLIER*8); +} +#endif + +static int ZSTD_compressedLiterals(optState_t const* const optPtr) +{ + return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; +} + +static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) +{ + if (ZSTD_compressedLiterals(optPtr)) + optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); + optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); + optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); + optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); +} + + +/* ZSTD_downscaleStat() : + * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) + * return the resulting sum of elements */ +static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) +{ + U32 s, sum=0; + DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); + assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); + for (s=0; s> (ZSTD_FREQ_DIV+malus)); + sum += table[s]; + } + return sum; +} + +/* ZSTD_rescaleFreqs() : + * if first block (detected by optPtr->litLengthSum == 0) : init statistics + * take hints from dictionary if there is one + * or init from zero, using src for literals stats, or flat 1 for match symbols + * otherwise downscale existing stats, to be used as seed for next block. + */ +static void +ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize, + int const optLevel) +{ + int const compressedLiterals = ZSTD_compressedLiterals(optPtr); + DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); + optPtr->priceType = zop_dynamic; + + if (optPtr->litLengthSum == 0) { /* first block : init */ + if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ + DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); + optPtr->priceType = zop_predef; + } + + assert(optPtr->symbolCosts != NULL); + if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { + /* huffman table presumed generated by dictionary */ + optPtr->priceType = zop_dynamic; + + if (compressedLiterals) { + unsigned lit; + assert(optPtr->litFreq != NULL); + optPtr->litSum = 0; + for (lit=0; lit<=MaxLit; lit++) { + U32 const scaleLog = 11; /* scale to 2K */ + U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); + assert(bitCost <= scaleLog); + optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litSum += optPtr->litFreq[lit]; + } } + + { unsigned ll; + FSE_CState_t llstate; + FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); + optPtr->litLengthSum = 0; + for (ll=0; ll<=MaxLL; ll++) { + U32 const scaleLog = 10; /* scale to 1K */ + U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); + assert(bitCost < scaleLog); + optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litLengthSum += optPtr->litLengthFreq[ll]; + } } + + { unsigned ml; + FSE_CState_t mlstate; + FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); + optPtr->matchLengthSum = 0; + for (ml=0; ml<=MaxML; ml++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); + assert(bitCost < scaleLog); + optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; + } } + + { unsigned of; + FSE_CState_t ofstate; + FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); + optPtr->offCodeSum = 0; + for (of=0; of<=MaxOff; of++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); + assert(bitCost < scaleLog); + optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->offCodeSum += optPtr->offCodeFreq[of]; + } } + + } else { /* not a dictionary */ + + assert(optPtr->litFreq != NULL); + if (compressedLiterals) { + unsigned lit = MaxLit; + HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + } + + { unsigned ll; + for (ll=0; ll<=MaxLL; ll++) + optPtr->litLengthFreq[ll] = 1; + } + optPtr->litLengthSum = MaxLL+1; + + { unsigned ml; + for (ml=0; ml<=MaxML; ml++) + optPtr->matchLengthFreq[ml] = 1; + } + optPtr->matchLengthSum = MaxML+1; + + { unsigned of; + for (of=0; of<=MaxOff; of++) + optPtr->offCodeFreq[of] = 1; + } + optPtr->offCodeSum = MaxOff+1; + + } + + } else { /* new block : re-use previous statistics, scaled down */ + + if (compressedLiterals) + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); + } + + ZSTD_setBasePrices(optPtr, optLevel); +} + +/* ZSTD_rawLiteralsCost() : + * price of literals (only) in specified segment (which length can be 0). + * does not include price of literalLength symbol */ +static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr, + int optLevel) +{ + if (litLength == 0) return 0; + + if (!ZSTD_compressedLiterals(optPtr)) + return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ + + if (optPtr->priceType == zop_predef) + return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ + + /* dynamic statistics */ + { U32 price = litLength * optPtr->litSumBasePrice; + U32 u; + for (u=0; u < litLength; u++) { + assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ + price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); + } + return price; + } +} + +/* ZSTD_litLengthPrice() : + * cost of literalLength symbol */ +static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) +{ + if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); + + /* dynamic statistics */ + { U32 const llCode = ZSTD_LLcode(litLength); + return (LL_bits[llCode] * BITCOST_MULTIPLIER) + + optPtr->litLengthSumBasePrice + - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); + } +} + +/* ZSTD_getMatchPrice() : + * Provides the cost of the match part (offset + matchLength) of a sequence + * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. + * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ +FORCE_INLINE_TEMPLATE U32 +ZSTD_getMatchPrice(U32 const offset, + U32 const matchLength, + const optState_t* const optPtr, + int const optLevel) +{ + U32 price; + U32 const offCode = ZSTD_highbit32(offset+1); + U32 const mlBase = matchLength - MINMATCH; + assert(matchLength >= MINMATCH); + + if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ + return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); + + /* dynamic statistics */ + price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); + if ((optLevel<2) /*static*/ && offCode >= 20) + price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ + + /* match Length */ + { U32 const mlCode = ZSTD_MLcode(mlBase); + price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); + } + + price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ + + DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); + return price; +} + +/* ZSTD_updateStats() : + * assumption : literals + litLengtn <= iend */ +static void ZSTD_updateStats(optState_t* const optPtr, + U32 litLength, const BYTE* literals, + U32 offsetCode, U32 matchLength) +{ + /* literals */ + if (ZSTD_compressedLiterals(optPtr)) { + U32 u; + for (u=0; u < litLength; u++) + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; + } + + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + optPtr->litLengthFreq[llCode]++; + optPtr->litLengthSum++; + } + + /* match offset code (0-2=>repCode; 3+=>offset+2) */ + { U32 const offCode = ZSTD_highbit32(offsetCode+1); + assert(offCode <= MaxOff); + optPtr->offCodeFreq[offCode]++; + optPtr->offCodeSum++; + } + + /* match Length */ + { U32 const mlBase = matchLength - MINMATCH; + U32 const mlCode = ZSTD_MLcode(mlBase); + optPtr->matchLengthFreq[mlCode]++; + optPtr->matchLengthSum++; + } +} + + +/* ZSTD_readMINMATCH() : + * function safe only for comparisons + * assumption : memPtr must be at least 4 bytes before end of buffer */ +MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip) +{ + U32* const hashTable3 = ms->hashTable3; + U32 const hashLog3 = ms->hashLog3; + const BYTE* const base = ms->window.base; + U32 idx = *nextToUpdate3; + U32 const target = (U32)(ip - base); + size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); + assert(hashLog3 > 0); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + *nextToUpdate3 = target; + return hashTable3[hash3]; +} + + +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. + * ip : assumed <= iend-8 . + * @return : nb of positions added */ +static U32 ZSTD_insertBt1( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + U32 const mls, const int extDict) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + const U32 curr = (U32)(ip-base); + const U32 btLow = btMask >= curr ? 0 : curr - btMask; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = ms->window.lowLimit; + U32 matchEndIdx = curr+8+1; + size_t bestLength = 8; + U32 nbCompares = 1U << cParams->searchLog; +#ifdef ZSTD_C_PREDICT + U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ + + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr); + + assert(ip <= iend-8); /* required for h calculation */ + hashTable[h] = curr; /* Update Hash Table */ + + assert(windowLow > 0); + while (nbCompares-- && (matchIndex >= windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < curr); + +#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + + if (!extDict || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + { U32 positions = 0; + if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + assert(matchEndIdx > curr + 8); + return MAX(positions, matchEndIdx - (curr + 8)); + } +} + +FORCE_INLINE_TEMPLATE +void ZSTD_updateTree_internal( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", + idx, target, dictMode); + + while(idx < target) { + U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + assert(idx < (U32)(idx + forward)); + idx += forward; + } + assert((size_t)(ip - base) <= (size_t)(U32)(-1)); + assert((size_t)(iend - base) <= (size_t)(U32)(-1)); + ms->nextToUpdate = target; +} + +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { + ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); +} + +FORCE_INLINE_TEMPLATE +U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ + const U32 lengthToBeat, + U32 const mls /* template */) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + const BYTE* const base = ms->window.base; + U32 const curr = (U32)(ip-base); + U32 const hashLog = cParams->hashLog; + U32 const minMatch = (mls==3) ? 3 : 4; + U32* const hashTable = ms->hashTable; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = ms->window.dictBase; + U32 const dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + U32 const matchLow = windowLow ? windowLow : 1; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */ + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + U32 nbCompares = 1U << cParams->searchLog; + + const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; + const ZSTD_compressionParameters* const dmsCParams = + dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; + const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; + const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; + U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; + U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; + U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; + U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; + U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; + U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; + U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; + + size_t bestLength = lengthToBeat-1; + DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr); + + /* check repCode */ + assert(ll0 <= 1); /* necessarily 1 or 0 */ + { U32 const lastR = ZSTD_REP_NUM + ll0; + U32 repCode; + for (repCode = ll0; repCode < lastR; repCode++) { + U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + U32 const repIndex = curr - repOffset; + U32 repLen = 0; + assert(curr >= dictLimit); + if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */ + /* We must validate the repcode offset because when we're using a dictionary the + * valid offset range shrinks when the dictionary goes out of bounds. + */ + if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { + repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; + } + } else { /* repIndex < dictLimit || repIndex >= curr */ + const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? + dmsBase + repIndex - dmsIndexDelta : + dictBase + repIndex; + assert(curr >= windowLow); + if ( dictMode == ZSTD_extDict + && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */ + & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; + } + if (dictMode == ZSTD_dictMatchState + && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */ + & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; + } } + /* save longer solution */ + if (repLen > bestLength) { + DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", + repCode, ll0, repOffset, repLen); + bestLength = repLen; + matches[mnum].off = repCode - ll0; + matches[mnum].len = (U32)repLen; + mnum++; + if ( (repLen > sufficient_len) + | (ip+repLen == iLimit) ) { /* best possible */ + return mnum; + } } } } + + /* HC3 match finder */ + if ((mls == 3) /*static*/ && (bestLength < mls)) { + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); + if ((matchIndex3 >= matchLow) + & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { + size_t mlen; + if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { + const BYTE* const match = base + matchIndex3; + mlen = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex3; + mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); + } + + /* save best solution */ + if (mlen >= mls /* == 3 > bestLength */) { + DEBUGLOG(8, "found small match with hlog3, of length %u", + (U32)mlen); + bestLength = mlen; + assert(curr > matchIndex3); + assert(mnum==0); /* no prior solution */ + matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE; + matches[0].len = (U32)mlen; + mnum = 1; + if ( (mlen > sufficient_len) | + (ip+mlen == iLimit) ) { /* best possible length */ + ms->nextToUpdate = curr+1; /* skip insertion */ + return 1; + } } } + /* no dictMatchState lookup: dicts don't have a populated HC3 table */ + } + + hashTable[h] = curr; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex >= matchLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + const BYTE* match; + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(curr > matchIndex); + + if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ + match = base + matchIndex; + if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); + } else { + match = dictBase + matchIndex; + assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* prepare for match[matchLength] read */ + } + + if (matchLength > bestLength) { + DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", + (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + assert(matchEndIdx > matchIndex); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ + break; /* drop, to preserve bt consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ + } else { + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + if (dictMode == ZSTD_dictMatchState && nbCompares) { + size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); + U32 dictMatchIndex = dms->hashTable[dmsH]; + const U32* const dmsBt = dms->chainTable; + commonLengthSmaller = commonLengthLarger = 0; + while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) { + const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dmsBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); + if (dictMatchIndex+matchLength >= dmsHighLimit) + match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + matchIndex = dictMatchIndex + dmsIndexDelta; + DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", + (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ + if (match[matchLength] < ip[matchLength]) { + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + } + + assert(matchEndIdx > curr+8); + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + return mnum; +} + + +FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( + ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const matchLengthSearch = cParams->minMatch; + DEBUGLOG(8, "ZSTD_BtGetAllMatches"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); + switch(matchLengthSearch) + { + case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); + default : + case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); + case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); + case 7 : + case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); + } +} + +/************************* +* LDM helper functions * +*************************/ + +/* Struct containing info needed to make decision about ldm inclusion */ +typedef struct { + rawSeqStore_t seqStore; /* External match candidates store for this block */ + U32 startPosInBlock; /* Start position of the current match candidate */ + U32 endPosInBlock; /* End position of the current match candidate */ + U32 offset; /* Offset of the match candidate */ +} ZSTD_optLdm_t; + +/* ZSTD_optLdm_skipRawSeqStoreBytes(): + * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'. + */ +static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + +/* ZSTD_opt_getNextMatchAndUpdateSeqStore(): + * Calculates the beginning and end of the next match in the current block. + * Updates 'pos' and 'posInSequence' of the ldmSeqStore. + */ +static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, + U32 blockBytesRemaining) { + rawSeq currSeq; + U32 currBlockEndPos; + U32 literalsBytesRemaining; + U32 matchBytesRemaining; + + /* Setting match end position to MAX to ensure we never use an LDM during this block */ + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + return; + } + /* Calculate appropriate bytes left in matchLength and litLength after adjusting + based on ldmSeqStore->posInSequence */ + currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos]; + assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength); + currBlockEndPos = currPosInBlock + blockBytesRemaining; + literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ? + currSeq.litLength - (U32)optLdm->seqStore.posInSequence : + 0; + matchBytesRemaining = (literalsBytesRemaining == 0) ? + currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) : + currSeq.matchLength; + + /* If there are more literal bytes than bytes remaining in block, no ldm is possible */ + if (literalsBytesRemaining >= blockBytesRemaining) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining); + return; + } + + /* Matches may be < MINMATCH by this process. In that case, we will reject them + when we are deciding whether or not to add the ldm */ + optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining; + optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining; + optLdm->offset = currSeq.offset; + + if (optLdm->endPosInBlock > currBlockEndPos) { + /* Match ends after the block ends, we can't use the whole match */ + optLdm->endPosInBlock = currBlockEndPos; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock); + } else { + /* Consume nb of bytes equal to size of sequence left */ + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining); + } +} + +/* ZSTD_optLdm_maybeAddMatch(): + * Adds a match if it's long enough, based on it's 'matchStartPosInBlock' + * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches' + */ +static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, + ZSTD_optLdm_t* optLdm, U32 currPosInBlock) { + U32 posDiff = currPosInBlock - optLdm->startPosInBlock; + /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ + U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; + U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE; + + /* Ensure that current block position is not outside of the match */ + if (currPosInBlock < optLdm->startPosInBlock + || currPosInBlock >= optLdm->endPosInBlock + || candidateMatchLength < MINMATCH) { + return; + } + + if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { + DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", + candidateOffCode, candidateMatchLength, currPosInBlock); + matches[*nbMatches].len = candidateMatchLength; + matches[*nbMatches].off = candidateOffCode; + (*nbMatches)++; + } +} + +/* ZSTD_optLdm_processMatchCandidate(): + * Wrapper function to update ldm seq store and call ldm functions as necessary. + */ +static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches, + U32 currPosInBlock, U32 remainingBytes) { + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + return; + } + + if (currPosInBlock >= optLdm->endPosInBlock) { + if (currPosInBlock > optLdm->endPosInBlock) { + /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily + * at the end of a match from the ldm seq store, and will often be some bytes + * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots" + */ + U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot); + } + ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); + } + ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); +} + +/*-******************************* +* Optimal parser +*********************************/ + + +static U32 ZSTD_totalLen(ZSTD_optimal_t sol) +{ + return sol.litlen + sol.mlen; +} + +#if 0 /* debug */ + +static void +listStats(const U32* table, int lastEltID) +{ + int const nbElts = lastEltID + 1; + int enb; + for (enb=0; enb < nbElts; enb++) { + (void)table; + /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ + RAWLOG(2, "%4i,", table[enb]); + } + RAWLOG(2, " \n"); +} + +#endif + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const int optLevel, + const ZSTD_dictMode_e dictMode) +{ + optState_t* const optStatePtr = &ms->opt; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; + U32 nextToUpdate3 = ms->nextToUpdate; + + ZSTD_optimal_t* const opt = optStatePtr->priceTable; + ZSTD_match_t* const matches = optStatePtr->matchTable; + ZSTD_optimal_t lastSequence; + ZSTD_optLdm_t optLdm; + + optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; + optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; + ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", + (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); + assert(optLevel <= 2); + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); + ip += (ip==prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, last_pos = 0; + + /* find first match */ + { U32 const litlen = (U32)(ip - anchor); + U32 const ll0 = !litlen; + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(ip-istart), (U32)(iend - ip)); + if (!nbMatches) { ip++; continue; } + + /* initialize opt[0] */ + { U32 i ; for (i=0; i immediate encoding */ + { U32 const maxML = matches[nbMatches-1].len; + U32 const maxOffset = matches[nbMatches-1].off; + DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", + nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); + + if (maxML > sufficient_len) { + lastSequence.litlen = litlen; + lastSequence.mlen = maxML; + lastSequence.off = maxOffset; + DEBUGLOG(6, "large match (%u>%u), immediate encoding", + maxML, sufficient_len); + cur = 0; + last_pos = ZSTD_totalLen(lastSequence); + goto _shortestPath; + } } + + /* set prices for first matches starting position == 0 */ + { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 pos; + U32 matchNb; + for (pos = 1; pos < minMatch; pos++) { + opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ + } + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const end = matches[matchNb].len; + for ( ; pos <= end ; pos++ ) { + U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + U32 const sequencePrice = literalsPrice + matchPrice; + DEBUGLOG(7, "rPos:%u => set initial price : %.2f", + pos, ZSTD_fCost(sequencePrice)); + opt[pos].mlen = pos; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = sequencePrice; + } } + last_pos = pos-1; + } + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + const BYTE* const inr = ip + cur; + assert(cur < ZSTD_OPT_NUM); + DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) + + /* Fix current position with one literal if cheaper */ + { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; + int const price = opt[cur-1].price + + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) + + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) + - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); + assert(price < 1000000000); /* overflow check */ + if (price <= opt[cur].price) { + DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, + opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); + opt[cur].mlen = 0; + opt[cur].off = 0; + opt[cur].litlen = litlen; + opt[cur].price = price; + } else { + DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), + opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); + } + } + + /* Set the repcodes of the current position. We must do it here + * because we rely on the repcodes of the 2nd to last sequence being + * correct to set the next chunks repcodes during the backward + * traversal. + */ + ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); + assert(cur >= opt[cur].mlen); + if (opt[cur].mlen != 0) { + U32 const prev = cur - opt[cur].mlen; + repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); + ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); + } else { + ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); + } + + /* last match must start at a minimum distance of 8 from oend */ + if (inr > ilimit) continue; + + if (cur == last_pos) break; + + if ( (optLevel==0) /*static_test*/ + && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { + DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); + continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ + } + + { U32 const ll0 = (opt[cur].mlen != 0); + U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; + U32 const previousPrice = opt[cur].price; + U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); + U32 matchNb; + + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(inr-istart), (U32)(iend-inr)); + + if (!nbMatches) { + DEBUGLOG(7, "rPos:%u : no match found", cur); + continue; + } + + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", + inr-istart, cur, nbMatches, maxML); + + if ( (maxML > sufficient_len) + || (cur + maxML >= ZSTD_OPT_NUM) ) { + lastSequence.mlen = maxML; + lastSequence.off = matches[nbMatches-1].off; + lastSequence.litlen = litlen; + cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ + last_pos = cur + ZSTD_totalLen(lastSequence); + if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ + goto _shortestPath; + } } + + /* set prices using matches found at position == cur */ + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const lastML = matches[matchNb].len; + U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; + U32 mlen; + + DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", + matchNb, matches[matchNb].off, lastML, litlen); + + for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ + U32 const pos = cur + mlen; + int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + + if ((pos > last_pos) || (price < opt[pos].price)) { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ + opt[pos].mlen = mlen; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = price; + } else { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ + } + } } } + } /* for (cur = 1; cur <= last_pos; cur++) */ + + lastSequence = opt[last_pos]; + cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ + assert(cur < ZSTD_OPT_NUM); /* control overflow*/ + +_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ + assert(opt[0].mlen == 0); + + /* Set the next chunk's repcodes based on the repcodes of the beginning + * of the last match, and the last sequence. This avoids us having to + * update them while traversing the sequences. + */ + if (lastSequence.mlen != 0) { + repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); + ZSTD_memcpy(rep, &reps, sizeof(reps)); + } else { + ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); + } + + { U32 const storeEnd = cur + 1; + U32 storeStart = storeEnd; + U32 seqPos = cur; + + DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", + last_pos, cur); (void)last_pos; + assert(storeEnd < ZSTD_OPT_NUM); + DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); + opt[storeEnd] = lastSequence; + while (seqPos > 0) { + U32 const backDist = ZSTD_totalLen(opt[seqPos]); + storeStart--; + DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); + opt[storeStart] = opt[seqPos]; + seqPos = (seqPos > backDist) ? seqPos - backDist : 0; + } + + /* save sequences */ + DEBUGLOG(6, "sending selected sequences into seqStore") + { U32 storePos; + for (storePos=storeStart; storePos <= storeEnd; storePos++) { + U32 const llen = opt[storePos].litlen; + U32 const mlen = opt[storePos].mlen; + U32 const offCode = opt[storePos].off; + U32 const advance = llen + mlen; + DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", + anchor - istart, (unsigned)llen, (unsigned)mlen); + + if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ + assert(storePos == storeEnd); /* must be last sequence */ + ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ + continue; /* will finish */ + } + + assert(anchor + llen <= iend); + ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); + ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); + anchor += advance; + ip = anchor; + } } + ZSTD_setBasePrices(optStatePtr, optLevel); + } + } /* while (ip < ilimit) */ + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btopt"); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); +} + + +/* used in 2-pass strategy */ +static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) +{ + U32 s, sum=0; + assert(ZSTD_FREQ_DIV+bonus >= 0); + for (s=0; slitSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); + optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); +} + +/* ZSTD_initStats_ultra(): + * make a first compression pass, just to seed stats with more accurate starting values. + * only works on first block, with no dictionary and no ldm. + * this function cannot error, hence its contract must be respected. + */ +static void +ZSTD_initStats_ultra(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ + ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); + + DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + assert(ms->opt.litLengthSum == 0); /* first block */ + assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ + assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ + assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ + + ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + + /* invalidate first scan from history */ + ZSTD_resetSeqStore(seqStore); + ms->window.base -= srcSize; + ms->window.dictLimit += (U32)srcSize; + ms->window.lowLimit = ms->window.dictLimit; + ms->nextToUpdate = ms->window.dictLimit; + + /* re-inforce weight of collected statistics */ + ZSTD_upscaleStats(&ms->opt); +} + +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 const curr = (U32)((const BYTE*)src - ms->window.base); + DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); + + /* 2-pass strategy: + * this strategy makes a first pass over first block to collect statistics + * and seed next round's statistics with it. + * After 1st pass, function forgets everything, and starts a new block. + * Consequently, this can only work if no data has been previously loaded in tables, + * aka, no dictionary, no prefix, no ldm preprocessing. + * The compression ratio gain is generally small (~0.5% on first block), + * the cost is 2x cpu time on first block. */ + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + if ( (ms->opt.litLengthSum==0) /* first block */ + && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ + && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ + && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ + && (srcSize > ZSTD_PREDEF_THRESHOLD) + ) { + ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); + } + + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict); +} + +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); +} + +/* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ +/**** ended inlining compress/zstd_opt.c ****/ +#ifdef ZSTD_MULTITHREAD +/**** start inlining compress/zstdmt_compress.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* ====== Compiler specifics ====== */ +#if defined(_MSC_VER) +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +#endif + + +/* ====== Constants ====== */ +#define ZSTDMT_OVERLAPLOG_DEFAULT 0 + + +/* ====== Dependencies ====== */ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/pool.h ****/ +/**** skipping file: ../common/threading.h ****/ +/**** skipping file: zstd_compress_internal.h ****/ +/**** skipping file: zstd_ldm.h ****/ +/**** skipping file: zstdmt_compress.h ****/ + +/* Guards code to support resizing the SeqPool. + * We will want to resize the SeqPool to save memory in the future. + * Until then, comment the code out since it is unused. + */ +#define ZSTD_RESIZE_SEQPOOL 0 + +/* ====== Debug ====== */ +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) \ + && !defined(_MSC_VER) \ + && !defined(__MINGW32__) + +# include +# include +# include + +# define DEBUG_PRINTHEX(l,p,n) { \ + unsigned debug_u; \ + for (debug_u=0; debug_u<(n); debug_u++) \ + RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \ + RAWLOG(l, " \n"); \ +} + +static unsigned long long GetCurrentClockTimeMicroseconds(void) +{ + static clock_t _ticksPerSecond = 0; + if (_ticksPerSecond <= 0) _ticksPerSecond = sysconf(_SC_CLK_TCK); + + { struct tms junk; clock_t newTicks = (clock_t) times(&junk); + return ((((unsigned long long)newTicks)*(1000000))/_ticksPerSecond); +} } + +#define MUTEX_WAIT_TIME_DLEVEL 6 +#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \ + if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \ + unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \ + ZSTD_pthread_mutex_lock(mutex); \ + { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \ + unsigned long long const elapsedTime = (afterTime-beforeTime); \ + if (elapsedTime > 1000) { /* or whatever threshold you like; I'm using 1 millisecond here */ \ + DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \ + elapsedTime, #mutex); \ + } } \ + } else { \ + ZSTD_pthread_mutex_lock(mutex); \ + } \ +} + +#else + +# define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m) +# define DEBUG_PRINTHEX(l,p,n) {} + +#endif + + +/* ===== Buffer Pool ===== */ +/* a single Buffer Pool can be invoked from multiple threads in parallel */ + +typedef struct buffer_s { + void* start; + size_t capacity; +} buffer_t; + +static const buffer_t g_nullBuffer = { NULL, 0 }; + +typedef struct ZSTDMT_bufferPool_s { + ZSTD_pthread_mutex_t poolMutex; + size_t bufferSize; + unsigned totalBuffers; + unsigned nbBuffers; + ZSTD_customMem cMem; + buffer_t bTable[1]; /* variable size */ +} ZSTDMT_bufferPool; + +static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned nbWorkers, ZSTD_customMem cMem) +{ + unsigned const maxNbBuffers = 2*nbWorkers + 3; + ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc( + sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem); + if (bufPool==NULL) return NULL; + if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) { + ZSTD_customFree(bufPool, cMem); + return NULL; + } + bufPool->bufferSize = 64 KB; + bufPool->totalBuffers = maxNbBuffers; + bufPool->nbBuffers = 0; + bufPool->cMem = cMem; + return bufPool; +} + +static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool) +{ + unsigned u; + DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool); + if (!bufPool) return; /* compatibility with free on NULL */ + for (u=0; utotalBuffers; u++) { + DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start); + ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem); + } + ZSTD_pthread_mutex_destroy(&bufPool->poolMutex); + ZSTD_customFree(bufPool, bufPool->cMem); +} + +/* only works at initialization, not during compression */ +static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool) +{ + size_t const poolSize = sizeof(*bufPool) + + (bufPool->totalBuffers - 1) * sizeof(buffer_t); + unsigned u; + size_t totalBufferSize = 0; + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + for (u=0; utotalBuffers; u++) + totalBufferSize += bufPool->bTable[u].capacity; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + + return poolSize + totalBufferSize; +} + +/* ZSTDMT_setBufferSize() : + * all future buffers provided by this buffer pool will have _at least_ this size + * note : it's better for all buffers to have same size, + * as they become freely interchangeable, reducing malloc/free usages and memory fragmentation */ +static void ZSTDMT_setBufferSize(ZSTDMT_bufferPool* const bufPool, size_t const bSize) +{ + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + DEBUGLOG(4, "ZSTDMT_setBufferSize: bSize = %u", (U32)bSize); + bufPool->bufferSize = bSize; + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); +} + + +static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers) +{ + unsigned const maxNbBuffers = 2*nbWorkers + 3; + if (srcBufPool==NULL) return NULL; + if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */ + return srcBufPool; + /* need a larger buffer pool */ + { ZSTD_customMem const cMem = srcBufPool->cMem; + size_t const bSize = srcBufPool->bufferSize; /* forward parameters */ + ZSTDMT_bufferPool* newBufPool; + ZSTDMT_freeBufferPool(srcBufPool); + newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + if (newBufPool==NULL) return newBufPool; + ZSTDMT_setBufferSize(newBufPool, bSize); + return newBufPool; + } +} + +/** ZSTDMT_getBuffer() : + * assumption : bufPool must be valid + * @return : a buffer, with start pointer and size + * note: allocation may fail, in this case, start==NULL and size==0 */ +static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool) +{ + size_t const bSize = bufPool->bufferSize; + DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize); + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers) { /* try to use an existing buffer */ + buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)]; + size_t const availBufferSize = buf.capacity; + bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer; + if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) { + /* large enough, but not too much */ + DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u", + bufPool->nbBuffers, (U32)buf.capacity); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return buf; + } + /* size conditions not respected : scratch this buffer, create new one */ + DEBUGLOG(5, "ZSTDMT_getBuffer: existing buffer does not meet size conditions => freeing"); + ZSTD_customFree(buf.start, bufPool->cMem); + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* create new buffer */ + DEBUGLOG(5, "ZSTDMT_getBuffer: create a new buffer"); + { buffer_t buffer; + void* const start = ZSTD_customMalloc(bSize, bufPool->cMem); + buffer.start = start; /* note : start can be NULL if malloc fails ! */ + buffer.capacity = (start==NULL) ? 0 : bSize; + if (start==NULL) { + DEBUGLOG(5, "ZSTDMT_getBuffer: buffer allocation failure !!"); + } else { + DEBUGLOG(5, "ZSTDMT_getBuffer: created buffer of size %u", (U32)bSize); + } + return buffer; + } +} + +#if ZSTD_RESIZE_SEQPOOL +/** ZSTDMT_resizeBuffer() : + * assumption : bufPool must be valid + * @return : a buffer that is at least the buffer pool buffer size. + * If a reallocation happens, the data in the input buffer is copied. + */ +static buffer_t ZSTDMT_resizeBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buffer) +{ + size_t const bSize = bufPool->bufferSize; + if (buffer.capacity < bSize) { + void* const start = ZSTD_customMalloc(bSize, bufPool->cMem); + buffer_t newBuffer; + newBuffer.start = start; + newBuffer.capacity = start == NULL ? 0 : bSize; + if (start != NULL) { + assert(newBuffer.capacity >= buffer.capacity); + ZSTD_memcpy(newBuffer.start, buffer.start, buffer.capacity); + DEBUGLOG(5, "ZSTDMT_resizeBuffer: created buffer of size %u", (U32)bSize); + return newBuffer; + } + DEBUGLOG(5, "ZSTDMT_resizeBuffer: buffer allocation failure !!"); + } + return buffer; +} +#endif + +/* store buffer for later re-use, up to pool capacity */ +static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf) +{ + DEBUGLOG(5, "ZSTDMT_releaseBuffer"); + if (buf.start == NULL) return; /* compatible with release on NULL */ + ZSTD_pthread_mutex_lock(&bufPool->poolMutex); + if (bufPool->nbBuffers < bufPool->totalBuffers) { + bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u", + (U32)buf.capacity, (U32)(bufPool->nbBuffers-1)); + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + return; + } + ZSTD_pthread_mutex_unlock(&bufPool->poolMutex); + /* Reached bufferPool capacity (should not happen) */ + DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing "); + ZSTD_customFree(buf.start, bufPool->cMem); +} + + +/* ===== Seq Pool Wrapper ====== */ + +typedef ZSTDMT_bufferPool ZSTDMT_seqPool; + +static size_t ZSTDMT_sizeof_seqPool(ZSTDMT_seqPool* seqPool) +{ + return ZSTDMT_sizeof_bufferPool(seqPool); +} + +static rawSeqStore_t bufferToSeq(buffer_t buffer) +{ + rawSeqStore_t seq = kNullRawSeqStore; + seq.seq = (rawSeq*)buffer.start; + seq.capacity = buffer.capacity / sizeof(rawSeq); + return seq; +} + +static buffer_t seqToBuffer(rawSeqStore_t seq) +{ + buffer_t buffer; + buffer.start = seq.seq; + buffer.capacity = seq.capacity * sizeof(rawSeq); + return buffer; +} + +static rawSeqStore_t ZSTDMT_getSeq(ZSTDMT_seqPool* seqPool) +{ + if (seqPool->bufferSize == 0) { + return kNullRawSeqStore; + } + return bufferToSeq(ZSTDMT_getBuffer(seqPool)); +} + +#if ZSTD_RESIZE_SEQPOOL +static rawSeqStore_t ZSTDMT_resizeSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + return bufferToSeq(ZSTDMT_resizeBuffer(seqPool, seqToBuffer(seq))); +} +#endif + +static void ZSTDMT_releaseSeq(ZSTDMT_seqPool* seqPool, rawSeqStore_t seq) +{ + ZSTDMT_releaseBuffer(seqPool, seqToBuffer(seq)); +} + +static void ZSTDMT_setNbSeq(ZSTDMT_seqPool* const seqPool, size_t const nbSeq) +{ + ZSTDMT_setBufferSize(seqPool, nbSeq * sizeof(rawSeq)); +} + +static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem) +{ + ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + if (seqPool == NULL) return NULL; + ZSTDMT_setNbSeq(seqPool, 0); + return seqPool; +} + +static void ZSTDMT_freeSeqPool(ZSTDMT_seqPool* seqPool) +{ + ZSTDMT_freeBufferPool(seqPool); +} + +static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers) +{ + return ZSTDMT_expandBufferPool(pool, nbWorkers); +} + + +/* ===== CCtx Pool ===== */ +/* a single CCtx Pool can be invoked from multiple threads in parallel */ + +typedef struct { + ZSTD_pthread_mutex_t poolMutex; + int totalCCtx; + int availCCtx; + ZSTD_customMem cMem; + ZSTD_CCtx* cctx[1]; /* variable size */ +} ZSTDMT_CCtxPool; + +/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */ +static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool) +{ + int cid; + for (cid=0; cidtotalCCtx; cid++) + ZSTD_freeCCtx(pool->cctx[cid]); /* note : compatible with free on NULL */ + ZSTD_pthread_mutex_destroy(&pool->poolMutex); + ZSTD_customFree(pool, pool->cMem); +} + +/* ZSTDMT_createCCtxPool() : + * implies nbWorkers >= 1 , checked by caller ZSTDMT_createCCtx() */ +static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers, + ZSTD_customMem cMem) +{ + ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc( + sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem); + assert(nbWorkers > 0); + if (!cctxPool) return NULL; + if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) { + ZSTD_customFree(cctxPool, cMem); + return NULL; + } + cctxPool->cMem = cMem; + cctxPool->totalCCtx = nbWorkers; + cctxPool->availCCtx = 1; /* at least one cctx for single-thread mode */ + cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem); + if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; } + DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers); + return cctxPool; +} + +static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool, + int nbWorkers) +{ + if (srcPool==NULL) return NULL; + if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */ + /* need a larger cctx pool */ + { ZSTD_customMem const cMem = srcPool->cMem; + ZSTDMT_freeCCtxPool(srcPool); + return ZSTDMT_createCCtxPool(nbWorkers, cMem); + } +} + +/* only works during initialization phase, not during compression */ +static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool) +{ + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + { unsigned const nbWorkers = cctxPool->totalCCtx; + size_t const poolSize = sizeof(*cctxPool) + + (nbWorkers-1) * sizeof(ZSTD_CCtx*); + unsigned u; + size_t totalCCtxSize = 0; + for (u=0; ucctx[u]); + } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + assert(nbWorkers > 0); + return poolSize + totalCCtxSize; + } +} + +static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool) +{ + DEBUGLOG(5, "ZSTDMT_getCCtx"); + ZSTD_pthread_mutex_lock(&cctxPool->poolMutex); + if (cctxPool->availCCtx) { + cctxPool->availCCtx--; + { ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx]; + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + return cctx; + } } + ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex); + DEBUGLOG(5, "create one more CCtx"); + return ZSTD_createCCtx_advanced(cctxPool->cMem); /* note : can be NULL, when creation fails ! */ +} + +static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return; /* compatibility with release on NULL */ + ZSTD_pthread_mutex_lock(&pool->poolMutex); + if (pool->availCCtx < pool->totalCCtx) + pool->cctx[pool->availCCtx++] = cctx; + else { + /* pool overflow : should not happen, since totalCCtx==nbWorkers */ + DEBUGLOG(4, "CCtx pool overflow : free cctx"); + ZSTD_freeCCtx(cctx); + } + ZSTD_pthread_mutex_unlock(&pool->poolMutex); +} + +/* ==== Serial State ==== */ + +typedef struct { + void const* start; + size_t size; +} range_t; + +typedef struct { + /* All variables in the struct are protected by mutex. */ + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + ZSTD_CCtx_params params; + ldmState_t ldmState; + XXH64_state_t xxhState; + unsigned nextJobID; + /* Protects ldmWindow. + * Must be acquired after the main mutex when acquiring both. + */ + ZSTD_pthread_mutex_t ldmWindowMutex; + ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is updated */ + ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */ +} serialState_t; + +static int +ZSTDMT_serialState_reset(serialState_t* serialState, + ZSTDMT_seqPool* seqPool, + ZSTD_CCtx_params params, + size_t jobSize, + const void* dict, size_t const dictSize, + ZSTD_dictContentType_e dictContentType) +{ + /* Adjust parameters */ + if (params.ldmParams.enableLdm) { + DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10); + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashRateLog < 32); + } else { + ZSTD_memset(¶ms.ldmParams, 0, sizeof(params.ldmParams)); + } + serialState->nextJobID = 0; + if (params.fParams.checksumFlag) + XXH64_reset(&serialState->xxhState, 0); + if (params.ldmParams.enableLdm) { + ZSTD_customMem cMem = params.customMem; + unsigned const hashLog = params.ldmParams.hashLog; + size_t const hashSize = ((size_t)1 << hashLog) * sizeof(ldmEntry_t); + unsigned const bucketLog = + params.ldmParams.hashLog - params.ldmParams.bucketSizeLog; + unsigned const prevBucketLog = + serialState->params.ldmParams.hashLog - + serialState->params.ldmParams.bucketSizeLog; + size_t const numBuckets = (size_t)1 << bucketLog; + /* Size the seq pool tables */ + ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize)); + /* Reset the window */ + ZSTD_window_init(&serialState->ldmState.window); + /* Resize tables and output space if necessary. */ + if (serialState->ldmState.hashTable == NULL || serialState->params.ldmParams.hashLog < hashLog) { + ZSTD_customFree(serialState->ldmState.hashTable, cMem); + serialState->ldmState.hashTable = (ldmEntry_t*)ZSTD_customMalloc(hashSize, cMem); + } + if (serialState->ldmState.bucketOffsets == NULL || prevBucketLog < bucketLog) { + ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem); + serialState->ldmState.bucketOffsets = (BYTE*)ZSTD_customMalloc(numBuckets, cMem); + } + if (!serialState->ldmState.hashTable || !serialState->ldmState.bucketOffsets) + return 1; + /* Zero the tables */ + ZSTD_memset(serialState->ldmState.hashTable, 0, hashSize); + ZSTD_memset(serialState->ldmState.bucketOffsets, 0, numBuckets); + + /* Update window state and fill hash table with dict */ + serialState->ldmState.loadedDictEnd = 0; + if (dictSize > 0) { + if (dictContentType == ZSTD_dct_rawContent) { + BYTE const* const dictEnd = (const BYTE*)dict + dictSize; + ZSTD_window_update(&serialState->ldmState.window, dict, dictSize, /* forceNonContiguous */ 0); + ZSTD_ldm_fillHashTable(&serialState->ldmState, (const BYTE*)dict, dictEnd, ¶ms.ldmParams); + serialState->ldmState.loadedDictEnd = params.forceWindow ? 0 : (U32)(dictEnd - serialState->ldmState.window.base); + } else { + /* don't even load anything */ + } + } + + /* Initialize serialState's copy of ldmWindow. */ + serialState->ldmWindow = serialState->ldmState.window; + } + + serialState->params = params; + serialState->params.jobSize = (U32)jobSize; + return 0; +} + +static int ZSTDMT_serialState_init(serialState_t* serialState) +{ + int initError = 0; + ZSTD_memset(serialState, 0, sizeof(*serialState)); + initError |= ZSTD_pthread_mutex_init(&serialState->mutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->cond, NULL); + initError |= ZSTD_pthread_mutex_init(&serialState->ldmWindowMutex, NULL); + initError |= ZSTD_pthread_cond_init(&serialState->ldmWindowCond, NULL); + return initError; +} + +static void ZSTDMT_serialState_free(serialState_t* serialState) +{ + ZSTD_customMem cMem = serialState->params.customMem; + ZSTD_pthread_mutex_destroy(&serialState->mutex); + ZSTD_pthread_cond_destroy(&serialState->cond); + ZSTD_pthread_mutex_destroy(&serialState->ldmWindowMutex); + ZSTD_pthread_cond_destroy(&serialState->ldmWindowCond); + ZSTD_customFree(serialState->ldmState.hashTable, cMem); + ZSTD_customFree(serialState->ldmState.bucketOffsets, cMem); +} + +static void ZSTDMT_serialState_update(serialState_t* serialState, + ZSTD_CCtx* jobCCtx, rawSeqStore_t seqStore, + range_t src, unsigned jobID) +{ + /* Wait for our turn */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + while (serialState->nextJobID < jobID) { + DEBUGLOG(5, "wait for serialState->cond"); + ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex); + } + /* A future job may error and skip our job */ + if (serialState->nextJobID == jobID) { + /* It is now our turn, do any processing necessary */ + if (serialState->params.ldmParams.enableLdm) { + size_t error; + assert(seqStore.seq != NULL && seqStore.pos == 0 && + seqStore.size == 0 && seqStore.capacity > 0); + assert(src.size <= serialState->params.jobSize); + ZSTD_window_update(&serialState->ldmState.window, src.start, src.size, /* forceNonContiguous */ 0); + error = ZSTD_ldm_generateSequences( + &serialState->ldmState, &seqStore, + &serialState->params.ldmParams, src.start, src.size); + /* We provide a large enough buffer to never fail. */ + assert(!ZSTD_isError(error)); (void)error; + /* Update ldmWindow to match the ldmState.window and signal the main + * thread if it is waiting for a buffer. + */ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + serialState->ldmWindow = serialState->ldmState.window; + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + if (serialState->params.fParams.checksumFlag && src.size > 0) + XXH64_update(&serialState->xxhState, src.start, src.size); + } + /* Now it is the next jobs turn */ + serialState->nextJobID++; + ZSTD_pthread_cond_broadcast(&serialState->cond); + ZSTD_pthread_mutex_unlock(&serialState->mutex); + + if (seqStore.size > 0) { + size_t const err = ZSTD_referenceExternalSequences( + jobCCtx, seqStore.seq, seqStore.size); + assert(serialState->params.ldmParams.enableLdm); + assert(!ZSTD_isError(err)); + (void)err; + } +} + +static void ZSTDMT_serialState_ensureFinished(serialState_t* serialState, + unsigned jobID, size_t cSize) +{ + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex); + if (serialState->nextJobID <= jobID) { + assert(ZSTD_isError(cSize)); (void)cSize; + DEBUGLOG(5, "Skipping past job %u because of error", jobID); + serialState->nextJobID = jobID + 1; + ZSTD_pthread_cond_broadcast(&serialState->cond); + + ZSTD_PTHREAD_MUTEX_LOCK(&serialState->ldmWindowMutex); + ZSTD_window_clear(&serialState->ldmWindow); + ZSTD_pthread_cond_signal(&serialState->ldmWindowCond); + ZSTD_pthread_mutex_unlock(&serialState->ldmWindowMutex); + } + ZSTD_pthread_mutex_unlock(&serialState->mutex); + +} + + +/* ------------------------------------------ */ +/* ===== Worker thread ===== */ +/* ------------------------------------------ */ + +static const range_t kNullRange = { NULL, 0 }; + +typedef struct { + size_t consumed; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx */ + size_t cSize; /* SHARED - set0 by mtctx, then modified by worker AND read by mtctx, then set0 by mtctx */ + ZSTD_pthread_mutex_t job_mutex; /* Thread-safe - used by mtctx and worker */ + ZSTD_pthread_cond_t job_cond; /* Thread-safe - used by mtctx and worker */ + ZSTDMT_CCtxPool* cctxPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_bufferPool* bufPool; /* Thread-safe - used by mtctx and (all) workers */ + ZSTDMT_seqPool* seqPool; /* Thread-safe - used by mtctx and (all) workers */ + serialState_t* serial; /* Thread-safe - used by mtctx and (all) workers */ + buffer_t dstBuff; /* set by worker (or mtctx), then read by worker & mtctx, then modified by mtctx => no barrier */ + range_t prefix; /* set by mtctx, then read by worker & mtctx => no barrier */ + range_t src; /* set by mtctx, then read by worker & mtctx => no barrier */ + unsigned jobID; /* set by mtctx, then read by worker => no barrier */ + unsigned firstJob; /* set by mtctx, then read by worker => no barrier */ + unsigned lastJob; /* set by mtctx, then read by worker => no barrier */ + ZSTD_CCtx_params params; /* set by mtctx, then read by worker => no barrier */ + const ZSTD_CDict* cdict; /* set by mtctx, then read by worker => no barrier */ + unsigned long long fullFrameSize; /* set by mtctx, then read by worker => no barrier */ + size_t dstFlushed; /* used only by mtctx */ + unsigned frameChecksumNeeded; /* used only by mtctx */ +} ZSTDMT_jobDescription; + +#define JOB_ERROR(e) { \ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \ + job->cSize = e; \ + ZSTD_pthread_mutex_unlock(&job->job_mutex); \ + goto _endJob; \ +} + +/* ZSTDMT_compressionJob() is a POOL_function type */ +static void ZSTDMT_compressionJob(void* jobDescription) +{ + ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription; + ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */ + ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool); + rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool); + buffer_t dstBuff = job->dstBuff; + size_t lastCBlockSize = 0; + + /* resources */ + if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation)); + if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */ + dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation)); + job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */ + } + if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL) + JOB_ERROR(ERROR(memory_allocation)); + + /* Don't compute the checksum for chunks, since we compute it externally, + * but write it in the header. + */ + if (job->jobID != 0) jobParams.fParams.checksumFlag = 0; + /* Don't run LDM for the chunks, since we handle it externally */ + jobParams.ldmParams.enableLdm = 0; + /* Correct nbWorkers to 0. */ + jobParams.nbWorkers = 0; + + + /* init */ + if (job->cdict) { + size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, &jobParams, job->fullFrameSize); + assert(job->firstJob); /* only allowed for first job */ + if (ZSTD_isError(initError)) JOB_ERROR(initError); + } else { /* srcStart points at reloaded section */ + U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size; + { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob); + if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError); + } + if (!job->firstJob) { + size_t const err = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_deterministicRefPrefix, 0); + if (ZSTD_isError(err)) JOB_ERROR(err); + } + { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, + job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */ + ZSTD_dtlm_fast, + NULL, /*cdict*/ + &jobParams, pledgedSrcSize); + if (ZSTD_isError(initError)) JOB_ERROR(initError); + } } + + /* Perform serial step as early as possible, but after CCtx initialization */ + ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID); + + if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */ + size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0); + if (ZSTD_isError(hSize)) JOB_ERROR(hSize); + DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize); + ZSTD_invalidateRepCodes(cctx); + } + + /* compress */ + { size_t const chunkSize = 4*ZSTD_BLOCKSIZE_MAX; + int const nbChunks = (int)((job->src.size + (chunkSize-1)) / chunkSize); + const BYTE* ip = (const BYTE*) job->src.start; + BYTE* const ostart = (BYTE*)dstBuff.start; + BYTE* op = ostart; + BYTE* oend = op + dstBuff.capacity; + int chunkNb; + if (sizeof(size_t) > sizeof(int)) assert(job->src.size < ((size_t)INT_MAX) * chunkSize); /* check overflow */ + DEBUGLOG(5, "ZSTDMT_compressionJob: compress %u bytes in %i blocks", (U32)job->src.size, nbChunks); + assert(job->cSize == 0); + for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) { + size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize); + if (ZSTD_isError(cSize)) JOB_ERROR(cSize); + ip += chunkSize; + op += cSize; assert(op < oend); + /* stats */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + job->cSize += cSize; + job->consumed = chunkSize * chunkNb; + DEBUGLOG(5, "ZSTDMT_compressionJob: compress new block : cSize==%u bytes (total: %u)", + (U32)cSize, (U32)job->cSize); + ZSTD_pthread_cond_signal(&job->job_cond); /* warns some more data is ready to be flushed */ + ZSTD_pthread_mutex_unlock(&job->job_mutex); + } + /* last block */ + assert(chunkSize > 0); + assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */ + if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) { + size_t const lastBlockSize1 = job->src.size & (chunkSize-1); + size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1; + size_t const cSize = (job->lastJob) ? + ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) : + ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize); + if (ZSTD_isError(cSize)) JOB_ERROR(cSize); + lastCBlockSize = cSize; + } } + if (!job->firstJob) { + /* Double check that we don't have an ext-dict, because then our + * repcode invalidation doesn't work. + */ + assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); + } + ZSTD_CCtx_trace(cctx, 0); + +_endJob: + ZSTDMT_serialState_ensureFinished(job->serial, job->jobID, job->cSize); + if (job->prefix.size > 0) + DEBUGLOG(5, "Finished with prefix: %zx", (size_t)job->prefix.start); + DEBUGLOG(5, "Finished with source: %zx", (size_t)job->src.start); + /* release resources */ + ZSTDMT_releaseSeq(job->seqPool, rawSeqStore); + ZSTDMT_releaseCCtx(job->cctxPool, cctx); + /* report */ + ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); + if (ZSTD_isError(job->cSize)) assert(lastCBlockSize == 0); + job->cSize += lastCBlockSize; + job->consumed = job->src.size; /* when job->consumed == job->src.size , compression job is presumed completed */ + ZSTD_pthread_cond_signal(&job->job_cond); + ZSTD_pthread_mutex_unlock(&job->job_mutex); +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +typedef struct { + range_t prefix; /* read-only non-owned prefix buffer */ + buffer_t buffer; + size_t filled; +} inBuff_t; + +typedef struct { + BYTE* buffer; /* The round input buffer. All jobs get references + * to pieces of the buffer. ZSTDMT_tryGetInputRange() + * handles handing out job input buffers, and makes + * sure it doesn't overlap with any pieces still in use. + */ + size_t capacity; /* The capacity of buffer. */ + size_t pos; /* The position of the current inBuff in the round + * buffer. Updated past the end if the inBuff once + * the inBuff is sent to the worker thread. + * pos <= capacity. + */ +} roundBuff_t; + +static const roundBuff_t kNullRoundBuff = {NULL, 0, 0}; + +#define RSYNC_LENGTH 32 + +typedef struct { + U64 hash; + U64 hitMask; + U64 primePower; +} rsyncState_t; + +struct ZSTDMT_CCtx_s { + POOL_ctx* factory; + ZSTDMT_jobDescription* jobs; + ZSTDMT_bufferPool* bufPool; + ZSTDMT_CCtxPool* cctxPool; + ZSTDMT_seqPool* seqPool; + ZSTD_CCtx_params params; + size_t targetSectionSize; + size_t targetPrefixSize; + int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */ + inBuff_t inBuff; + roundBuff_t roundBuff; + serialState_t serial; + rsyncState_t rsync; + unsigned jobIDMask; + unsigned doneJobID; + unsigned nextJobID; + unsigned frameEnded; + unsigned allJobsCompleted; + unsigned long long frameContentSize; + unsigned long long consumed; + unsigned long long produced; + ZSTD_customMem cMem; + ZSTD_CDict* cdictLocal; + const ZSTD_CDict* cdict; + unsigned providedFactory: 1; +}; + +static void ZSTDMT_freeJobsTable(ZSTDMT_jobDescription* jobTable, U32 nbJobs, ZSTD_customMem cMem) +{ + U32 jobNb; + if (jobTable == NULL) return; + for (jobNb=0; jobNb mtctx->jobIDMask+1) { /* need more job capacity */ + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + mtctx->jobIDMask = 0; + mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem); + if (mtctx->jobs==NULL) return ERROR(memory_allocation); + assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */ + mtctx->jobIDMask = nbJobs - 1; + } + return 0; +} + + +/* ZSTDMT_CCtxParam_setNbWorkers(): + * Internal use only */ +static size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers) +{ + return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers); +} + +MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool) +{ + ZSTDMT_CCtx* mtctx; + U32 nbJobs = nbWorkers + 2; + int initError; + DEBUGLOG(3, "ZSTDMT_createCCtx_advanced (nbWorkers = %u)", nbWorkers); + + if (nbWorkers < 1) return NULL; + nbWorkers = MIN(nbWorkers , ZSTDMT_NBWORKERS_MAX); + if ((cMem.customAlloc!=NULL) ^ (cMem.customFree!=NULL)) + /* invalid custom allocator */ + return NULL; + + mtctx = (ZSTDMT_CCtx*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtx), cMem); + if (!mtctx) return NULL; + ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); + mtctx->cMem = cMem; + mtctx->allJobsCompleted = 1; + if (pool != NULL) { + mtctx->factory = pool; + mtctx->providedFactory = 1; + } + else { + mtctx->factory = POOL_create_advanced(nbWorkers, 0, cMem); + mtctx->providedFactory = 0; + } + mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, cMem); + assert(nbJobs > 0); assert((nbJobs & (nbJobs - 1)) == 0); /* ensure nbJobs is a power of 2 */ + mtctx->jobIDMask = nbJobs - 1; + mtctx->bufPool = ZSTDMT_createBufferPool(nbWorkers, cMem); + mtctx->cctxPool = ZSTDMT_createCCtxPool(nbWorkers, cMem); + mtctx->seqPool = ZSTDMT_createSeqPool(nbWorkers, cMem); + initError = ZSTDMT_serialState_init(&mtctx->serial); + mtctx->roundBuff = kNullRoundBuff; + if (!mtctx->factory | !mtctx->jobs | !mtctx->bufPool | !mtctx->cctxPool | !mtctx->seqPool | initError) { + ZSTDMT_freeCCtx(mtctx); + return NULL; + } + DEBUGLOG(3, "mt_cctx created, for %u threads", nbWorkers); + return mtctx; +} + +ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem, ZSTD_threadPool* pool) +{ +#ifdef ZSTD_MULTITHREAD + return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem, pool); +#else + (void)nbWorkers; + (void)cMem; + (void)pool; + return NULL; +#endif +} + + +/* ZSTDMT_releaseAllJobResources() : + * note : ensure all workers are killed first ! */ +static void ZSTDMT_releaseAllJobResources(ZSTDMT_CCtx* mtctx) +{ + unsigned jobID; + DEBUGLOG(3, "ZSTDMT_releaseAllJobResources"); + for (jobID=0; jobID <= mtctx->jobIDMask; jobID++) { + /* Copy the mutex/cond out */ + ZSTD_pthread_mutex_t const mutex = mtctx->jobs[jobID].job_mutex; + ZSTD_pthread_cond_t const cond = mtctx->jobs[jobID].job_cond; + + DEBUGLOG(4, "job%02u: release dst address %08X", jobID, (U32)(size_t)mtctx->jobs[jobID].dstBuff.start); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[jobID].dstBuff); + + /* Clear the job description, but keep the mutex/cond */ + ZSTD_memset(&mtctx->jobs[jobID], 0, sizeof(mtctx->jobs[jobID])); + mtctx->jobs[jobID].job_mutex = mutex; + mtctx->jobs[jobID].job_cond = cond; + } + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->allJobsCompleted = 1; +} + +static void ZSTDMT_waitForAllJobsCompleted(ZSTDMT_CCtx* mtctx) +{ + DEBUGLOG(4, "ZSTDMT_waitForAllJobsCompleted"); + while (mtctx->doneJobID < mtctx->nextJobID) { + unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask; + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex); + while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) { + DEBUGLOG(4, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */ + ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex); + mtctx->doneJobID++; + } +} + +size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx==NULL) return 0; /* compatible with free on NULL */ + if (!mtctx->providedFactory) + POOL_free(mtctx->factory); /* stop and free worker threads */ + ZSTDMT_releaseAllJobResources(mtctx); /* release job resources into pools first */ + ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem); + ZSTDMT_freeBufferPool(mtctx->bufPool); + ZSTDMT_freeCCtxPool(mtctx->cctxPool); + ZSTDMT_freeSeqPool(mtctx->seqPool); + ZSTDMT_serialState_free(&mtctx->serial); + ZSTD_freeCDict(mtctx->cdictLocal); + if (mtctx->roundBuff.buffer) + ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem); + ZSTD_customFree(mtctx, mtctx->cMem); + return 0; +} + +size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx) +{ + if (mtctx == NULL) return 0; /* supports sizeof NULL */ + return sizeof(*mtctx) + + POOL_sizeof(mtctx->factory) + + ZSTDMT_sizeof_bufferPool(mtctx->bufPool) + + (mtctx->jobIDMask+1) * sizeof(ZSTDMT_jobDescription) + + ZSTDMT_sizeof_CCtxPool(mtctx->cctxPool) + + ZSTDMT_sizeof_seqPool(mtctx->seqPool) + + ZSTD_sizeof_CDict(mtctx->cdictLocal) + + mtctx->roundBuff.capacity; +} + + +/* ZSTDMT_resize() : + * @return : error code if fails, 0 on success */ +static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers) +{ + if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation); + FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) , ""); + mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers); + if (mtctx->bufPool == NULL) return ERROR(memory_allocation); + mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers); + if (mtctx->cctxPool == NULL) return ERROR(memory_allocation); + mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers); + if (mtctx->seqPool == NULL) return ERROR(memory_allocation); + ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers); + return 0; +} + + +/*! ZSTDMT_updateCParams_whileCompressing() : + * Updates a selected set of compression parameters, remaining compatible with currently active frame. + * New parameters will be applied to next compression job. */ +void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams) +{ + U32 const saved_wlog = mtctx->params.cParams.windowLog; /* Do not modify windowLog while compressing */ + int const compressionLevel = cctxParams->compressionLevel; + DEBUGLOG(5, "ZSTDMT_updateCParams_whileCompressing (level:%i)", + compressionLevel); + mtctx->params.compressionLevel = compressionLevel; + { ZSTD_compressionParameters cParams = ZSTD_getCParamsFromCCtxParams(cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + cParams.windowLog = saved_wlog; + mtctx->params.cParams = cParams; + } +} + +/* ZSTDMT_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads. + * Note : mutex will be acquired during statistics collection inside workers. */ +ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx) +{ + ZSTD_frameProgression fps; + DEBUGLOG(5, "ZSTDMT_getFrameProgression"); + fps.ingested = mtctx->consumed + mtctx->inBuff.filled; + fps.consumed = mtctx->consumed; + fps.produced = fps.flushed = mtctx->produced; + fps.currentJobID = mtctx->nextJobID; + fps.nbActiveWorkers = 0; + { unsigned jobNb; + unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1); + DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)", + mtctx->doneJobID, lastJobNb, mtctx->jobReady) + for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) { + unsigned const wJobID = jobNb & mtctx->jobIDMask; + ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID]; + ZSTD_pthread_mutex_lock(&jobPtr->job_mutex); + { size_t const cResult = jobPtr->cSize; + size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; + size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; + assert(flushed <= produced); + fps.ingested += jobPtr->src.size; + fps.consumed += jobPtr->consumed; + fps.produced += produced; + fps.flushed += flushed; + fps.nbActiveWorkers += (jobPtr->consumed < jobPtr->src.size); + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + } + } + return fps; +} + + +size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx) +{ + size_t toFlush; + unsigned const jobID = mtctx->doneJobID; + assert(jobID <= mtctx->nextJobID); + if (jobID == mtctx->nextJobID) return 0; /* no active job => nothing to flush */ + + /* look into oldest non-fully-flushed job */ + { unsigned const wJobID = jobID & mtctx->jobIDMask; + ZSTDMT_jobDescription* const jobPtr = &mtctx->jobs[wJobID]; + ZSTD_pthread_mutex_lock(&jobPtr->job_mutex); + { size_t const cResult = jobPtr->cSize; + size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; + size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; + assert(flushed <= produced); + assert(jobPtr->consumed <= jobPtr->src.size); + toFlush = produced - flushed; + /* if toFlush==0, nothing is available to flush. + * However, jobID is expected to still be active: + * if jobID was already completed and fully flushed, + * ZSTDMT_flushProduced() should have already moved onto next job. + * Therefore, some input has not yet been consumed. */ + if (toFlush==0) { + assert(jobPtr->consumed < jobPtr->src.size); + } + } + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + } + + return toFlush; +} + + +/* ------------------------------------------ */ +/* ===== Multi-threaded compression ===== */ +/* ------------------------------------------ */ + +static unsigned ZSTDMT_computeTargetJobLog(const ZSTD_CCtx_params* params) +{ + unsigned jobLog; + if (params->ldmParams.enableLdm) { + /* In Long Range Mode, the windowLog is typically oversized. + * In which case, it's preferable to determine the jobSize + * based on cycleLog instead. */ + jobLog = MAX(21, ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy) + 3); + } else { + jobLog = MAX(20, params->cParams.windowLog + 2); + } + return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX); +} + +static int ZSTDMT_overlapLog_default(ZSTD_strategy strat) +{ + switch(strat) + { + case ZSTD_btultra2: + return 9; + case ZSTD_btultra: + case ZSTD_btopt: + return 8; + case ZSTD_btlazy2: + case ZSTD_lazy2: + return 7; + case ZSTD_lazy: + case ZSTD_greedy: + case ZSTD_dfast: + case ZSTD_fast: + default:; + } + return 6; +} + +static int ZSTDMT_overlapLog(int ovlog, ZSTD_strategy strat) +{ + assert(0 <= ovlog && ovlog <= 9); + if (ovlog == 0) return ZSTDMT_overlapLog_default(strat); + return ovlog; +} + +static size_t ZSTDMT_computeOverlapSize(const ZSTD_CCtx_params* params) +{ + int const overlapRLog = 9 - ZSTDMT_overlapLog(params->overlapLog, params->cParams.strategy); + int ovLog = (overlapRLog >= 8) ? 0 : (params->cParams.windowLog - overlapRLog); + assert(0 <= overlapRLog && overlapRLog <= 8); + if (params->ldmParams.enableLdm) { + /* In Long Range Mode, the windowLog is typically oversized. + * In which case, it's preferable to determine the jobSize + * based on chainLog instead. + * Then, ovLog becomes a fraction of the jobSize, rather than windowSize */ + ovLog = MIN(params->cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) + - overlapRLog; + } + assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX); + DEBUGLOG(4, "overlapLog : %i", params->overlapLog); + DEBUGLOG(4, "overlap size : %i", 1 << ovLog); + return (ovLog==0) ? 0 : (size_t)1 << ovLog; +} + +/* ====================================== */ +/* ======= Streaming API ======= */ +/* ====================================== */ + +size_t ZSTDMT_initCStream_internal( + ZSTDMT_CCtx* mtctx, + const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, + const ZSTD_CDict* cdict, ZSTD_CCtx_params params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)", + (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx); + + /* params supposed partially fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + + /* init */ + if (params.nbWorkers != mtctx->params.nbWorkers) + FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) , ""); + + if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; + if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX; + + DEBUGLOG(4, "ZSTDMT_initCStream_internal: %u workers", params.nbWorkers); + + if (mtctx->allJobsCompleted == 0) { /* previous compression not correctly finished */ + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + mtctx->allJobsCompleted = 1; + } + + mtctx->params = params; + mtctx->frameContentSize = pledgedSrcSize; + if (dict) { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, dictContentType, /* note : a loadPrefix becomes an internal CDict */ + params.cParams, mtctx->cMem); + mtctx->cdict = mtctx->cdictLocal; + if (mtctx->cdictLocal == NULL) return ERROR(memory_allocation); + } else { + ZSTD_freeCDict(mtctx->cdictLocal); + mtctx->cdictLocal = NULL; + mtctx->cdict = cdict; + } + + mtctx->targetPrefixSize = ZSTDMT_computeOverlapSize(¶ms); + DEBUGLOG(4, "overlapLog=%i => %u KB", params.overlapLog, (U32)(mtctx->targetPrefixSize>>10)); + mtctx->targetSectionSize = params.jobSize; + if (mtctx->targetSectionSize == 0) { + mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(¶ms); + } + assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX); + + if (params.rsyncable) { + /* Aim for the targetsectionSize as the average job size. */ + U32 const jobSizeKB = (U32)(mtctx->targetSectionSize >> 10); + U32 const rsyncBits = (assert(jobSizeKB >= 1), ZSTD_highbit32(jobSizeKB) + 10); + DEBUGLOG(4, "rsyncLog = %u", rsyncBits); + mtctx->rsync.hash = 0; + mtctx->rsync.hitMask = (1ULL << rsyncBits) - 1; + mtctx->rsync.primePower = ZSTD_rollingHash_primePower(RSYNC_LENGTH); + } + if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */ + DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), (U32)params.jobSize); + DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10)); + ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(mtctx->targetSectionSize)); + { + /* If ldm is enabled we need windowSize space. */ + size_t const windowSize = mtctx->params.ldmParams.enableLdm ? (1U << mtctx->params.cParams.windowLog) : 0; + /* Two buffers of slack, plus extra space for the overlap + * This is the minimum slack that LDM works with. One extra because + * flush might waste up to targetSectionSize-1 bytes. Another extra + * for the overlap (if > 0), then one to fill which doesn't overlap + * with the LDM window. + */ + size_t const nbSlackBuffers = 2 + (mtctx->targetPrefixSize > 0); + size_t const slackSize = mtctx->targetSectionSize * nbSlackBuffers; + /* Compute the total size, and always have enough slack */ + size_t const nbWorkers = MAX(mtctx->params.nbWorkers, 1); + size_t const sectionsSize = mtctx->targetSectionSize * nbWorkers; + size_t const capacity = MAX(windowSize, sectionsSize) + slackSize; + if (mtctx->roundBuff.capacity < capacity) { + if (mtctx->roundBuff.buffer) + ZSTD_customFree(mtctx->roundBuff.buffer, mtctx->cMem); + mtctx->roundBuff.buffer = (BYTE*)ZSTD_customMalloc(capacity, mtctx->cMem); + if (mtctx->roundBuff.buffer == NULL) { + mtctx->roundBuff.capacity = 0; + return ERROR(memory_allocation); + } + mtctx->roundBuff.capacity = capacity; + } + } + DEBUGLOG(4, "roundBuff capacity : %u KB", (U32)(mtctx->roundBuff.capacity>>10)); + mtctx->roundBuff.pos = 0; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + mtctx->inBuff.prefix = kNullRange; + mtctx->doneJobID = 0; + mtctx->nextJobID = 0; + mtctx->frameEnded = 0; + mtctx->allJobsCompleted = 0; + mtctx->consumed = 0; + mtctx->produced = 0; + if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize, + dict, dictSize, dictContentType)) + return ERROR(memory_allocation); + return 0; +} + + +/* ZSTDMT_writeLastEmptyBlock() + * Write a single empty block with an end-of-frame to finish a frame. + * Job must be created from streaming variant. + * This function is always successful if expected conditions are fulfilled. + */ +static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job) +{ + assert(job->lastJob == 1); + assert(job->src.size == 0); /* last job is empty -> will be simplified into a last empty block */ + assert(job->firstJob == 0); /* cannot be first job, as it also needs to create frame header */ + assert(job->dstBuff.start == NULL); /* invoked from streaming variant only (otherwise, dstBuff might be user's output) */ + job->dstBuff = ZSTDMT_getBuffer(job->bufPool); + if (job->dstBuff.start == NULL) { + job->cSize = ERROR(memory_allocation); + return; + } + assert(job->dstBuff.capacity >= ZSTD_blockHeaderSize); /* no buffer should ever be that small */ + job->src = kNullRange; + job->cSize = ZSTD_writeLastEmptyBlock(job->dstBuff.start, job->dstBuff.capacity); + assert(!ZSTD_isError(job->cSize)); + assert(job->consumed == 0); +} + +static size_t ZSTDMT_createCompressionJob(ZSTDMT_CCtx* mtctx, size_t srcSize, ZSTD_EndDirective endOp) +{ + unsigned const jobID = mtctx->nextJobID & mtctx->jobIDMask; + int const endFrame = (endOp == ZSTD_e_end); + + if (mtctx->nextJobID > mtctx->doneJobID + mtctx->jobIDMask) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: will not create new job : table is full"); + assert((mtctx->nextJobID & mtctx->jobIDMask) == (mtctx->doneJobID & mtctx->jobIDMask)); + return 0; + } + + if (!mtctx->jobReady) { + BYTE const* src = (BYTE const*)mtctx->inBuff.buffer.start; + DEBUGLOG(5, "ZSTDMT_createCompressionJob: preparing job %u to compress %u bytes with %u preload ", + mtctx->nextJobID, (U32)srcSize, (U32)mtctx->inBuff.prefix.size); + mtctx->jobs[jobID].src.start = src; + mtctx->jobs[jobID].src.size = srcSize; + assert(mtctx->inBuff.filled >= srcSize); + mtctx->jobs[jobID].prefix = mtctx->inBuff.prefix; + mtctx->jobs[jobID].consumed = 0; + mtctx->jobs[jobID].cSize = 0; + mtctx->jobs[jobID].params = mtctx->params; + mtctx->jobs[jobID].cdict = mtctx->nextJobID==0 ? mtctx->cdict : NULL; + mtctx->jobs[jobID].fullFrameSize = mtctx->frameContentSize; + mtctx->jobs[jobID].dstBuff = g_nullBuffer; + mtctx->jobs[jobID].cctxPool = mtctx->cctxPool; + mtctx->jobs[jobID].bufPool = mtctx->bufPool; + mtctx->jobs[jobID].seqPool = mtctx->seqPool; + mtctx->jobs[jobID].serial = &mtctx->serial; + mtctx->jobs[jobID].jobID = mtctx->nextJobID; + mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0); + mtctx->jobs[jobID].lastJob = endFrame; + mtctx->jobs[jobID].frameChecksumNeeded = mtctx->params.fParams.checksumFlag && endFrame && (mtctx->nextJobID>0); + mtctx->jobs[jobID].dstFlushed = 0; + + /* Update the round buffer pos and clear the input buffer to be reset */ + mtctx->roundBuff.pos += srcSize; + mtctx->inBuff.buffer = g_nullBuffer; + mtctx->inBuff.filled = 0; + /* Set the prefix */ + if (!endFrame) { + size_t const newPrefixSize = MIN(srcSize, mtctx->targetPrefixSize); + mtctx->inBuff.prefix.start = src + srcSize - newPrefixSize; + mtctx->inBuff.prefix.size = newPrefixSize; + } else { /* endFrame==1 => no need for another input buffer */ + mtctx->inBuff.prefix = kNullRange; + mtctx->frameEnded = endFrame; + if (mtctx->nextJobID == 0) { + /* single job exception : checksum is already calculated directly within worker thread */ + mtctx->params.fParams.checksumFlag = 0; + } } + + if ( (srcSize == 0) + && (mtctx->nextJobID>0)/*single job must also write frame header*/ ) { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: creating a last empty block to end frame"); + assert(endOp == ZSTD_e_end); /* only possible case : need to end the frame with an empty last block */ + ZSTDMT_writeLastEmptyBlock(mtctx->jobs + jobID); + mtctx->nextJobID++; + return 0; + } + } + + DEBUGLOG(5, "ZSTDMT_createCompressionJob: posting job %u : %u bytes (end:%u, jobNb == %u (mod:%u))", + mtctx->nextJobID, + (U32)mtctx->jobs[jobID].src.size, + mtctx->jobs[jobID].lastJob, + mtctx->nextJobID, + jobID); + if (POOL_tryAdd(mtctx->factory, ZSTDMT_compressionJob, &mtctx->jobs[jobID])) { + mtctx->nextJobID++; + mtctx->jobReady = 0; + } else { + DEBUGLOG(5, "ZSTDMT_createCompressionJob: no worker available for job %u", mtctx->nextJobID); + mtctx->jobReady = 1; + } + return 0; +} + + +/*! ZSTDMT_flushProduced() : + * flush whatever data has been produced but not yet flushed in current job. + * move to next job if current one is fully flushed. + * `output` : `pos` will be updated with amount of data flushed . + * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush . + * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */ +static size_t ZSTDMT_flushProduced(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, unsigned blockToFlush, ZSTD_EndDirective end) +{ + unsigned const wJobID = mtctx->doneJobID & mtctx->jobIDMask; + DEBUGLOG(5, "ZSTDMT_flushProduced (blocking:%u , job %u <= %u)", + blockToFlush, mtctx->doneJobID, mtctx->nextJobID); + assert(output->size >= output->pos); + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + if ( blockToFlush + && (mtctx->doneJobID < mtctx->nextJobID) ) { + assert(mtctx->jobs[wJobID].dstFlushed <= mtctx->jobs[wJobID].cSize); + while (mtctx->jobs[wJobID].dstFlushed == mtctx->jobs[wJobID].cSize) { /* nothing to flush */ + if (mtctx->jobs[wJobID].consumed == mtctx->jobs[wJobID].src.size) { + DEBUGLOG(5, "job %u is completely consumed (%u == %u) => don't wait for cond, there will be none", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].consumed, (U32)mtctx->jobs[wJobID].src.size); + break; + } + DEBUGLOG(5, "waiting for something to flush from job %u (currently flushed: %u bytes)", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTD_pthread_cond_wait(&mtctx->jobs[wJobID].job_cond, &mtctx->jobs[wJobID].job_mutex); /* block when nothing to flush but some to come */ + } } + + /* try to flush something */ + { size_t cSize = mtctx->jobs[wJobID].cSize; /* shared */ + size_t const srcConsumed = mtctx->jobs[wJobID].consumed; /* shared */ + size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */ + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + if (ZSTD_isError(cSize)) { + DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s", + mtctx->doneJobID, ZSTD_getErrorName(cSize)); + ZSTDMT_waitForAllJobsCompleted(mtctx); + ZSTDMT_releaseAllJobResources(mtctx); + return cSize; + } + /* add frame checksum if necessary (can only happen once) */ + assert(srcConsumed <= srcSize); + if ( (srcConsumed == srcSize) /* job completed -> worker no longer active */ + && mtctx->jobs[wJobID].frameChecksumNeeded ) { + U32 const checksum = (U32)XXH64_digest(&mtctx->serial.xxhState); + DEBUGLOG(4, "ZSTDMT_flushProduced: writing checksum : %08X \n", checksum); + MEM_writeLE32((char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].cSize, checksum); + cSize += 4; + mtctx->jobs[wJobID].cSize += 4; /* can write this shared value, as worker is no longer active */ + mtctx->jobs[wJobID].frameChecksumNeeded = 0; + } + + if (cSize > 0) { /* compression is ongoing or completed */ + size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos); + DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)", + (U32)toFlush, mtctx->doneJobID, (U32)srcConsumed, (U32)srcSize, (U32)cSize); + assert(mtctx->doneJobID < mtctx->nextJobID); + assert(cSize >= mtctx->jobs[wJobID].dstFlushed); + assert(mtctx->jobs[wJobID].dstBuff.start != NULL); + if (toFlush > 0) { + ZSTD_memcpy((char*)output->dst + output->pos, + (const char*)mtctx->jobs[wJobID].dstBuff.start + mtctx->jobs[wJobID].dstFlushed, + toFlush); + } + output->pos += toFlush; + mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */ + + if ( (srcConsumed == srcSize) /* job is completed */ + && (mtctx->jobs[wJobID].dstFlushed == cSize) ) { /* output buffer fully flushed => free this job position */ + DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one", + mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed); + ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff); + DEBUGLOG(5, "dstBuffer released"); + mtctx->jobs[wJobID].dstBuff = g_nullBuffer; + mtctx->jobs[wJobID].cSize = 0; /* ensure this job slot is considered "not started" in future check */ + mtctx->consumed += srcSize; + mtctx->produced += cSize; + mtctx->doneJobID++; + } } + + /* return value : how many bytes left in buffer ; fake it to 1 when unknown but >0 */ + if (cSize > mtctx->jobs[wJobID].dstFlushed) return (cSize - mtctx->jobs[wJobID].dstFlushed); + if (srcSize > srcConsumed) return 1; /* current job not completely compressed */ + } + if (mtctx->doneJobID < mtctx->nextJobID) return 1; /* some more jobs ongoing */ + if (mtctx->jobReady) return 1; /* one job is ready to push, just not yet in the list */ + if (mtctx->inBuff.filled > 0) return 1; /* input is not empty, and still needs to be converted into a job */ + mtctx->allJobsCompleted = mtctx->frameEnded; /* all jobs are entirely flushed => if this one is last one, frame is completed */ + if (end == ZSTD_e_end) return !mtctx->frameEnded; /* for ZSTD_e_end, question becomes : is frame completed ? instead of : are internal buffers fully flushed ? */ + return 0; /* internal buffers fully flushed */ +} + +/** + * Returns the range of data used by the earliest job that is not yet complete. + * If the data of the first job is broken up into two segments, we cover both + * sections. + */ +static range_t ZSTDMT_getInputDataInUse(ZSTDMT_CCtx* mtctx) +{ + unsigned const firstJobID = mtctx->doneJobID; + unsigned const lastJobID = mtctx->nextJobID; + unsigned jobID; + + for (jobID = firstJobID; jobID < lastJobID; ++jobID) { + unsigned const wJobID = jobID & mtctx->jobIDMask; + size_t consumed; + + ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[wJobID].job_mutex); + consumed = mtctx->jobs[wJobID].consumed; + ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex); + + if (consumed < mtctx->jobs[wJobID].src.size) { + range_t range = mtctx->jobs[wJobID].prefix; + if (range.size == 0) { + /* Empty prefix */ + range = mtctx->jobs[wJobID].src; + } + /* Job source in multiple segments not supported yet */ + assert(range.start <= mtctx->jobs[wJobID].src.start); + return range; + } + } + return kNullRange; +} + +/** + * Returns non-zero iff buffer and range overlap. + */ +static int ZSTDMT_isOverlapped(buffer_t buffer, range_t range) +{ + BYTE const* const bufferStart = (BYTE const*)buffer.start; + BYTE const* const bufferEnd = bufferStart + buffer.capacity; + BYTE const* const rangeStart = (BYTE const*)range.start; + BYTE const* const rangeEnd = range.size != 0 ? rangeStart + range.size : rangeStart; + + if (rangeStart == NULL || bufferStart == NULL) + return 0; + /* Empty ranges cannot overlap */ + if (bufferStart == bufferEnd || rangeStart == rangeEnd) + return 0; + + return bufferStart < rangeEnd && rangeStart < bufferEnd; +} + +static int ZSTDMT_doesOverlapWindow(buffer_t buffer, ZSTD_window_t window) +{ + range_t extDict; + range_t prefix; + + DEBUGLOG(5, "ZSTDMT_doesOverlapWindow"); + extDict.start = window.dictBase + window.lowLimit; + extDict.size = window.dictLimit - window.lowLimit; + + prefix.start = window.base + window.dictLimit; + prefix.size = window.nextSrc - (window.base + window.dictLimit); + DEBUGLOG(5, "extDict [0x%zx, 0x%zx)", + (size_t)extDict.start, + (size_t)extDict.start + extDict.size); + DEBUGLOG(5, "prefix [0x%zx, 0x%zx)", + (size_t)prefix.start, + (size_t)prefix.start + prefix.size); + + return ZSTDMT_isOverlapped(buffer, extDict) + || ZSTDMT_isOverlapped(buffer, prefix); +} + +static void ZSTDMT_waitForLdmComplete(ZSTDMT_CCtx* mtctx, buffer_t buffer) +{ + if (mtctx->params.ldmParams.enableLdm) { + ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex; + DEBUGLOG(5, "ZSTDMT_waitForLdmComplete"); + DEBUGLOG(5, "source [0x%zx, 0x%zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + ZSTD_PTHREAD_MUTEX_LOCK(mutex); + while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) { + DEBUGLOG(5, "Waiting for LDM to finish..."); + ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex); + } + DEBUGLOG(6, "Done waiting for LDM to finish"); + ZSTD_pthread_mutex_unlock(mutex); + } +} + +/** + * Attempts to set the inBuff to the next section to fill. + * If any part of the new section is still in use we give up. + * Returns non-zero if the buffer is filled. + */ +static int ZSTDMT_tryGetInputRange(ZSTDMT_CCtx* mtctx) +{ + range_t const inUse = ZSTDMT_getInputDataInUse(mtctx); + size_t const spaceLeft = mtctx->roundBuff.capacity - mtctx->roundBuff.pos; + size_t const target = mtctx->targetSectionSize; + buffer_t buffer; + + DEBUGLOG(5, "ZSTDMT_tryGetInputRange"); + assert(mtctx->inBuff.buffer.start == NULL); + assert(mtctx->roundBuff.capacity >= target); + + if (spaceLeft < target) { + /* ZSTD_invalidateRepCodes() doesn't work for extDict variants. + * Simply copy the prefix to the beginning in that case. + */ + BYTE* const start = (BYTE*)mtctx->roundBuff.buffer; + size_t const prefixSize = mtctx->inBuff.prefix.size; + + buffer.start = start; + buffer.capacity = prefixSize; + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(5, "Waiting for buffer..."); + return 0; + } + ZSTDMT_waitForLdmComplete(mtctx, buffer); + ZSTD_memmove(start, mtctx->inBuff.prefix.start, prefixSize); + mtctx->inBuff.prefix.start = start; + mtctx->roundBuff.pos = prefixSize; + } + buffer.start = mtctx->roundBuff.buffer + mtctx->roundBuff.pos; + buffer.capacity = target; + + if (ZSTDMT_isOverlapped(buffer, inUse)) { + DEBUGLOG(5, "Waiting for buffer..."); + return 0; + } + assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix)); + + ZSTDMT_waitForLdmComplete(mtctx, buffer); + + DEBUGLOG(5, "Using prefix range [%zx, %zx)", + (size_t)mtctx->inBuff.prefix.start, + (size_t)mtctx->inBuff.prefix.start + mtctx->inBuff.prefix.size); + DEBUGLOG(5, "Using source range [%zx, %zx)", + (size_t)buffer.start, + (size_t)buffer.start + buffer.capacity); + + + mtctx->inBuff.buffer = buffer; + mtctx->inBuff.filled = 0; + assert(mtctx->roundBuff.pos + buffer.capacity <= mtctx->roundBuff.capacity); + return 1; +} + +typedef struct { + size_t toLoad; /* The number of bytes to load from the input. */ + int flush; /* Boolean declaring if we must flush because we found a synchronization point. */ +} syncPoint_t; + +/** + * Searches through the input for a synchronization point. If one is found, we + * will instruct the caller to flush, and return the number of bytes to load. + * Otherwise, we will load as many bytes as possible and instruct the caller + * to continue as normal. + */ +static syncPoint_t +findSynchronizationPoint(ZSTDMT_CCtx const* mtctx, ZSTD_inBuffer const input) +{ + BYTE const* const istart = (BYTE const*)input.src + input.pos; + U64 const primePower = mtctx->rsync.primePower; + U64 const hitMask = mtctx->rsync.hitMask; + + syncPoint_t syncPoint; + U64 hash; + BYTE const* prev; + size_t pos; + + syncPoint.toLoad = MIN(input.size - input.pos, mtctx->targetSectionSize - mtctx->inBuff.filled); + syncPoint.flush = 0; + if (!mtctx->params.rsyncable) + /* Rsync is disabled. */ + return syncPoint; + if (mtctx->inBuff.filled + syncPoint.toLoad < RSYNC_LENGTH) + /* Not enough to compute the hash. + * We will miss any synchronization points in this RSYNC_LENGTH byte + * window. However, since it depends only in the internal buffers, if the + * state is already synchronized, we will remain synchronized. + * Additionally, the probability that we miss a synchronization point is + * low: RSYNC_LENGTH / targetSectionSize. + */ + return syncPoint; + /* Initialize the loop variables. */ + if (mtctx->inBuff.filled >= RSYNC_LENGTH) { + /* We have enough bytes buffered to initialize the hash. + * Start scanning at the beginning of the input. + */ + pos = 0; + prev = (BYTE const*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled - RSYNC_LENGTH; + hash = ZSTD_rollingHash_compute(prev, RSYNC_LENGTH); + if ((hash & hitMask) == hitMask) { + /* We're already at a sync point so don't load any more until + * we're able to flush this sync point. + * This likely happened because the job table was full so we + * couldn't add our job. + */ + syncPoint.toLoad = 0; + syncPoint.flush = 1; + return syncPoint; + } + } else { + /* We don't have enough bytes buffered to initialize the hash, but + * we know we have at least RSYNC_LENGTH bytes total. + * Start scanning after the first RSYNC_LENGTH bytes less the bytes + * already buffered. + */ + pos = RSYNC_LENGTH - mtctx->inBuff.filled; + prev = (BYTE const*)mtctx->inBuff.buffer.start - pos; + hash = ZSTD_rollingHash_compute(mtctx->inBuff.buffer.start, mtctx->inBuff.filled); + hash = ZSTD_rollingHash_append(hash, istart, pos); + } + /* Starting with the hash of the previous RSYNC_LENGTH bytes, roll + * through the input. If we hit a synchronization point, then cut the + * job off, and tell the compressor to flush the job. Otherwise, load + * all the bytes and continue as normal. + * If we go too long without a synchronization point (targetSectionSize) + * then a block will be emitted anyways, but this is okay, since if we + * are already synchronized we will remain synchronized. + */ + for (; pos < syncPoint.toLoad; ++pos) { + BYTE const toRemove = pos < RSYNC_LENGTH ? prev[pos] : istart[pos - RSYNC_LENGTH]; + /* if (pos >= RSYNC_LENGTH) assert(ZSTD_rollingHash_compute(istart + pos - RSYNC_LENGTH, RSYNC_LENGTH) == hash); */ + hash = ZSTD_rollingHash_rotate(hash, toRemove, istart[pos], primePower); + if ((hash & hitMask) == hitMask) { + syncPoint.toLoad = pos + 1; + syncPoint.flush = 1; + break; + } + } + return syncPoint; +} + +size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx) +{ + size_t hintInSize = mtctx->targetSectionSize - mtctx->inBuff.filled; + if (hintInSize==0) hintInSize = mtctx->targetSectionSize; + return hintInSize; +} + +/** ZSTDMT_compressStream_generic() : + * internal use only - exposed to be invoked from zstd_compress.c + * assumption : output and input are valid (pos <= size) + * @return : minimum amount of data remaining to flush, 0 if none */ +size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + unsigned forwardInputProgress = 0; + DEBUGLOG(5, "ZSTDMT_compressStream_generic (endOp=%u, srcSize=%u)", + (U32)endOp, (U32)(input->size - input->pos)); + assert(output->pos <= output->size); + assert(input->pos <= input->size); + + if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { + /* current frame being ended. Only flush/end are allowed */ + return ERROR(stage_wrong); + } + + /* fill input buffer */ + if ( (!mtctx->jobReady) + && (input->size > input->pos) ) { /* support NULL input */ + if (mtctx->inBuff.buffer.start == NULL) { + assert(mtctx->inBuff.filled == 0); /* Can't fill an empty buffer */ + if (!ZSTDMT_tryGetInputRange(mtctx)) { + /* It is only possible for this operation to fail if there are + * still compression jobs ongoing. + */ + DEBUGLOG(5, "ZSTDMT_tryGetInputRange failed"); + assert(mtctx->doneJobID != mtctx->nextJobID); + } else + DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start); + } + if (mtctx->inBuff.buffer.start != NULL) { + syncPoint_t const syncPoint = findSynchronizationPoint(mtctx, *input); + if (syncPoint.flush && endOp == ZSTD_e_continue) { + endOp = ZSTD_e_flush; + } + assert(mtctx->inBuff.buffer.capacity >= mtctx->targetSectionSize); + DEBUGLOG(5, "ZSTDMT_compressStream_generic: adding %u bytes on top of %u to buffer of size %u", + (U32)syncPoint.toLoad, (U32)mtctx->inBuff.filled, (U32)mtctx->targetSectionSize); + ZSTD_memcpy((char*)mtctx->inBuff.buffer.start + mtctx->inBuff.filled, (const char*)input->src + input->pos, syncPoint.toLoad); + input->pos += syncPoint.toLoad; + mtctx->inBuff.filled += syncPoint.toLoad; + forwardInputProgress = syncPoint.toLoad>0; + } + } + if ((input->pos < input->size) && (endOp == ZSTD_e_end)) { + /* Can't end yet because the input is not fully consumed. + * We are in one of these cases: + * - mtctx->inBuff is NULL & empty: we couldn't get an input buffer so don't create a new job. + * - We filled the input buffer: flush this job but don't end the frame. + * - We hit a synchronization point: flush this job but don't end the frame. + */ + assert(mtctx->inBuff.filled == 0 || mtctx->inBuff.filled == mtctx->targetSectionSize || mtctx->params.rsyncable); + endOp = ZSTD_e_flush; + } + + if ( (mtctx->jobReady) + || (mtctx->inBuff.filled >= mtctx->targetSectionSize) /* filled enough : let's compress */ + || ((endOp != ZSTD_e_continue) && (mtctx->inBuff.filled > 0)) /* something to flush : let's go */ + || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */ + size_t const jobSize = mtctx->inBuff.filled; + assert(mtctx->inBuff.filled <= mtctx->targetSectionSize); + FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) , ""); + } + + /* check for potential compressed data ready to be flushed */ + { size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */ + if (input->pos < input->size) return MAX(remainingToFlush, 1); /* input not consumed : do not end flush yet */ + DEBUGLOG(5, "end of ZSTDMT_compressStream_generic: remainingToFlush = %u", (U32)remainingToFlush); + return remainingToFlush; + } +} +/**** ended inlining compress/zstdmt_compress.c ****/ +#endif + +/**** start inlining decompress/huf_decompress.c ****/ +/* ****************************************************************** + * huff0 huffman decoder, + * part of Finite State Entropy library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Dependencies +****************************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/bitstream.h ****/ +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/error_private.h ****/ + +/* ************************************************************** +* Macros +****************************************************************/ + +/* These two optional macros force the use one way or another of the two + * Huffman decompression implementations. You can't force in both directions + * at the same time. + */ +#if defined(HUF_FORCE_DECOMPRESS_X1) && \ + defined(HUF_FORCE_DECOMPRESS_X2) +#error "Cannot force the use of the X1 and X2 decoders at the same time!" +#endif + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError + + +/* ************************************************************** +* Byte alignment for workSpace management +****************************************************************/ +#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) +#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) + + +/* ************************************************************** +* BMI2 Variant Wrappers +****************************************************************/ +#if DYNAMIC_BMI2 + +#define HUF_DGEN(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + if (bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#else + +#define HUF_DGEN(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + (void)bmi2; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#endif + + +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; + +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + ZSTD_memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} + + +#ifndef HUF_FORCE_DECOMPRESS_X2 + +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ + +/** + * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at + * a time. + */ +static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { + U64 D4; + if (MEM_isLittleEndian()) { + D4 = symbol + (nbBits << 8); + } else { + D4 = (symbol << 8) + nbBits; + } + D4 *= 0x0001000100010001ULL; + return D4; +} + +typedef struct { + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; +} HUF_ReadDTableX1_Workspace; + + +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +{ + return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + U32 tableLog = 0; + U32 nbSymbols = 0; + size_t iSize; + void* const dtPtr = DTable + 1; + HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; + HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; + + DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); + if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); + + DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); + if (HUF_isError(iSize)) return iSize; + + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + } + + /* Compute symbols and rankStart given rankVal: + * + * rankVal already contains the number of values of each weight. + * + * symbols contains the symbols ordered by weight. First are the rankVal[0] + * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. + * symbols[0] is filled (but unused) to avoid a branch. + * + * rankStart contains the offset where each rank belongs in the DTable. + * rankStart[0] is not filled because there are no entries in the table for + * weight 0. + */ + { + int n; + int nextRankStart = 0; + int const unroll = 4; + int const nLimit = (int)nbSymbols - unroll + 1; + for (n=0; n<(int)tableLog+1; n++) { + U32 const curr = nextRankStart; + nextRankStart += wksp->rankVal[n]; + wksp->rankStart[n] = curr; + } + for (n=0; n < nLimit; n += unroll) { + int u; + for (u=0; u < unroll; ++u) { + size_t const w = wksp->huffWeight[n+u]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); + } + } + for (; n < (int)nbSymbols; ++n) { + size_t const w = wksp->huffWeight[n]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; + } + } + + /* fill DTable + * We fill all entries of each weight in order. + * That way length is a constant for each iteration of the outter loop. + * We can switch based on the length to a different inner loop which is + * optimized for that particular case. + */ + { + U32 w; + int symbol=wksp->rankVal[0]; + int rankStart=0; + for (w=1; wrankVal[w]; + int const length = (1 << w) >> 1; + int uStart = rankStart; + BYTE const nbBits = (BYTE)(tableLog + 1 - w); + int s; + int u; + switch (length) { + case 1: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart] = D; + uStart += 1; + } + break; + case 2: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart+0] = D; + dt[uStart+1] = D; + uStart += 2; + } + break; + case 4: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + uStart += 4; + } + break; + case 8: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + MEM_write64(dt + uStart + 4, D4); + uStart += 8; + } + break; + default: + for (s=0; ssymbols[symbol + s], nbBits); + for (u=0; u < length; u += 16) { + MEM_write64(dt + uStart + u + 0, D4); + MEM_write64(dt + uStart + u + 4, D4); + MEM_write64(dt + uStart + u + 8, D4); + MEM_write64(dt + uStart + u + 12, D4); + } + assert(u == length); + uStart += length; + } + break; + } + symbol += symbolCount; + rankStart += symbolCount * length; + } + } + return iSize; +} + +FORCE_INLINE_TEMPLATE BYTE +HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +HINT_INLINE size_t +HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + } + + /* [0-3] symbols remaining */ + if (MEM_32bits()) + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + return pEnd-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstSize; + const void* dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + BIT_DStream_t bitD; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); + + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + /* Check */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - 3; + const void* const dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + U32 endSignal = 1; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ + for ( ; (endSignal) & (op4 < olimit) ; ) { + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + } + + /* check corruption */ + /* note : should not be necessary : op# advance in lock step, and we control op4. + * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + + +typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, + const void *cSrc, + size_t cSrcSize, + const HUF_DTable *DTable); + +HUF_DGEN(HUF_decompress1X1_usingDTable_internal) +HUF_DGEN(HUF_decompress4X1_usingDTable_internal) + + + +size_t HUF_decompress1X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); +} + + +#endif /* HUF_FORCE_DECOMPRESS_X2 */ + + +#ifndef HUF_FORCE_DECOMPRESS_X1 + +/* *************************/ +/* double-symbols decoding */ +/* *************************/ + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; + + +/* HUF_fillDTableX2Level2() : + * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize) +{ + HUF_DEltX2 DElt; + U32* rankVal = wksp; + + assert(wkspSize >= HUF_TABLELOG_MAX + 1); + (void)wkspSize; + /* get pre-calculated rankVal */ + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); + + /* fill skipped values */ + if (minWeight>1) { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + { U32 s; for (s=0; s= 1 */ + + rankVal[weight] += length; + } } +} + + +static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline, U32* wksp, size_t wkspSize) +{ + U32* rankVal = wksp; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + assert(wkspSize >= HUF_TABLELOG_MAX + 1); + wksp += HUF_TABLELOG_MAX + 1; + wkspSize -= HUF_TABLELOG_MAX + 1; + + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); + + /* fill DTable */ + for (s=0; s= minBits) { /* enough room for a second symbol */ + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol, wksp, wkspSize); + } else { + HUF_DEltX2 DElt; + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + { U32 const end = start + length; + U32 u; + for (u = start; u < end; u++) DTable[u] = DElt; + } } + rankVal[weight] += length; + } +} + +typedef struct { + rankValCol_t rankVal[HUF_TABLELOG_MAX]; + U32 rankStats[HUF_TABLELOG_MAX + 1]; + U32 rankStart0[HUF_TABLELOG_MAX + 2]; + sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; + BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; + U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; +} HUF_ReadDTableX2_Workspace; + +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + U32 tableLog, maxW, sizeOfSort, nbSymbols; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog; + size_t iSize; + void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + U32 *rankStart; + + HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace; + + if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC); + + rankStart = wksp->rankStart0 + 1; + ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats)); + ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0)); + + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ + if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + + /* Get start index of each weight */ + { U32 w, nextRankStart = 0; + for (w=1; wrankStats[w]; + rankStart[w] = curr; + } + rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ + sizeOfSort = nextRankStart; + } + + /* sort symbols by weight */ + { U32 s; + for (s=0; sweightList[s]; + U32 const r = rankStart[w]++; + wksp->sortedSymbol[r].symbol = (BYTE)s; + wksp->sortedSymbol[r].weight = (BYTE)w; + } + rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ + } + + /* Build rankVal */ + { U32* const rankVal0 = wksp->rankVal[0]; + { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ + U32 nextRankVal = 0; + U32 w; + for (w=1; wrankStats[w] << (w+rescale); + rankVal0[w] = curr; + } } + { U32 const minBits = tableLog+1 - maxW; + U32 consumed; + for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { + U32* const rankValPtr = wksp->rankVal[consumed]; + U32 w; + for (w = 1; w < maxW+1; w++) { + rankValPtr[w] = rankVal0[w] >> consumed; + } } } } + + HUF_fillDTableX2(dt, maxTableLog, + wksp->sortedSymbol, sizeOfSort, + wksp->rankStart0, wksp->rankVal, maxW, + tableLog+1, + wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32)); + + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + return iSize; +} + + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); + } } + return 1; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +HINT_INLINE size_t +HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, + const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to end : up to 2 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BIT_DStream_t bitD; + + /* Init */ + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); + } + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - (sizeof(size_t)-1); + const void* const dtPtr = DTable+1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal = 1; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + for ( ; (endSignal) & (op4 < olimit); ) { +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; +#else + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = (U32)LIKELY( + (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); +#endif + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + +HUF_DGEN(HUF_decompress1X2_usingDTable_internal) +HUF_DGEN(HUF_decompress4X2_usingDTable_internal) + +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + + +#endif /* HUF_FORCE_DECOMPRESS_X1 */ + + +/* ***********************************/ +/* Universal decompression selectors */ +/* ***********************************/ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} + + +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) +typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; +static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +{ + /* single, double, quad */ + {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ + {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ + {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ + {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ + {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ + {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ + {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ + {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ + {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ + {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ + {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ + {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ + {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ + {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ + {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ + {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ +}; +#endif + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +{ + assert(dstSize > 0); + assert(dstSize <= 128*1024); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dstSize; + (void)cSrcSize; + return 0; +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dstSize; + (void)cSrcSize; + return 1; +#else + /* decoder timing evaluation */ + { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ + return DTime1 < DTime0; + } +#endif +} + + +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, + size_t dstSize, const void* cSrc, + size_t cSrcSize, void* workSpace, + size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#endif + } +} + +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#endif + } +} + + +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} +#endif + +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : + HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#endif + } +} + +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX1_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX2_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); + +size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) + static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; +#endif + + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); +#else + return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); +#endif + } +} + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#else + return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; +#endif + } +} + +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +#endif +/**** ended inlining decompress/huf_decompress.c ****/ +/**** start inlining decompress/zstd_ddict.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_ddict.c : + * concentrates all logic that needs to know the internals of ZSTD_DDict object */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** start inlining zstd_decompress_internal.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* zstd_decompress_internal: + * objects and definitions shared within lib/decompress modules */ + + #ifndef ZSTD_DECOMPRESS_INTERNAL_H + #define ZSTD_DECOMPRESS_INTERNAL_H + + +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ + + + +/*-******************************************************* + * Constants + *********************************************************/ +static UNUSED_ATTR const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + +static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; + +static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + +static UNUSED_ATTR const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + +/*-******************************************************* + * Decompression types + *********************************************************/ + typedef struct { + U32 fastMode; + U32 tableLog; + } ZSTD_seqSymbol_header; + + typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; + } ZSTD_seqSymbol; + + #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) + +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 rep[ZSTD_REP_NUM]; + U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; +} ZSTD_entropyDTables_t; + +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; + +typedef enum { zdss_init=0, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; + +typedef enum { + ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ + ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ + ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ +} ZSTD_dictUses_e; + +/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */ +typedef struct { + const ZSTD_DDict** ddictPtrTable; + size_t ddictPtrTableSize; + size_t ddictPtrCount; +} ZSTD_DDictHashSet; + +struct ZSTD_DCtx_s +{ + const ZSTD_seqSymbol* LLTptr; + const ZSTD_seqSymbol* MLTptr; + const ZSTD_seqSymbol* OFTptr; + const HUF_DTable* HUFptr; + ZSTD_entropyDTables_t entropy; + U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ + const void* previousDstEnd; /* detect continuity */ + const void* prefixStart; /* start of current segment */ + const void* virtualStart; /* virtual start of previous segment if it was just before current one */ + const void* dictEnd; /* end of previous segment */ + size_t expected; + ZSTD_frameHeader fParams; + U64 processedCSize; + U64 decodedSize; + blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + XXH64_state_t xxhState; + size_t headerSize; + ZSTD_format_e format; + ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ + U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litSize; + size_t rleSize; + size_t staticSize; + int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ + + /* dictionary */ + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ + U32 dictID; + int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ + ZSTD_dictUses_e dictUses; + ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ + ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ + + /* streaming */ + ZSTD_dStreamStage streamStage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t lhSize; + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; + U32 hostageByte; + int noForwardProgress; + ZSTD_bufferMode_e outBufferMode; + ZSTD_outBuffer expectedOutBuffer; + + /* workspace */ + BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + + size_t oversizedDuration; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + void const* dictContentBeginForFuzzing; + void const* dictContentEndForFuzzing; +#endif + + /* Tracing */ +#if ZSTD_TRACE + ZSTD_TraceCtx traceCtx; +#endif +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + + +/*-******************************************************* + * Shared internal functions + *********************************************************/ + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ +size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize); + +/*! ZSTD_checkContinuity() : + * check if next `dst` follows previous position, where decompression ended. + * If yes, do nothing (continue on current segment). + * If not, classify previous segment as "external dictionary", and start a new segment. + * This function cannot fail. */ +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize); + + +#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ +/**** ended inlining zstd_decompress_internal.h ****/ +/**** start inlining zstd_ddict.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DDICT_H +#define ZSTD_DDICT_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../zstd.h ****/ + + +/*-******************************************************* + * Interface + *********************************************************/ + +/* note: several prototypes are already published in `zstd.h` : + * ZSTD_createDDict() + * ZSTD_createDDict_byReference() + * ZSTD_createDDict_advanced() + * ZSTD_freeDDict() + * ZSTD_initStaticDDict() + * ZSTD_sizeof_DDict() + * ZSTD_estimateDDictSize() + * ZSTD_getDictID_fromDict() + */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + + + +#endif /* ZSTD_DDICT_H */ +/**** ended inlining zstd_ddict.h ****/ + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +/**** start inlining ../legacy/zstd_legacy.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LEGACY_H +#define ZSTD_LEGACY_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/error_private.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ + +#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0) +# undef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 8 +#endif + +#if (ZSTD_LEGACY_SUPPORT <= 1) +/**** start inlining zstd_v01.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V01_H_28739879432 +#define ZSTD_V01_H_28739879432 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error +*/ +unsigned ZSTDv01_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx; +ZSTDv01_Dctx* ZSTDv01_createDCtx(void); +size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx); + +size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx); +size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv01_magicNumber 0xFD2FB51E /* Big Endian version */ +#define ZSTDv01_magicNumberLE 0x1EB52FFD /* Little Endian version */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V01_H_28739879432 */ +/**** ended inlining zstd_v01.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) +/**** start inlining zstd_v02.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V02_H_4174539423 +#define ZSTD_V02_H_4174539423 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error +*/ +unsigned ZSTDv02_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx; +ZSTDv02_Dctx* ZSTDv02_createDCtx(void); +size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx); + +size_t ZSTDv02_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx); + +size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx); +size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv02_magicNumber 0xFD2FB522 /* v0.2 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V02_H_4174539423 */ +/**** ended inlining zstd_v02.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) +/**** start inlining zstd_v03.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V03_H_298734209782 +#define ZSTD_V03_H_298734209782 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ + void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + + /** +ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error +*/ +unsigned ZSTDv03_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx; +ZSTDv03_Dctx* ZSTDv03_createDCtx(void); +size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx); + +size_t ZSTDv03_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx); + +size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx); +size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv03_magicNumber 0xFD2FB523 /* v0.3 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V03_H_298734209782 */ +/**** ended inlining zstd_v03.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) +/**** start inlining zstd_v04.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_V04_H_91868324769238 +#define ZSTD_V04_H_91868324769238 + +#if defined (__cplusplus) +extern "C" { +#endif + +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ + + +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ + void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error +*/ +unsigned ZSTDv04_isError(size_t code); + + +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv04_Dctx_s ZSTDv04_Dctx; +ZSTDv04_Dctx* ZSTDv04_createDCtx(void); +size_t ZSTDv04_freeDCtx(ZSTDv04_Dctx* dctx); + +size_t ZSTDv04_decompressDCtx(ZSTDv04_Dctx* dctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + +/* ************************************* +* Direct Streaming +***************************************/ +size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx); + +size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx); +size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ + + +/* ************************************* +* Buffered Streaming +***************************************/ +typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx; +ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void); +size_t ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx); + +size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx); +size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize); + +size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr); + +/** ************************************************ +* Streaming decompression +* +* A ZBUFF_DCtx object is required to track streaming operation. +* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. +* Use ZBUFF_decompressInit() to start a new decompression operation. +* ZBUFF_DCtx objects can be reused multiple times. +* +* Optionally, a reference to a static dictionary can be set, using ZBUFF_decompressWithDictionary() +* It must be the same content as the one set during compression phase. +* Dictionary content must remain accessible during the decompression process. +* +* Use ZBUFF_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *maxDstSizePtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or 0 when a frame is completely decoded +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize +* output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded. +* input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* **************************************************/ +unsigned ZBUFFv04_isError(size_t errorCode); +const char* ZBUFFv04_getErrorName(size_t errorCode); + + +/** The below functions provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are not compulsory, they just tend to offer better latency */ +size_t ZBUFFv04_recommendedDInSize(void); +size_t ZBUFFv04_recommendedDOutSize(void); + + +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv04_magicNumber 0xFD2FB524 /* v0.4 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_V04_H_91868324769238 */ +/**** ended inlining zstd_v04.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) +/**** start inlining zstd_v05.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDv05_H +#define ZSTDv05_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* +* Dependencies +***************************************/ +#include /* size_t */ +/**** skipping file: ../common/mem.h ****/ + + +/* ************************************* +* Simple functions +***************************************/ +/*! ZSTDv05_decompress() : + `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. + `dstCapacity` must be large enough, equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */ +size_t ZSTDv05_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + + /** + ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/* ************************************* +* Helper functions +***************************************/ +/* Error Management */ +unsigned ZSTDv05_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +const char* ZSTDv05_getErrorName(size_t code); /*!< provides readable string for an error code */ + + +/* ************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx; +ZSTDv05_DCtx* ZSTDv05_createDCtx(void); +size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv05_decompressDCtx() : +* Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */ +size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* Simple Dictionary API +*************************/ +/*! ZSTDv05_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. +* Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */ +size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + +/*-************************ +* Advanced Streaming API +***************************/ +typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy; +typedef struct { + U64 srcSize; + U32 windowLog; /* the only useful information to retrieve */ + U32 contentLog; U32 hashLog; U32 searchLog; U32 searchLength; U32 targetLength; ZSTDv05_strategy strategy; +} ZSTDv05_parameters; +size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize); + +size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize); +void ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx); +size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx); +size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* ZBUFF API +*************************/ +typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx; +ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void); +size_t ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* dctx); + +size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx); +size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize); + +size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression +* +* A ZBUFFv05_DCtx object is required to track streaming operations. +* Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources. +* Use ZBUFFv05_decompressInit() to start a new decompression operation, +* or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv05_DCtx objects can be reused multiple times. +* +* Use ZBUFFv05_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency) +* or 0 when a frame is completely decoded +* or an error code, which can be tested using ZBUFFv05_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize() +* output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +unsigned ZBUFFv05_isError(size_t errorCode); +const char* ZBUFFv05_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, and tend to offer better latency */ +size_t ZBUFFv05_recommendedDInSize(void); +size_t ZBUFFv05_recommendedDOutSize(void); + + + +/*-************************************* +* Constants +***************************************/ +#define ZSTDv05_MAGICNUMBER 0xFD2FB525 /* v0.5 */ + + + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv0505_H */ +/**** ended inlining zstd_v05.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) +/**** start inlining zstd_v06.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDv06_H +#define ZSTDv06_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/*====== Dependency ======*/ +#include /* size_t */ + + +/*====== Export for Windows ======*/ +/*! +* ZSTDv06_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTDv06_DLL_EXPORT) && (ZSTDv06_DLL_EXPORT==1) +# define ZSTDLIBv06_API __declspec(dllexport) +#else +# define ZSTDLIBv06_API +#endif + + +/* ************************************* +* Simple functions +***************************************/ +/*! ZSTDv06_decompress() : + `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. + `dstCapacity` must be large enough, equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv06_isError()) */ +ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/** +ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. +*/ +void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/* ************************************* +* Helper functions +***************************************/ +ZSTDLIBv06_API size_t ZSTDv06_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */ + +/* Error Management */ +ZSTDLIBv06_API unsigned ZSTDv06_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIBv06_API const char* ZSTDv06_getErrorName(size_t code); /*!< provides readable string for an error code */ + + +/* ************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv06_DCtx_s ZSTDv06_DCtx; +ZSTDLIBv06_API ZSTDv06_DCtx* ZSTDv06_createDCtx(void); +ZSTDLIBv06_API size_t ZSTDv06_freeDCtx(ZSTDv06_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv06_decompressDCtx() : +* Same as ZSTDv06_decompress(), but requires an already allocated ZSTDv06_DCtx (see ZSTDv06_createDCtx()) */ +ZSTDLIBv06_API size_t ZSTDv06_decompressDCtx(ZSTDv06_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* Dictionary API +*************************/ +/*! ZSTDv06_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. +* Note : dict can be NULL, in which case, it's equivalent to ZSTDv06_decompressDCtx() */ +ZSTDLIBv06_API size_t ZSTDv06_decompress_usingDict(ZSTDv06_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*-************************ +* Advanced Streaming API +***************************/ +struct ZSTDv06_frameParams_s { unsigned long long frameContentSize; unsigned windowLog; }; +typedef struct ZSTDv06_frameParams_s ZSTDv06_frameParams; + +ZSTDLIBv06_API size_t ZSTDv06_getFrameParams(ZSTDv06_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +ZSTDLIBv06_API size_t ZSTDv06_decompressBegin_usingDict(ZSTDv06_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIBv06_API void ZSTDv06_copyDCtx(ZSTDv06_DCtx* dctx, const ZSTDv06_DCtx* preparedDCtx); + +ZSTDLIBv06_API size_t ZSTDv06_nextSrcSizeToDecompress(ZSTDv06_DCtx* dctx); +ZSTDLIBv06_API size_t ZSTDv06_decompressContinue(ZSTDv06_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + + +/* ************************************* +* ZBUFF API +***************************************/ + +typedef struct ZBUFFv06_DCtx_s ZBUFFv06_DCtx; +ZSTDLIBv06_API ZBUFFv06_DCtx* ZBUFFv06_createDCtx(void); +ZSTDLIBv06_API size_t ZBUFFv06_freeDCtx(ZBUFFv06_DCtx* dctx); + +ZSTDLIBv06_API size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* dctx); +ZSTDLIBv06_API size_t ZBUFFv06_decompressInitDictionary(ZBUFFv06_DCtx* dctx, const void* dict, size_t dictSize); + +ZSTDLIBv06_API size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFFv06_DCtx object is required to track streaming operations. +* Use ZBUFFv06_createDCtx() and ZBUFFv06_freeDCtx() to create/release resources. +* Use ZBUFFv06_decompressInit() to start a new decompression operation, +* or ZBUFFv06_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv06_DCtx objects can be re-init multiple times. +* +* Use ZBUFFv06_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency), +* or 0 when a frame is completely decoded, +* or an error code, which can be tested using ZBUFFv06_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv06_recommendedDInSize() and ZBUFFv06_recommendedDOutSize() +* output : ZBUFFv06_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv06_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFFv06_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +ZSTDLIBv06_API unsigned ZBUFFv06_isError(size_t errorCode); +ZSTDLIBv06_API const char* ZBUFFv06_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZSTDLIBv06_API size_t ZBUFFv06_recommendedDInSize(void); +ZSTDLIBv06_API size_t ZBUFFv06_recommendedDOutSize(void); + + +/*-************************************* +* Constants +***************************************/ +#define ZSTDv06_MAGICNUMBER 0xFD2FB526 /* v0.6 */ + + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv06_BUFFERED_H */ +/**** ended inlining zstd_v06.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) +/**** start inlining zstd_v07.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTDv07_H_235446 +#define ZSTDv07_H_235446 + +#if defined (__cplusplus) +extern "C" { +#endif + +/*====== Dependency ======*/ +#include /* size_t */ + + +/*====== Export for Windows ======*/ +/*! +* ZSTDv07_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTDv07_DLL_EXPORT) && (ZSTDv07_DLL_EXPORT==1) +# define ZSTDLIBv07_API __declspec(dllexport) +#else +# define ZSTDLIBv07_API +#endif + + +/* ************************************* +* Simple API +***************************************/ +/*! ZSTDv07_getDecompressedSize() : +* @return : decompressed size if known, 0 otherwise. + note 1 : if `0`, follow up with ZSTDv07_getFrameParams() to know precise failure cause. + note 2 : decompressed size could be wrong or intentionally modified ! + always ensure results fit within application's authorized limits */ +unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTDv07_decompress() : + `compressedSize` : must be _exact_ size of compressed input, otherwise decompression will fail. + `dstCapacity` must be equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv07_isError()) */ +ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/** +ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. +*/ +void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/*====== Helper functions ======*/ +ZSTDLIBv07_API unsigned ZSTDv07_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIBv07_API const char* ZSTDv07_getErrorName(size_t code); /*!< provides readable string from an error code */ + + +/*-************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv07_DCtx_s ZSTDv07_DCtx; +ZSTDLIBv07_API ZSTDv07_DCtx* ZSTDv07_createDCtx(void); +ZSTDLIBv07_API size_t ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv07_decompressDCtx() : +* Same as ZSTDv07_decompress(), requires an allocated ZSTDv07_DCtx (see ZSTDv07_createDCtx()) */ +ZSTDLIBv07_API size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-************************ +* Simple dictionary API +***************************/ +/*! ZSTDv07_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression. +* Note : This function load the dictionary, resulting in a significant startup time */ +ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*-************************** +* Advanced Dictionary API +****************************/ +/*! ZSTDv07_createDDict() : +* Create a digested dictionary, ready to start decompression operation without startup delay. +* `dict` can be released after creation */ +typedef struct ZSTDv07_DDict_s ZSTDv07_DDict; +ZSTDLIBv07_API ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize); +ZSTDLIBv07_API size_t ZSTDv07_freeDDict(ZSTDv07_DDict* ddict); + +/*! ZSTDv07_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Faster startup than ZSTDv07_decompress_usingDict(), recommended when same dictionary is used multiple times. */ +ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTDv07_DDict* ddict); + +typedef struct { + unsigned long long frameContentSize; + unsigned windowSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTDv07_frameParams; + +ZSTDLIBv07_API size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */ + + + + +/* ************************************* +* Streaming functions +***************************************/ +typedef struct ZBUFFv07_DCtx_s ZBUFFv07_DCtx; +ZSTDLIBv07_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void); +ZSTDLIBv07_API size_t ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* dctx); + +ZSTDLIBv07_API size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* dctx); +ZSTDLIBv07_API size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* dctx, const void* dict, size_t dictSize); + +ZSTDLIBv07_API size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFFv07_DCtx object is required to track streaming operations. +* Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources. +* Use ZBUFFv07_decompressInit() to start a new decompression operation, +* or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv07_DCtx objects can be re-init multiple times. +* +* Use ZBUFFv07_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency), +* or 0 when a frame is completely decoded, +* or an error code, which can be tested using ZBUFFv07_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize() +* output : ZBUFFv07_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv07_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +ZSTDLIBv07_API unsigned ZBUFFv07_isError(size_t errorCode); +ZSTDLIBv07_API const char* ZBUFFv07_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZSTDLIBv07_API size_t ZBUFFv07_recommendedDInSize(void); +ZSTDLIBv07_API size_t ZBUFFv07_recommendedDOutSize(void); + + +/*-************************************* +* Constants +***************************************/ +#define ZSTDv07_MAGICNUMBER 0xFD2FB527 /* v0.7 */ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv07_H_235446 */ +/**** ended inlining zstd_v07.h ****/ +#endif + +/** ZSTD_isLegacy() : + @return : > 0 if supported by legacy decoder. 0 otherwise. + return value is the version. +*/ +MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize) +{ + U32 magicNumberLE; + if (srcSize<4) return 0; + magicNumberLE = MEM_readLE32(src); + switch(magicNumberLE) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case ZSTDv01_magicNumberLE:return 1; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case ZSTDv02_magicNumber : return 2; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case ZSTDv03_magicNumber : return 3; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case ZSTDv04_magicNumber : return 4; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case ZSTDv05_MAGICNUMBER : return 5; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case ZSTDv06_MAGICNUMBER : return 6; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case ZSTDv07_MAGICNUMBER : return 7; +#endif + default : return 0; + } +} + + +MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize) +{ + U32 const version = ZSTD_isLegacy(src, srcSize); + if (version < 5) return 0; /* no decompressed size in frame header, or not a legacy format */ +#if (ZSTD_LEGACY_SUPPORT <= 5) + if (version==5) { + ZSTDv05_parameters fParams; + size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.srcSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + if (version==6) { + ZSTDv06_frameParams fParams; + size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + if (version==7) { + ZSTDv07_frameParams fParams; + size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif + return 0; /* should not be possible */ +} + + +MEM_STATIC size_t ZSTD_decompressLegacy( + void* dst, size_t dstCapacity, + const void* src, size_t compressedSize, + const void* dict,size_t dictSize) +{ + U32 const version = ZSTD_isLegacy(src, compressedSize); + (void)dst; (void)dstCapacity; (void)dict; (void)dictSize; /* unused when ZSTD_LEGACY_SUPPORT >= 8 */ + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { size_t result; + ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv05_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { size_t result; + ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv06_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv06_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { size_t result; + ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv07_freeDCtx(zd); + return result; + } +#endif + default : + return ERROR(prefix_unknown); + } +} + +MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + U32 const version = ZSTD_isLegacy(src, srcSize); + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + ZSTDv01_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + ZSTDv02_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + ZSTDv03_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + ZSTDv04_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + ZSTDv05_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + ZSTDv06_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + ZSTDv07_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif + default : + frameSizeInfo.compressedSize = ERROR(prefix_unknown); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + break; + } + if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) { + frameSizeInfo.compressedSize = ERROR(srcSize_wrong); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + } + return frameSizeInfo; +} + +MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize); + return frameSizeInfo.compressedSize; +} + +MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version) +{ + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext); +#endif + } +} + + +MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion, + const void* dict, size_t dictSize) +{ + DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion); + if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion); + switch(newVersion) + { + default : + case 1 : + case 2 : + case 3 : + (void)dict; (void)dictSize; + return 0; +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv04_decompressInit(dctx); + ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv06_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif + } +} + + + +MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version, + ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version); + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; (void)output; (void)input; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv06_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif + } +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTD_LEGACY_H */ +/**** ended inlining ../legacy/zstd_legacy.h ****/ +#endif + + + +/*-******************************************************* +* Types +*********************************************************/ +struct ZSTD_DDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictSize; + ZSTD_entropyDTables_t entropy; + U32 dictID; + U32 entropyPresent; + ZSTD_customMem cMem; +}; /* typedef'd to ZSTD_DDict within "zstd.h" */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictContent; +} + +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictSize; +} + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_copyDDictParameters"); + assert(dctx != NULL); + assert(ddict != NULL); + dctx->dictID = ddict->dictID; + dctx->prefixStart = ddict->dictContent; + dctx->virtualStart = ddict->dictContent; + dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; + dctx->previousDstEnd = dctx->dictEnd; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + if (ddict->entropyPresent) { + dctx->litEntropy = 1; + dctx->fseEntropy = 1; + dctx->LLTptr = ddict->entropy.LLTable; + dctx->MLTptr = ddict->entropy.MLTable; + dctx->OFTptr = ddict->entropy.OFTable; + dctx->HUFptr = ddict->entropy.hufTable; + dctx->entropy.rep[0] = ddict->entropy.rep[0]; + dctx->entropy.rep[1] = ddict->entropy.rep[1]; + dctx->entropy.rep[2] = ddict->entropy.rep[2]; + } else { + dctx->litEntropy = 0; + dctx->fseEntropy = 0; + } +} + + +static size_t +ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, + ZSTD_dictContentType_e dictContentType) +{ + ddict->dictID = 0; + ddict->entropyPresent = 0; + if (dictContentType == ZSTD_dct_rawContent) return 0; + + if (ddict->dictSize < 8) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + { U32 const magic = MEM_readLE32(ddict->dictContent); + if (magic != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + } + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( + &ddict->entropy, ddict->dictContent, ddict->dictSize)), + dictionary_corrupted, ""); + ddict->entropyPresent = 1; + return 0; +} + + +static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { + ddict->dictBuffer = NULL; + ddict->dictContent = dict; + if (!dict) dictSize = 0; + } else { + void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); + ddict->dictBuffer = internalBuffer; + ddict->dictContent = internalBuffer; + if (!internalBuffer) return ERROR(memory_allocation); + ZSTD_memcpy(internalBuffer, dict, dictSize); + } + ddict->dictSize = dictSize; + ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + + /* parse dictionary content */ + FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); + + return 0; +} + +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); + if (ddict == NULL) return NULL; + ddict->cMem = customMem; + { size_t const initResult = ZSTD_initDDict_internal(ddict, + dict, dictSize, + dictLoadMethod, dictContentType); + if (ZSTD_isError(initResult)) { + ZSTD_freeDDict(ddict); + return NULL; + } } + return ddict; + } +} + +/*! ZSTD_createDDict() : +* Create a digested dictionary, to start decompression without startup delay. +* `dict` content is copied inside DDict. +* Consequently, `dict` can be released after `ZSTD_DDict` creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); +} + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, to start decompression without startup delay. + * Dictionary content is simply referenced, it will be accessed during decompression. + * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ +ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); +} + + +const ZSTD_DDict* ZSTD_initStaticDDict( + void* sBuffer, size_t sBufferSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + size_t const neededSpace = sizeof(ZSTD_DDict) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; + assert(sBuffer != NULL); + assert(dict != NULL); + if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ + if (sBufferSize < neededSpace) return NULL; + if (dictLoadMethod == ZSTD_dlm_byCopy) { + ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ + dict = ddict+1; + } + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, + dict, dictSize, + ZSTD_dlm_byRef, dictContentType) )) + return NULL; + return ddict; +} + + +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = ddict->cMem; + ZSTD_customFree(ddict->dictBuffer, cMem); + ZSTD_customFree(ddict, cMem); + return 0; + } +} + +/*! ZSTD_estimateDDictSize() : + * Estimate amount of memory that will be needed to create a dictionary for decompression. + * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ +size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) +{ + return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); +} + +size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; +} + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; + return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); +} +/**** ended inlining decompress/zstd_ddict.c ****/ +/**** start inlining decompress/zstd_decompress.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * HEAPMODE : + * Select how default decompression function ZSTD_decompress() allocates its context, + * on stack (0), or into heap (1, default; requires malloc()). + * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif + +/*! +* LEGACY_SUPPORT : +* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) +*/ +#ifndef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 0 +#endif + +/*! + * MAXWINDOWSIZE_DEFAULT : + * maximum window size accepted by DStream __by default__. + * Frames requiring more memory will be rejected. + * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). + */ +#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT +# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1) +#endif + +/*! + * NO_FORWARD_PROGRESS_MAX : + * maximum allowed nb of calls to ZSTD_decompressStream() + * without any forward progress + * (defined as: no byte read from input, and no byte flushed to output) + * before triggering an error. + */ +#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX +# define ZSTD_NO_FORWARD_PROGRESS_MAX 16 +#endif + + +/*-******************************************************* +* Dependencies +*********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/xxhash.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: zstd_decompress_internal.h ****/ +/**** skipping file: zstd_ddict.h ****/ +/**** start inlining zstd_decompress_block.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DEC_BLOCK_H +#define ZSTD_DEC_BLOCK_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../zstd.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: zstd_decompress_internal.h ****/ + + +/* === Prototypes === */ + +/* note: prototypes already published within `zstd.h` : + * ZSTD_decompressBlock() + */ + +/* note: prototypes already published within `zstd_internal.h` : + * ZSTD_getcBlockSize() + * ZSTD_decodeSeqHeaders() + */ + + +/* ZSTD_decompressBlock_internal() : + * decompress block, starting at `src`, + * into destination buffer `dst`. + * @return : decompressed block size, + * or an error code (which can be tested using ZSTD_isError()) + */ +size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame); + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * this function must be called with valid parameters only + * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) + * in which case it cannot fail. + * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is + * defined in zstd_decompress_internal.h. + * Internal use only. + */ +void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize, + int bmi2); + + +#endif /* ZSTD_DEC_BLOCK_H */ +/**** ended inlining zstd_decompress_block.h ****/ + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +/**** skipping file: ../legacy/zstd_legacy.h ****/ +#endif + + + +/************************************* + * Multiple DDicts Hashset internals * + *************************************/ + +#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 +#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. + * Currently, that means a 0.75 load factor. + * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded + * the load factor of the ddict hash set. + */ + +#define DDICT_HASHSET_TABLE_BASE_SIZE 64 +#define DDICT_HASHSET_RESIZE_FACTOR 2 + +/* Hash function to determine starting position of dict insertion within the table + * Returns an index between [0, hashSet->ddictPtrTableSize] + */ +static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) { + const U64 hash = XXH64(&dictID, sizeof(U32), 0); + /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */ + return hash & (hashSet->ddictPtrTableSize - 1); +} + +/* Adds DDict to a hashset without resizing it. + * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set. + * Returns 0 if successful, or a zstd error code if something went wrong. + */ +static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) { + const U32 dictID = ZSTD_getDictID_fromDDict(ddict); + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!"); + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + while (hashSet->ddictPtrTable[idx] != NULL) { + /* Replace existing ddict if inserting ddict with same dictID */ + if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) { + DEBUGLOG(4, "DictID already exists, replacing rather than adding"); + hashSet->ddictPtrTable[idx] = ddict; + return 0; + } + idx &= idxRangeMask; + idx++; + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + hashSet->ddictPtrTable[idx] = ddict; + hashSet->ddictPtrCount++; + return 0; +} + +/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and + * rehashes all values, allocates new table, frees old table. + * Returns 0 on success, otherwise a zstd error code. + */ +static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR; + const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem); + const ZSTD_DDict** oldTable = hashSet->ddictPtrTable; + size_t oldTableSize = hashSet->ddictPtrTableSize; + size_t i; + + DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize); + RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!"); + hashSet->ddictPtrTable = newTable; + hashSet->ddictPtrTableSize = newTableSize; + hashSet->ddictPtrCount = 0; + for (i = 0; i < oldTableSize; ++i) { + if (oldTable[i] != NULL) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), ""); + } + } + ZSTD_customFree((void*)oldTable, customMem); + DEBUGLOG(4, "Finished re-hash"); + return 0; +} + +/* Fetches a DDict with the given dictID + * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL. + */ +static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) { + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + for (;;) { + size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]); + if (currDictID == dictID || currDictID == 0) { + /* currDictID == 0 implies a NULL ddict entry */ + break; + } else { + idx &= idxRangeMask; /* Goes to start of table when we reach the end */ + idx++; + } + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + return hashSet->ddictPtrTable[idx]; +} + +/* Allocates space for and returns a ddict hash set + * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with. + * Returns NULL if allocation failed. + */ +static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) { + ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem); + DEBUGLOG(4, "Allocating new hash set"); + ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem); + ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE; + ret->ddictPtrCount = 0; + if (!ret || !ret->ddictPtrTable) { + return NULL; + } + return ret; +} + +/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself. + * Note: The ZSTD_DDict* within the table are NOT freed. + */ +static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + DEBUGLOG(4, "Freeing ddict hash set"); + if (hashSet && hashSet->ddictPtrTable) { + ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem); + } + if (hashSet) { + ZSTD_customFree(hashSet, customMem); + } +} + +/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set. + * Returns 0 on success, or a ZSTD error. + */ +static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) { + DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize); + if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), ""); + } + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), ""); + return 0; +} + +/*-************************************************************* +* Context management +***************************************************************/ +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support sizeof NULL */ + return sizeof(*dctx) + + ZSTD_sizeof_DDict(dctx->ddictLocal) + + dctx->inBuffSize + dctx->outBuffSize; +} + +size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } + + +static size_t ZSTD_startingInputLength(ZSTD_format_e format) +{ + size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format); + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); + return startingInputLength; +} + +static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) +{ + assert(dctx->streamStage == zdss_init); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->outBufferMode = ZSTD_bm_buffered; + dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; + dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; +} + +static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) +{ + dctx->staticSize = 0; + dctx->ddict = NULL; + dctx->ddictLocal = NULL; + dctx->dictEnd = NULL; + dctx->ddictIsCold = 0; + dctx->dictUses = ZSTD_dont_use; + dctx->inBuff = NULL; + dctx->inBuffSize = 0; + dctx->outBuffSize = 0; + dctx->streamStage = zdss_init; + dctx->legacyContext = NULL; + dctx->previousLegacyVersion = 0; + dctx->noForwardProgress = 0; + dctx->oversizedDuration = 0; + dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + dctx->ddictSet = NULL; + ZSTD_DCtx_resetParameters(dctx); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = NULL; +#endif +} + +ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ + + ZSTD_initDCtx_internal(dctx); + dctx->staticSize = workspaceSize; + dctx->inBuff = (char*)(dctx+1); + return dctx; +} + +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem); + if (!dctx) return NULL; + dctx->customMem = customMem; + ZSTD_initDCtx_internal(dctx); + return dctx; + } +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + DEBUGLOG(3, "ZSTD_createDCtx"); + return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_clearDict(ZSTD_DCtx* dctx) +{ + ZSTD_freeDDict(dctx->ddictLocal); + dctx->ddictLocal = NULL; + dctx->ddict = NULL; + dctx->dictUses = ZSTD_dont_use; +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); + { ZSTD_customMem const cMem = dctx->customMem; + ZSTD_clearDict(dctx); + ZSTD_customFree(dctx->inBuff, cMem); + dctx->inBuff = NULL; +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (dctx->legacyContext) + ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); +#endif + if (dctx->ddictSet) { + ZSTD_freeDDictHashSet(dctx->ddictSet, cMem); + dctx->ddictSet = NULL; + } + ZSTD_customFree(dctx, cMem); + return 0; + } +} + +/* no longer useful */ +void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) +{ + size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); + ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ +} + +/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on + * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then + * accordingly sets the ddict to be used to decompress the frame. + * + * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is. + * + * ZSTD_d_refMultipleDDicts must be enabled for this function to be called. + */ +static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) { + assert(dctx->refMultipleDDicts && dctx->ddictSet); + DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame"); + if (dctx->ddict) { + const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID); + if (frameDDict) { + DEBUGLOG(4, "DDict found!"); + ZSTD_clearDict(dctx); + dctx->dictID = dctx->fParams.dictID; + dctx->ddict = frameDDict; + dctx->dictUses = ZSTD_use_indefinitely; + } + } +} + + +/*-************************************************************* + * Frame header decoding + ***************************************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +unsigned ZSTD_isFrame(const void* buffer, size_t size) +{ + if (size < ZSTD_FRAMEIDSIZE) return 0; + { U32 const magic = MEM_readLE32(buffer); + if (magic == ZSTD_MAGICNUMBER) return 1; + if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(buffer, size)) return 1; +#endif + return 0; +} + +/** ZSTD_frameHeaderSize_internal() : + * srcSize must be large enough to reach header size fields. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. + * @return : size of the Frame Header + * or an error code, which can be tested with ZSTD_isError() */ +static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) +{ + size_t const minInputSize = ZSTD_startingInputLength(format); + RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); + + { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; + U32 const dictID= fhd & 3; + U32 const singleSegment = (fhd >> 5) & 1; + U32 const fcsId = fhd >> 6; + return minInputSize + !singleSegment + + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); + } +} + +/** ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_frameHeaderSize_prefix. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameHeader_advanced() : + * decode Frame Header, or require larger `srcSize`. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) +{ + const BYTE* ip = (const BYTE*)src; + size_t const minInputSize = ZSTD_startingInputLength(format); + + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ + if (srcSize < minInputSize) return minInputSize; + RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); + + if ( (format != ZSTD_f_zstd1_magicless) + && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + /* skippable frame */ + if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) + return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); + zfhPtr->frameType = ZSTD_skippableFrame; + return 0; + } + RETURN_ERROR(prefix_unknown, ""); + } + + /* ensure there is enough `srcSize` to fully read/decode frame header */ + { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); + if (srcSize < fhsize) return fhsize; + zfhPtr->headerSize = (U32)fhsize; + } + + { BYTE const fhdByte = ip[minInputSize-1]; + size_t pos = minInputSize; + U32 const dictIDSizeCode = fhdByte&3; + U32 const checksumFlag = (fhdByte>>2)&1; + U32 const singleSegment = (fhdByte>>5)&1; + U32 const fcsID = fhdByte>>6; + U64 windowSize = 0; + U32 dictID = 0; + U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; + RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, + "reserved bits, must be zero"); + + if (!singleSegment) { + BYTE const wlByte = ip[pos++]; + U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); + windowSize = (1ULL << windowLog); + windowSize += (windowSize >> 3) * (wlByte&7); + } + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : dictID = ip[pos]; pos++; break; + case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; + case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; + } + switch(fcsID) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) frameContentSize = ip[pos]; break; + case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; + case 2 : frameContentSize = MEM_readLE32(ip+pos); break; + case 3 : frameContentSize = MEM_readLE64(ip+pos); break; + } + if (singleSegment) windowSize = frameContentSize; + + zfhPtr->frameType = ZSTD_frame; + zfhPtr->frameContentSize = frameContentSize; + zfhPtr->windowSize = windowSize; + zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + zfhPtr->dictID = dictID; + zfhPtr->checksumFlag = checksumFlag; + } + return 0; +} + +/** ZSTD_getFrameHeader() : + * decode Frame Header, or require larger `srcSize`. + * note : this function does not consume input, it only reads it. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) +{ + return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameContentSize() : + * compatible with legacy mode + * @return : decompressed size of the single frame pointed to be `src` if known, otherwise + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) +{ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + unsigned long long const ret = ZSTD_getDecompressedSize_legacy(src, srcSize); + return ret == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : ret; + } +#endif + { ZSTD_frameHeader zfh; + if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) + return ZSTD_CONTENTSIZE_ERROR; + if (zfh.frameType == ZSTD_skippableFrame) { + return 0; + } else { + return zfh.frameContentSize; + } } +} + +static size_t readSkippableFrameSize(void const* src, size_t srcSize) +{ + size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; + U32 sizeU32; + + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); + + sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); + RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, + frameParameter_unsupported, ""); + { + size_t const skippableSize = skippableHeaderSize + sizeU32; + RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); + return skippableSize; + } +} + +/** ZSTD_findDecompressedSize() : + * compatible with legacy mode + * `srcSize` must be the exact length of some number of ZSTD compressed and/or + * skippable frames + * @return : decompressed size of the frames contained */ +unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long totalDstSize = 0; + + while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { + U32 const magicNumber = MEM_readLE32(src); + + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + if (ZSTD_isError(skippableSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } + + { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; + + /* check for overflow */ + if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; + totalDstSize += ret; + } + { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); + if (ZSTD_isError(frameSrcSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + + src = (const BYTE *)src + frameSrcSize; + srcSize -= frameSrcSize; + } + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + if (srcSize) return ZSTD_CONTENTSIZE_ERROR; + + return totalDstSize; +} + +/** ZSTD_getDecompressedSize() : + * compatible with legacy mode + * @return : decompressed size if known, 0 otherwise + note : 0 can mean any of the following : + - frame content is empty + - decompressed size field is not present in frame header + - frame header unknown / not supported + - frame header not complete (`srcSize` too small) */ +unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); + return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; +} + + +/** ZSTD_decodeFrameHeader() : + * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). + * If multiple DDict references are enabled, also will choose the correct DDict to use. + * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) +{ + size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); + if (ZSTD_isError(result)) return result; /* invalid header */ + RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); + + /* Reference DDict requested by frame if dctx references multiple ddicts */ + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) { + ZSTD_DCtx_selectFrameDDict(dctx); + } + +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* Skip the dictID check in fuzzing mode, because it makes the search + * harder. + */ + RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), + dictionary_wrong, ""); +#endif + dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; + if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0); + dctx->processedCSize += headerSize; + return 0; +} + +static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) +{ + ZSTD_frameSizeInfo frameSizeInfo; + frameSizeInfo.compressedSize = ret; + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + return frameSizeInfo; +} + +static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) + return ZSTD_findFrameSizeInfoLegacy(src, srcSize); +#endif + + if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) + && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); + assert(ZSTD_isError(frameSizeInfo.compressedSize) || + frameSizeInfo.compressedSize <= srcSize); + return frameSizeInfo; + } else { + const BYTE* ip = (const BYTE*)src; + const BYTE* const ipstart = ip; + size_t remainingSize = srcSize; + size_t nbBlocks = 0; + ZSTD_frameHeader zfh; + + /* Extract Frame Header */ + { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(ret)) + return ZSTD_errorFrameSizeInfo(ret); + if (ret > 0) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + } + + ip += zfh.headerSize; + remainingSize -= zfh.headerSize; + + /* Iterate over each block */ + while (1) { + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) + return ZSTD_errorFrameSizeInfo(cBlockSize); + + if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + + ip += ZSTD_blockHeaderSize + cBlockSize; + remainingSize -= ZSTD_blockHeaderSize + cBlockSize; + nbBlocks++; + + if (blockProperties.lastBlock) break; + } + + /* Final frame content checksum */ + if (zfh.checksumFlag) { + if (remainingSize < 4) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + ip += 4; + } + + frameSizeInfo.compressedSize = (size_t)(ip - ipstart); + frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) + ? zfh.frameContentSize + : nbBlocks * zfh.blockSizeMax; + return frameSizeInfo; + } +} + +/** ZSTD_findFrameCompressedSize() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame + * `srcSize` must be at least as large as the frame contained + * @return : the compressed size of the frame starting at `src` */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + return frameSizeInfo.compressedSize; +} + +/** ZSTD_decompressBound() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame or a skippeable frame + * `srcSize` must be at least as large as the frame contained + * @return : the maximum decompressed size of the compressed source + */ +unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) +{ + unsigned long long bound = 0; + /* Iterate over each frame */ + while (srcSize > 0) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + size_t const compressedSize = frameSizeInfo.compressedSize; + unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; + if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) + return ZSTD_CONTENTSIZE_ERROR; + assert(srcSize >= compressedSize); + src = (const BYTE*)src + compressedSize; + srcSize -= compressedSize; + bound += decompressedBound; + } + return bound; +} + + +/*-************************************************************* + * Frame decoding + ***************************************************************/ + +/** ZSTD_insertBlock() : + * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ +size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +{ + DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); + ZSTD_checkContinuity(dctx, blockStart, blockSize); + dctx->previousDstEnd = (const char*)blockStart + blockSize; + return blockSize; +} + + +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_copyRawBlock"); + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (srcSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memcpy(dst, src, srcSize); + return srcSize; +} + +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, + BYTE b, + size_t regenSize) +{ + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (regenSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memset(dst, b, regenSize); + return regenSize; +} + +static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming) +{ +#if ZSTD_TRACE + if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) { + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + if (dctx->ddict) { + trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict); + trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict); + trace.dictionaryIsCold = dctx->ddictIsCold; + } + trace.uncompressedSize = (size_t)uncompressedSize; + trace.compressedSize = (size_t)compressedSize; + trace.dctx = dctx; + ZSTD_trace_decompress_end(dctx->traceCtx, &trace); + } +#else + (void)dctx; + (void)uncompressedSize; + (void)compressedSize; + (void)streaming; +#endif +} + + +/*! ZSTD_decompressFrame() : + * @dctx must be properly initialized + * will update *srcPtr and *srcSizePtr, + * to make *srcPtr progress by one frame. */ +static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void** srcPtr, size_t *srcSizePtr) +{ + const BYTE* const istart = (const BYTE*)(*srcPtr); + const BYTE* ip = istart; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; + BYTE* op = ostart; + size_t remainingSrcSize = *srcSizePtr; + + DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); + + /* check */ + RETURN_ERROR_IF( + remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( + ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); + ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; + } + + /* Loop on each block */ + while (1) { + size_t decodedSize; + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + ip += ZSTD_blockHeaderSize; + remainingSrcSize -= ZSTD_blockHeaderSize; + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); + + switch(blockProperties.blockType) + { + case bt_compressed: + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1); + break; + case bt_raw : + decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); + break; + case bt_rle : + decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize); + break; + case bt_reserved : + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + + if (ZSTD_isError(decodedSize)) return decodedSize; + if (dctx->validateChecksum) + XXH64_update(&dctx->xxhState, op, decodedSize); + if (decodedSize != 0) + op += decodedSize; + assert(ip != NULL); + ip += cBlockSize; + remainingSrcSize -= cBlockSize; + if (blockProperties.lastBlock) break; + } + + if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { + RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, + corruption_detected, ""); + } + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); + if (!dctx->forceIgnoreChecksum) { + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + checkRead = MEM_readLE32(ip); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + } + ip += 4; + remainingSrcSize -= 4; + } + ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); + /* Allow caller to get size read */ + *srcPtr = ip; + *srcSizePtr = remainingSrcSize; + return (size_t)(op-ostart); +} + +static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + const ZSTD_DDict* ddict) +{ + void* const dststart = dst; + int moreThan1Frame = 0; + + DEBUGLOG(5, "ZSTD_decompressMultiFrame"); + assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ + + if (ddict) { + dict = ZSTD_DDict_dictContent(ddict); + dictSize = ZSTD_DDict_dictSize(ddict); + } + + while (srcSize >= ZSTD_startingInputLength(dctx->format)) { + +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) + if (ZSTD_isLegacy(src, srcSize)) { + size_t decodedSize; + size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); + if (ZSTD_isError(frameSize)) return frameSize; + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, + "legacy support is not compatible with static dctx"); + + decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); + if (ZSTD_isError(decodedSize)) return decodedSize; + + assert(decodedSize <= dstCapacity); + dst = (BYTE*)dst + decodedSize; + dstCapacity -= decodedSize; + + src = (const BYTE*)src + frameSize; + srcSize -= frameSize; + + continue; + } +#endif + + { U32 const magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (unsigned)magicNumber, ZSTD_MAGICNUMBER); + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } } + + if (ddict) { + /* we were called from ZSTD_decompress_usingDDict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); + } else { + /* this will initialize correctly with no dict if dict == NULL, so + * use this in all cases but ddict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); + } + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, + &src, &srcSize); + RETURN_ERROR_IF( + (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1), + srcSize_wrong, + "At least one frame successfully completed, " + "but following bytes are garbage: " + "it's more likely to be a srcSize error, " + "specifying more input bytes than size of frame(s). " + "Note: one could be unlucky, it might be a corruption error instead, " + "happening right at the place where we expect zstd magic bytes. " + "But this is _much_ less likely than a srcSize field error."); + if (ZSTD_isError(res)) return res; + assert(res <= dstCapacity); + if (res != 0) + dst = (BYTE*)dst + res; + dstCapacity -= res; + } + moreThan1Frame = 1; + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); + + return (size_t)((BYTE*)dst - (BYTE*)dststart); +} + +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) +{ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); +} + + +static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) +{ + switch (dctx->dictUses) { + default: + assert(0 /* Impossible */); + /* fall-through */ + case ZSTD_dont_use: + ZSTD_clearDict(dctx); + return NULL; + case ZSTD_use_indefinitely: + return dctx->ddict; + case ZSTD_use_once: + dctx->dictUses = ZSTD_dont_use; + return dctx->ddict; + } +} + +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); +} + + +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) + size_t regenSize; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); + ZSTD_freeDCtx(dctx); + return regenSize; +#else /* stack mode */ + ZSTD_DCtx dctx; + ZSTD_initDCtx_internal(&dctx); + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); +#endif +} + + +/*-************************************** +* Advanced Streaming Decompression API +* Bufferless and synchronous +****************************************/ +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } + +/** + * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, + * we allow taking a partial block as the input. Currently only raw uncompressed blocks can + * be streamed. + * + * For blocks that can be streamed, this allows us to reduce the latency until we produce + * output, and avoid copying the input. + * + * @param inputSize - The total amount of input that the caller currently has. + */ +static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) { + if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock)) + return dctx->expected; + if (dctx->bType != bt_raw) + return dctx->expected; + return MIN(MAX(inputSize, 1), dctx->expected); +} + +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { + switch(dctx->stage) + { + default: /* should not happen */ + assert(0); + case ZSTDds_getFrameHeaderSize: + case ZSTDds_decodeFrameHeader: + return ZSTDnit_frameHeader; + case ZSTDds_decodeBlockHeader: + return ZSTDnit_blockHeader; + case ZSTDds_decompressBlock: + return ZSTDnit_block; + case ZSTDds_decompressLastBlock: + return ZSTDnit_lastBlock; + case ZSTDds_checkChecksum: + return ZSTDnit_checksum; + case ZSTDds_decodeSkippableHeader: + case ZSTDds_skipFrame: + return ZSTDnit_skippableFrame; + } +} + +static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } + +/** ZSTD_decompressContinue() : + * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) + * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); + /* Sanity check */ + RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + dctx->processedCSize += srcSize; + + switch (dctx->stage) + { + case ZSTDds_getFrameHeaderSize : + assert(src != NULL); + if (dctx->format == ZSTD_f_zstd1) { /* allows header */ + assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } } + dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); + if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = dctx->headerSize - srcSize; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; + + case ZSTDds_decodeFrameHeader: + assert(src != NULL); + ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); + dctx->expected = ZSTD_blockHeaderSize; + dctx->stage = ZSTDds_decodeBlockHeader; + return 0; + + case ZSTDds_decodeBlockHeader: + { blockProperties_t bp; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->rleSize = bp.origSize; + if (cBlockSize) { + dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; + return 0; + } + /* empty block */ + if (bp.lastBlock) { + if (dctx->fParams.checksumFlag) { + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* end of frame */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */ + dctx->stage = ZSTDds_decodeBlockHeader; + } + return 0; + } + + case ZSTDds_decompressLastBlock: + case ZSTDds_decompressBlock: + DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); + { size_t rSize; + switch(dctx->bType) + { + case bt_compressed: + DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_raw : + assert(srcSize <= dctx->expected); + rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); + assert(rSize == srcSize); + dctx->expected -= rSize; + break; + case bt_rle : + rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_reserved : /* should never happen */ + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + FORWARD_IF_ERROR(rSize, ""); + RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); + DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); + dctx->decodedSize += rSize; + if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize); + dctx->previousDstEnd = (char*)dst + rSize; + + /* Stay on the same stage until we are finished streaming the block. */ + if (dctx->expected > 0) { + return rSize; + } + + if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); + RETURN_ERROR_IF( + dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && dctx->decodedSize != dctx->fParams.frameContentSize, + corruption_detected, ""); + if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; /* ends here */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTD_blockHeaderSize; + } + return rSize; + } + + case ZSTDds_checkChecksum: + assert(srcSize == 4); /* guaranteed by dctx->expected */ + { + if (dctx->validateChecksum) { + U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); + RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + } + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } + + case ZSTDds_decodeSkippableHeader: + assert(src != NULL); + assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); + ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ + dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ + dctx->stage = ZSTDds_skipFrame; + return 0; + + case ZSTDds_skipFrame: + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ + } +} + + +static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dict; + dctx->previousDstEnd = (const char*)dict + dictSize; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + return 0; +} + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of entropy tables read */ +size_t +ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + + RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); + assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ + dictPtr += 8; /* skip header = magic + dictID */ + + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable)); + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable)); + ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); + { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ + size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); +#ifdef HUF_FORCE_DECOMPRESS_X1 + /* in minimal huffman, we always use X1 variants */ + size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, + dictPtr, dictEnd - dictPtr, + workspace, workspaceSize); +#else + size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, + dictPtr, (size_t)(dictEnd - dictPtr), + workspace, workspaceSize); +#endif + RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); + dictPtr += hSize; + } + + { short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff, offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->OFTable, + offcodeNCount, offcodeMaxValue, + OF_base, OF_bits, + offcodeLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */0); + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->MLTable, + matchlengthNCount, matchlengthMaxValue, + ML_base, ML_bits, + matchlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->LLTable, + litlengthNCount, litlengthMaxValue, + LL_base, LL_bits, + litlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + { int i; + size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); + for (i=0; i<3; i++) { + U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; + RETURN_ERROR_IF(rep==0 || rep > dictContentSize, + dictionary_corrupted, ""); + entropy->rep[i] = rep; + } } + + return (size_t)(dictPtr - (const BYTE*)dict); +} + +static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); + { U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_MAGIC_DICTIONARY) { + return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ + } } + dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); + RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); + dict = (const char*)dict + eSize; + dictSize -= eSize; + } + dctx->litEntropy = dctx->fseEntropy = 1; + + /* reference dictionary content */ + return ZSTD_refDictContent(dctx, dict, dictSize); +} + +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + assert(dctx != NULL); +#if ZSTD_TRACE + dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0; +#endif + dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->processedCSize = 0; + dctx->decodedSize = 0; + dctx->previousDstEnd = NULL; + dctx->prefixStart = NULL; + dctx->virtualStart = NULL; + dctx->dictEnd = NULL; + dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + dctx->bType = bt_reserved; + ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + dctx->LLTptr = dctx->entropy.LLTable; + dctx->MLTptr = dctx->entropy.MLTable; + dctx->OFTptr = dctx->entropy.OFTable; + dctx->HUFptr = dctx->entropy.hufTable; + return 0; +} + +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (dict && dictSize) + RETURN_ERROR_IF( + ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), + dictionary_corrupted, ""); + return 0; +} + + +/* ====== ZSTD_DDict ====== */ + +size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict"); + assert(dctx != NULL); + if (ddict) { + const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict); + size_t const dictSize = ZSTD_DDict_dictSize(ddict); + const void* const dictEnd = dictStart + dictSize; + dctx->ddictIsCold = (dctx->dictEnd != dictEnd); + DEBUGLOG(4, "DDict is %s", + dctx->ddictIsCold ? "~cold~" : "hot!"); + } + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (ddict) { /* NULL ddict is equivalent to no dictionary */ + ZSTD_copyDDictParameters(dctx, ddict); + } + return 0; +} + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); +} + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompress frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary (most common case). + * - The frame was built with dictID intentionally removed. + * Needed dictionary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, frame header could not be decoded. + * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use + * ZSTD_getFrameHeader(), which will provide a more precise error code. */ +unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) +{ + ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; + size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); + if (ZSTD_isError(hError)) return 0; + return zfp.dictID; +} + + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Use dictionary without significant overhead. */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict) +{ + /* pass content and size in case legacy frames are encountered */ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, + NULL, 0, + ddict); +} + + +/*===================================== +* Streaming decompression +*====================================*/ + +ZSTD_DStream* ZSTD_createDStream(void) +{ + DEBUGLOG(3, "ZSTD_createDStream"); + return ZSTD_createDStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticDCtx(workspace, workspaceSize); +} + +ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_advanced(customMem); +} + +size_t ZSTD_freeDStream(ZSTD_DStream* zds) +{ + return ZSTD_freeDCtx(zds); +} + + +/* *** Initialization *** */ + +size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; } +size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (dict && dictSize != 0) { + dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); + RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); + dctx->ddict = dctx->ddictLocal; + dctx->dictUses = ZSTD_use_indefinitely; + } + return 0; +} + +size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); + dctx->dictUses = ZSTD_use_once; + return 0; +} + +size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + + +/* ZSTD_initDStream_usingDict() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) +{ + DEBUGLOG(4, "ZSTD_initDStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); + return ZSTD_startingInputLength(zds->format); +} + +/* note : this variant can't fail */ +size_t ZSTD_initDStream(ZSTD_DStream* zds) +{ + DEBUGLOG(4, "ZSTD_initDStream"); + return ZSTD_initDStream_usingDDict(zds, NULL); +} + +/* ZSTD_initDStream_usingDDict() : + * ddict will just be referenced, and must outlive decompression session + * this function cannot fail */ +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) +{ + FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); + return ZSTD_startingInputLength(dctx->format); +} + +/* ZSTD_resetDStream() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_resetDStream(ZSTD_DStream* dctx) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); + return ZSTD_startingInputLength(dctx->format); +} + + +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (ddict) { + dctx->ddict = ddict; + dctx->dictUses = ZSTD_use_indefinitely; + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) { + if (dctx->ddictSet == NULL) { + dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem); + if (!dctx->ddictSet) { + RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!"); + } + } + assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */ + FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), ""); + } + } + return 0; +} + +/* ZSTD_DCtx_setMaxWindowSize() : + * note : no direct equivalence in ZSTD_DCtx_setParameter, + * since this version sets windowSize, and the other sets windowLog */ +size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); + size_t const min = (size_t)1 << bounds.lowerBound; + size_t const max = (size_t)1 << bounds.upperBound; + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); + RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); + dctx->maxWindowSize = maxWindowSize; + return 0; +} + +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) +{ + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format); +} + +ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + switch(dParam) { + case ZSTD_d_windowLogMax: + bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + case ZSTD_d_format: + bounds.lowerBound = (int)ZSTD_f_zstd1; + bounds.upperBound = (int)ZSTD_f_zstd1_magicless; + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + return bounds; + case ZSTD_d_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + case ZSTD_d_forceIgnoreChecksum: + bounds.lowerBound = (int)ZSTD_d_validateChecksum; + bounds.upperBound = (int)ZSTD_d_ignoreChecksum; + return bounds; + case ZSTD_d_refMultipleDDicts: + bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; + bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; + return bounds; + default:; + } + bounds.error = ERROR(parameter_unsupported); + return bounds; +} + +/* ZSTD_dParam_withinBounds: + * @return 1 if value is within dParam bounds, + * 0 otherwise */ +static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +#define CHECK_DBOUNDS(p,v) { \ + RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ +} + +size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value) +{ + switch (param) { + case ZSTD_d_windowLogMax: + *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize); + return 0; + case ZSTD_d_format: + *value = (int)dctx->format; + return 0; + case ZSTD_d_stableOutBuffer: + *value = (int)dctx->outBufferMode; + return 0; + case ZSTD_d_forceIgnoreChecksum: + *value = (int)dctx->forceIgnoreChecksum; + return 0; + case ZSTD_d_refMultipleDDicts: + *value = (int)dctx->refMultipleDDicts; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + switch(dParam) { + case ZSTD_d_windowLogMax: + if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; + CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); + dctx->maxWindowSize = ((size_t)1) << value; + return 0; + case ZSTD_d_format: + CHECK_DBOUNDS(ZSTD_d_format, value); + dctx->format = (ZSTD_format_e)value; + return 0; + case ZSTD_d_stableOutBuffer: + CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); + dctx->outBufferMode = (ZSTD_bufferMode_e)value; + return 0; + case ZSTD_d_forceIgnoreChecksum: + CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); + dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; + return 0; + case ZSTD_d_refMultipleDDicts: + CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value); + if (dctx->staticSize != 0) { + RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!"); + } + dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + dctx->streamStage = zdss_init; + dctx->noForwardProgress = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + ZSTD_DCtx_resetParameters(dctx); + } + return 0; +} + + +size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) +{ + return ZSTD_sizeof_DCtx(dctx); +} + +size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +{ + size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); + unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); + size_t const minRBSize = (size_t) neededSize; + RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, + frameParameter_windowTooLarge, ""); + return minRBSize; +} + +size_t ZSTD_estimateDStreamSize(size_t windowSize) +{ + size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + size_t const inBuffSize = blockSize; /* no block can be larger */ + size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; +} + +size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) +{ + U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ + ZSTD_frameHeader zfh; + size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(err)) return err; + RETURN_ERROR_IF(err>0, srcSize_wrong, ""); + RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, + frameParameter_windowTooLarge, ""); + return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); +} + + +/* ***** Decompression ***** */ + +static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; +} + +static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) + zds->oversizedDuration++; + else + zds->oversizedDuration = 0; +} + +static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) +{ + return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ +static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) +{ + ZSTD_outBuffer const expect = zds->expectedOutBuffer; + /* No requirement when ZSTD_obm_stable is not enabled. */ + if (zds->outBufferMode != ZSTD_bm_stable) + return 0; + /* Any buffer is allowed in zdss_init, this must be the same for every other call until + * the context is reset. + */ + if (zds->streamStage == zdss_init) + return 0; + /* The buffer must match our expectation exactly. */ + if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) + return 0; + RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!"); +} + +/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() + * and updates the stage and the output buffer state. This call is extracted so it can be + * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. + * NOTE: You must break after calling this function since the streamStage is modified. + */ +static size_t ZSTD_decompressContinueStream( + ZSTD_DStream* zds, char** op, char* oend, + void const* src, size_t srcSize) { + int const isSkipFrame = ZSTD_isSkipFrame(zds); + if (zds->outBufferMode == ZSTD_bm_buffered) { + size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; + size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + if (!decodedSize && !isSkipFrame) { + zds->streamStage = zdss_read; + } else { + zds->outEnd = zds->outStart + decodedSize; + zds->streamStage = zdss_flush; + } + } else { + /* Write directly into the output buffer */ + size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op); + size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + *op += decodedSize; + /* Flushing is not needed. */ + zds->streamStage = zdss_read; + assert(*op <= oend); + assert(zds->outBufferMode == ZSTD_bm_stable); + } + return 0; +} + +size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + const char* const src = (const char*)input->src; + const char* const istart = input->pos != 0 ? src + input->pos : src; + const char* const iend = input->size != 0 ? src + input->size : src; + const char* ip = istart; + char* const dst = (char*)output->dst; + char* const ostart = output->pos != 0 ? dst + output->pos : dst; + char* const oend = output->size != 0 ? dst + output->size : dst; + char* op = ostart; + U32 someMoreWork = 1; + + DEBUGLOG(5, "ZSTD_decompressStream"); + RETURN_ERROR_IF( + input->pos > input->size, + srcSize_wrong, + "forbidden. in: pos: %u vs size: %u", + (U32)input->pos, (U32)input->size); + RETURN_ERROR_IF( + output->pos > output->size, + dstSize_tooSmall, + "forbidden. out: pos: %u vs size: %u", + (U32)output->pos, (U32)output->size); + DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); + + while (someMoreWork) { + switch(zds->streamStage) + { + case zdss_init : + DEBUGLOG(5, "stage zdss_init => transparent reset "); + zds->streamStage = zdss_loadHeader; + zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; + zds->legacyVersion = 0; + zds->hostageByte = 0; + zds->expectedOutBuffer = *output; + /* fall-through */ + + case zdss_loadHeader : + DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + if (zds->legacyVersion) { + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; + return hint; + } } +#endif + { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + if (zds->refMultipleDDicts && zds->ddictSet) { + ZSTD_DCtx_selectFrameDDict(zds); + } + DEBUGLOG(5, "header size : %u", (U32)hSize); + if (ZSTD_isError(hSize)) { +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) + U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); + if (legacyVersion) { + ZSTD_DDict const* const ddict = ZSTD_getDDict(zds); + const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL; + size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0; + DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); + RETURN_ERROR_IF(zds->staticSize, memory_allocation, + "legacy support is incompatible with static dctx"); + FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, + zds->previousLegacyVersion, legacyVersion, + dict, dictSize), ""); + zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; + { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); + if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ + return hint; + } } +#endif + return hSize; /* error */ + } + if (hSize != 0) { /* need more input */ + size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ + size_t const remainingInput = (size_t)(iend-ip); + assert(iend >= ip); + if (toLoad > remainingInput) { /* not enough input to load full header */ + if (remainingInput > 0) { + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + zds->lhSize += remainingInput; + } + input->pos = input->size; + return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + } + assert(ip != NULL); + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + break; + } } + + /* check for single-pass mode opportunity */ + if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && zds->fParams.frameType != ZSTD_skippableFrame + && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { + size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); + if (cSize <= (size_t)(iend-istart)) { + /* shortcut : using single-pass mode */ + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); + if (ZSTD_isError(decompressedSize)) return decompressedSize; + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") + ip = istart + cSize; + op += decompressedSize; + zds->expected = 0; + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } } + + /* Check output buffer is large enough for ZSTD_odm_stable. */ + if (zds->outBufferMode == ZSTD_bm_stable + && zds->fParams.frameType != ZSTD_skippableFrame + && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { + RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); + } + + /* Consume header (see ZSTDds_decodeFrameHeader) */ + DEBUGLOG(4, "Consume header"); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); + + if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); + zds->stage = ZSTDds_skipFrame; + } else { + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); + zds->expected = ZSTD_blockHeaderSize; + zds->stage = ZSTDds_decodeBlockHeader; + } + + /* control buffer memory usage */ + DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", + (U32)(zds->fParams.windowSize >>10), + (U32)(zds->maxWindowSize >> 10) ); + zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); + RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, + frameParameter_windowTooLarge, ""); + + /* Adapt buffer sizes to frame header instructions */ + { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); + size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered + ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) + : 0; + + ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); + + { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); + int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); + + if (tooSmall || tooLarge) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + RETURN_ERROR_IF( + bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), + memory_allocation, ""); + } else { + ZSTD_customFree(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem); + RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; + } } } + zds->streamStage = zdss_read; + /* fall-through */ + + case zdss_read: + DEBUGLOG(5, "stage zdss_read"); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)); + DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); + if (neededInSize==0) { /* end of frame */ + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); + ip += neededInSize; + /* Function modifies the stage so we must break */ + break; + } } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zds->streamStage = zdss_load; + /* fall-through */ + + case zdss_load: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + size_t const toLoad = neededInSize - zds->inPos; + int const isSkipFrame = ZSTD_isSkipFrame(zds); + size_t loadedSize; + /* At this point we shouldn't be decompressing a block that we can stream. */ + assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); + if (isSkipFrame) { + loadedSize = MIN(toLoad, (size_t)(iend-ip)); + } else { + RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, + corruption_detected, + "should never happen"); + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); + } + ip += loadedSize; + zds->inPos += loadedSize; + if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ + + /* decode loaded input */ + zds->inPos = 0; /* input is consumed */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); + /* Function modifies the stage so we must break */ + break; + } + case zdss_flush: + { size_t const toFlushSize = zds->outEnd - zds->outStart; + size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); + op += flushedSize; + zds->outStart += flushedSize; + if (flushedSize == toFlushSize) { /* flush completed */ + zds->streamStage = zdss_read; + if ( (zds->outBuffSize < zds->fParams.frameContentSize) + && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { + DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", + (int)(zds->outBuffSize - zds->outStart), + (U32)zds->fParams.blockSizeMax); + zds->outStart = zds->outEnd = 0; + } + break; + } } + /* cannot complete flush */ + someMoreWork = 0; + break; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ + } } + + /* result */ + input->pos = (size_t)(ip - (const char*)(input->src)); + output->pos = (size_t)(op - (char*)(output->dst)); + + /* Update the expected output buffer for ZSTD_obm_stable. */ + zds->expectedOutBuffer = *output; + + if ((ip==istart) && (op==ostart)) { /* no forward progress */ + zds->noForwardProgress ++; + if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { + RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); + RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); + assert(0); + } + } else { + zds->noForwardProgress = 0; + } + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); + if (!nextSrcSizeHint) { /* frame fully decoded */ + if (zds->outEnd == zds->outStart) { /* output fully flushed */ + if (zds->hostageByte) { + if (input->pos >= input->size) { + /* can't release hostage (not present) */ + zds->streamStage = zdss_read; + return 1; + } + input->pos++; /* release hostage */ + } /* zds->hostageByte */ + return 0; + } /* zds->outEnd == zds->outStart */ + if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ + input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ + zds->hostageByte=1; + } + return 1; + } /* nextSrcSizeHint==0 */ + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ + assert(zds->inPos <= nextSrcSizeHint); + nextSrcSizeHint -= zds->inPos; /* part already loaded*/ + return nextSrcSizeHint; + } +} + +size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} +/**** ended inlining decompress/zstd_decompress.c ****/ +/**** start inlining decompress/zstd_decompress_block.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_decompress_block : + * this module takes care of decompressing _compressed_ block */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: zstd_decompress_internal.h ****/ +/**** skipping file: zstd_ddict.h ****/ +/**** skipping file: zstd_decompress_block.h ****/ + +/*_******************************************************* +* Macros +**********************************************************/ + +/* These two optional macros force the use one way or another of the two + * ZSTD_decompressSequences implementations. You can't force in both directions + * at the same time. + */ +#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" +#endif + + +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } + + +/*-************************************************************* + * Block decoding + ***************************************************************/ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr) +{ + RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, ""); + + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_rle) return 1; + RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); + return cSize; + } +} + + +/* Hidden declaration for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize); +/*! ZSTD_decodeLiteralsBlock() : + * @return : nb of bytes read from src (< srcSize ) + * note : symbol not declared but exposed for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); + + { const BYTE* const istart = (const BYTE*) src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + + switch(litEncType) + { + case set_repeat: + DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); + RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); + /* fall-through */ + + case set_compressed: + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + size_t hufSuccess; + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); + + /* prefetch huffman table if cold */ + if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { + PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); + } + + if (litEncType==set_repeat) { + if (singleStream) { + hufSuccess = HUF_decompress1X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } else { + hufSuccess = HUF_decompress4X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } + } else { + if (singleStream) { +#if defined(HUF_FORCE_DECOMPRESS_X2) + hufSuccess = HUF_decompress1X_DCtx_wksp( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace)); +#else + hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); +#endif + } else { + hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); + } + } + + RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); + + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + dctx->litEntropy = 1; + if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return litCSize + lhSize; + } + + case set_basic: + { size_t litSize, lhSize; + U32 const lhlCode = ((istart[0]) >> 2) & 3; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + break; + } + + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); + ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litSize = litSize; + return lhSize+litSize; + } + + case set_rle: + { U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t litSize, lhSize; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + return lhSize+1; + } + default: + RETURN_ERROR(corruption_detected, "impossible"); + } + } +} + +/* Default FSE distribution tables. + * These are pre-calculated FSE decoding tables using default distributions as defined in specification : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions + * They were generated programmatically with following method : + * - start from default distributions, present in /lib/common/zstd_internal.h + * - generate tables normally, using ZSTD_buildFSETable() + * - printout the content of tables + * - pretify output, report below, test with fuzzer to ensure it's correct */ + +/* Default FSE distribution table for Literal Lengths */ +static const ZSTD_seqSymbol LL_defaultDTable[(1<tableLog = 0; + DTableH->fastMode = 0; + + cell->nbBits = 0; + cell->nextState = 0; + assert(nbAddBits < 255); + cell->nbAdditionalBits = (BYTE)nbAddBits; + cell->baseValue = baseValue; +} + + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * cannot fail if input is valid => + * all inputs are presumed validated at this stage */ +FORCE_INLINE_TEMPLATE +void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize) +{ + ZSTD_seqSymbol* const tableDecode = dt+1; + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + + U16* symbolNext = (U16*)wksp; + BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1); + U32 highThreshold = tableSize - 1; + + + /* Sanity Checks */ + assert(maxSymbolValue <= MaxSeq); + assert(tableLog <= MaxFSELog); + assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); + (void)wkspSize; + /* Init, lay down lowprob symbols */ + { ZSTD_seqSymbol_header DTableH; + DTableH.tableLog = tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + assert(normalizedCounter[s]>=0); + symbolNext[s] = (U16)normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + assert(tableSize <= 512); + /* Specialized symbol spreading for the case when there are + * no low probability (-1 count) symbols. When compressing + * small blocks we avoid low probability symbols to hit this + * case, since header decoding speed matters more. + */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { + U32 u; + for (u=0; u max, corruption_detected, ""); + { U32 const symbol = *(const BYTE*)src; + U32 const baseline = baseValue[symbol]; + U32 const nbBits = nbAdditionalBits[symbol]; + ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); + } + *DTablePtr = DTableSpace; + return 1; + case set_basic : + *DTablePtr = defaultTable; + return 0; + case set_repeat: + RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); + /* prefetch FSE table if used */ + if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { + const void* const pStart = *DTablePtr; + size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); + PREFETCH_AREA(pStart, pSize); + } + return 0; + case set_compressed : + { unsigned tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); + RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2); + *DTablePtr = DTableSpace; + return headerSize; + } + default : + assert(0); + RETURN_ERROR(GENERIC, "impossible"); + } +} + +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + int nbSeq; + DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); + + /* check */ + RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); + + /* SeqHead */ + nbSeq = *ip++; + if (!nbSeq) { + *nbSeqPtr=0; + RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); + return 1; + } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ; + ip+=2; + } else { + RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + } + *nbSeqPtr = nbSeq; + + /* FSE table descriptors */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ + { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); + symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); + symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); + ip++; + + /* Build DTables */ + { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, + LLtype, MaxLL, LLFSELog, + ip, iend-ip, + LL_base, LL_bits, + LL_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += llhSize; + } + + { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, + OFtype, MaxOff, OffFSELog, + ip, iend-ip, + OF_base, OF_bits, + OF_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += ofhSize; + } + + { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, + MLtype, MaxML, MLFSELog, + ip, iend-ip, + ML_base, ML_bits, + ML_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += mlhSize; + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; +} seq_t; + +typedef struct { + size_t state; + const ZSTD_seqSymbol* table; +} ZSTD_fseState; + +typedef struct { + BIT_DStream_t DStream; + ZSTD_fseState stateLL; + ZSTD_fseState stateOffb; + ZSTD_fseState stateML; + size_t prevOffset[ZSTD_REP_NUM]; +} seqState_t; + +/*! ZSTD_overlapCopy8() : + * Copies 8 bytes from ip to op and updates op and ip where ip <= op. + * If the offset is < 8 then the offset is spread to at least 8 bytes. + * + * Precondition: *ip <= *op + * Postcondition: *op - *op >= 8 + */ +HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { + assert(*ip <= *op); + if (offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[offset]; + (*op)[0] = (*ip)[0]; + (*op)[1] = (*ip)[1]; + (*op)[2] = (*ip)[2]; + (*op)[3] = (*ip)[3]; + *ip += dec32table[offset]; + ZSTD_copy4(*op+4, *ip); + *ip -= sub2; + } else { + ZSTD_copy8(*op, *ip); + } + *ip += 8; + *op += 8; + assert(*op - *ip >= 8); +} + +/*! ZSTD_safecopy() : + * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer + * and write up to 16 bytes past oend_w (op >= oend_w is allowed). + * This function is only called in the uncommon case where the sequence is near the end of the block. It + * should be fast for a single long sequence, but can be slow for several short sequences. + * + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. + * The src buffer must be before the dst buffer. + */ +static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { + ptrdiff_t const diff = op - ip; + BYTE* const oend = op + length; + + assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || + (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); + + if (length < 8) { + /* Handle short lengths. */ + while (op < oend) *op++ = *ip++; + return; + } + if (ovtype == ZSTD_overlap_src_before_dst) { + /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ + assert(length >= 8); + ZSTD_overlapCopy8(&op, &ip, diff); + assert(op - ip >= 8); + assert(op <= oend); + } + + if (oend <= oend_w) { + /* No risk of overwrite. */ + ZSTD_wildcopy(op, ip, length, ovtype); + return; + } + if (op <= oend_w) { + /* Wildcopy until we get close to the end. */ + assert(oend > oend_w); + ZSTD_wildcopy(op, ip, oend_w - op, ovtype); + ip += oend_w - op; + op = oend_w; + } + /* Handle the leftovers. */ + while (op < oend) *op++ = *ip++; +} + +/* ZSTD_execSequenceEnd(): + * This version handles cases that are near the end of the output buffer. It requires + * more careful checks to make sure there is no overflow. By separating out these hard + * and unlikely cases, we can speed up the common cases. + * + * NOTE: This function needs to be fast for a single long sequence, but doesn't need + * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). + */ +FORCE_NOINLINE +size_t ZSTD_execSequenceEnd(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); + + /* copy literals */ + ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); + op = oLitEnd; + *litPtr = iLitEnd; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); + match = dictEnd - (prefixStart-match); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); + return sequenceLength; +} + +HINT_INLINE +size_t ZSTD_execSequence(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) + return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); + assert(iLitEnd <= litLimit /* Literal length is in bounds */); + assert(oLitEnd <= oend_w /* Can wildcopy literals */); + assert(oMatchEnd <= oend_w /* Can wildcopy matches */); + + /* Copy Literals: + * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. + * We likely don't need the full 32-byte wildcopy. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(op, (*litPtr)); + if (UNLIKELY(sequence.litLength > 16)) { + ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); + } + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* Copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix -> go into extDict */ + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); + match = dictEnd + (match - prefixStart); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + /* Match within prefix of 1 or more bytes */ + assert(op <= oMatchEnd); + assert(oMatchEnd <= oend_w); + assert(match >= prefixStart); + assert(sequence.matchLength >= 1); + + /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy + * without overlap checking. + */ + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { + /* We bet on a full wildcopy for matches, since we expect matches to be + * longer than literals (in general). In silesia, ~10% of matches are longer + * than 16 bytes. + */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); + return sequenceLength; + } + assert(sequence.offset < WILDCOPY_VECLEN); + + /* Copy 8 bytes and spread the offset to be >= 8. */ + ZSTD_overlapCopy8(&op, &match, sequence.offset); + + /* If the match length is > 8 bytes, then continue with the wildcopy. */ + if (sequence.matchLength > 8) { + assert(op < oMatchEnd); + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); + } + return sequenceLength; +} + +static void +ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) +{ + const void* ptr = dt; + const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", + (U32)DStatePtr->state, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) +{ + ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) +{ + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offsets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) + +typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; + +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) +{ + seq_t seq; + ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; + ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; + ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; + U32 const llBase = llDInfo.baseValue; + U32 const mlBase = mlDInfo.baseValue; + U32 const ofBase = ofDInfo.baseValue; + BYTE const llBits = llDInfo.nbAdditionalBits; + BYTE const mlBits = mlDInfo.nbAdditionalBits; + BYTE const ofBits = ofDInfo.nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; + + /* sequence */ + { size_t offset; + if (ofBits > 1) { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } else { + U32 const ll0 = (llBase == 0); + if (LIKELY((ofBits == 0))) { + if (LIKELY(!ll0)) + offset = seqState->prevOffset[0]; + else { + offset = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } + seq.offset = offset; + } + + seq.matchLength = mlBase; + if (mlBits > 0) + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase; + if (llBits > 0) + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + /* ANS state update + * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). + * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). + * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the + * better option, so it is the default for other compilers. But, if you + * measure that it is worse, please put up a pull request. + */ + { +#if defined(__GNUC__) && !defined(__clang__) + const int kUseUpdateFseState = 1; +#else + const int kUseUpdateFseState = 0; +#endif + if (kUseUpdateFseState) { + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + } else { + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ + } + } + + return seq; +} + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +{ + size_t const windowSize = dctx->fParams.windowSize; + /* No dictionary used. */ + if (dctx->dictContentEndForFuzzing == NULL) return 0; + /* Dictionary is our prefix. */ + if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; + /* Dictionary is not our ext-dict. */ + if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; + /* Dictionary is not within our window size. */ + if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; + /* Dictionary is active. */ + return 1; +} + +MEM_STATIC void ZSTD_assertValidSequence( + ZSTD_DCtx const* dctx, + BYTE const* op, BYTE const* oend, + seq_t const seq, + BYTE const* prefixStart, BYTE const* virtualStart) +{ +#if DEBUGLEVEL >= 1 + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } +#else + (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; +#endif +} +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +FORCE_INLINE_TEMPLATE size_t +DONT_VECTORIZE +ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences_body"); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); + + ZSTD_STATIC_ASSERT( + BIT_DStream_unfinished < BIT_DStream_completed && + BIT_DStream_endOfBuffer < BIT_DStream_completed && + BIT_DStream_completed < BIT_DStream_overflow); + +#if defined(__GNUC__) && defined(__x86_64__) + /* Align the decompression loop to 32 + 16 bytes. + * + * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression + * speed swings based on the alignment of the decompression loop. This + * performance swing is caused by parts of the decompression loop falling + * out of the DSB. The entire decompression loop should fit in the DSB, + * when it can't we get much worse performance. You can measure if you've + * hit the good case or the bad case with this perf command for some + * compressed file test.zst: + * + * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ + * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst + * + * If you see most cycles served out of the MITE you've hit the bad case. + * If you see most cycles served out of the DSB you've hit the good case. + * If it is pretty even then you may be in an okay case. + * + * This issue has been reproduced on the following CPUs: + * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 + * Use Instruments->Counters to get DSB/MITE cycles. + * I never got performance swings, but I was able to + * go from the good case of mostly DSB to half of the + * cycles served from MITE. + * - Coffeelake: Intel i9-9900k + * - Coffeelake: Intel i7-9700k + * + * I haven't been able to reproduce the instability or DSB misses on any + * of the following CPUS: + * - Haswell + * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH + * - Skylake + * + * If you are seeing performance stability this script can help test. + * It tests on 4 commits in zstd where I saw performance change. + * + * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 + */ + __asm__(".p2align 6"); + __asm__("nop"); + __asm__(".p2align 5"); + __asm__("nop"); +# if __GNUC__ >= 9 + /* better for gcc-9 and gcc-10, worse for clang and gcc-8 */ + __asm__(".p2align 3"); +# else + __asm__(".p2align 4"); +# endif +#endif + for ( ; ; ) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + if (UNLIKELY(!--nbSeq)) + break; + BIT_reloadDStream(&(seqState.DStream)); + } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); + RETURN_ERROR_IF(nbSeq, corruption_detected, ""); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + +FORCE_INLINE_TEMPLATE size_t +ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, + const BYTE* const prefixStart, const BYTE* const dictEnd) +{ + prefetchPos += sequence.litLength; + { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart; + const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : memory address is only used for prefetching, not for dereferencing */ + PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ + } + return prefetchPos + sequence.matchLength; +} + +/* This decoding function employs prefetching + * to reduce latency impact of cache misses. + * It's generally employed when block contains a significant portion of long-distance matches + * or when coupled with a "cold" dictionary */ +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequencesLong_body( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { +#define STORED_SEQS 8 +#define STORED_SEQS_MASK (STORED_SEQS-1) +#define ADVANCED_SEQS STORED_SEQS + seq_t sequences[STORED_SEQS]; + int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); + seqState_t seqState; + int seqNb; + size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */ + + dctx->fseEntropy = 1; + { int i; for (i=0; ientropy.rep[i]; } + assert(dst != NULL); + assert(iend >= ip); + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + /* prepare in advance */ + for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if DYNAMIC_BMI2 + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static TARGET_ATTRIBUTE("bmi2") size_t +DONT_VECTORIZE +ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + +#endif /* DYNAMIC_BMI2 */ + +typedef size_t (*ZSTD_decompressSequences_t)( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame); + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static size_t +ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequences"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +/* ZSTD_decompressSequencesLong() : + * decompression function triggered when a minimum share of offsets is considered "long", + * aka out of cache. + * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". + * This function will try to mitigate main memory latency through the use of prefetching */ +static size_t +ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesLong"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +/* ZSTD_getLongOffsetsShare() : + * condition : offTable must be valid + * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) + * compared to maximum possible of (1< 22) total += 1; + } + + assert(tableLog <= OffFSELog); + total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ + + return total; +} +#endif + +size_t +ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + /* isLongOffset must be true if there are long offsets. + * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. + * We don't expect that to be the case in 64-bit mode. + * In block mode, window size is not known, so we have to be conservative. + * (note: but it could be evaluated from current-lowLimit) + */ + ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); + DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + + RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); + + /* Decode literals section */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + + /* Build Decoding Tables */ + { + /* These macros control at build-time which decompressor implementation + * we use. If neither is defined, we do some inspection and dispatch at + * runtime. + */ +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + int usePrefetchDecoder = dctx->ddictIsCold; +#endif + int nbSeq; + size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + srcSize -= seqHSize; + + RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if ( !usePrefetchDecoder + && (!frame || (dctx->fParams.windowSize > (1<<24))) + && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ + U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); + U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ + usePrefetchDecoder = (shareLongOffsets >= minShare); + } +#endif + + dctx->ddictIsCold = 0; + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if (usePrefetchDecoder) +#endif +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG + /* else */ + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + } +} + + +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) +{ + if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dst; + dctx->previousDstEnd = dst; + } +} + + +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t dSize; + ZSTD_checkContinuity(dctx, dst, dstCapacity); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); + dctx->previousDstEnd = (char*)dst + dSize; + return dSize; +} +/**** ended inlining decompress/zstd_decompress_block.c ****/ + +/**** start inlining dictBuilder/cover.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* ***************************************************************************** + * Constructs a dictionary using a heuristic based on the following paper: + * + * Liao, Petri, Moffat, Wirth + * Effective Construction of Relative Lempel-Ziv Dictionaries + * Published in WWW 2016. + * + * Adapted from code originally written by @ot (Giuseppe Ottaviano). + ******************************************************************************/ + +/*-************************************* +* Dependencies +***************************************/ +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ + +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/pool.h ****/ +/**** skipping file: ../common/threading.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** start inlining ../zdict.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef DICTBUILDER_H_001 +#define DICTBUILDER_H_001 + +#if defined (__cplusplus) +extern "C" { +#endif + + +/*====== Dependencies ======*/ +#include /* size_t */ + + +/* ===== ZDICTLIB_API : control library symbols visibility ===== */ +#ifndef ZDICTLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZDICTLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZDICTLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZDICTLIB_API __declspec(dllexport) ZDICTLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZDICTLIB_API __declspec(dllimport) ZDICTLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZDICTLIB_API ZDICTLIB_VISIBILITY +#endif + +/******************************************************************************* + * Zstd dictionary builder + * + * FAQ + * === + * Why should I use a dictionary? + * ------------------------------ + * + * Zstd can use dictionaries to improve compression ratio of small data. + * Traditionally small files don't compress well because there is very little + * repetion in a single sample, since it is small. But, if you are compressing + * many similar files, like a bunch of JSON records that share the same + * structure, you can train a dictionary on ahead of time on some samples of + * these files. Then, zstd can use the dictionary to find repetitions that are + * present across samples. This can vastly improve compression ratio. + * + * When is a dictionary useful? + * ---------------------------- + * + * Dictionaries are useful when compressing many small files that are similar. + * The larger a file is, the less benefit a dictionary will have. Generally, + * we don't expect dictionary compression to be effective past 100KB. And the + * smaller a file is, the more we would expect the dictionary to help. + * + * How do I use a dictionary? + * -------------------------- + * + * Simply pass the dictionary to the zstd compressor with + * `ZSTD_CCtx_loadDictionary()`. The same dictionary must then be passed to + * the decompressor, using `ZSTD_DCtx_loadDictionary()`. There are other + * more advanced functions that allow selecting some options, see zstd.h for + * complete documentation. + * + * What is a zstd dictionary? + * -------------------------- + * + * A zstd dictionary has two pieces: Its header, and its content. The header + * contains a magic number, the dictionary ID, and entropy tables. These + * entropy tables allow zstd to save on header costs in the compressed file, + * which really matters for small data. The content is just bytes, which are + * repeated content that is common across many samples. + * + * What is a raw content dictionary? + * --------------------------------- + * + * A raw content dictionary is just bytes. It doesn't have a zstd dictionary + * header, a dictionary ID, or entropy tables. Any buffer is a valid raw + * content dictionary. + * + * How do I train a dictionary? + * ---------------------------- + * + * Gather samples from your use case. These samples should be similar to each + * other. If you have several use cases, you could try to train one dictionary + * per use case. + * + * Pass those samples to `ZDICT_trainFromBuffer()` and that will train your + * dictionary. There are a few advanced versions of this function, but this + * is a great starting point. If you want to further tune your dictionary + * you could try `ZDICT_optimizeTrainFromBuffer_cover()`. If that is too slow + * you can try `ZDICT_optimizeTrainFromBuffer_fastCover()`. + * + * If the dictionary training function fails, that is likely because you + * either passed too few samples, or a dictionary would not be effective + * for your data. Look at the messages that the dictionary trainer printed, + * if it doesn't say too few samples, then a dictionary would not be effective. + * + * How large should my dictionary be? + * ---------------------------------- + * + * A reasonable dictionary size, the `dictBufferCapacity`, is about 100KB. + * The zstd CLI defaults to a 110KB dictionary. You likely don't need a + * dictionary larger than that. But, most use cases can get away with a + * smaller dictionary. The advanced dictionary builders can automatically + * shrink the dictionary for you, and select a the smallest size that + * doesn't hurt compression ratio too much. See the `shrinkDict` parameter. + * A smaller dictionary can save memory, and potentially speed up + * compression. + * + * How many samples should I provide to the dictionary builder? + * ------------------------------------------------------------ + * + * We generally recommend passing ~100x the size of the dictionary + * in samples. A few thousand should suffice. Having too few samples + * can hurt the dictionaries effectiveness. Having more samples will + * only improve the dictionaries effectiveness. But having too many + * samples can slow down the dictionary builder. + * + * How do I determine if a dictionary will be effective? + * ----------------------------------------------------- + * + * Simply train a dictionary and try it out. You can use zstd's built in + * benchmarking tool to test the dictionary effectiveness. + * + * # Benchmark levels 1-3 without a dictionary + * zstd -b1e3 -r /path/to/my/files + * # Benchmark levels 1-3 with a dictioanry + * zstd -b1e3 -r /path/to/my/files -D /path/to/my/dictionary + * + * When should I retrain a dictionary? + * ----------------------------------- + * + * You should retrain a dictionary when its effectiveness drops. Dictionary + * effectiveness drops as the data you are compressing changes. Generally, we do + * expect dictionaries to "decay" over time, as your data changes, but the rate + * at which they decay depends on your use case. Internally, we regularly + * retrain dictionaries, and if the new dictionary performs significantly + * better than the old dictionary, we will ship the new dictionary. + * + * I have a raw content dictionary, how do I turn it into a zstd dictionary? + * ------------------------------------------------------------------------- + * + * If you have a raw content dictionary, e.g. by manually constructing it, or + * using a third-party dictionary builder, you can turn it into a zstd + * dictionary by using `ZDICT_finalizeDictionary()`. You'll also have to + * provide some samples of the data. It will add the zstd header to the + * raw content, which contains a dictionary ID and entropy tables, which + * will improve compression ratio, and allow zstd to write the dictionary ID + * into the frame, if you so choose. + * + * Do I have to use zstd's dictionary builder? + * ------------------------------------------- + * + * No! You can construct dictionary content however you please, it is just + * bytes. It will always be valid as a raw content dictionary. If you want + * a zstd dictionary, which can improve compression ratio, use + * `ZDICT_finalizeDictionary()`. + * + * What is the attack surface of a zstd dictionary? + * ------------------------------------------------ + * + * Zstd is heavily fuzz tested, including loading fuzzed dictionaries, so + * zstd should never crash, or access out-of-bounds memory no matter what + * the dictionary is. However, if an attacker can control the dictionary + * during decompression, they can cause zstd to generate arbitrary bytes, + * just like if they controlled the compressed data. + * + ******************************************************************************/ + + +/*! ZDICT_trainFromBuffer(): + * Train a dictionary from an array of samples. + * Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4, + * f=20, and accel=1. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * Note: Dictionary training will fail if there are not enough samples to construct a + * dictionary, or if most of the samples are too small (< 8 bytes being the lower limit). + * If dictionary training fails, you should use zstd without a dictionary, as the dictionary + * would've been ineffective anyways. If you believe your samples would benefit from a dictionary + * please open an issue with details, and we can look into it. + * Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples); + +typedef struct { + int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */ + unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ + unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value) + * NOTE: The zstd format reserves some dictionary IDs for future use. + * You may use them in private settings, but be warned that they + * may be used by zstd in a public dictionary registry in the future. + * These dictionary IDs are: + * - low range : <= 32767 + * - high range : >= (2^31) + */ +} ZDICT_params_t; + +/*! ZDICT_finalizeDictionary(): + * Given a custom content as a basis for dictionary, and a set of samples, + * finalize dictionary by adding headers and statistics according to the zstd + * dictionary format. + * + * Samples must be stored concatenated in a flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each + * sample in order. The samples are used to construct the statistics, so they + * should be representative of what you will compress with this dictionary. + * + * The compression level can be set in `parameters`. You should pass the + * compression level you expect to use in production. The statistics for each + * compression level differ, so tuning the dictionary for the compression level + * can help quite a bit. + * + * You can set an explicit dictionary ID in `parameters`, or allow us to pick + * a random dictionary ID for you, but we can't guarantee no collisions. + * + * The dstDictBuffer and the dictContent may overlap, and the content will be + * appended to the end of the header. If the header + the content doesn't fit in + * maxDictSize the beginning of the content is truncated to make room, since it + * is presumed that the most profitable content is at the end of the dictionary, + * since that is the cheapest to reference. + * + * `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes. + * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN). + * + * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`), + * or an error code, which can be tested by ZDICT_isError(). + * Note: ZDICT_finalizeDictionary() will push notifications into stderr if + * instructed to, using notificationLevel>0. + * NOTE: This function currently may fail in several edge cases including: + * * Not enough samples + * * Samples are uncompressible + * * Samples are all exactly the same + */ +ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize, + const void* dictContent, size_t dictContentSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t parameters); + + +/*====== Helper functions ======*/ +ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ +ZDICTLIB_API size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize); /* returns dict header size; returns a ZSTD error code on failure */ +ZDICTLIB_API unsigned ZDICT_isError(size_t errorCode); +ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); + + + +#ifdef ZDICT_STATIC_LINKING_ONLY + +/* ==================================================================================== + * The definitions in this section are considered experimental. + * They should never be used with a dynamic library, as they may change in the future. + * They are provided for advanced usages. + * Use them only in association with static linking. + * ==================================================================================== */ + +#define ZDICT_CONTENTSIZE_MIN 128 +#define ZDICT_DICTSIZE_MIN 256 + +/*! ZDICT_cover_params_t: + * k and d are the only required parameters. + * For others, value 0 means default. + */ +typedef struct { + unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ + unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ + unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ + unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ + double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */ + unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ + unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ + ZDICT_params_t zParams; +} ZDICT_cover_params_t; + +typedef struct { + unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */ + unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */ + unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/ + unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ + unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ + double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */ + unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */ + unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ + unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ + + ZDICT_params_t zParams; +} ZDICT_fastCover_params_t; + +/*! ZDICT_trainFromBuffer_cover(): + * Train a dictionary from an array of samples using the COVER algorithm. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters); + +/*! ZDICT_optimizeTrainFromBuffer_cover(): + * The same requirements as above hold for all the parameters except `parameters`. + * This function tries many parameter combinations and picks the best parameters. + * `*parameters` is filled with the best parameters found, + * dictionary constructed with those parameters is stored in `dictBuffer`. + * + * All of the parameters d, k, steps are optional. + * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}. + * if steps is zero it defaults to its default value. + * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. + * + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * On success `*parameters` contains the parameters selected. + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. + */ +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t* parameters); + +/*! ZDICT_trainFromBuffer_fastCover(): + * Train a dictionary from an array of samples using a modified version of COVER algorithm. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * d and k are required. + * All other parameters are optional, will use default values if not provided + * The resulting dictionary will be saved into `dictBuffer`. + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer, + size_t dictBufferCapacity, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t parameters); + +/*! ZDICT_optimizeTrainFromBuffer_fastCover(): + * The same requirements as above hold for all the parameters except `parameters`. + * This function tries many parameter combinations (specifically, k and d combinations) + * and picks the best parameters. `*parameters` is filled with the best parameters found, + * dictionary constructed with those parameters is stored in `dictBuffer`. + * All of the parameters d, k, steps, f, and accel are optional. + * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}. + * if steps is zero it defaults to its default value. + * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000]. + * If f is zero, default value of 20 is used. + * If accel is zero, default value of 1 is used. + * + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * On success `*parameters` contains the parameters selected. + * See ZDICT_trainFromBuffer() for details on failure modes. + * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread. + */ +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, + size_t dictBufferCapacity, const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t* parameters); + +typedef struct { + unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ + ZDICT_params_t zParams; +} ZDICT_legacy_params_t; + +/*! ZDICT_trainFromBuffer_legacy(): + * Train a dictionary from an array of samples. + * Samples must be stored concatenated in a single flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order. + * The resulting dictionary will be saved into `dictBuffer`. + * `parameters` is optional and can be provided with values set to 0 to mean "default". + * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) + * or an error code, which can be tested with ZDICT_isError(). + * See ZDICT_trainFromBuffer() for details on failure modes. + * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. + * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. + * In general, it's recommended to provide a few thousands samples, though this can vary a lot. + * It's recommended that total size of all samples be about ~x100 times the target size of dictionary. + * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0. + */ +ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy( + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t parameters); + + +/* Deprecation warnings */ +/* It is generally possible to disable deprecation warnings from compiler, + for example with -Wno-deprecated-declarations for gcc + or _CRT_SECURE_NO_WARNINGS in Visual. + Otherwise, it's also possible to manually define ZDICT_DISABLE_DEPRECATE_WARNINGS */ +#ifdef ZDICT_DISABLE_DEPRECATE_WARNINGS +# define ZDICT_DEPRECATED(message) ZDICTLIB_API /* disable deprecation warnings */ +#else +# define ZDICT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZDICT_DEPRECATED(message) [[deprecated(message)]] ZDICTLIB_API +# elif defined(__clang__) || (ZDICT_GCC_VERSION >= 405) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated(message))) +# elif (ZDICT_GCC_VERSION >= 301) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZDICT_DEPRECATED(message) ZDICTLIB_API __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZDICT_DEPRECATED for this compiler") +# define ZDICT_DEPRECATED(message) ZDICTLIB_API +# endif +#endif /* ZDICT_DISABLE_DEPRECATE_WARNINGS */ + +ZDICT_DEPRECATED("use ZDICT_finalizeDictionary() instead") +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); + + +#endif /* ZDICT_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif + +#endif /* DICTBUILDER_H_001 */ +/**** ended inlining ../zdict.h ****/ +/**** start inlining cover.h ****/ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif + +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/pool.h ****/ +/**** skipping file: ../common/threading.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: ../zdict.h ****/ + +/** + * COVER_best_t is used for two purposes: + * 1. Synchronizing threads. + * 2. Saving the best parameters and dictionary. + * + * All of the methods except COVER_best_init() are thread safe if zstd is + * compiled with multithreaded support. + */ +typedef struct COVER_best_s { + ZSTD_pthread_mutex_t mutex; + ZSTD_pthread_cond_t cond; + size_t liveJobs; + void *dict; + size_t dictSize; + ZDICT_cover_params_t parameters; + size_t compressedSize; +} COVER_best_t; + +/** + * A segment is a range in the source as well as the score of the segment. + */ +typedef struct { + U32 begin; + U32 end; + U32 score; +} COVER_segment_t; + +/** + *Number of epochs and size of each epoch. + */ +typedef struct { + U32 num; + U32 size; +} COVER_epoch_info_t; + +/** + * Struct used for the dictionary selection function. + */ +typedef struct COVER_dictSelection { + BYTE* dictContent; + size_t dictSize; + size_t totalCompressedSize; +} COVER_dictSelection_t; + +/** + * Computes the number of epochs and the size of each epoch. + * We will make sure that each epoch gets at least 10 * k bytes. + * + * The COVER algorithms divide the data up into epochs of equal size and + * select one segment from each epoch. + * + * @param maxDictSize The maximum allowed dictionary size. + * @param nbDmers The number of dmers we are training on. + * @param k The parameter k (segment size). + * @param passes The target number of passes over the dmer corpus. + * More passes means a better dictionary. + */ +COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers, + U32 k, U32 passes); + +/** + * Warns the user when their corpus is too small. + */ +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel); + +/** + * Checks total compressed size of a dictionary + */ +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, + const size_t *samplesSizes, const BYTE *samples, + size_t *offsets, + size_t nbTrainSamples, size_t nbSamples, + BYTE *const dict, size_t dictBufferCapacity); + +/** + * Returns the sum of the sample sizes. + */ +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ; + +/** + * Initialize the `COVER_best_t`. + */ +void COVER_best_init(COVER_best_t *best); + +/** + * Wait until liveJobs == 0. + */ +void COVER_best_wait(COVER_best_t *best); + +/** + * Call COVER_best_wait() and then destroy the COVER_best_t. + */ +void COVER_best_destroy(COVER_best_t *best); + +/** + * Called when a thread is about to be launched. + * Increments liveJobs. + */ +void COVER_best_start(COVER_best_t *best); + +/** + * Called when a thread finishes executing, both on error or success. + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. + * If this dictionary is the best so far save it and its parameters. + */ +void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, + COVER_dictSelection_t selection); +/** + * Error function for COVER_selectDict function. Checks if the return + * value is an error. + */ +unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection); + + /** + * Error function for COVER_selectDict function. Returns a struct where + * return.totalCompressedSize is a ZSTD error. + */ +COVER_dictSelection_t COVER_dictSelectionError(size_t error); + +/** + * Always call after selectDict is called to free up used memory from + * newly created dictionary. + */ +void COVER_dictSelectionFree(COVER_dictSelection_t selection); + +/** + * Called to finalize the dictionary and select one based on whether or not + * the shrink-dict flag was enabled. If enabled the dictionary used is the + * smallest dictionary within a specified regression of the compressed size + * from the largest dictionary. + */ + COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity, + size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, + size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize); +/**** ended inlining cover.h ****/ + +/*-************************************* +* Constants +***************************************/ +#define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) +#define COVER_DEFAULT_SPLITPOINT 1.0 + +/*-************************************* +* Console display +***************************************/ +#ifndef LOCALDISPLAYLEVEL +static int g_displayLevel = 2; +#endif +#undef DISPLAY +#define DISPLAY(...) \ + { \ + fprintf(stderr, __VA_ARGS__); \ + fflush(stderr); \ + } +#undef LOCALDISPLAYLEVEL +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + DISPLAY(__VA_ARGS__); \ + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#undef DISPLAYLEVEL +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) + +#ifndef LOCALDISPLAYUPDATE +static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; +#endif +#undef LOCALDISPLAYUPDATE +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \ + g_time = clock(); \ + DISPLAY(__VA_ARGS__); \ + } \ + } +#undef DISPLAYUPDATE +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) + +/*-************************************* +* Hash table +*************************************** +* A small specialized hash map for storing activeDmers. +* The map does not resize, so if it becomes full it will loop forever. +* Thus, the map must be large enough to store every value. +* The map implements linear probing and keeps its load less than 0.5. +*/ + +#define MAP_EMPTY_VALUE ((U32)-1) +typedef struct COVER_map_pair_t_s { + U32 key; + U32 value; +} COVER_map_pair_t; + +typedef struct COVER_map_s { + COVER_map_pair_t *data; + U32 sizeLog; + U32 size; + U32 sizeMask; +} COVER_map_t; + +/** + * Clear the map. + */ +static void COVER_map_clear(COVER_map_t *map) { + memset(map->data, MAP_EMPTY_VALUE, map->size * sizeof(COVER_map_pair_t)); +} + +/** + * Initializes a map of the given size. + * Returns 1 on success and 0 on failure. + * The map must be destroyed with COVER_map_destroy(). + * The map is only guaranteed to be large enough to hold size elements. + */ +static int COVER_map_init(COVER_map_t *map, U32 size) { + map->sizeLog = ZSTD_highbit32(size) + 2; + map->size = (U32)1 << map->sizeLog; + map->sizeMask = map->size - 1; + map->data = (COVER_map_pair_t *)malloc(map->size * sizeof(COVER_map_pair_t)); + if (!map->data) { + map->sizeLog = 0; + map->size = 0; + return 0; + } + COVER_map_clear(map); + return 1; +} + +/** + * Internal hash function + */ +static const U32 COVER_prime4bytes = 2654435761U; +static U32 COVER_map_hash(COVER_map_t *map, U32 key) { + return (key * COVER_prime4bytes) >> (32 - map->sizeLog); +} + +/** + * Helper function that returns the index that a key should be placed into. + */ +static U32 COVER_map_index(COVER_map_t *map, U32 key) { + const U32 hash = COVER_map_hash(map, key); + U32 i; + for (i = hash;; i = (i + 1) & map->sizeMask) { + COVER_map_pair_t *pos = &map->data[i]; + if (pos->value == MAP_EMPTY_VALUE) { + return i; + } + if (pos->key == key) { + return i; + } + } +} + +/** + * Returns the pointer to the value for key. + * If key is not in the map, it is inserted and the value is set to 0. + * The map must not be full. + */ +static U32 *COVER_map_at(COVER_map_t *map, U32 key) { + COVER_map_pair_t *pos = &map->data[COVER_map_index(map, key)]; + if (pos->value == MAP_EMPTY_VALUE) { + pos->key = key; + pos->value = 0; + } + return &pos->value; +} + +/** + * Deletes key from the map if present. + */ +static void COVER_map_remove(COVER_map_t *map, U32 key) { + U32 i = COVER_map_index(map, key); + COVER_map_pair_t *del = &map->data[i]; + U32 shift = 1; + if (del->value == MAP_EMPTY_VALUE) { + return; + } + for (i = (i + 1) & map->sizeMask;; i = (i + 1) & map->sizeMask) { + COVER_map_pair_t *const pos = &map->data[i]; + /* If the position is empty we are done */ + if (pos->value == MAP_EMPTY_VALUE) { + del->value = MAP_EMPTY_VALUE; + return; + } + /* If pos can be moved to del do so */ + if (((i - COVER_map_hash(map, pos->key)) & map->sizeMask) >= shift) { + del->key = pos->key; + del->value = pos->value; + del = pos; + shift = 1; + } else { + ++shift; + } + } +} + +/** + * Destroys a map that is inited with COVER_map_init(). + */ +static void COVER_map_destroy(COVER_map_t *map) { + if (map->data) { + free(map->data); + } + map->data = NULL; + map->size = 0; +} + +/*-************************************* +* Context +***************************************/ + +typedef struct { + const BYTE *samples; + size_t *offsets; + const size_t *samplesSizes; + size_t nbSamples; + size_t nbTrainSamples; + size_t nbTestSamples; + U32 *suffix; + size_t suffixSize; + U32 *freqs; + U32 *dmerAt; + unsigned d; +} COVER_ctx_t; + +/* We need a global context for qsort... */ +static COVER_ctx_t *g_coverCtx = NULL; + +/*-************************************* +* Helper functions +***************************************/ + +/** + * Returns the sum of the sample sizes. + */ +size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) { + size_t sum = 0; + unsigned i; + for (i = 0; i < nbSamples; ++i) { + sum += samplesSizes[i]; + } + return sum; +} + +/** + * Returns -1 if the dmer at lp is less than the dmer at rp. + * Return 0 if the dmers at lp and rp are equal. + * Returns 1 if the dmer at lp is greater than the dmer at rp. + */ +static int COVER_cmp(COVER_ctx_t *ctx, const void *lp, const void *rp) { + U32 const lhs = *(U32 const *)lp; + U32 const rhs = *(U32 const *)rp; + return memcmp(ctx->samples + lhs, ctx->samples + rhs, ctx->d); +} +/** + * Faster version for d <= 8. + */ +static int COVER_cmp8(COVER_ctx_t *ctx, const void *lp, const void *rp) { + U64 const mask = (ctx->d == 8) ? (U64)-1 : (((U64)1 << (8 * ctx->d)) - 1); + U64 const lhs = MEM_readLE64(ctx->samples + *(U32 const *)lp) & mask; + U64 const rhs = MEM_readLE64(ctx->samples + *(U32 const *)rp) & mask; + if (lhs < rhs) { + return -1; + } + return (lhs > rhs); +} + +/** + * Same as COVER_cmp() except ties are broken by pointer value + * NOTE: g_coverCtx must be set to call this function. A global is required because + * qsort doesn't take an opaque pointer. + */ +static int WIN_CDECL COVER_strict_cmp(const void *lp, const void *rp) { + int result = COVER_cmp(g_coverCtx, lp, rp); + if (result == 0) { + result = lp < rp ? -1 : 1; + } + return result; +} +/** + * Faster version for d <= 8. + */ +static int WIN_CDECL COVER_strict_cmp8(const void *lp, const void *rp) { + int result = COVER_cmp8(g_coverCtx, lp, rp); + if (result == 0) { + result = lp < rp ? -1 : 1; + } + return result; +} + +/** + * Returns the first pointer in [first, last) whose element does not compare + * less than value. If no such element exists it returns last. + */ +static const size_t *COVER_lower_bound(const size_t *first, const size_t *last, + size_t value) { + size_t count = last - first; + while (count != 0) { + size_t step = count / 2; + const size_t *ptr = first; + ptr += step; + if (*ptr < value) { + first = ++ptr; + count -= step + 1; + } else { + count = step; + } + } + return first; +} + +/** + * Generic groupBy function. + * Groups an array sorted by cmp into groups with equivalent values. + * Calls grp for each group. + */ +static void +COVER_groupBy(const void *data, size_t count, size_t size, COVER_ctx_t *ctx, + int (*cmp)(COVER_ctx_t *, const void *, const void *), + void (*grp)(COVER_ctx_t *, const void *, const void *)) { + const BYTE *ptr = (const BYTE *)data; + size_t num = 0; + while (num < count) { + const BYTE *grpEnd = ptr + size; + ++num; + while (num < count && cmp(ctx, ptr, grpEnd) == 0) { + grpEnd += size; + ++num; + } + grp(ctx, ptr, grpEnd); + ptr = grpEnd; + } +} + +/*-************************************* +* Cover functions +***************************************/ + +/** + * Called on each group of positions with the same dmer. + * Counts the frequency of each dmer and saves it in the suffix array. + * Fills `ctx->dmerAt`. + */ +static void COVER_group(COVER_ctx_t *ctx, const void *group, + const void *groupEnd) { + /* The group consists of all the positions with the same first d bytes. */ + const U32 *grpPtr = (const U32 *)group; + const U32 *grpEnd = (const U32 *)groupEnd; + /* The dmerId is how we will reference this dmer. + * This allows us to map the whole dmer space to a much smaller space, the + * size of the suffix array. + */ + const U32 dmerId = (U32)(grpPtr - ctx->suffix); + /* Count the number of samples this dmer shows up in */ + U32 freq = 0; + /* Details */ + const size_t *curOffsetPtr = ctx->offsets; + const size_t *offsetsEnd = ctx->offsets + ctx->nbSamples; + /* Once *grpPtr >= curSampleEnd this occurrence of the dmer is in a + * different sample than the last. + */ + size_t curSampleEnd = ctx->offsets[0]; + for (; grpPtr != grpEnd; ++grpPtr) { + /* Save the dmerId for this position so we can get back to it. */ + ctx->dmerAt[*grpPtr] = dmerId; + /* Dictionaries only help for the first reference to the dmer. + * After that zstd can reference the match from the previous reference. + * So only count each dmer once for each sample it is in. + */ + if (*grpPtr < curSampleEnd) { + continue; + } + freq += 1; + /* Binary search to find the end of the sample *grpPtr is in. + * In the common case that grpPtr + 1 == grpEnd we can skip the binary + * search because the loop is over. + */ + if (grpPtr + 1 != grpEnd) { + const size_t *sampleEndPtr = + COVER_lower_bound(curOffsetPtr, offsetsEnd, *grpPtr); + curSampleEnd = *sampleEndPtr; + curOffsetPtr = sampleEndPtr + 1; + } + } + /* At this point we are never going to look at this segment of the suffix + * array again. We take advantage of this fact to save memory. + * We store the frequency of the dmer in the first position of the group, + * which is dmerId. + */ + ctx->suffix[dmerId] = freq; +} + + +/** + * Selects the best segment in an epoch. + * Segments of are scored according to the function: + * + * Let F(d) be the frequency of dmer d. + * Let S_i be the dmer at position i of segment S which has length k. + * + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) + * + * Once the dmer d is in the dictionary we set F(d) = 0. + */ +static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, + COVER_map_t *activeDmers, U32 begin, + U32 end, + ZDICT_cover_params_t parameters) { + /* Constants */ + const U32 k = parameters.k; + const U32 d = parameters.d; + const U32 dmersInK = k - d + 1; + /* Try each segment (activeSegment) and save the best (bestSegment) */ + COVER_segment_t bestSegment = {0, 0, 0}; + COVER_segment_t activeSegment; + /* Reset the activeDmers in the segment */ + COVER_map_clear(activeDmers); + /* The activeSegment starts at the beginning of the epoch. */ + activeSegment.begin = begin; + activeSegment.end = begin; + activeSegment.score = 0; + /* Slide the activeSegment through the whole epoch. + * Save the best segment in bestSegment. + */ + while (activeSegment.end < end) { + /* The dmerId for the dmer at the next position */ + U32 newDmer = ctx->dmerAt[activeSegment.end]; + /* The entry in activeDmers for this dmerId */ + U32 *newDmerOcc = COVER_map_at(activeDmers, newDmer); + /* If the dmer isn't already present in the segment add its score. */ + if (*newDmerOcc == 0) { + /* The paper suggest using the L-0.5 norm, but experiments show that it + * doesn't help. + */ + activeSegment.score += freqs[newDmer]; + } + /* Add the dmer to the segment */ + activeSegment.end += 1; + *newDmerOcc += 1; + + /* If the window is now too large, drop the first position */ + if (activeSegment.end - activeSegment.begin == dmersInK + 1) { + U32 delDmer = ctx->dmerAt[activeSegment.begin]; + U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer); + activeSegment.begin += 1; + *delDmerOcc -= 1; + /* If this is the last occurrence of the dmer, subtract its score */ + if (*delDmerOcc == 0) { + COVER_map_remove(activeDmers, delDmer); + activeSegment.score -= freqs[delDmer]; + } + } + + /* If this segment is the best so far save it */ + if (activeSegment.score > bestSegment.score) { + bestSegment = activeSegment; + } + } + { + /* Trim off the zero frequency head and tail from the segment. */ + U32 newBegin = bestSegment.end; + U32 newEnd = bestSegment.begin; + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + U32 freq = freqs[ctx->dmerAt[pos]]; + if (freq != 0) { + newBegin = MIN(newBegin, pos); + newEnd = pos + 1; + } + } + bestSegment.begin = newBegin; + bestSegment.end = newEnd; + } + { + /* Zero out the frequency of each dmer covered by the chosen segment. */ + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + freqs[ctx->dmerAt[pos]] = 0; + } + } + return bestSegment; +} + +/** + * Check the validity of the parameters. + * Returns non-zero if the parameters are valid and 0 otherwise. + */ +static int COVER_checkParameters(ZDICT_cover_params_t parameters, + size_t maxDictSize) { + /* k and d are required parameters */ + if (parameters.d == 0 || parameters.k == 0) { + return 0; + } + /* k <= maxDictSize */ + if (parameters.k > maxDictSize) { + return 0; + } + /* d <= k */ + if (parameters.d > parameters.k) { + return 0; + } + /* 0 < splitPoint <= 1 */ + if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){ + return 0; + } + return 1; +} + +/** + * Clean up a context initialized with `COVER_ctx_init()`. + */ +static void COVER_ctx_destroy(COVER_ctx_t *ctx) { + if (!ctx) { + return; + } + if (ctx->suffix) { + free(ctx->suffix); + ctx->suffix = NULL; + } + if (ctx->freqs) { + free(ctx->freqs); + ctx->freqs = NULL; + } + if (ctx->dmerAt) { + free(ctx->dmerAt); + ctx->dmerAt = NULL; + } + if (ctx->offsets) { + free(ctx->offsets); + ctx->offsets = NULL; + } +} + +/** + * Prepare a context for dictionary building. + * The context is only dependent on the parameter `d` and can used multiple + * times. + * Returns 0 on success or error code on error. + * The context must be destroyed with `COVER_ctx_destroy()`. + */ +static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, + const size_t *samplesSizes, unsigned nbSamples, + unsigned d, double splitPoint) { + const BYTE *const samples = (const BYTE *)samplesBuffer; + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); + /* Split samples into testing and training sets */ + const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; + const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; + const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; + const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; + /* Checks */ + if (totalSamplesSize < MAX(d, sizeof(U64)) || + totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", + (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); + return ERROR(srcSize_wrong); + } + /* Check if there are at least 5 training samples */ + if (nbTrainSamples < 5) { + DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); + return ERROR(srcSize_wrong); + } + /* Check if there's testing sample */ + if (nbTestSamples < 1) { + DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); + return ERROR(srcSize_wrong); + } + /* Zero the context */ + memset(ctx, 0, sizeof(*ctx)); + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, + (unsigned)trainingSamplesSize); + DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, + (unsigned)testSamplesSize); + ctx->samples = samples; + ctx->samplesSizes = samplesSizes; + ctx->nbSamples = nbSamples; + ctx->nbTrainSamples = nbTrainSamples; + ctx->nbTestSamples = nbTestSamples; + /* Partial suffix array */ + ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1; + ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + /* Maps index to the dmerID */ + ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); + /* The offsets of each file */ + ctx->offsets = (size_t *)malloc((nbSamples + 1) * sizeof(size_t)); + if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { + DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); + COVER_ctx_destroy(ctx); + return ERROR(memory_allocation); + } + ctx->freqs = NULL; + ctx->d = d; + + /* Fill offsets from the samplesSizes */ + { + U32 i; + ctx->offsets[0] = 0; + for (i = 1; i <= nbSamples; ++i) { + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; + } + } + DISPLAYLEVEL(2, "Constructing partial suffix array\n"); + { + /* suffix is a partial suffix array. + * It only sorts suffixes by their first parameters.d bytes. + * The sort is stable, so each dmer group is sorted by position in input. + */ + U32 i; + for (i = 0; i < ctx->suffixSize; ++i) { + ctx->suffix[i] = i; + } + /* qsort doesn't take an opaque pointer, so pass as a global. + * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is. + */ + g_coverCtx = ctx; +#if defined(__OpenBSD__) + mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +#else + qsort(ctx->suffix, ctx->suffixSize, sizeof(U32), + (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp)); +#endif + } + DISPLAYLEVEL(2, "Computing frequencies\n"); + /* For each dmer group (group of positions with the same first d bytes): + * 1. For each position we set dmerAt[position] = dmerID. The dmerID is + * (groupBeginPtr - suffix). This allows us to go from position to + * dmerID so we can look up values in freq. + * 2. We calculate how many samples the dmer occurs in and save it in + * freqs[dmerId]. + */ + COVER_groupBy(ctx->suffix, ctx->suffixSize, sizeof(U32), ctx, + (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); + ctx->freqs = ctx->suffix; + ctx->suffix = NULL; + return 0; +} + +void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) +{ + const double ratio = (double)nbDmers / maxDictSize; + if (ratio >= 10) { + return; + } + LOCALDISPLAYLEVEL(displayLevel, 1, + "WARNING: The maximum dictionary size %u is too large " + "compared to the source size %u! " + "size(source)/size(dictionary) = %f, but it should be >= " + "10! This may lead to a subpar dictionary! We recommend " + "training on sources at least 10x, and preferably 100x " + "the size of the dictionary! \n", (U32)maxDictSize, + (U32)nbDmers, ratio); +} + +COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, + U32 nbDmers, U32 k, U32 passes) +{ + const U32 minEpochSize = k * 10; + COVER_epoch_info_t epochs; + epochs.num = MAX(1, maxDictSize / k / passes); + epochs.size = nbDmers / epochs.num; + if (epochs.size >= minEpochSize) { + assert(epochs.size * epochs.num <= nbDmers); + return epochs; + } + epochs.size = MIN(minEpochSize, nbDmers); + epochs.num = nbDmers / epochs.size; + assert(epochs.size * epochs.num <= nbDmers); + return epochs; +} + +/** + * Given the prepared context build the dictionary. + */ +static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs, + COVER_map_t *activeDmers, void *dictBuffer, + size_t dictBufferCapacity, + ZDICT_cover_params_t parameters) { + BYTE *const dict = (BYTE *)dictBuffer; + size_t tail = dictBufferCapacity; + /* Divide the data into epochs. We will select one segment from each epoch. */ + const COVER_epoch_info_t epochs = COVER_computeEpochs( + (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4); + const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3)); + size_t zeroScoreRun = 0; + size_t epoch; + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", + (U32)epochs.num, (U32)epochs.size); + /* Loop through the epochs until there are no more segments or the dictionary + * is full. + */ + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { + const U32 epochBegin = (U32)(epoch * epochs.size); + const U32 epochEnd = epochBegin + epochs.size; + size_t segmentSize; + /* Select a segment */ + COVER_segment_t segment = COVER_selectSegment( + ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); + /* If the segment covers no dmers, then we are out of content. + * There may be new content in other epochs, for continue for some time. + */ + if (segment.score == 0) { + if (++zeroScoreRun >= maxZeroScoreRun) { + break; + } + continue; + } + zeroScoreRun = 0; + /* Trim the segment if necessary and if it is too small then we are done */ + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); + if (segmentSize < parameters.d) { + break; + } + /* We fill the dictionary from the back to allow the best segments to be + * referenced with the smallest offsets. + */ + tail -= segmentSize; + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); + DISPLAYUPDATE( + 2, "\r%u%% ", + (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); + } + DISPLAYLEVEL(2, "\r%79s\r", ""); + return tail; +} + +ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( + void *dictBuffer, size_t dictBufferCapacity, + const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t parameters) +{ + BYTE* const dict = (BYTE*)dictBuffer; + COVER_ctx_t ctx; + COVER_map_t activeDmers; + parameters.splitPoint = 1.0; + /* Initialize global data */ + g_displayLevel = parameters.zParams.notificationLevel; + /* Checks */ + if (!COVER_checkParameters(parameters, dictBufferCapacity)) { + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + /* Initialize context and activeDmers */ + { + size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + parameters.d, parameters.splitPoint); + if (ZSTD_isError(initVal)) { + return initVal; + } + } + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel); + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); + COVER_ctx_destroy(&ctx); + return ERROR(memory_allocation); + } + + DISPLAYLEVEL(2, "Building dictionary\n"); + { + const size_t tail = + COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer, + dictBufferCapacity, parameters); + const size_t dictionarySize = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + samplesBuffer, samplesSizes, nbSamples, parameters.zParams); + if (!ZSTD_isError(dictionarySize)) { + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", + (unsigned)dictionarySize); + } + COVER_ctx_destroy(&ctx); + COVER_map_destroy(&activeDmers); + return dictionarySize; + } +} + + + +size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, + const size_t *samplesSizes, const BYTE *samples, + size_t *offsets, + size_t nbTrainSamples, size_t nbSamples, + BYTE *const dict, size_t dictBufferCapacity) { + size_t totalCompressedSize = ERROR(GENERIC); + /* Pointers */ + ZSTD_CCtx *cctx; + ZSTD_CDict *cdict; + void *dst; + /* Local variables */ + size_t dstCapacity; + size_t i; + /* Allocate dst with enough space to compress the maximum sized sample */ + { + size_t maxSampleSize = 0; + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; + for (; i < nbSamples; ++i) { + maxSampleSize = MAX(samplesSizes[i], maxSampleSize); + } + dstCapacity = ZSTD_compressBound(maxSampleSize); + dst = malloc(dstCapacity); + } + /* Create the cctx and cdict */ + cctx = ZSTD_createCCtx(); + cdict = ZSTD_createCDict(dict, dictBufferCapacity, + parameters.zParams.compressionLevel); + if (!dst || !cctx || !cdict) { + goto _compressCleanup; + } + /* Compress each sample and sum their sizes (or error) */ + totalCompressedSize = dictBufferCapacity; + i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0; + for (; i < nbSamples; ++i) { + const size_t size = ZSTD_compress_usingCDict( + cctx, dst, dstCapacity, samples + offsets[i], + samplesSizes[i], cdict); + if (ZSTD_isError(size)) { + totalCompressedSize = size; + goto _compressCleanup; + } + totalCompressedSize += size; + } +_compressCleanup: + ZSTD_freeCCtx(cctx); + ZSTD_freeCDict(cdict); + if (dst) { + free(dst); + } + return totalCompressedSize; +} + + +/** + * Initialize the `COVER_best_t`. + */ +void COVER_best_init(COVER_best_t *best) { + if (best==NULL) return; /* compatible with init on NULL */ + (void)ZSTD_pthread_mutex_init(&best->mutex, NULL); + (void)ZSTD_pthread_cond_init(&best->cond, NULL); + best->liveJobs = 0; + best->dict = NULL; + best->dictSize = 0; + best->compressedSize = (size_t)-1; + memset(&best->parameters, 0, sizeof(best->parameters)); +} + +/** + * Wait until liveJobs == 0. + */ +void COVER_best_wait(COVER_best_t *best) { + if (!best) { + return; + } + ZSTD_pthread_mutex_lock(&best->mutex); + while (best->liveJobs != 0) { + ZSTD_pthread_cond_wait(&best->cond, &best->mutex); + } + ZSTD_pthread_mutex_unlock(&best->mutex); +} + +/** + * Call COVER_best_wait() and then destroy the COVER_best_t. + */ +void COVER_best_destroy(COVER_best_t *best) { + if (!best) { + return; + } + COVER_best_wait(best); + if (best->dict) { + free(best->dict); + } + ZSTD_pthread_mutex_destroy(&best->mutex); + ZSTD_pthread_cond_destroy(&best->cond); +} + +/** + * Called when a thread is about to be launched. + * Increments liveJobs. + */ +void COVER_best_start(COVER_best_t *best) { + if (!best) { + return; + } + ZSTD_pthread_mutex_lock(&best->mutex); + ++best->liveJobs; + ZSTD_pthread_mutex_unlock(&best->mutex); +} + +/** + * Called when a thread finishes executing, both on error or success. + * Decrements liveJobs and signals any waiting threads if liveJobs == 0. + * If this dictionary is the best so far save it and its parameters. + */ +void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, + COVER_dictSelection_t selection) { + void* dict = selection.dictContent; + size_t compressedSize = selection.totalCompressedSize; + size_t dictSize = selection.dictSize; + if (!best) { + return; + } + { + size_t liveJobs; + ZSTD_pthread_mutex_lock(&best->mutex); + --best->liveJobs; + liveJobs = best->liveJobs; + /* If the new dictionary is better */ + if (compressedSize < best->compressedSize) { + /* Allocate space if necessary */ + if (!best->dict || best->dictSize < dictSize) { + if (best->dict) { + free(best->dict); + } + best->dict = malloc(dictSize); + if (!best->dict) { + best->compressedSize = ERROR(GENERIC); + best->dictSize = 0; + ZSTD_pthread_cond_signal(&best->cond); + ZSTD_pthread_mutex_unlock(&best->mutex); + return; + } + } + /* Save the dictionary, parameters, and size */ + if (dict) { + memcpy(best->dict, dict, dictSize); + best->dictSize = dictSize; + best->parameters = parameters; + best->compressedSize = compressedSize; + } + } + if (liveJobs == 0) { + ZSTD_pthread_cond_broadcast(&best->cond); + } + ZSTD_pthread_mutex_unlock(&best->mutex); + } +} + +COVER_dictSelection_t COVER_dictSelectionError(size_t error) { + COVER_dictSelection_t selection = { NULL, 0, error }; + return selection; +} + +unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) { + return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent); +} + +void COVER_dictSelectionFree(COVER_dictSelection_t selection){ + free(selection.dictContent); +} + +COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, size_t dictBufferCapacity, + size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, + size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) { + + size_t largestDict = 0; + size_t largestCompressed = 0; + BYTE* customDictContentEnd = customDictContent + dictContentSize; + + BYTE * largestDictbuffer = (BYTE *)malloc(dictBufferCapacity); + BYTE * candidateDictBuffer = (BYTE *)malloc(dictBufferCapacity); + double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00; + + if (!largestDictbuffer || !candidateDictBuffer) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + } + + /* Initial dictionary size and compressed size */ + memcpy(largestDictbuffer, customDictContent, dictContentSize); + dictContentSize = ZDICT_finalizeDictionary( + largestDictbuffer, dictBufferCapacity, customDictContent, dictContentSize, + samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); + + if (ZDICT_isError(dictContentSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + } + + totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, + samplesBuffer, offsets, + nbCheckSamples, nbSamples, + largestDictbuffer, dictContentSize); + + if (ZSTD_isError(totalCompressedSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(totalCompressedSize); + } + + if (params.shrinkDict == 0) { + COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; + free(candidateDictBuffer); + return selection; + } + + largestDict = dictContentSize; + largestCompressed = totalCompressedSize; + dictContentSize = ZDICT_DICTSIZE_MIN; + + /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */ + while (dictContentSize < largestDict) { + memcpy(candidateDictBuffer, largestDictbuffer, largestDict); + dictContentSize = ZDICT_finalizeDictionary( + candidateDictBuffer, dictBufferCapacity, customDictContentEnd - dictContentSize, dictContentSize, + samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); + + if (ZDICT_isError(dictContentSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(dictContentSize); + + } + + totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, + samplesBuffer, offsets, + nbCheckSamples, nbSamples, + candidateDictBuffer, dictContentSize); + + if (ZSTD_isError(totalCompressedSize)) { + free(largestDictbuffer); + free(candidateDictBuffer); + return COVER_dictSelectionError(totalCompressedSize); + } + + if (totalCompressedSize <= largestCompressed * regressionTolerance) { + COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize }; + free(largestDictbuffer); + return selection; + } + dictContentSize *= 2; + } + dictContentSize = largestDict; + totalCompressedSize = largestCompressed; + { + COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; + free(candidateDictBuffer); + return selection; + } +} + +/** + * Parameters for COVER_tryParameters(). + */ +typedef struct COVER_tryParameters_data_s { + const COVER_ctx_t *ctx; + COVER_best_t *best; + size_t dictBufferCapacity; + ZDICT_cover_params_t parameters; +} COVER_tryParameters_data_t; + +/** + * Tries a set of parameters and updates the COVER_best_t with the results. + * This function is thread safe if zstd is compiled with multithreaded support. + * It takes its parameters as an *OWNING* opaque pointer to support threading. + */ +static void COVER_tryParameters(void *opaque) +{ + /* Save parameters as local variables */ + COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t*)opaque; + const COVER_ctx_t *const ctx = data->ctx; + const ZDICT_cover_params_t parameters = data->parameters; + size_t dictBufferCapacity = data->dictBufferCapacity; + size_t totalCompressedSize = ERROR(GENERIC); + /* Allocate space for hash table, dict, and freqs */ + COVER_map_t activeDmers; + BYTE* const dict = (BYTE*)malloc(dictBufferCapacity); + COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); + U32* const freqs = (U32*)malloc(ctx->suffixSize * sizeof(U32)); + if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { + DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); + goto _cleanup; + } + if (!dict || !freqs) { + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); + goto _cleanup; + } + /* Copy the frequencies because we need to modify them */ + memcpy(freqs, ctx->freqs, ctx->suffixSize * sizeof(U32)); + /* Build the dictionary */ + { + const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, + dictBufferCapacity, parameters); + selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, + ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, + totalCompressedSize); + + if (COVER_dictSelectionIsError(selection)) { + DISPLAYLEVEL(1, "Failed to select dictionary\n"); + goto _cleanup; + } + } +_cleanup: + free(dict); + COVER_best_finish(data->best, parameters, selection); + free(data); + COVER_map_destroy(&activeDmers); + COVER_dictSelectionFree(selection); + free(freqs); +} + +ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( + void* dictBuffer, size_t dictBufferCapacity, const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_cover_params_t* parameters) +{ + /* constants */ + const unsigned nbThreads = parameters->nbThreads; + const double splitPoint = + parameters->splitPoint <= 0.0 ? COVER_DEFAULT_SPLITPOINT : parameters->splitPoint; + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); + const unsigned kIterations = + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); + const unsigned shrinkDict = 0; + /* Local variables */ + const int displayLevel = parameters->zParams.notificationLevel; + unsigned iteration = 1; + unsigned d; + unsigned k; + COVER_best_t best; + POOL_ctx *pool = NULL; + int warned = 0; + + /* Checks */ + if (splitPoint <= 0 || splitPoint > 1) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); + return ERROR(parameter_outOfBound); + } + if (kMinK < kMaxD || kMaxK < kMinK) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "Cover must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + if (nbThreads > 1) { + pool = POOL_create(nbThreads, 1); + if (!pool) { + return ERROR(memory_allocation); + } + } + /* Initialization */ + COVER_best_init(&best); + /* Turn down global display level to clean up display at level 2 and below */ + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; + /* Loop through d first because each new value needs a new context */ + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", + kIterations); + for (d = kMinD; d <= kMaxD; d += 2) { + /* Initialize the context for this value of d */ + COVER_ctx_t ctx; + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); + { + const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } + } + if (!warned) { + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel); + warned = 1; + } + /* Loop through k reusing the same context */ + for (k = kMinK; k <= kMaxK; k += kStepSize) { + /* Prepare the arguments */ + COVER_tryParameters_data_t *data = (COVER_tryParameters_data_t *)malloc( + sizeof(COVER_tryParameters_data_t)); + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); + if (!data) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); + COVER_best_destroy(&best); + COVER_ctx_destroy(&ctx); + POOL_free(pool); + return ERROR(memory_allocation); + } + data->ctx = &ctx; + data->best = &best; + data->dictBufferCapacity = dictBufferCapacity; + data->parameters = *parameters; + data->parameters.k = k; + data->parameters.d = d; + data->parameters.splitPoint = splitPoint; + data->parameters.steps = kSteps; + data->parameters.shrinkDict = shrinkDict; + data->parameters.zParams.notificationLevel = g_displayLevel; + /* Check the parameters */ + if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { + DISPLAYLEVEL(1, "Cover parameters incorrect\n"); + free(data); + continue; + } + /* Call the function and pass ownership of data to it */ + COVER_best_start(&best); + if (pool) { + POOL_add(pool, &COVER_tryParameters, data); + } else { + COVER_tryParameters(data); + } + /* Print status */ + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", + (unsigned)((iteration * 100) / kIterations)); + ++iteration; + } + COVER_best_wait(&best); + COVER_ctx_destroy(&ctx); + } + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); + /* Fill the output buffer and parameters with output of the best parameters */ + { + const size_t dictSize = best.dictSize; + if (ZSTD_isError(best.compressedSize)) { + const size_t compressedSize = best.compressedSize; + COVER_best_destroy(&best); + POOL_free(pool); + return compressedSize; + } + *parameters = best.parameters; + memcpy(dictBuffer, best.dict, dictSize); + COVER_best_destroy(&best); + POOL_free(pool); + return dictSize; + } +} +/**** ended inlining dictBuilder/cover.c ****/ +/**** start inlining dictBuilder/divsufsort.c ****/ +/* + * divsufsort.c for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/*- Compiler specifics -*/ +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wshorten-64-to-32" +#endif + +#if defined(_MSC_VER) +# pragma warning(disable : 4244) +# pragma warning(disable : 4127) /* C4127 : Condition expression is constant */ +#endif + + +/*- Dependencies -*/ +#include +#include +#include + +/**** start inlining divsufsort.h ****/ +/* + * divsufsort.h for libdivsufsort-lite + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _DIVSUFSORT_H +#define _DIVSUFSORT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/*- Prototypes -*/ + +/** + * Constructs the suffix array of a given string. + * @param T [0..n-1] The input string. + * @param SA [0..n-1] The output array of suffixes. + * @param n The length of the given string. + * @param openMP enables OpenMP optimization. + * @return 0 if no error occurred, -1 or -2 otherwise. + */ +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP); + +/** + * Constructs the burrows-wheeler transformed string of a given string. + * @param T [0..n-1] The input string. + * @param U [0..n-1] The output string. (can be T) + * @param A [0..n-1] The temporary array. (can be NULL) + * @param n The length of the given string. + * @param num_indexes The length of secondary indexes array. (can be NULL) + * @param indexes The secondary indexes array. (can be NULL) + * @param openMP enables OpenMP optimization. + * @return The primary index if no error occurred, -1 or -2 otherwise. + */ +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* _DIVSUFSORT_H */ +/**** ended inlining divsufsort.h ****/ + +/*- Constants -*/ +#if defined(INLINE) +# undef INLINE +#endif +#if !defined(INLINE) +# define INLINE __inline +#endif +#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1) +# undef ALPHABET_SIZE +#endif +#if !defined(ALPHABET_SIZE) +# define ALPHABET_SIZE (256) +#endif +#define BUCKET_A_SIZE (ALPHABET_SIZE) +#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE) +#if defined(SS_INSERTIONSORT_THRESHOLD) +# if SS_INSERTIONSORT_THRESHOLD < 1 +# undef SS_INSERTIONSORT_THRESHOLD +# define SS_INSERTIONSORT_THRESHOLD (1) +# endif +#else +# define SS_INSERTIONSORT_THRESHOLD (8) +#endif +#if defined(SS_BLOCKSIZE) +# if SS_BLOCKSIZE < 0 +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (0) +# elif 32768 <= SS_BLOCKSIZE +# undef SS_BLOCKSIZE +# define SS_BLOCKSIZE (32767) +# endif +#else +# define SS_BLOCKSIZE (1024) +#endif +/* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */ +#if SS_BLOCKSIZE == 0 +# define SS_MISORT_STACKSIZE (96) +#elif SS_BLOCKSIZE <= 4096 +# define SS_MISORT_STACKSIZE (16) +#else +# define SS_MISORT_STACKSIZE (24) +#endif +#define SS_SMERGE_STACKSIZE (32) +#define TR_INSERTIONSORT_THRESHOLD (8) +#define TR_STACKSIZE (64) + + +/*- Macros -*/ +#ifndef SWAP +# define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0) +#endif /* SWAP */ +#ifndef MIN +# define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b)) +#endif /* MIN */ +#ifndef MAX +# define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b)) +#endif /* MAX */ +#define STACK_PUSH(_a, _b, _c, _d)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize++].d = (_d);\ + } while(0) +#define STACK_PUSH5(_a, _b, _c, _d, _e)\ + do {\ + assert(ssize < STACK_SIZE);\ + stack[ssize].a = (_a), stack[ssize].b = (_b),\ + stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\ + } while(0) +#define STACK_POP(_a, _b, _c, _d)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d;\ + } while(0) +#define STACK_POP5(_a, _b, _c, _d, _e)\ + do {\ + assert(0 <= ssize);\ + if(ssize == 0) { return; }\ + (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\ + (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\ + } while(0) +#define BUCKET_A(_c0) bucket_A[(_c0)] +#if ALPHABET_SIZE == 256 +#define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)]) +#else +#define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)]) +#define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)]) +#endif + + +/*- Private Functions -*/ + +static const int lg_table[256]= { + -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4, + 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, + 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7 +}; + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +int +ss_ilg(int n) { +#if SS_BLOCKSIZE == 0 + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +#elif SS_BLOCKSIZE < 256 + return lg_table[n]; +#else + return (n & 0xff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]; +#endif +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + +#if SS_BLOCKSIZE != 0 + +static const int sqq_table[256] = { + 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61, + 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89, + 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109, +110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, +128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, +143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155, +156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168, +169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180, +181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191, +192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201, +202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211, +212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221, +221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230, +230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238, +239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247, +247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255 +}; + +static INLINE +int +ss_isqrt(int x) { + int y, e; + + if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; } + e = (x & 0xffff0000) ? + ((x & 0xff000000) ? + 24 + lg_table[(x >> 24) & 0xff] : + 16 + lg_table[(x >> 16) & 0xff]) : + ((x & 0x0000ff00) ? + 8 + lg_table[(x >> 8) & 0xff] : + 0 + lg_table[(x >> 0) & 0xff]); + + if(e >= 16) { + y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7); + if(e >= 24) { y = (y + 1 + x / y) >> 1; } + y = (y + 1 + x / y) >> 1; + } else if(e >= 8) { + y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1; + } else { + return sqq_table[x] >> 4; + } + + return (x < (y * y)) ? y - 1 : y; +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Compares two suffixes. */ +static INLINE +int +ss_compare(const unsigned char *T, + const int *p1, const int *p2, + int depth) { + const unsigned char *U1, *U2, *U1n, *U2n; + + for(U1 = T + depth + *p1, + U2 = T + depth + *p2, + U1n = T + *(p1 + 1) + 2, + U2n = T + *(p2 + 1) + 2; + (U1 < U1n) && (U2 < U2n) && (*U1 == *U2); + ++U1, ++U2) { + } + + return U1 < U1n ? + (U2 < U2n ? *U1 - *U2 : 1) : + (U2 < U2n ? -1 : 0); +} + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) + +/* Insertionsort for small size groups */ +static +void +ss_insertionsort(const unsigned char *T, const int *PA, + int *first, int *last, int depth) { + int *i, *j; + int t; + int r; + + for(i = last - 2; first <= i; --i) { + for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) { + do { *(j - 1) = *j; } while((++j < last) && (*j < 0)); + if(last <= j) { break; } + } + if(r == 0) { *j = ~*j; } + *(j - 1) = t; + } +} + +#endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */ + + +/*---------------------------------------------------------------------------*/ + +#if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) + +static INLINE +void +ss_fixdown(const unsigned char *Td, const int *PA, + int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = Td[PA[SA[k = j++]]]; + if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + ss_fixdown(Td, PA, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +ss_median3(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3) { + int *t; + if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); } + if(Td[PA[*v2]] > Td[PA[*v3]]) { + if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +ss_median5(const unsigned char *Td, const int *PA, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); } + if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); } + if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); } + if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); } + if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); } + if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return ss_median3(Td, PA, first, middle, last - 1); + } else { + t >>= 2; + return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = ss_median3(Td, PA, first, first + t, first + (t << 1)); + middle = ss_median3(Td, PA, middle - t, middle, middle + t); + last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1); + return ss_median3(Td, PA, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +/* Binary partition for substrings. */ +static INLINE +int * +ss_partition(const int *PA, + int *first, int *last, int depth) { + int *a, *b; + int t; + for(a = first - 1, b = last;;) { + for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; } + for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { } + if(b <= a) { break; } + t = ~*b; + *b = *a; + *a = t; + } + if(first < a) { *first = ~*first; } + return a; +} + +/* Multikey introsort for medium size groups. */ +static +void +ss_mintrosort(const unsigned char *T, const int *PA, + int *first, int *last, + int depth) { +#define STACK_SIZE SS_MISORT_STACKSIZE + struct { int *a, *b, c; int d; } stack[STACK_SIZE]; + const unsigned char *Td; + int *a, *b, *c, *d, *e, *f; + int s, t; + int ssize; + int limit; + int v, x = 0; + + for(ssize = 0, limit = ss_ilg(last - first);;) { + + if((last - first) <= SS_INSERTIONSORT_THRESHOLD) { +#if 1 < SS_INSERTIONSORT_THRESHOLD + if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); } +#endif + STACK_POP(first, last, depth, limit); + continue; + } + + Td = T + depth; + if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); } + if(limit < 0) { + for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) { + if((x = Td[PA[*a]]) != v) { + if(1 < (a - first)) { break; } + v = x; + first = a; + } + } + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, a, depth); + } + if((a - first) <= (last - a)) { + if(1 < (a - first)) { + STACK_PUSH(a, last, depth, -1); + last = a, depth += 1, limit = ss_ilg(a - first); + } else { + first = a, limit = -1; + } + } else { + if(1 < (last - a)) { + STACK_PUSH(first, a, depth + 1, ss_ilg(a - first)); + first = a, limit = -1; + } else { + last = a, depth += 1, limit = ss_ilg(a - first); + } + } + continue; + } + + /* choose pivot */ + a = ss_pivot(Td, PA, first, last); + v = Td[PA[*a]]; + SWAP(*first, *a); + + /* partition */ + for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + + a = first + (b - a), c = last - (d - c); + b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth); + + if((a - first) <= (last - c)) { + if((last - c) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(c, last, depth, limit); + last = a; + } else if((a - first) <= (c - b)) { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + last = a; + } else { + STACK_PUSH(c, last, depth, limit); + STACK_PUSH(first, a, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } else { + if((a - first) <= (c - b)) { + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + STACK_PUSH(first, a, depth, limit); + first = c; + } else if((last - c) <= (c - b)) { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(b, c, depth + 1, ss_ilg(c - b)); + first = c; + } else { + STACK_PUSH(first, a, depth, limit); + STACK_PUSH(c, last, depth, limit); + first = b, last = c, depth += 1, limit = ss_ilg(c - b); + } + } + } else { + limit += 1; + if(Td[PA[*first] - 1] < v) { + first = ss_partition(PA, first, last, depth); + limit = ss_ilg(last - first); + } + depth += 1; + } + } +#undef STACK_SIZE +} + +#endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */ + + +/*---------------------------------------------------------------------------*/ + +#if SS_BLOCKSIZE != 0 + +static INLINE +void +ss_blockswap(int *a, int *b, int n) { + int t; + for(; 0 < n; --n, ++a, ++b) { + t = *a, *a = *b, *b = t; + } +} + +static INLINE +void +ss_rotate(int *first, int *middle, int *last) { + int *a, *b, t; + int l, r; + l = middle - first, r = last - middle; + for(; (0 < l) && (0 < r);) { + if(l == r) { ss_blockswap(first, middle, l); break; } + if(l < r) { + a = last - 1, b = middle - 1; + t = *a; + do { + *a-- = *b, *b-- = *a; + if(b < first) { + *a = t; + last = a; + if((r -= l + 1) <= l) { break; } + a -= 1, b = middle - 1; + t = *a; + } + } while(1); + } else { + a = first, b = middle; + t = *a; + do { + *a++ = *b, *b++ = *a; + if(last <= b) { + *a = t; + first = a + 1; + if((l -= r + 1) <= r) { break; } + a += 1, b = middle; + t = *a; + } + } while(1); + } + } +} + + +/*---------------------------------------------------------------------------*/ + +static +void +ss_inplacemerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int depth) { + const int *p; + int *a, *b; + int len, half; + int q, r; + int x; + + for(;;) { + if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); } + else { x = 0; p = PA + *(last - 1); } + for(a = first, len = middle - first, half = len >> 1, r = -1; + 0 < len; + len = half, half >>= 1) { + b = a + half; + q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth); + if(q < 0) { + a = b + 1; + half -= (len & 1) ^ 1; + } else { + r = q; + } + } + if(a < middle) { + if(r == 0) { *a = ~*a; } + ss_rotate(a, middle, last); + last -= middle - a; + middle = a; + if(first == middle) { break; } + } + --last; + if(x != 0) { while(*--last < 0) { } } + if(middle == last) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Merge-forward with internal buffer. */ +static +void +ss_mergeforward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + int *a, *b, *c, *bufend; + int t; + int r; + + bufend = buf + (middle - first) - 1; + ss_blockswap(buf, first, middle - first); + + for(t = *(a = first), b = buf, c = middle;;) { + r = ss_compare(T, PA + *b, PA + *c, depth); + if(r < 0) { + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + } else if(r > 0) { + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } else { + *c = ~*c; + do { + *a++ = *b; + if(bufend <= b) { *bufend = t; return; } + *b++ = *a; + } while(*b < 0); + + do { + *a++ = *c, *c++ = *a; + if(last <= c) { + while(b < bufend) { *a++ = *b, *b++ = *a; } + *a = *b, *b = t; + return; + } + } while(*c < 0); + } + } +} + +/* Merge-backward with internal buffer. */ +static +void +ss_mergebackward(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int depth) { + const int *p1, *p2; + int *a, *b, *c, *bufend; + int t; + int r; + int x; + + bufend = buf + (last - middle) - 1; + ss_blockswap(buf, middle, last - middle); + + x = 0; + if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; } + else { p1 = PA + *bufend; } + if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; } + else { p2 = PA + *(middle - 1); } + for(t = *(a = last - 1), b = bufend, c = middle - 1;;) { + r = ss_compare(T, p1, p2, depth); + if(0 < r) { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = *b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + } else if(r < 0) { + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } else { + if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; } + *a-- = ~*b; + if(b <= buf) { *buf = t; break; } + *b-- = *a; + if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; } + *a-- = *c, *c-- = *a; + if(c < first) { + while(buf < b) { *a-- = *b, *b-- = *a; } + *a = *b, *b = t; + break; + } + if(*b < 0) { p1 = PA + ~*b; x |= 1; } + else { p1 = PA + *b; } + if(*c < 0) { p2 = PA + ~*c; x |= 2; } + else { p2 = PA + *c; } + } + } +} + +/* D&C based merge. */ +static +void +ss_swapmerge(const unsigned char *T, const int *PA, + int *first, int *middle, int *last, + int *buf, int bufsize, int depth) { +#define STACK_SIZE SS_SMERGE_STACKSIZE +#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a))) +#define MERGE_CHECK(a, b, c)\ + do {\ + if(((c) & 1) ||\ + (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\ + *(a) = ~*(a);\ + }\ + if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\ + *(b) = ~*(b);\ + }\ + } while(0) + struct { int *a, *b, *c; int d; } stack[STACK_SIZE]; + int *l, *r, *lm, *rm; + int m, len, half; + int ssize; + int check, next; + + for(check = 0, ssize = 0;;) { + if((last - middle) <= bufsize) { + if((first < middle) && (middle < last)) { + ss_mergebackward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + if((middle - first) <= bufsize) { + if(first < middle) { + ss_mergeforward(T, PA, first, middle, last, buf, depth); + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + continue; + } + + for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1; + 0 < len; + len = half, half >>= 1) { + if(ss_compare(T, PA + GETIDX(*(middle + m + half)), + PA + GETIDX(*(middle - m - half - 1)), depth) < 0) { + m += half + 1; + half -= (len & 1) ^ 1; + } + } + + if(0 < m) { + lm = middle - m, rm = middle + m; + ss_blockswap(lm, middle, m); + l = r = middle, next = 0; + if(rm < last) { + if(*rm < 0) { + *rm = ~*rm; + if(first < lm) { for(; *--l < 0;) { } next |= 4; } + next |= 1; + } else if(first < lm) { + for(; *r < 0; ++r) { } + next |= 2; + } + } + + if((l - first) <= (last - r)) { + STACK_PUSH(r, rm, last, (next & 3) | (check & 4)); + middle = lm, last = l, check = (check & 3) | (next & 4); + } else { + if((next & 2) && (r == middle)) { next ^= 6; } + STACK_PUSH(first, lm, l, (check & 3) | (next & 4)); + first = r, middle = rm, check = (next & 3) | (check & 4); + } + } else { + if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) { + *middle = ~*middle; + } + MERGE_CHECK(first, last, check); + STACK_POP(first, middle, last, check); + } + } +#undef STACK_SIZE +} + +#endif /* SS_BLOCKSIZE != 0 */ + + +/*---------------------------------------------------------------------------*/ + +/* Substring sort */ +static +void +sssort(const unsigned char *T, const int *PA, + int *first, int *last, + int *buf, int bufsize, + int depth, int n, int lastsuffix) { + int *a; +#if SS_BLOCKSIZE != 0 + int *b, *middle, *curbuf; + int j, k, curbufsize, limit; +#endif + int i; + + if(lastsuffix != 0) { ++first; } + +#if SS_BLOCKSIZE == 0 + ss_mintrosort(T, PA, first, last, depth); +#else + if((bufsize < SS_BLOCKSIZE) && + (bufsize < (last - first)) && + (bufsize < (limit = ss_isqrt(last - first)))) { + if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; } + buf = middle = last - limit, bufsize = limit; + } else { + middle = last, limit = 0; + } + for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth); +#endif + curbufsize = last - (a + SS_BLOCKSIZE); + curbuf = a + SS_BLOCKSIZE; + if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; } + for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) { + ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth); + } + } +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, a, middle, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, a, middle, depth); +#endif + for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) { + if(i & 1) { + ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth); + a -= k; + } + } + if(limit != 0) { +#if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE + ss_mintrosort(T, PA, middle, last, depth); +#elif 1 < SS_BLOCKSIZE + ss_insertionsort(T, PA, middle, last, depth); +#endif + ss_inplacemerge(T, PA, first, middle, last, depth); + } +#endif + + if(lastsuffix != 0) { + /* Insert last type B* suffix. */ + int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2; + for(a = first, i = *(first - 1); + (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth))); + ++a) { + *(a - 1) = *a; + } + *(a - 1) = i; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +int +tr_ilg(int n) { + return (n & 0xffff0000) ? + ((n & 0xff000000) ? + 24 + lg_table[(n >> 24) & 0xff] : + 16 + lg_table[(n >> 16) & 0xff]) : + ((n & 0x0000ff00) ? + 8 + lg_table[(n >> 8) & 0xff] : + 0 + lg_table[(n >> 0) & 0xff]); +} + + +/*---------------------------------------------------------------------------*/ + +/* Simple insertionsort for small size groups. */ +static +void +tr_insertionsort(const int *ISAd, int *first, int *last) { + int *a, *b; + int t, r; + + for(a = first + 1; a < last; ++a) { + for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) { + do { *(b + 1) = *b; } while((first <= --b) && (*b < 0)); + if(b < first) { break; } + } + if(r == 0) { *b = ~*b; } + *(b + 1) = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_fixdown(const int *ISAd, int *SA, int i, int size) { + int j, k; + int v; + int c, d, e; + + for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) { + d = ISAd[SA[k = j++]]; + if(d < (e = ISAd[SA[j]])) { k = j; d = e; } + if(d <= c) { break; } + } + SA[i] = v; +} + +/* Simple top-down heapsort. */ +static +void +tr_heapsort(const int *ISAd, int *SA, int size) { + int i, m; + int t; + + m = size; + if((size % 2) == 0) { + m--; + if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); } + } + + for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); } + if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); } + for(i = m - 1; 0 < i; --i) { + t = SA[0], SA[0] = SA[i]; + tr_fixdown(ISAd, SA, 0, i); + SA[i] = t; + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Returns the median of three elements. */ +static INLINE +int * +tr_median3(const int *ISAd, int *v1, int *v2, int *v3) { + int *t; + if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); } + if(ISAd[*v2] > ISAd[*v3]) { + if(ISAd[*v1] > ISAd[*v3]) { return v1; } + else { return v3; } + } + return v2; +} + +/* Returns the median of five elements. */ +static INLINE +int * +tr_median5(const int *ISAd, + int *v1, int *v2, int *v3, int *v4, int *v5) { + int *t; + if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); } + if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); } + if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); } + if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); } + if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); } + if(ISAd[*v3] > ISAd[*v4]) { return v4; } + return v3; +} + +/* Returns the pivot element. */ +static INLINE +int * +tr_pivot(const int *ISAd, int *first, int *last) { + int *middle; + int t; + + t = last - first; + middle = first + t / 2; + + if(t <= 512) { + if(t <= 32) { + return tr_median3(ISAd, first, middle, last - 1); + } else { + t >>= 2; + return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1); + } + } + t >>= 3; + first = tr_median3(ISAd, first, first + t, first + (t << 1)); + middle = tr_median3(ISAd, middle - t, middle, middle + t); + last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1); + return tr_median3(ISAd, first, middle, last); +} + + +/*---------------------------------------------------------------------------*/ + +typedef struct _trbudget_t trbudget_t; +struct _trbudget_t { + int chance; + int remain; + int incval; + int count; +}; + +static INLINE +void +trbudget_init(trbudget_t *budget, int chance, int incval) { + budget->chance = chance; + budget->remain = budget->incval = incval; +} + +static INLINE +int +trbudget_check(trbudget_t *budget, int size) { + if(size <= budget->remain) { budget->remain -= size; return 1; } + if(budget->chance == 0) { budget->count += size; return 0; } + budget->remain += budget->incval - size; + budget->chance -= 1; + return 1; +} + + +/*---------------------------------------------------------------------------*/ + +static INLINE +void +tr_partition(const int *ISAd, + int *first, int *middle, int *last, + int **pa, int **pb, int v) { + int *a, *b, *c, *d, *e, *f; + int t, s; + int x = 0; + + for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { } + if(((a = b) < last) && (x < v)) { + for(; (++b < last) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + } + for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { } + if((b < (d = c)) && (x > v)) { + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + for(; b < c;) { + SWAP(*b, *c); + for(; (++b < c) && ((x = ISAd[*b]) <= v);) { + if(x == v) { SWAP(*b, *a); ++a; } + } + for(; (b < --c) && ((x = ISAd[*c]) >= v);) { + if(x == v) { SWAP(*c, *d); --d; } + } + } + + if(a <= d) { + c = b - 1; + if((s = a - first) > (t = b - a)) { s = t; } + for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + if((s = d - c) > (t = last - d - 1)) { s = t; } + for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); } + first += (b - a), last -= (d - c); + } + *pa = first, *pb = last; +} + +static +void +tr_copy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + /* sort suffixes of middle partition + by using sorted order of suffixes of left and right partition. */ + int *c, *d, *e; + int s, v; + + v = b - SA - 1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + ISA[s] = d - SA; + } + } + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + ISA[s] = d - SA; + } + } +} + +static +void +tr_partialcopy(int *ISA, const int *SA, + int *first, int *a, int *b, int *last, + int depth) { + int *c, *d, *e; + int s, v; + int rank, lastrank, newrank = -1; + + v = b - SA - 1; + lastrank = -1; + for(c = first, d = a - 1; c <= d; ++c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *++d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } + + lastrank = -1; + for(e = d; first <= e; --e) { + rank = ISA[*e]; + if(lastrank != rank) { lastrank = rank; newrank = e - SA; } + if(newrank != rank) { ISA[*e] = newrank; } + } + + lastrank = -1; + for(c = last - 1, e = d + 1, d = b; e < d; --c) { + if((0 <= (s = *c - depth)) && (ISA[s] == v)) { + *--d = s; + rank = ISA[s + depth]; + if(lastrank != rank) { lastrank = rank; newrank = d - SA; } + ISA[s] = newrank; + } + } +} + +static +void +tr_introsort(int *ISA, const int *ISAd, + int *SA, int *first, int *last, + trbudget_t *budget) { +#define STACK_SIZE TR_STACKSIZE + struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE]; + int *a, *b, *c; + int t; + int v, x = 0; + int incr = ISAd - ISA; + int limit, next; + int ssize, trlink = -1; + + for(ssize = 0, limit = tr_ilg(last - first);;) { + + if(limit < 0) { + if(limit == -1) { + /* tandem repeat partition */ + tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1); + + /* update ranks */ + if(a < last) { + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + } + if(b < last) { + for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } + } + + /* push */ + if(1 < (b - a)) { + STACK_PUSH5(NULL, a, b, 0, 0); + STACK_PUSH5(ISAd - incr, first, last, -2, trlink); + trlink = ssize - 2; + } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink); + last = a, limit = tr_ilg(a - first); + } else if(1 < (last - b)) { + first = b, limit = tr_ilg(last - b); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink); + first = b, limit = tr_ilg(last - b); + } else if(1 < (a - first)) { + last = a, limit = tr_ilg(a - first); + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else if(limit == -2) { + /* tandem repeat copy */ + a = stack[--ssize].b, b = stack[ssize].c; + if(stack[ssize].d == 0) { + tr_copy(ISA, SA, first, a, b, last, ISAd - ISA); + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA); + } + STACK_POP5(ISAd, first, last, limit, trlink); + } else { + /* sorted partition */ + if(0 <= *first) { + a = first; + do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a)); + first = a; + } + if(first < last) { + a = first; do { *a = ~*a; } while(*++a < 0); + next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1; + if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } } + + /* push */ + if(trbudget_check(budget, a - first)) { + if((a - first) <= (last - a)) { + STACK_PUSH5(ISAd, a, last, -3, trlink); + ISAd += incr, last = a, limit = next; + } else { + if(1 < (last - a)) { + STACK_PUSH5(ISAd + incr, first, a, next, trlink); + first = a, limit = -3; + } else { + ISAd += incr, last = a, limit = next; + } + } + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + if(1 < (last - a)) { + first = a, limit = -3; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + continue; + } + + if((last - first) <= TR_INSERTIONSORT_THRESHOLD) { + tr_insertionsort(ISAd, first, last); + limit = -3; + continue; + } + + if(limit-- == 0) { + tr_heapsort(ISAd, first, last - first); + for(a = last - 1; first < a; a = b) { + for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; } + } + limit = -3; + continue; + } + + /* choose pivot */ + a = tr_pivot(ISAd, first, last); + SWAP(*first, *a); + v = ISAd[*first]; + + /* partition */ + tr_partition(ISAd, first, first + 1, last, &a, &b, v); + if((last - first) != (b - a)) { + next = (ISA[*a] != v) ? tr_ilg(b - a) : -1; + + /* update ranks */ + for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; } + if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } } + + /* push */ + if((1 < (b - a)) && (trbudget_check(budget, b - a))) { + if((a - first) <= (last - b)) { + if((last - b) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((a - first) <= (b - a)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, b, last, limit, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + if((a - first) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + last = a; + } else { + ISAd += incr, first = a, last = b, limit = next; + } + } else if((last - b) <= (b - a)) { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd + incr, a, b, next, trlink); + first = b; + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } else { + STACK_PUSH5(ISAd, first, a, limit, trlink); + STACK_PUSH5(ISAd, b, last, limit, trlink); + ISAd += incr, first = a, last = b, limit = next; + } + } + } else { + if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; } + if((a - first) <= (last - b)) { + if(1 < (a - first)) { + STACK_PUSH5(ISAd, b, last, limit, trlink); + last = a; + } else if(1 < (last - b)) { + first = b; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } else { + if(1 < (last - b)) { + STACK_PUSH5(ISAd, first, a, limit, trlink); + first = b; + } else if(1 < (a - first)) { + last = a; + } else { + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } + } else { + if(trbudget_check(budget, last - first)) { + limit = tr_ilg(last - first), ISAd += incr; + } else { + if(0 <= trlink) { stack[trlink].d = -1; } + STACK_POP5(ISAd, first, last, limit, trlink); + } + } + } +#undef STACK_SIZE +} + + + +/*---------------------------------------------------------------------------*/ + +/* Tandem repeat sort */ +static +void +trsort(int *ISA, int *SA, int n, int depth) { + int *ISAd; + int *first, *last; + trbudget_t budget; + int t, skip, unsorted; + + trbudget_init(&budget, tr_ilg(n) * 2 / 3, n); +/* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */ + for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) { + first = SA; + skip = 0; + unsorted = 0; + do { + if((t = *first) < 0) { first -= t; skip += t; } + else { + if(skip != 0) { *(first + skip) = skip; skip = 0; } + last = SA + ISA[t] + 1; + if(1 < (last - first)) { + budget.count = 0; + tr_introsort(ISA, ISAd, SA, first, last, &budget); + if(budget.count != 0) { unsorted += budget.count; } + else { skip = first - last; } + } else if((last - first) == 1) { + skip = -1; + } + first = last; + } + } while(first < (SA + n)); + if(skip != 0) { *(first + skip) = skip; } + if(unsorted == 0) { break; } + } +} + + +/*---------------------------------------------------------------------------*/ + +/* Sorts suffixes of type B*. */ +static +int +sort_typeBstar(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int openMP) { + int *PAb, *ISAb, *buf; +#ifdef LIBBSC_OPENMP + int *curbuf; + int l; +#endif + int i, j, k, t, m, bufsize; + int c0, c1; +#ifdef LIBBSC_OPENMP + int d0, d1; +#endif + (void)openMP; + + /* Initialize bucket arrays. */ + for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; } + for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; } + + /* Count the number of occurrences of the first one or two characters of each + type A, B and B* suffix. Moreover, store the beginning position of all + type B* suffixes into the array SA. */ + for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) { + /* type A suffix. */ + do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1)); + if(0 <= i) { + /* type B* suffix. */ + ++BUCKET_BSTAR(c0, c1); + SA[--m] = i; + /* type B suffix. */ + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { + ++BUCKET_B(c0, c1); + } + } + } + m = n - m; +/* +note: + A type B* suffix is lexicographically smaller than a type B suffix that + begins with the same first two characters. +*/ + + /* Calculate the index of start/end point of each bucket. */ + for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) { + t = i + BUCKET_A(c0); + BUCKET_A(c0) = i + j; /* start point */ + i = t + BUCKET_B(c0, c0); + for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) { + j += BUCKET_BSTAR(c0, c1); + BUCKET_BSTAR(c0, c1) = j; /* end point */ + i += BUCKET_B(c0, c1); + } + } + + if(0 < m) { + /* Sort the type B* suffixes by their first two characters. */ + PAb = SA + n - m; ISAb = SA + m; + for(i = m - 2; 0 <= i; --i) { + t = PAb[i], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = i; + } + t = PAb[m - 1], c0 = T[t], c1 = T[t + 1]; + SA[--BUCKET_BSTAR(c0, c1)] = m - 1; + + /* Sort the type B* substrings using sssort. */ +#ifdef LIBBSC_OPENMP + if (openMP) + { + buf = SA + m; + c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m; +#pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1) + { + bufsize = (n - (2 * m)) / omp_get_num_threads(); + curbuf = buf + omp_get_thread_num() * bufsize; + k = 0; + for(;;) { + #pragma omp critical(sssort_lock) + { + if(0 < (l = j)) { + d0 = c0, d1 = c1; + do { + k = BUCKET_BSTAR(d0, d1); + if(--d1 <= d0) { + d1 = ALPHABET_SIZE - 1; + if(--d0 < 0) { break; } + } + } while(((l - k) <= 1) && (0 < (l = k))); + c0 = d0, c1 = d1, j = k; + } + } + if(l == 0) { break; } + sssort(T, PAb, SA + k, SA + l, + curbuf, bufsize, 2, n, *(SA + k) == (m - 1)); + } + } + } + else + { + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } + } +#else + buf = SA + m, bufsize = n - (2 * m); + for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) { + for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) { + i = BUCKET_BSTAR(c0, c1); + if(1 < (j - i)) { + sssort(T, PAb, SA + i, SA + j, + buf, bufsize, 2, n, *(SA + i) == (m - 1)); + } + } + } +#endif + + /* Compute ranks of type B* substrings. */ + for(i = m - 1; 0 <= i; --i) { + if(0 <= SA[i]) { + j = i; + do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i])); + SA[i + 1] = i - j; + if(i <= 0) { break; } + } + j = i; + do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0); + ISAb[SA[i]] = j; + } + + /* Construct the inverse suffix array of type B* suffixes using trsort. */ + trsort(ISAb, SA, m, 1); + + /* Set the sorted order of type B* suffixes. */ + for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) { + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { } + if(0 <= i) { + t = i; + for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { } + SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t; + } + } + + /* Calculate the index of start/end point of each bucket. */ + BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */ + for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) { + i = BUCKET_A(c0 + 1) - 1; + for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) { + t = i - BUCKET_B(c0, c1); + BUCKET_B(c0, c1) = i; /* end point */ + + /* Move all type B* suffixes to the correct position. */ + for(i = t, j = BUCKET_BSTAR(c0, c1); + j <= k; + --i, --k) { SA[i] = SA[k]; } + } + BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */ + BUCKET_B(c0, c0) = i; /* end point */ + } + } + + return m; +} + +/* Constructs the suffix array by using the sorted order of type B* suffixes. */ +static +void +construct_SA(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + *j = ~s; + c0 = T[--s]; + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); assert(k != NULL); + *k-- = s; + } else { + assert(((s == 0) && (T[s] == c1)) || (s < 0)); + *j = ~s; + } + } + } + } + + /* Construct the suffix array by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + if((s == 0) || (T[s - 1] < c0)) { s = ~s; } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else { + assert(s < 0); + *i = ~s; + } + } +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); assert(k != NULL); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1); + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + c0 = T[--s]; + *i = c0; + if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); } + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + +/* Constructs the burrows-wheeler transformed string directly + by using the sorted order of type B* suffixes. */ +static +int +construct_BWT_indexes(const unsigned char *T, int *SA, + int *bucket_A, int *bucket_B, + int n, int m, + unsigned char * num_indexes, int * indexes) { + int *i, *j, *k, *orig; + int s; + int c0, c1, c2; + + int mod = n / 8; + { + mod |= mod >> 1; mod |= mod >> 2; + mod |= mod >> 4; mod |= mod >> 8; + mod |= mod >> 16; mod >>= 1; + + *num_indexes = (unsigned char)((n - 1) / (mod + 1)); + } + + if(0 < m) { + /* Construct the sorted order of type B suffixes by using + the sorted order of type B* suffixes. */ + for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) { + /* Scan the suffix array from right to left. */ + for(i = SA + BUCKET_BSTAR(c1, c1 + 1), + j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1; + i <= j; + --j) { + if(0 < (s = *j)) { + assert(T[s] == c1); + assert(((s + 1) < n) && (T[s] <= T[s + 1])); + assert(T[s - 1] <= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA; + + c0 = T[--s]; + *j = ~((int)c0); + if((0 < s) && (T[s - 1] > c0)) { s = ~s; } + if(c0 != c2) { + if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; } + k = SA + BUCKET_B(c2 = c0, c1); + } + assert(k < j); assert(k != NULL); + *k-- = s; + } else if(s != 0) { + *j = ~s; +#ifndef NDEBUG + } else { + assert(T[s] == c1); +#endif + } + } + } + } + + /* Construct the BWTed string by using + the sorted order of type B suffixes. */ + k = SA + BUCKET_A(c2 = T[n - 1]); + if (T[n - 2] < c2) { + if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[n - 2]); + } + else { + *k++ = n - 1; + } + + /* Scan the suffix array from left to right. */ + for(i = SA, j = SA + n, orig = SA; i < j; ++i) { + if(0 < (s = *i)) { + assert(T[s - 1] >= T[s]); + + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA; + + c0 = T[--s]; + *i = c0; + if(c0 != c2) { + BUCKET_A(c2) = k - SA; + k = SA + BUCKET_A(c2 = c0); + } + assert(i < k); + if((0 < s) && (T[s - 1] < c0)) { + if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA; + *k++ = ~((int)T[s - 1]); + } else + *k++ = s; + } else if(s != 0) { + *i = ~s; + } else { + orig = i; + } + } + + return orig - SA; +} + + +/*---------------------------------------------------------------------------*/ + +/*- Function -*/ + +int +divsufsort(const unsigned char *T, int *SA, int n, int openMP) { + int *bucket_A, *bucket_B; + int m; + int err = 0; + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; } + else if(n == 0) { return 0; } + else if(n == 1) { SA[0] = 0; return 0; } + else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } + + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Suffixsort. */ + if((bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP); + construct_SA(T, SA, bucket_A, bucket_B, n, m); + } else { + err = -2; + } + + free(bucket_B); + free(bucket_A); + + return err; +} + +int +divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) { + int *B; + int *bucket_A, *bucket_B; + int m, pidx, i; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0)) { return -1; } + else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; } + + if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); } + bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int)); + bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int)); + + /* Burrows-Wheeler Transform. */ + if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) { + m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP); + + if (num_indexes == NULL || indexes == NULL) { + pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m); + } else { + pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes); + } + + /* Copy to output string. */ + U[0] = T[n - 1]; + for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; } + for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; } + pidx += 1; + } else { + pidx = -2; + } + + free(bucket_B); + free(bucket_A); + if(A == NULL) { free(B); } + + return pidx; +} +/**** ended inlining dictBuilder/divsufsort.c ****/ +/**** start inlining dictBuilder/fastcover.c ****/ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +#include /* fprintf */ +#include /* malloc, free, qsort */ +#include /* memset */ +#include /* clock */ + +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/pool.h ****/ +/**** skipping file: ../common/threading.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: ../compress/zstd_compress_internal.h ****/ +/**** skipping file: ../zdict.h ****/ +/**** skipping file: cover.h ****/ + + +/*-************************************* +* Constants +***************************************/ +#define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((unsigned)-1) : ((unsigned)1 GB)) +#define FASTCOVER_MAX_F 31 +#define FASTCOVER_MAX_ACCEL 10 +#define FASTCOVER_DEFAULT_SPLITPOINT 0.75 +#define DEFAULT_F 20 +#define DEFAULT_ACCEL 1 + + +/*-************************************* +* Console display +***************************************/ +#ifndef LOCALDISPLAYLEVEL +static int g_displayLevel = 2; +#endif +#undef DISPLAY +#define DISPLAY(...) \ + { \ + fprintf(stderr, __VA_ARGS__); \ + fflush(stderr); \ + } +#undef LOCALDISPLAYLEVEL +#define LOCALDISPLAYLEVEL(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + DISPLAY(__VA_ARGS__); \ + } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ +#undef DISPLAYLEVEL +#define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__) + +#ifndef LOCALDISPLAYUPDATE +static const clock_t g_refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; +#endif +#undef LOCALDISPLAYUPDATE +#define LOCALDISPLAYUPDATE(displayLevel, l, ...) \ + if (displayLevel >= l) { \ + if ((clock() - g_time > g_refreshRate) || (displayLevel >= 4)) { \ + g_time = clock(); \ + DISPLAY(__VA_ARGS__); \ + } \ + } +#undef DISPLAYUPDATE +#define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__) + + +/*-************************************* +* Hash Functions +***************************************/ +/** + * Hash the d-byte value pointed to by p and mod 2^f into the frequency vector + */ +static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 f, unsigned d) { + if (d == 6) { + return ZSTD_hash6Ptr(p, f); + } + return ZSTD_hash8Ptr(p, f); +} + + +/*-************************************* +* Acceleration +***************************************/ +typedef struct { + unsigned finalize; /* Percentage of training samples used for ZDICT_finalizeDictionary */ + unsigned skip; /* Number of dmer skipped between each dmer counted in computeFrequency */ +} FASTCOVER_accel_t; + + +static const FASTCOVER_accel_t FASTCOVER_defaultAccelParameters[FASTCOVER_MAX_ACCEL+1] = { + { 100, 0 }, /* accel = 0, should not happen because accel = 0 defaults to accel = 1 */ + { 100, 0 }, /* accel = 1 */ + { 50, 1 }, /* accel = 2 */ + { 34, 2 }, /* accel = 3 */ + { 25, 3 }, /* accel = 4 */ + { 20, 4 }, /* accel = 5 */ + { 17, 5 }, /* accel = 6 */ + { 14, 6 }, /* accel = 7 */ + { 13, 7 }, /* accel = 8 */ + { 11, 8 }, /* accel = 9 */ + { 10, 9 }, /* accel = 10 */ +}; + + +/*-************************************* +* Context +***************************************/ +typedef struct { + const BYTE *samples; + size_t *offsets; + const size_t *samplesSizes; + size_t nbSamples; + size_t nbTrainSamples; + size_t nbTestSamples; + size_t nbDmers; + U32 *freqs; + unsigned d; + unsigned f; + FASTCOVER_accel_t accelParams; +} FASTCOVER_ctx_t; + + +/*-************************************* +* Helper functions +***************************************/ +/** + * Selects the best segment in an epoch. + * Segments of are scored according to the function: + * + * Let F(d) be the frequency of all dmers with hash value d. + * Let S_i be hash value of the dmer at position i of segment S which has length k. + * + * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) + * + * Once the dmer with hash value d is in the dictionary we set F(d) = 0. + */ +static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, + U32 *freqs, U32 begin, U32 end, + ZDICT_cover_params_t parameters, + U16* segmentFreqs) { + /* Constants */ + const U32 k = parameters.k; + const U32 d = parameters.d; + const U32 f = ctx->f; + const U32 dmersInK = k - d + 1; + + /* Try each segment (activeSegment) and save the best (bestSegment) */ + COVER_segment_t bestSegment = {0, 0, 0}; + COVER_segment_t activeSegment; + + /* Reset the activeDmers in the segment */ + /* The activeSegment starts at the beginning of the epoch. */ + activeSegment.begin = begin; + activeSegment.end = begin; + activeSegment.score = 0; + + /* Slide the activeSegment through the whole epoch. + * Save the best segment in bestSegment. + */ + while (activeSegment.end < end) { + /* Get hash value of current dmer */ + const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d); + + /* Add frequency of this index to score if this is the first occurrence of index in active segment */ + if (segmentFreqs[idx] == 0) { + activeSegment.score += freqs[idx]; + } + /* Increment end of segment and segmentFreqs*/ + activeSegment.end += 1; + segmentFreqs[idx] += 1; + /* If the window is now too large, drop the first position */ + if (activeSegment.end - activeSegment.begin == dmersInK + 1) { + /* Get hash value of the dmer to be eliminated from active segment */ + const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d); + segmentFreqs[delIndex] -= 1; + /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */ + if (segmentFreqs[delIndex] == 0) { + activeSegment.score -= freqs[delIndex]; + } + /* Increment start of segment */ + activeSegment.begin += 1; + } + + /* If this segment is the best so far save it */ + if (activeSegment.score > bestSegment.score) { + bestSegment = activeSegment; + } + } + + /* Zero out rest of segmentFreqs array */ + while (activeSegment.begin < end) { + const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d); + segmentFreqs[delIndex] -= 1; + activeSegment.begin += 1; + } + + { + /* Zero the frequency of hash value of each dmer covered by the chosen segment. */ + U32 pos; + for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) { + const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, f, d); + freqs[i] = 0; + } + } + + return bestSegment; +} + + +static int FASTCOVER_checkParameters(ZDICT_cover_params_t parameters, + size_t maxDictSize, unsigned f, + unsigned accel) { + /* k, d, and f are required parameters */ + if (parameters.d == 0 || parameters.k == 0) { + return 0; + } + /* d has to be 6 or 8 */ + if (parameters.d != 6 && parameters.d != 8) { + return 0; + } + /* k <= maxDictSize */ + if (parameters.k > maxDictSize) { + return 0; + } + /* d <= k */ + if (parameters.d > parameters.k) { + return 0; + } + /* 0 < f <= FASTCOVER_MAX_F*/ + if (f > FASTCOVER_MAX_F || f == 0) { + return 0; + } + /* 0 < splitPoint <= 1 */ + if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) { + return 0; + } + /* 0 < accel <= 10 */ + if (accel > 10 || accel == 0) { + return 0; + } + return 1; +} + + +/** + * Clean up a context initialized with `FASTCOVER_ctx_init()`. + */ +static void +FASTCOVER_ctx_destroy(FASTCOVER_ctx_t* ctx) +{ + if (!ctx) return; + + free(ctx->freqs); + ctx->freqs = NULL; + + free(ctx->offsets); + ctx->offsets = NULL; +} + + +/** + * Calculate for frequency of hash value of each dmer in ctx->samples + */ +static void +FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx) +{ + const unsigned f = ctx->f; + const unsigned d = ctx->d; + const unsigned skip = ctx->accelParams.skip; + const unsigned readLength = MAX(d, 8); + size_t i; + assert(ctx->nbTrainSamples >= 5); + assert(ctx->nbTrainSamples <= ctx->nbSamples); + for (i = 0; i < ctx->nbTrainSamples; i++) { + size_t start = ctx->offsets[i]; /* start of current dmer */ + size_t const currSampleEnd = ctx->offsets[i+1]; + while (start + readLength <= currSampleEnd) { + const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, d); + freqs[dmerIndex]++; + start = start + skip + 1; + } + } +} + + +/** + * Prepare a context for dictionary building. + * The context is only dependent on the parameter `d` and can used multiple + * times. + * Returns 0 on success or error code on error. + * The context must be destroyed with `FASTCOVER_ctx_destroy()`. + */ +static size_t +FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + unsigned d, double splitPoint, unsigned f, + FASTCOVER_accel_t accelParams) +{ + const BYTE* const samples = (const BYTE*)samplesBuffer; + const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples); + /* Split samples into testing and training sets */ + const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples; + const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples; + const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize; + const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize; + + /* Checks */ + if (totalSamplesSize < MAX(d, sizeof(U64)) || + totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { + DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", + (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); + return ERROR(srcSize_wrong); + } + + /* Check if there are at least 5 training samples */ + if (nbTrainSamples < 5) { + DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples); + return ERROR(srcSize_wrong); + } + + /* Check if there's testing sample */ + if (nbTestSamples < 1) { + DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples); + return ERROR(srcSize_wrong); + } + + /* Zero the context */ + memset(ctx, 0, sizeof(*ctx)); + DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples, + (unsigned)trainingSamplesSize); + DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples, + (unsigned)testSamplesSize); + + ctx->samples = samples; + ctx->samplesSizes = samplesSizes; + ctx->nbSamples = nbSamples; + ctx->nbTrainSamples = nbTrainSamples; + ctx->nbTestSamples = nbTestSamples; + ctx->nbDmers = trainingSamplesSize - MAX(d, sizeof(U64)) + 1; + ctx->d = d; + ctx->f = f; + ctx->accelParams = accelParams; + + /* The offsets of each file */ + ctx->offsets = (size_t*)calloc((nbSamples + 1), sizeof(size_t)); + if (ctx->offsets == NULL) { + DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n"); + FASTCOVER_ctx_destroy(ctx); + return ERROR(memory_allocation); + } + + /* Fill offsets from the samplesSizes */ + { U32 i; + ctx->offsets[0] = 0; + assert(nbSamples >= 5); + for (i = 1; i <= nbSamples; ++i) { + ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1]; + } + } + + /* Initialize frequency array of size 2^f */ + ctx->freqs = (U32*)calloc(((U64)1 << f), sizeof(U32)); + if (ctx->freqs == NULL) { + DISPLAYLEVEL(1, "Failed to allocate frequency table \n"); + FASTCOVER_ctx_destroy(ctx); + return ERROR(memory_allocation); + } + + DISPLAYLEVEL(2, "Computing frequencies\n"); + FASTCOVER_computeFrequency(ctx->freqs, ctx); + + return 0; +} + + +/** + * Given the prepared context build the dictionary. + */ +static size_t +FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx, + U32* freqs, + void* dictBuffer, size_t dictBufferCapacity, + ZDICT_cover_params_t parameters, + U16* segmentFreqs) +{ + BYTE *const dict = (BYTE *)dictBuffer; + size_t tail = dictBufferCapacity; + /* Divide the data into epochs. We will select one segment from each epoch. */ + const COVER_epoch_info_t epochs = COVER_computeEpochs( + (U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1); + const size_t maxZeroScoreRun = 10; + size_t zeroScoreRun = 0; + size_t epoch; + DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", + (U32)epochs.num, (U32)epochs.size); + /* Loop through the epochs until there are no more segments or the dictionary + * is full. + */ + for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { + const U32 epochBegin = (U32)(epoch * epochs.size); + const U32 epochEnd = epochBegin + epochs.size; + size_t segmentSize; + /* Select a segment */ + COVER_segment_t segment = FASTCOVER_selectSegment( + ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs); + + /* If the segment covers no dmers, then we are out of content. + * There may be new content in other epochs, for continue for some time. + */ + if (segment.score == 0) { + if (++zeroScoreRun >= maxZeroScoreRun) { + break; + } + continue; + } + zeroScoreRun = 0; + + /* Trim the segment if necessary and if it is too small then we are done */ + segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); + if (segmentSize < parameters.d) { + break; + } + + /* We fill the dictionary from the back to allow the best segments to be + * referenced with the smallest offsets. + */ + tail -= segmentSize; + memcpy(dict + tail, ctx->samples + segment.begin, segmentSize); + DISPLAYUPDATE( + 2, "\r%u%% ", + (unsigned)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity)); + } + DISPLAYLEVEL(2, "\r%79s\r", ""); + return tail; +} + +/** + * Parameters for FASTCOVER_tryParameters(). + */ +typedef struct FASTCOVER_tryParameters_data_s { + const FASTCOVER_ctx_t* ctx; + COVER_best_t* best; + size_t dictBufferCapacity; + ZDICT_cover_params_t parameters; +} FASTCOVER_tryParameters_data_t; + + +/** + * Tries a set of parameters and updates the COVER_best_t with the results. + * This function is thread safe if zstd is compiled with multithreaded support. + * It takes its parameters as an *OWNING* opaque pointer to support threading. + */ +static void FASTCOVER_tryParameters(void* opaque) +{ + /* Save parameters as local variables */ + FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t*)opaque; + const FASTCOVER_ctx_t *const ctx = data->ctx; + const ZDICT_cover_params_t parameters = data->parameters; + size_t dictBufferCapacity = data->dictBufferCapacity; + size_t totalCompressedSize = ERROR(GENERIC); + /* Initialize array to keep track of frequency of dmer within activeSegment */ + U16* segmentFreqs = (U16*)calloc(((U64)1 << ctx->f), sizeof(U16)); + /* Allocate space for hash table, dict, and freqs */ + BYTE *const dict = (BYTE*)malloc(dictBufferCapacity); + COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); + U32* freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32)); + if (!segmentFreqs || !dict || !freqs) { + DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); + goto _cleanup; + } + /* Copy the frequencies because we need to modify them */ + memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32)); + /* Build the dictionary */ + { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity, + parameters, segmentFreqs); + + const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100); + selection = COVER_selectDict(dict + tail, dictBufferCapacity, dictBufferCapacity - tail, + ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, + totalCompressedSize); + + if (COVER_dictSelectionIsError(selection)) { + DISPLAYLEVEL(1, "Failed to select dictionary\n"); + goto _cleanup; + } + } +_cleanup: + free(dict); + COVER_best_finish(data->best, parameters, selection); + free(data); + free(segmentFreqs); + COVER_dictSelectionFree(selection); + free(freqs); +} + + +static void +FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams, + ZDICT_cover_params_t* coverParams) +{ + coverParams->k = fastCoverParams.k; + coverParams->d = fastCoverParams.d; + coverParams->steps = fastCoverParams.steps; + coverParams->nbThreads = fastCoverParams.nbThreads; + coverParams->splitPoint = fastCoverParams.splitPoint; + coverParams->zParams = fastCoverParams.zParams; + coverParams->shrinkDict = fastCoverParams.shrinkDict; +} + + +static void +FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams, + ZDICT_fastCover_params_t* fastCoverParams, + unsigned f, unsigned accel) +{ + fastCoverParams->k = coverParams.k; + fastCoverParams->d = coverParams.d; + fastCoverParams->steps = coverParams.steps; + fastCoverParams->nbThreads = coverParams.nbThreads; + fastCoverParams->splitPoint = coverParams.splitPoint; + fastCoverParams->f = f; + fastCoverParams->accel = accel; + fastCoverParams->zParams = coverParams.zParams; + fastCoverParams->shrinkDict = coverParams.shrinkDict; +} + + +ZDICTLIB_API size_t +ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t parameters) +{ + BYTE* const dict = (BYTE*)dictBuffer; + FASTCOVER_ctx_t ctx; + ZDICT_cover_params_t coverParams; + FASTCOVER_accel_t accelParams; + /* Initialize global data */ + g_displayLevel = parameters.zParams.notificationLevel; + /* Assign splitPoint and f if not provided */ + parameters.splitPoint = 1.0; + parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f; + parameters.accel = parameters.accel == 0 ? DEFAULT_ACCEL : parameters.accel; + /* Convert to cover parameter */ + memset(&coverParams, 0 , sizeof(coverParams)); + FASTCOVER_convertToCoverParams(parameters, &coverParams); + /* Checks */ + if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, + parameters.accel)) { + DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + /* Assign corresponding FASTCOVER_accel_t to accelParams*/ + accelParams = FASTCOVER_defaultAccelParameters[parameters.accel]; + /* Initialize context */ + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + coverParams.d, parameters.splitPoint, parameters.f, + accelParams); + if (ZSTD_isError(initVal)) { + DISPLAYLEVEL(1, "Failed to initialize context\n"); + return initVal; + } + } + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); + /* Build the dictionary */ + DISPLAYLEVEL(2, "Building dictionary\n"); + { + /* Initialize array to keep track of frequency of dmer within activeSegment */ + U16* segmentFreqs = (U16 *)calloc(((U64)1 << parameters.f), sizeof(U16)); + const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer, + dictBufferCapacity, coverParams, segmentFreqs); + const unsigned nbFinalizeSamples = (unsigned)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100); + const size_t dictionarySize = ZDICT_finalizeDictionary( + dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, + samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams); + if (!ZSTD_isError(dictionarySize)) { + DISPLAYLEVEL(2, "Constructed dictionary of size %u\n", + (unsigned)dictionarySize); + } + FASTCOVER_ctx_destroy(&ctx); + free(segmentFreqs); + return dictionarySize; + } +} + + +ZDICTLIB_API size_t +ZDICT_optimizeTrainFromBuffer_fastCover( + void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, + const size_t* samplesSizes, unsigned nbSamples, + ZDICT_fastCover_params_t* parameters) +{ + ZDICT_cover_params_t coverParams; + FASTCOVER_accel_t accelParams; + /* constants */ + const unsigned nbThreads = parameters->nbThreads; + const double splitPoint = + parameters->splitPoint <= 0.0 ? FASTCOVER_DEFAULT_SPLITPOINT : parameters->splitPoint; + const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d; + const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d; + const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k; + const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k; + const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps; + const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); + const unsigned kIterations = + (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); + const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f; + const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel; + const unsigned shrinkDict = 0; + /* Local variables */ + const int displayLevel = parameters->zParams.notificationLevel; + unsigned iteration = 1; + unsigned d; + unsigned k; + COVER_best_t best; + POOL_ctx *pool = NULL; + int warned = 0; + /* Checks */ + if (splitPoint <= 0 || splitPoint > 1) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); + return ERROR(parameter_outOfBound); + } + if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n"); + return ERROR(parameter_outOfBound); + } + if (kMinK < kMaxD || kMaxK < kMinK) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n"); + return ERROR(parameter_outOfBound); + } + if (nbSamples == 0) { + LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n"); + return ERROR(srcSize_wrong); + } + if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { + LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n", + ZDICT_DICTSIZE_MIN); + return ERROR(dstSize_tooSmall); + } + if (nbThreads > 1) { + pool = POOL_create(nbThreads, 1); + if (!pool) { + return ERROR(memory_allocation); + } + } + /* Initialization */ + COVER_best_init(&best); + memset(&coverParams, 0 , sizeof(coverParams)); + FASTCOVER_convertToCoverParams(*parameters, &coverParams); + accelParams = FASTCOVER_defaultAccelParameters[accel]; + /* Turn down global display level to clean up display at level 2 and below */ + g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1; + /* Loop through d first because each new value needs a new context */ + LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n", + kIterations); + for (d = kMinD; d <= kMaxD; d += 2) { + /* Initialize the context for this value of d */ + FASTCOVER_ctx_t ctx; + LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } + } + if (!warned) { + COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); + warned = 1; + } + /* Loop through k reusing the same context */ + for (k = kMinK; k <= kMaxK; k += kStepSize) { + /* Prepare the arguments */ + FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc( + sizeof(FASTCOVER_tryParameters_data_t)); + LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k); + if (!data) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n"); + COVER_best_destroy(&best); + FASTCOVER_ctx_destroy(&ctx); + POOL_free(pool); + return ERROR(memory_allocation); + } + data->ctx = &ctx; + data->best = &best; + data->dictBufferCapacity = dictBufferCapacity; + data->parameters = coverParams; + data->parameters.k = k; + data->parameters.d = d; + data->parameters.splitPoint = splitPoint; + data->parameters.steps = kSteps; + data->parameters.shrinkDict = shrinkDict; + data->parameters.zParams.notificationLevel = g_displayLevel; + /* Check the parameters */ + if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity, + data->ctx->f, accel)) { + DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); + free(data); + continue; + } + /* Call the function and pass ownership of data to it */ + COVER_best_start(&best); + if (pool) { + POOL_add(pool, &FASTCOVER_tryParameters, data); + } else { + FASTCOVER_tryParameters(data); + } + /* Print status */ + LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ", + (unsigned)((iteration * 100) / kIterations)); + ++iteration; + } + COVER_best_wait(&best); + FASTCOVER_ctx_destroy(&ctx); + } + LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", ""); + /* Fill the output buffer and parameters with output of the best parameters */ + { + const size_t dictSize = best.dictSize; + if (ZSTD_isError(best.compressedSize)) { + const size_t compressedSize = best.compressedSize; + COVER_best_destroy(&best); + POOL_free(pool); + return compressedSize; + } + FASTCOVER_convertToFastCoverParams(best.parameters, parameters, f, accel); + memcpy(dictBuffer, best.dict, dictSize); + COVER_best_destroy(&best); + POOL_free(pool); + return dictSize; + } + +} +/**** ended inlining dictBuilder/fastcover.c ****/ +/**** start inlining dictBuilder/zdict.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/*-************************************** +* Tuning parameters +****************************************/ +#define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */ +#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20) +#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO) + + +/*-************************************** +* Compiler Options +****************************************/ +/* Unix Large Files support (>4GB) */ +#define _FILE_OFFSET_BITS 64 +#if (defined(__sun__) && (!defined(__LP64__))) /* Sun Solaris 32-bits requires specific definitions */ +# ifndef _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE +# endif +#elif ! defined(__LP64__) /* No point defining Large file for 64 bit */ +# ifndef _LARGEFILE64_SOURCE +# define _LARGEFILE64_SOURCE +# endif +#endif + + +/*-************************************* +* Dependencies +***************************************/ +#include /* malloc, free */ +#include /* memset */ +#include /* fprintf, fopen, ftello64 */ +#include /* clock */ + +#ifndef ZDICT_STATIC_LINKING_ONLY +# define ZDICT_STATIC_LINKING_ONLY +#endif +#define HUF_STATIC_LINKING_ONLY + +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/fse.h ****/ +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ +/**** skipping file: ../common/xxhash.h ****/ +/**** skipping file: ../compress/zstd_compress_internal.h ****/ +/**** skipping file: ../zdict.h ****/ +/**** skipping file: divsufsort.h ****/ + + +/*-************************************* +* Constants +***************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define DICTLISTSIZE_DEFAULT 10000 + +#define NOISELENGTH 32 + +static const U32 g_selectivity_default = 9; + + +/*-************************************* +* Console display +***************************************/ +#undef DISPLAY +#define DISPLAY(...) { fprintf(stderr, __VA_ARGS__); fflush( stderr ); } +#undef DISPLAYLEVEL +#define DISPLAYLEVEL(l, ...) if (notificationLevel>=l) { DISPLAY(__VA_ARGS__); } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */ + +static clock_t ZDICT_clockSpan(clock_t nPrevious) { return clock() - nPrevious; } + +static void ZDICT_printHex(const void* ptr, size_t length) +{ + const BYTE* const b = (const BYTE*)ptr; + size_t u; + for (u=0; u126) c = '.'; /* non-printable char */ + DISPLAY("%c", c); + } +} + + +/*-******************************************************** +* Helper functions +**********************************************************/ +unsigned ZDICT_isError(size_t errorCode) { return ERR_isError(errorCode); } + +const char* ZDICT_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } + +unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dictBuffer + 4); +} + +size_t ZDICT_getDictHeaderSize(const void* dictBuffer, size_t dictSize) +{ + size_t headerSize; + if (dictSize <= 8 || MEM_readLE32(dictBuffer) != ZSTD_MAGIC_DICTIONARY) return ERROR(dictionary_corrupted); + + { ZSTD_compressedBlockState_t* bs = (ZSTD_compressedBlockState_t*)malloc(sizeof(ZSTD_compressedBlockState_t)); + U32* wksp = (U32*)malloc(HUF_WORKSPACE_SIZE); + if (!bs || !wksp) { + headerSize = ERROR(memory_allocation); + } else { + ZSTD_reset_compressedBlockState(bs); + headerSize = ZSTD_loadCEntropy(bs, wksp, dictBuffer, dictSize); + } + + free(bs); + free(wksp); + } + + return headerSize; +} + +/*-******************************************************** +* Dictionary training functions +**********************************************************/ +static unsigned ZDICT_NbCommonBytes (size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanForward64( &r, (U64)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r=0; + _BitScanForward( &r, (U32)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if defined(_MSC_VER) && defined(_WIN64) + unsigned long r = 0; + _BitScanReverse64( &r, val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if defined(_MSC_VER) + unsigned long r = 0; + _BitScanReverse( &r, (unsigned long)val ); + return (unsigned)(r>>3); +# elif defined(__GNUC__) && (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +/*! ZDICT_count() : + Count the nb of common bytes between 2 pointers. + Note : this function presumes end of buffer followed by noisy guard band. +*/ +static size_t ZDICT_count(const void* pIn, const void* pMatch) +{ + const char* const pStart = (const char*)pIn; + for (;;) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { + pIn = (const char*)pIn+sizeof(size_t); + pMatch = (const char*)pMatch+sizeof(size_t); + continue; + } + pIn = (const char*)pIn+ZDICT_NbCommonBytes(diff); + return (size_t)((const char*)pIn - pStart); + } +} + + +typedef struct { + U32 pos; + U32 length; + U32 savings; +} dictItem; + +static void ZDICT_initDictItem(dictItem* d) +{ + d->pos = 1; + d->length = 0; + d->savings = (U32)(-1); +} + + +#define LLIMIT 64 /* heuristic determined experimentally */ +#define MINMATCHLENGTH 7 /* heuristic determined experimentally */ +static dictItem ZDICT_analyzePos( + BYTE* doneMarks, + const int* suffix, U32 start, + const void* buffer, U32 minRatio, U32 notificationLevel) +{ + U32 lengthList[LLIMIT] = {0}; + U32 cumulLength[LLIMIT] = {0}; + U32 savings[LLIMIT] = {0}; + const BYTE* b = (const BYTE*)buffer; + size_t maxLength = LLIMIT; + size_t pos = suffix[start]; + U32 end = start; + dictItem solution; + + /* init */ + memset(&solution, 0, sizeof(solution)); + doneMarks[pos] = 1; + + /* trivial repetition cases */ + if ( (MEM_read16(b+pos+0) == MEM_read16(b+pos+2)) + ||(MEM_read16(b+pos+1) == MEM_read16(b+pos+3)) + ||(MEM_read16(b+pos+2) == MEM_read16(b+pos+4)) ) { + /* skip and mark segment */ + U16 const pattern16 = MEM_read16(b+pos+4); + U32 u, patternEnd = 6; + while (MEM_read16(b+pos+patternEnd) == pattern16) patternEnd+=2 ; + if (b[pos+patternEnd] == b[pos+patternEnd-1]) patternEnd++; + for (u=1; u= MINMATCHLENGTH); + } + + /* look backward */ + { size_t length; + do { + length = ZDICT_count(b + pos, b + *(suffix+start-1)); + if (length >=MINMATCHLENGTH) start--; + } while(length >= MINMATCHLENGTH); + } + + /* exit if not found a minimum nb of repetitions */ + if (end-start < minRatio) { + U32 idx; + for(idx=start; idx= %i at pos %7u ", (unsigned)(end-start), MINMATCHLENGTH, (unsigned)pos); + DISPLAYLEVEL(4, "\n"); + + for (mml = MINMATCHLENGTH ; ; mml++) { + BYTE currentChar = 0; + U32 currentCount = 0; + U32 currentID = refinedStart; + U32 id; + U32 selectedCount = 0; + U32 selectedID = currentID; + for (id =refinedStart; id < refinedEnd; id++) { + if (b[suffix[id] + mml] != currentChar) { + if (currentCount > selectedCount) { + selectedCount = currentCount; + selectedID = currentID; + } + currentID = id; + currentChar = b[ suffix[id] + mml]; + currentCount = 0; + } + currentCount ++; + } + if (currentCount > selectedCount) { /* for last */ + selectedCount = currentCount; + selectedID = currentID; + } + + if (selectedCount < minRatio) + break; + refinedStart = selectedID; + refinedEnd = refinedStart + selectedCount; + } + + /* evaluate gain based on new dict */ + start = refinedStart; + pos = suffix[refinedStart]; + end = start; + memset(lengthList, 0, sizeof(lengthList)); + + /* look forward */ + { size_t length; + do { + end++; + length = ZDICT_count(b + pos, b + suffix[end]); + if (length >= LLIMIT) length = LLIMIT-1; + lengthList[length]++; + } while (length >=MINMATCHLENGTH); + } + + /* look backward */ + { size_t length = MINMATCHLENGTH; + while ((length >= MINMATCHLENGTH) & (start > 0)) { + length = ZDICT_count(b + pos, b + suffix[start - 1]); + if (length >= LLIMIT) length = LLIMIT - 1; + lengthList[length]++; + if (length >= MINMATCHLENGTH) start--; + } + } + + /* largest useful length */ + memset(cumulLength, 0, sizeof(cumulLength)); + cumulLength[maxLength-1] = lengthList[maxLength-1]; + for (i=(int)(maxLength-2); i>=0; i--) + cumulLength[i] = cumulLength[i+1] + lengthList[i]; + + for (i=LLIMIT-1; i>=MINMATCHLENGTH; i--) if (cumulLength[i]>=minRatio) break; + maxLength = i; + + /* reduce maxLength in case of final into repetitive data */ + { U32 l = (U32)maxLength; + BYTE const c = b[pos + maxLength-1]; + while (b[pos+l-2]==c) l--; + maxLength = l; + } + if (maxLength < MINMATCHLENGTH) return solution; /* skip : no long-enough solution */ + + /* calculate savings */ + savings[5] = 0; + for (i=MINMATCHLENGTH; i<=(int)maxLength; i++) + savings[i] = savings[i-1] + (lengthList[i] * (i-3)); + + DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n", + (unsigned)pos, (unsigned)maxLength, (unsigned)savings[maxLength], (double)savings[maxLength] / maxLength); + + solution.pos = (U32)pos; + solution.length = (U32)maxLength; + solution.savings = savings[maxLength]; + + /* mark positions done */ + { U32 id; + for (id=start; id solution.length) length = solution.length; + } + pEnd = (U32)(testedPos + length); + for (p=testedPos; ppos; + const U32 eltEnd = elt.pos + elt.length; + const char* const buf = (const char*) buffer; + + /* tail overlap */ + U32 u; for (u=1; u elt.pos) && (table[u].pos <= eltEnd)) { /* overlap, existing > new */ + /* append */ + U32 const addedLength = table[u].pos - elt.pos; + table[u].length += addedLength; + table[u].pos = elt.pos; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + table[u].savings += elt.length / 8; /* rough approx bonus */ + elt = table[u]; + /* sort : improve rank */ + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } } + + /* front overlap */ + for (u=1; u= elt.pos) && (table[u].pos < elt.pos)) { /* overlap, existing < new */ + /* append */ + int const addedLength = (int)eltEnd - (table[u].pos + table[u].length); + table[u].savings += elt.length / 8; /* rough approx bonus */ + if (addedLength > 0) { /* otherwise, elt fully included into existing */ + table[u].length += addedLength; + table[u].savings += elt.savings * addedLength / elt.length; /* rough approx */ + } + /* sort : improve rank */ + elt = table[u]; + while ((u>1) && (table[u-1].savings < elt.savings)) + table[u] = table[u-1], u--; + table[u] = elt; + return u; + } + + if (MEM_read64(buf + table[u].pos) == MEM_read64(buf + elt.pos + 1)) { + if (isIncluded(buf + table[u].pos, buf + elt.pos + 1, table[u].length)) { + size_t const addedLength = MAX( (int)elt.length - (int)table[u].length , 1 ); + table[u].pos = elt.pos; + table[u].savings += (U32)(elt.savings * addedLength / elt.length); + table[u].length = MIN(elt.length, table[u].length + 1); + return u; + } + } + } + + return 0; +} + + +static void ZDICT_removeDictItem(dictItem* table, U32 id) +{ + /* convention : table[0].pos stores nb of elts */ + U32 const max = table[0].pos; + U32 u; + if (!id) return; /* protection, should never happen */ + for (u=id; upos--; +} + + +static void ZDICT_insertDictItem(dictItem* table, U32 maxSize, dictItem elt, const void* buffer) +{ + /* merge if possible */ + U32 mergeId = ZDICT_tryMerge(table, elt, 0, buffer); + if (mergeId) { + U32 newMerge = 1; + while (newMerge) { + newMerge = ZDICT_tryMerge(table, table[mergeId], mergeId, buffer); + if (newMerge) ZDICT_removeDictItem(table, mergeId); + mergeId = newMerge; + } + return; + } + + /* insert */ + { U32 current; + U32 nextElt = table->pos; + if (nextElt >= maxSize) nextElt = maxSize-1; + current = nextElt-1; + while (table[current].savings < elt.savings) { + table[current+1] = table[current]; + current--; + } + table[current+1] = elt; + table->pos = nextElt+1; + } +} + + +static U32 ZDICT_dictSize(const dictItem* dictList) +{ + U32 u, dictSize = 0; + for (u=1; u=l) { \ + if (ZDICT_clockSpan(displayClock) > refreshRate) \ + { displayClock = clock(); DISPLAY(__VA_ARGS__); \ + if (notificationLevel>=4) fflush(stderr); } } + + /* init */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + if (!suffix0 || !reverseSuffix || !doneMarks || !filePos) { + result = ERROR(memory_allocation); + goto _cleanup; + } + if (minRatio < MINRATIO) minRatio = MINRATIO; + memset(doneMarks, 0, bufferSize+16); + + /* limit sample set size (divsufsort limitation)*/ + if (bufferSize > ZDICT_MAX_SAMPLES_SIZE) DISPLAYLEVEL(3, "sample set too large : reduced to %u MB ...\n", (unsigned)(ZDICT_MAX_SAMPLES_SIZE>>20)); + while (bufferSize > ZDICT_MAX_SAMPLES_SIZE) bufferSize -= fileSizes[--nbFiles]; + + /* sort */ + DISPLAYLEVEL(2, "sorting %u files of total size %u MB ...\n", nbFiles, (unsigned)(bufferSize>>20)); + { int const divSuftSortResult = divsufsort((const unsigned char*)buffer, suffix, (int)bufferSize, 0); + if (divSuftSortResult != 0) { result = ERROR(GENERIC); goto _cleanup; } + } + suffix[bufferSize] = (int)bufferSize; /* leads into noise */ + suffix0[0] = (int)bufferSize; /* leads into noise */ + /* build reverse suffix sort */ + { size_t pos; + for (pos=0; pos < bufferSize; pos++) + reverseSuffix[suffix[pos]] = (U32)pos; + /* note filePos tracks borders between samples. + It's not used at this stage, but planned to become useful in a later update */ + filePos[0] = 0; + for (pos=1; pos> 21); + } +} + + +typedef struct +{ + ZSTD_CDict* dict; /* dictionary */ + ZSTD_CCtx* zc; /* working context */ + void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */ +} EStats_ress_t; + +#define MAXREPOFFSET 1024 + +static void ZDICT_countEStats(EStats_ress_t esr, const ZSTD_parameters* params, + unsigned* countLit, unsigned* offsetcodeCount, unsigned* matchlengthCount, unsigned* litlengthCount, U32* repOffsets, + const void* src, size_t srcSize, + U32 notificationLevel) +{ + size_t const blockSizeMax = MIN (ZSTD_BLOCKSIZE_MAX, 1 << params->cParams.windowLog); + size_t cSize; + + if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */ + { size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict); + if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; } + + } + cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize); + if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (unsigned)srcSize); return; } + + if (cSize) { /* if == 0; block is not compressible */ + const seqStore_t* const seqStorePtr = ZSTD_getSeqStore(esr.zc); + + /* literals stats */ + { const BYTE* bytePtr; + for(bytePtr = seqStorePtr->litStart; bytePtr < seqStorePtr->lit; bytePtr++) + countLit[*bytePtr]++; + } + + /* seqStats */ + { U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + ZSTD_seqToCodes(seqStorePtr); + + { const BYTE* codePtr = seqStorePtr->ofCode; + U32 u; + for (u=0; umlCode; + U32 u; + for (u=0; ullCode; + U32 u; + for (u=0; u= 2) { /* rep offsets */ + const seqDef* const seq = seqStorePtr->sequencesStart; + U32 offset1 = seq[0].offset - 3; + U32 offset2 = seq[1].offset - 3; + if (offset1 >= MAXREPOFFSET) offset1 = 0; + if (offset2 >= MAXREPOFFSET) offset2 = 0; + repOffsets[offset1] += 3; + repOffsets[offset2] += 1; + } } } +} + +static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles) +{ + size_t total=0; + unsigned u; + for (u=0; u0; u--) { + offsetCount_t tmp; + if (table[u-1].count >= table[u].count) break; + tmp = table[u-1]; + table[u-1] = table[u]; + table[u] = tmp; + } +} + +/* ZDICT_flatLit() : + * rewrite `countLit` to contain a mostly flat but still compressible distribution of literals. + * necessary to avoid generating a non-compressible distribution that HUF_writeCTable() cannot encode. + */ +static void ZDICT_flatLit(unsigned* countLit) +{ + int u; + for (u=1; u<256; u++) countLit[u] = 2; + countLit[0] = 4; + countLit[253] = 1; + countLit[254] = 1; +} + +#define OFFCODE_MAX 30 /* only applicable to first block */ +static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, + int compressionLevel, + const void* srcBuffer, const size_t* fileSizes, unsigned nbFiles, + const void* dictBuffer, size_t dictBufferSize, + unsigned notificationLevel) +{ + unsigned countLit[256]; + HUF_CREATE_STATIC_CTABLE(hufTable, 255); + unsigned offcodeCount[OFFCODE_MAX+1]; + short offcodeNCount[OFFCODE_MAX+1]; + U32 offcodeMax = ZSTD_highbit32((U32)(dictBufferSize + 128 KB)); + unsigned matchLengthCount[MaxML+1]; + short matchLengthNCount[MaxML+1]; + unsigned litLengthCount[MaxLL+1]; + short litLengthNCount[MaxLL+1]; + U32 repOffset[MAXREPOFFSET]; + offsetCount_t bestRepOffset[ZSTD_REP_NUM+1]; + EStats_ress_t esr = { NULL, NULL, NULL }; + ZSTD_parameters params; + U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total; + size_t pos = 0, errorCode; + size_t eSize = 0; + size_t const totalSrcSize = ZDICT_totalSampleSize(fileSizes, nbFiles); + size_t const averageSampleSize = totalSrcSize / (nbFiles + !nbFiles); + BYTE* dstPtr = (BYTE*)dstBuffer; + + /* init */ + DEBUGLOG(4, "ZDICT_analyzeEntropy"); + if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */ + for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */ + for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1; + for (u=0; u<=MaxML; u++) matchLengthCount[u] = 1; + for (u=0; u<=MaxLL; u++) litLengthCount[u] = 1; + memset(repOffset, 0, sizeof(repOffset)); + repOffset[1] = repOffset[4] = repOffset[8] = 1; + memset(bestRepOffset, 0, sizeof(bestRepOffset)); + if (compressionLevel==0) compressionLevel = ZSTD_CLEVEL_DEFAULT; + params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize); + + esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem); + esr.zc = ZSTD_createCCtx(); + esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX); + if (!esr.dict || !esr.zc || !esr.workPlace) { + eSize = ERROR(memory_allocation); + DISPLAYLEVEL(1, "Not enough memory \n"); + goto _cleanup; + } + + /* collect stats on all samples */ + for (u=0; u dictBufferCapacity) dictContentSize = dictBufferCapacity - hSize; + { size_t const dictSize = hSize + dictContentSize; + char* dictEnd = (char*)dictBuffer + dictSize; + memmove(dictEnd - dictContentSize, customDictContent, dictContentSize); + memcpy(dictBuffer, header, hSize); + return dictSize; + } +} + + +static size_t ZDICT_addEntropyTablesFromBuffer_advanced( + void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t params) +{ + int const compressionLevel = (params.compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : params.compressionLevel; + U32 const notificationLevel = params.notificationLevel; + size_t hSize = 8; + + /* calculate entropy tables */ + DISPLAYLEVEL(2, "\r%70s\r", ""); /* clean display line */ + DISPLAYLEVEL(2, "statistics ... \n"); + { size_t const eSize = ZDICT_analyzeEntropy((char*)dictBuffer+hSize, dictBufferCapacity-hSize, + compressionLevel, + samplesBuffer, samplesSizes, nbSamples, + (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, + notificationLevel); + if (ZDICT_isError(eSize)) return eSize; + hSize += eSize; + } + + /* add dictionary header (after entropy tables) */ + MEM_writeLE32(dictBuffer, ZSTD_MAGIC_DICTIONARY); + { U64 const randomID = XXH64((char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize, 0); + U32 const compliantID = (randomID % ((1U<<31)-32768)) + 32768; + U32 const dictID = params.dictID ? params.dictID : compliantID; + MEM_writeLE32((char*)dictBuffer+4, dictID); + } + + if (hSize + dictContentSize < dictBufferCapacity) + memmove((char*)dictBuffer + hSize, (char*)dictBuffer + dictBufferCapacity - dictContentSize, dictContentSize); + return MIN(dictBufferCapacity, hSize+dictContentSize); +} + +/*! ZDICT_trainFromBuffer_unsafe_legacy() : +* Warning : `samplesBuffer` must be followed by noisy guard band !!! +* @return : size of dictionary, or an error code which can be tested with ZDICT_isError() +*/ +static size_t ZDICT_trainFromBuffer_unsafe_legacy( + void* dictBuffer, size_t maxDictSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t params) +{ + U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16)); + dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList)); + unsigned const selectivity = params.selectivityLevel == 0 ? g_selectivity_default : params.selectivityLevel; + unsigned const minRep = (selectivity > 30) ? MINRATIO : nbSamples >> selectivity; + size_t const targetDictSize = maxDictSize; + size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + size_t dictSize = 0; + U32 const notificationLevel = params.zParams.notificationLevel; + + /* checks */ + if (!dictList) return ERROR(memory_allocation); + if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */ + if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */ + + /* init */ + ZDICT_initDictItem(dictList); + + /* build dictionary */ + ZDICT_trainBuffer_legacy(dictList, dictListSize, + samplesBuffer, samplesBuffSize, + samplesSizes, nbSamples, + minRep, notificationLevel); + + /* display best matches */ + if (params.zParams.notificationLevel>= 3) { + unsigned const nb = MIN(25, dictList[0].pos); + unsigned const dictContentSize = ZDICT_dictSize(dictList); + unsigned u; + DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", (unsigned)dictList[0].pos-1, dictContentSize); + DISPLAYLEVEL(3, "list %u best segments \n", nb-1); + for (u=1; u samplesBuffSize) || ((pos + length) > samplesBuffSize)) { + free(dictList); + return ERROR(GENERIC); /* should never happen */ + } + DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", + u, length, pos, (unsigned)dictList[u].savings); + ZDICT_printHex((const char*)samplesBuffer+pos, printedLength); + DISPLAYLEVEL(3, "| \n"); + } } + + + /* create dictionary */ + { unsigned dictContentSize = ZDICT_dictSize(dictList); + if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */ + if (dictContentSize < targetDictSize/4) { + DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (unsigned)maxDictSize); + if (samplesBuffSize < 10 * targetDictSize) + DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (unsigned)(samplesBuffSize>>20)); + if (minRep > MINRATIO) { + DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1); + DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n"); + } + } + + if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) { + unsigned proposedSelectivity = selectivity-1; + while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; } + DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (unsigned)maxDictSize); + DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity); + DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n"); + } + + /* limit dictionary size */ + { U32 const max = dictList->pos; /* convention : nb of useful elts within dictList */ + U32 currentSize = 0; + U32 n; for (n=1; n targetDictSize) { currentSize -= dictList[n].length; break; } + } + dictList->pos = n; + dictContentSize = currentSize; + } + + /* build dict content */ + { U32 u; + BYTE* ptr = (BYTE*)dictBuffer + maxDictSize; + for (u=1; upos; u++) { + U32 l = dictList[u].length; + ptr -= l; + if (ptr<(BYTE*)dictBuffer) { free(dictList); return ERROR(GENERIC); } /* should not happen */ + memcpy(ptr, (const char*)samplesBuffer+dictList[u].pos, l); + } } + + dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize, + samplesBuffer, samplesSizes, nbSamples, + params.zParams); + } + + /* clean up */ + free(dictList); + return dictSize; +} + + +/* ZDICT_trainFromBuffer_legacy() : + * issue : samplesBuffer need to be followed by a noisy guard band. + * work around : duplicate the buffer, and add the noise */ +size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_legacy_params_t params) +{ + size_t result; + void* newBuff; + size_t const sBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples); + if (sBuffSize < ZDICT_MIN_SAMPLES_SIZE) return 0; /* not enough content => no dictionary */ + + newBuff = malloc(sBuffSize + NOISELENGTH); + if (!newBuff) return ERROR(memory_allocation); + + memcpy(newBuff, samplesBuffer, sBuffSize); + ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */ + + result = + ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff, + samplesSizes, nbSamples, params); + free(newBuff); + return result; +} + + +size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_fastCover_params_t params; + DEBUGLOG(3, "ZDICT_trainFromBuffer"); + memset(¶ms, 0, sizeof(params)); + params.d = 8; + params.steps = 4; + /* Use default level since no compression level information is available */ + params.zParams.compressionLevel = ZSTD_CLEVEL_DEFAULT; +#if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1) + params.zParams.notificationLevel = DEBUGLEVEL; +#endif + return ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + ¶ms); +} + +size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples) +{ + ZDICT_params_t params; + memset(¶ms, 0, sizeof(params)); + return ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, dictBufferCapacity, + samplesBuffer, samplesSizes, nbSamples, + params); +} +/**** ended inlining dictBuilder/zdict.c ****/ diff --git a/libkram/zstd/zstd.h b/libkram/zstd/zstd.h new file mode 100644 index 00000000..4651e6c4 --- /dev/null +++ b/libkram/zstd/zstd.h @@ -0,0 +1,2532 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 + +/* ====== Dependency ======*/ +#include /* INT_MAX */ +#include /* size_t */ + + +/* ===== ZSTDLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDLIB_VISIBILITY +# endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define ZSTDLIB_API ZSTDLIB_VISIBILITY +#endif + + +/******************************************************************************* + Introduction + + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. + + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). + + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit context) + - unbounded multiple steps (described as Streaming compression) + + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) + + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. + + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ + +/*------ Version ------*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 5 +#define ZSTD_VERSION_RELEASE 0 +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/*! ZSTD_versionNumber() : + * Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */ +ZSTDLIB_API unsigned ZSTD_versionNumber(void); + +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) + +/*! ZSTD_versionString() : + * Return runtime library version, like "1.4.5". Requires v1.3.0+. */ +ZSTDLIB_API const char* ZSTD_versionString(void); + +/* ************************************* + * Default constant + ***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif + +/* ************************************* + * Constants + ***************************************/ + +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 + +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*! ZSTD_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/*! ZSTD_getFrameContentSize() : requires v1.3.0+ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + +/*! ZSTD_getDecompressedSize() : + * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+ + * `src` should point to the start of a ZSTD frame or skippable frame. + * `srcSize` must be >= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); + + +/*====== Helper functions ======*/ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ +ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ + + +/*************************************** +* Explicit context +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer */ + +/*! ZSTD_compressCCtx() : + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. + * Important : in order to behave similarly to `ZSTD_compress()`, + * this function compresses at requested compression level, + * __ignoring any other parameter__ . + * If any advanced parameter was set using the advanced API, + * they will all be reset. Only `compressionLevel` remains. + */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution. */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer */ + +/*! ZSTD_decompressDCtx() : + * Same as ZSTD_decompress(), + * requires an allocated ZSTD_DCtx. + * Compatible with sticky parameters. + */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/********************************************* +* Advanced compression API (Requires v1.4.0+) +**********************************************/ + +/* API design : + * Parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. + * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! + * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * + * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). + * + * This API supercedes all other "advanced" API entry points in the experimental section. + * In the future, we expect to remove from experimental API entry points which are redundant with this API. + */ + + +/* Compression strategies, listed from fastest to strongest */ +typedef enum { ZSTD_fast=1, + ZSTD_dfast=2, + ZSTD_greedy=3, + ZSTD_lazy=4, + ZSTD_lazy2=5, + ZSTD_btlazy2=6, + ZSTD_btopt=7, + ZSTD_btultra=8, + ZSTD_btultra2=9 + /* note : new strategies _might_ be added in the future. + Only the order (from fast to strong) is guaranteed */ +} ZSTD_strategy; + +typedef enum { + + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. + * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. + * Note that exact compression parameters are dynamically determined, + * depending on both compression level and srcSize (when known). + * Default level is ZSTD_CLEVEL_DEFAULT==3. + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. + * Note 1 : it's possible to pass a negative compression level. + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ + /* Advanced compression parameters : + * It's possible to pin down compression parameters to some specific values. + * In which case, these values are no longer dynamically selected by the compressor */ + ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * This will set a memory budget for streaming decompression, + * with larger values requiring more memory + * and typically compressing more. + * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. + * Special: value 0 means "use default windowLog". + * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT + * requires explicitly allowing such size at streaming decompression stage. */ + ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. + * Resulting memory usage is (1 << (hashLog+2)). + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. + * Larger tables improve compression ratio of strategies <= dFast, + * and improve speed of strategies > dFast. + * Special: value 0 means "use default hashLog". */ + ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. + * Resulting memory usage is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. + * Larger tables result in better and slower compression. + * This parameter is useless for "fast" strategy. + * It's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. + * Special: value 0 means "use default chainLog". */ + ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. + * More attempts result in better and slower compression. + * This parameter is useless for "fast" and "dFast" strategies. + * Special: value 0 means "use default searchLog". */ + ZSTD_c_minMatch=105, /* Minimum size of searched matches. + * Note that Zstandard can still find matches of smaller size, + * it just tweaks its search algorithm to look for this size and larger. + * Larger values increase compression and decompression speed, but decrease ratio. + * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. + * Note that currently, for all strategies < btopt, effective minimum is 4. + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ + ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. + * For strategies btopt, btultra & btultra2: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. + * Special: value 0 means "use default targetLength". */ + ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. + * The higher the value of selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "use default strategy". */ + /* LDM mode parameters */ + ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve compression ratio + * for large inputs, by finding large matches at long distance. + * It increases memory usage and window size. + * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB + * except when expressly set to a different value. + * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and + * compression strategy >= ZSTD_btopt (== compression level 16+) */ + ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * default: windowlog - 7. + * Special: value 0 means "automatically determine hashlog". */ + ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ + ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. + * Special: value 0 means "use default value" (default: 3). */ + ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Special: value 0 means "automatically determine hashRateLog". */ + + /* frame parameters */ + ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ + ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ + + /* multi-threading parameters */ + /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * Otherwise, trying to set any other value than default (0) will be a no-op and return an error. + * In a situation where it's unknown if the linked library supports multi-threading or not, + * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. + */ + ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. + * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : + * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, + * while compression is performed in parallel, within worker thread(s). + * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : + * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, + * compression is performed inside Caller's thread, and all invocations are blocking */ + ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. + * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. + * 0 means default, which is dynamically determined based on compression parameters. + * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest. + * The minimum size is automatically and transparently enforced. */ + ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. + * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. + * It helps preserve compression ratio, while each job is compressed in parallel. + * This value is enforced only when nbWorkers >= 1. + * Larger values increase compression ratio, but decrease speed. + * Possible values range from 0 to 9 : + * - 0 means "default" : value will be determined by the library, depending on strategy + * - 1 means "no overlap" + * - 9 means "full overlap", using a full window size. + * Each intermediate rank increases/decreases load size by a factor 2 : + * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default + * default value varies between 6 and 9, depending on strategy */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_rsyncable + * ZSTD_c_format + * ZSTD_c_forceMaxWindow + * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint + * ZSTD_c_enableDedicatedDictSearch + * ZSTD_c_stableInBuffer + * ZSTD_c_stableOutBuffer + * ZSTD_c_blockDelimiters + * ZSTD_c_validateSequences + * ZSTD_c_splitBlocks + * ZSTD_c_useRowMatchFinder + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. + */ + ZSTD_c_experimentalParam1=500, + ZSTD_c_experimentalParam2=10, + ZSTD_c_experimentalParam3=1000, + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004, + ZSTD_c_experimentalParam8=1005, + ZSTD_c_experimentalParam9=1006, + ZSTD_c_experimentalParam10=1007, + ZSTD_c_experimentalParam11=1008, + ZSTD_c_experimentalParam12=1009, + ZSTD_c_experimentalParam13=1010, + ZSTD_c_experimentalParam14=1011, + ZSTD_c_experimentalParam15=1012 +} ZSTD_cParameter; + +typedef struct { + size_t error; + int lowerBound; + int upperBound; +} ZSTD_bounds; + +/*! ZSTD_cParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - lower and upper bounds, both inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); + +/*! ZSTD_CCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_cParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbWorkers >= 1), + * the following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active for next job only (after a flush()). + * @return : an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtx_setPledgedSrcSize() : + * Total input data size to be compressed as a single frame. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. + * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. + * Note 2 : pledgedSrcSize is only valid once, for the next frame. + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), + * this value is automatically overridden by srcSize instead. + */ +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); + +typedef enum { + ZSTD_reset_session_only = 1, + ZSTD_reset_parameters = 2, + ZSTD_reset_session_and_parameters = 3 +} ZSTD_ResetDirective; + +/*! ZSTD_CCtx_reset() : + * There are 2 different things that can be reset, independently or jointly : + * - The session : will stop compressing current frame, and make CCtx ready to start a new one. + * Useful after an error, or to interrupt any ongoing compression. + * Any internal data not yet flushed is cancelled. + * Compression parameters and dictionary remain unchanged. + * They will be used to compress next frame. + * Resetting session never fails. + * - The parameters : changes all parameters back to "default". + * This removes any reference to any dictionary too. + * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) + * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) + * - Both : similar to resetting the session, followed by resetting parameters. + */ +ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); + +/*! ZSTD_compress2() : + * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - The function is always blocking, returns when compression is completed. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*********************************************** +* Advanced decompression API (Requires v1.4.0+) +************************************************/ + +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ + +typedef enum { + + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_d_format + * ZSTD_d_stableOutBuffer + * ZSTD_d_forceIgnoreChecksum + * ZSTD_d_refMultipleDDicts + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001, + ZSTD_d_experimentalParam3=1002, + ZSTD_d_experimentalParam4=1003 + +} ZSTD_dParameter; + +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); + +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); + +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); + + +/**************************** +* Streaming +****************************/ + +typedef struct ZSTD_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; + +typedef struct ZSTD_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; + + + +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* +* For parallel execution, use one separate ZSTD_CStream per thread. +* +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ + +typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); /* accept NULL pointer */ + +/*===== Streaming compression functions =====*/ +typedef enum { + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush=1, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ + ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. + * note that frame is only closed after compressed data is fully flushed (return value == 0). + * After that point, any additional data starts a new frame. + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ +} ZSTD_EndDirective; + +/*! ZSTD_compressStream2() : Requires v1.4.0+ + * Behaves about the same as ZSTD_compressStream, with additional control on end directive. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) + * - output->pos must be <= dstCapacity, input->pos must be <= srcSize + * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - endOp must be a valid directive + * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. + * - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. + * - @return provides a minimum amount of data remaining to be flushed from internal buffers + * or an error code, which can be tested using ZSTD_isError(). + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. + * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), + * only ZSTD_e_end or ZSTD_e_flush operations are allowed. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ + + +/* ***************************************************************************** + * This following is a legacy streaming API, available since v1.0+ . + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. + * Streaming in combination with advanced parameters and dictionary compression + * can only be used through the new API. + ******************************************************************************/ + +/*! + * Equivalent to: + * + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + */ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/*! + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the minimum nb of bytes left to flush (if non-zero and not an error). + */ +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); + + +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. +* If `output.pos < output.size`, decoder has flushed everything it could. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. +* *******************************************************************************/ + +typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); /* accept NULL pointer */ + +/*===== Streaming decompression functions =====*/ + +/* This function is redundant with the advanced API and equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, NULL); + */ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); + +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ + + +/************************** +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*********************************** + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); + +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). + * If a NULL pointer is passed, no operation is performed. */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + + +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() + * If a NULL pointer is passed, no operation is performed. */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/******************************** + * Dictionary helper functions + *******************************/ + +/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+ + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); + +/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+ + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+ + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+ + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); + + +/******************************************************************************* + * Advanced dictionary and prefix API (Requires v1.4.0+) + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ + + +/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+ + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+ + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+ + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); + +/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+ + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * The dictionary remains valid for all future frames, until explicitly invalidated. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * It's recommended to "load once, use many times", to amortize the cost + * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. + * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of + * how dictionary content is loaded and interpreted. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+ + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special: referencing a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+ + * Reference a prefix (single-usage dictionary) to decompress next frame. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_decompressStream() returns 0. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_decompressStream() returns 0. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A full dictionary is more costly, as it requires building tables. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); + +/* === Memory management === */ + +/*! ZSTD_sizeof_*() : Requires v1.4.0+ + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + +#endif /* ZSTD_H_235446 */ + + +/* ************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ + +#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + +/* Deprecation warnings : + * Should these warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. + * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. + */ +#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS +# define ZSTD_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZSTD_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API +# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) +# define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ >= 3) +# define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZSTD_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") +# define ZSTD_DEPRECATED(message) ZSTDLIB_API +# endif +#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ + +/**************************************************************************************** + * experimental API (static linking only) + **************************************************************************************** + * The following symbols and constants + * are not planned to join "stable API" status in the near future. + * They can still change in future versions. + * Some of them are planned to remain in the static_only section indefinitely. + * Some of them might be removed in the future (especially when redundant with existing stable functions) + * ***************************************************************************************/ + +#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ +#define ZSTD_SKIPPABLEHEADERSIZE 8 + +/* compression parameter bounds */ +#define ZSTD_WINDOWLOG_MAX_32 30 +#define ZSTD_WINDOWLOG_MAX_64 31 +#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX_32 29 +#define ZSTD_CHAINLOG_MAX_64 30 +#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ +#define ZSTD_STRATEGY_MIN ZSTD_fast +#define ZSTD_STRATEGY_MAX ZSTD_btultra2 + + +#define ZSTD_OVERLAPLOG_MIN 0 +#define ZSTD_OVERLAPLOG_MAX 9 + +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame + * requiring larger than (1< 0: + * If litLength != 0: + * rep == 1 --> offset == repeat_offset_1 + * rep == 2 --> offset == repeat_offset_2 + * rep == 3 --> offset == repeat_offset_3 + * If litLength == 0: + * rep == 1 --> offset == repeat_offset_2 + * rep == 2 --> offset == repeat_offset_3 + * rep == 3 --> offset == repeat_offset_1 - 1 + * + * Note: This field is optional. ZSTD_generateSequences() will calculate the value of + * 'rep', but repeat offsets do not necessarily need to be calculated from an external + * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * use this 'rep' field at all (as of now). + */ +} ZSTD_Sequence; + +typedef struct { + unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /**< dispatch table : larger == faster, more memory */ + unsigned searchLog; /**< nb of searches : larger == more compression, slower */ + unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ +} ZSTD_compressionParameters; + +typedef struct { + int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ + int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ + int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ +} ZSTD_frameParameters; + +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + +typedef enum { + ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ + ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ +} ZSTD_dictContentType_e; + +typedef enum { + ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ + ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ +} ZSTD_dictLoadMethod_e; + +typedef enum { + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder cannot recognise automatically this format, requiring this instruction. */ +} ZSTD_format_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */ + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_forceIgnoreChecksum_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_refMultipleDDicts */ + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 +} ZSTD_refMultipleDDicts_e; + +typedef enum { + /* Note: this enum and the behavior it controls are effectively internal + * implementation details of the compressor. They are expected to continue + * to evolve and should be considered only in the context of extremely + * advanced performance tuning. + * + * Zstd currently supports the use of a CDict in three ways: + * + * - The contents of the CDict can be copied into the working context. This + * means that the compression can search both the dictionary and input + * while operating on a single set of internal tables. This makes + * the compression faster per-byte of input. However, the initial copy of + * the CDict's tables incurs a fixed cost at the beginning of the + * compression. For small compressions (< 8 KB), that copy can dominate + * the cost of the compression. + * + * - The CDict's tables can be used in-place. In this model, compression is + * slower per input byte, because the compressor has to search two sets of + * tables. However, this model incurs no start-up cost (as long as the + * working context's tables can be reused). For small inputs, this can be + * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * + * Zstd has a simple internal heuristic that selects which strategy to use + * at the beginning of a compression. However, if experimentation shows that + * Zstd is making poor choices, it is possible to override that choice with + * this enum. + */ + ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ + ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ + ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ +} ZSTD_dictAttachPref_e; + +typedef enum { + ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. + * Negative compression levels will be uncompressed, and positive compression + * levels will be compressed. */ + ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be + * emitted if Huffman compression is not profitable. */ + ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ +} ZSTD_literalCompressionMode_e; + +typedef enum { + ZSTD_urm_auto = 0, /* Automatically determine whether or not we use row matchfinder */ + ZSTD_urm_disableRowMatchFinder = 1, /* Never use row matchfinder */ + ZSTD_urm_enableRowMatchFinder = 2 /* Always use row matchfinder when applicable */ +} ZSTD_useRowMatchFinderMode_e; + +/*************************************** +* Frame size functions +***************************************/ + +/*! ZSTD_findDecompressedSize() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - decompressed size of all data in all successive frames + * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + * read each contained frame header. This is fast as most of the data is skipped, + * however it does mean that all frame data must be present and valid. */ +ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_decompressBound() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - upper-bound for the decompressed size of all data in all successive frames + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. + * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. + * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. + * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: + * upper-bound = # blocks * min(128 KB, Window_Size) + */ +ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); + +/*! ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); + +typedef enum { + ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ + ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ +} ZSTD_sequenceFormat_e; + +/*! ZSTD_generateSequences() : + * Generate sequences using ZSTD_compress2, given a source buffer. + * + * Each block will end with a dummy sequence + * with offset == 0, matchLength == 0, and litLength == length of last literals. + * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) + * simply acts as a block delimiter. + * + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters + * @return : number of sequences generated + */ + +ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); + +/*! ZSTD_mergeBlockDelimiters() : + * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals + * by merging them into into the literals of the next sequence. + * + * As such, the final generated result has no explicit representation of block boundaries, + * and the final last literals segment is not represented in the sequences. + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters + * @return : number of sequences left after merging + */ +ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); + +/*! ZSTD_compressSequences() : + * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. + * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * The entire source is compressed into a single frame. + * + * The compression behavior changes based on cctx params. In particular: + * If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on + * the block size derived from the cctx, and sequences may be split. This is the default setting. + * + * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * + * In addition to the two adjustable experimental params, there are other important cctx params. + * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. + * - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression. + * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset + * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md + * + * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. + * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history + * @return : final compressed size or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); + + +/*! ZSTD_writeSkippableFrame() : + * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * + * Returns an error if destination buffer is not large enough, if the source size is not representable + * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); + + +/*************************************** +* Memory management +***************************************/ + +/*! ZSTD_estimate*() : + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. + * + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. + * + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); + +/*! ZSTD_estimateCStreamSize() : + * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. + * It will also consider src size to be arbitrarily "large", which is worst case. + * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note : CStream size estimation is only correct for single-threaded compression. + * ZSTD_DStream memory budget depends on window Size. + * This information can be passed manually, using ZSTD_estimateDStreamSize, + * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), + * an internal ?Dict will be created, which additional size is not estimated here. + * In this case, get total size by adding ZSTD_estimate?DictSize */ +ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); + +/*! ZSTD_estimate?DictSize() : + * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. + */ +ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); + +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). + * Note 2 : there is no corresponding "free" function. + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. + */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ + +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ + +ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams); + +ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType); + + +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +static +#ifdef __GNUC__ +__attribute__((__unused__)) +#endif +ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ + +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem); + +/* ! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer. + */ +typedef struct POOL_ctx_s ZSTD_threadPool; +ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); /* accept NULL pointer */ +ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); + + +/* + * This API is temporary and is expected to change or disappear in the future! + */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* cctxParams, + ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); + + +/*************************************** +* Advanced compression functions +***************************************/ + +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is just referenced, not duplicated. + * As a consequence, `dictBuffer` **must** outlive CDict, + * and its content must remain unmodified throughout the lifetime of CDict. + * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. + * `estimatedSrcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_getParams() : + * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. + * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_checkCParams() : + * Ensure param values remain within authorized range. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + +/*! ZSTD_adjustCParams() : + * optimize params for a given `srcSize` and `dictSize`. + * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. + * `dictSize` must be `0` when there is no dictionary. + * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. + * This function never fails (wide contract) */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_compress_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. + * This prototype will generate compilation warnings. */ +ZSTD_DEPRECATED("use ZSTD_compress2") +size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/*! ZSTD_compress_usingCDict_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. + * This prototype will generate compilation warnings. */ +ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary") +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams); + + +/*! ZSTD_CCtx_loadDictionary_byReference() : + * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. + * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_loadDictionary_advanced() : + * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_CCtx_refPrefix_advanced() : + * Same as ZSTD_CCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/* === experimental parameters === */ +/* these parameters can be used with ZSTD_setParameter() + * they are not guaranteed to remain supported in the future */ + + /* Enables rsyncable mode, + * which makes compressed files more rsync friendly + * by adding periodic synchronization points to the compressed data. + * The target average block size is ZSTD_c_jobSize / 2. + * It's possible to modify the job size to increase or decrease + * the granularity of the synchronization point. + * Once the jobSize is smaller than the window size, + * it will result in compression ratio degradation. + * NOTE 1: rsyncable mode only works when multithreading is enabled. + * NOTE 2: rsyncable performs poorly in combination with long range mode, + * since it will decrease the effectiveness of synchronization points, + * though mileage may vary. + * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. + * If the selected compression level is already running significantly slower, + * the overall speed won't be significantly impacted. + */ + #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 + +/* Select a compression format. + * The value must be of type ZSTD_format_e. + * See ZSTD_format_e enum definition for details */ +#define ZSTD_c_format ZSTD_c_experimentalParam2 + +/* Force back-reference distances to remain < windowSize, + * even when referencing into Dictionary content (default:0) */ +#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 + +/* Controls whether the contents of a CDict + * are used in place, or copied into the working context. + * Accepts values from the ZSTD_dictAttachPref_e enum. + * See the comments on that enum for an explanation of the feature. */ +#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 + +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_e enum definition for details. + */ +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 + +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 + +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 + +/* Controls whether the new and experimental "dedicated dictionary search + * structure" can be used. This feature is still rough around the edges, be + * prepared for surprising behavior! + * + * How to use it: + * + * When using a CDict, whether to use this feature or not is controlled at + * CDict creation, and it must be set in a CCtxParams set passed into that + * construction (via ZSTD_createCDict_advanced2()). A compression will then + * use the feature or not based on how the CDict was constructed; the value of + * this param, set in the CCtx, will have no effect. + * + * However, when a dictionary buffer is passed into a CCtx, such as via + * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control + * whether the CDict that is created internally can use the feature or not. + * + * What it does: + * + * Normally, the internal data structures of the CDict are analogous to what + * would be stored in a CCtx after compressing the contents of a dictionary. + * To an approximation, a compression using a dictionary can then use those + * data structures to simply continue what is effectively a streaming + * compression where the simulated compression of the dictionary left off. + * Which is to say, the search structures in the CDict are normally the same + * format as in the CCtx. + * + * It is possible to do better, since the CDict is not like a CCtx: the search + * structures are written once during CDict creation, and then are only read + * after that, while the search structures in the CCtx are both read and + * written as the compression goes along. This means we can choose a search + * structure for the dictionary that is read-optimized. + * + * This feature enables the use of that different structure. + * + * Note that some of the members of the ZSTD_compressionParameters struct have + * different semantics and constraints in the dedicated search structure. It is + * highly recommended that you simply set a compression level in the CCtxParams + * you pass into the CDict creation call, and avoid messing with the cParams + * directly. + * + * Effects: + * + * This will only have any effect when the selected ZSTD_strategy + * implementation supports this feature. Currently, that's limited to + * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2. + * + * Note that this means that the CDict tables can no longer be copied into the + * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be + * useable. The dictionary can only be attached or reloaded. + * + * In general, you should expect compression to be faster--sometimes very much + * so--and CDict creation to be slightly slower. Eventually, we will probably + * make this mode the default. + */ +#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 + +/* ZSTD_c_stableInBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the compressor, and + * compression will fail if it ever changes. This means the only flush + * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end + * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) + * MUST not be modified during compression or you will get data corruption. + * + * When this flag is enabled zstd won't allocate an input window buffer, + * because the user guarantees it can reference the ZSTD_inBuffer until + * the frame is complete. But, it will still allocate an output buffer + * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also + * avoid the memcpy() from the input buffer to the input window buffer. + * + * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. + * That means this flag cannot be used with ZSTD_compressStream(). + * + * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, compression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST + * not be modified during compression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_inBuffer to find + * matches. Normally zstd maintains its own window buffer for this purpose, + * but passing this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 + +/* ZSTD_c_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells he compressor that the ZSTD_outBuffer will not be resized between + * calls. Specifically: (out.size - out.pos) will never grow. This gives the + * compressor the freedom to say: If the compressed data doesn't fit in the + * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to + * always decompress directly into the output buffer, instead of decompressing + * into an internal buffer and copying to the output buffer. + * + * When this flag is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer. It will still allocate the + * input window buffer (see ZSTD_c_stableInBuffer). + * + * Zstd will check that (out.size - out.pos) never grows and return an error + * if it does. While not strictly necessary, this should prevent surprises. + */ +#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10 + +/* ZSTD_c_blockDelimiters + * Default is 0 == ZSTD_sf_noBlockDelimiters. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * + * Designates whether or not the given array of ZSTD_Sequence contains block delimiters + * and last literals, which are defined as sequences with offset == 0 and matchLength == 0. + * See the definition of ZSTD_Sequence for more specifics. + */ +#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11 + +/* ZSTD_c_validateSequences + * Default is 0 == disabled. Set to 1 to enable sequence validation. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * during function execution. + * + * Without validation, providing a sequence that does not conform to the zstd spec will cause + * undefined behavior, and may produce a corrupted block. + * + * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and + * return an error. + * + */ +#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 + +/* ZSTD_c_splitBlocks + * Default is 0 == disabled. Set to 1 to enable block splitting. + * + * Will attempt to split blocks in order to improve compression ratio at the cost of speed. + */ +#define ZSTD_c_splitBlocks ZSTD_c_experimentalParam13 + +/* ZSTD_c_useRowMatchFinder + * Default is ZSTD_urm_auto. + * Controlled with ZSTD_useRowMatchFinderMode_e enum. + * + * By default, in ZSTD_urm_auto, when finalizing the compression parameters, the library + * will decide at runtime whether to use the row-based matchfinder based on support for SIMD + * instructions as well as the windowLog. + * + * Set to ZSTD_urm_disableRowMatchFinder to never use row-based matchfinder. + * Set to ZSTD_urm_enableRowMatchFinder to force usage of row-based matchfinder. + */ +#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14 + +/* ZSTD_c_deterministicRefPrefix + * Default is 0 == disabled. Set to 1 to enable. + * + * Zstd produces different results for prefix compression when the prefix is + * directly adjacent to the data about to be compressed vs. when it isn't. + * This is because zstd detects that the two buffers are contiguous and it can + * use a more efficient match finding algorithm. However, this produces different + * results than when the two buffers are non-contiguous. This flag forces zstd + * to always load the prefix in non-contiguous mode, even if it happens to be + * adjacent to the data, to guarantee determinism. + * + * If you really care about determinism when using a dictionary or prefix, + * like when doing delta compression, you should select this option. It comes + * at a speed penalty of about ~2.5% if the dictionary and data happened to be + * contiguous, and is free if they weren't contiguous. We don't expect that + * intentionally making the dictionary and data contiguous will be worth the + * cost to memcpy() the data. + */ +#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 + +/*! ZSTD_CCtx_getParameter() : + * Get the requested compression parameter value, selected by enum ZSTD_cParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); + + +/*! ZSTD_CCtx_params : + * Quick howto : + * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent frames. + * - ZSTD_compressStream2() : Do compression using the CCtx. + * - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer. + * + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation of CCtx for single-threaded compression. + */ +ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); /* accept NULL pointer */ + +/*! ZSTD_CCtxParams_reset() : + * Reset params to default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_init() : + * Initializes the compression parameters of cctxParams according to + * compression level. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); + +/*! ZSTD_CCtxParams_init_advanced() : + * Initializes the compression and frame parameters of cctxParams according to + * params. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); + +/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+ + * Similar to ZSTD_CCtx_setParameter. + * Set one compression parameter, selected by enum ZSTD_cParameter. + * Parameters must be applied to a ZSTD_CCtx using + * ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : a code representing success or failure (which can be tested with + * ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtxParams_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); + +/*! ZSTD_CCtx_setParametersUsingCCtxParams() : + * Apply a set of ZSTD_CCtx_params to the compression context. + * This can be done even after compression is started, + * if nbWorkers==0, this will have no impact until a new compression is started. + * if nbWorkers>=1, new parameters will be picked up at next job, + * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); + +/*! ZSTD_compressStream2_simpleArgs() : + * Same as ZSTD_compressStream2(), + * but using only integral types as arguments. + * This variant might be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp); + + +/*************************************** +* Advanced decompression functions +***************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, + * it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_byReference() : + * Same as ZSTD_DCtx_loadDictionary(), + * but references `dict` content instead of copying it into `dctx`. + * This saves memory if `dict` remains around., + * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_advanced() : + * Same as ZSTD_DCtx_loadDictionary(), + * but gives direct control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_refPrefix_advanced() : + * Same as ZSTD_DCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This protects a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); + +/*! ZSTD_DCtx_getParameter() : + * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); + +/* ZSTD_d_format + * experimental parameter, + * allowing selection between ZSTD_format_e input compression formats + */ +#define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. + * + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 + +/* ZSTD_d_forceIgnoreChecksum + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * Tells the decompressor to skip checksum validation during decompression, regardless + * of whether checksumming was specified during compression. This offers some + * slight performance benefits, and may be useful for debugging. + * Param has values of type ZSTD_forceIgnoreChecksum_e + */ +#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 + +/* ZSTD_d_refMultipleDDicts + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * If enabled and dctx is allocated on the heap, then additional memory will be allocated + * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. + * + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. + * + * Param has values of byte ZSTD_refMultipleDDicts_e + * + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. + * + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. + */ +#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 + + +/*! ZSTD_DCtx_setFormat() : + * This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead") +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); + +/*! ZSTD_decompressStream_simpleArgs() : + * Same as ZSTD_decompressStream(), + * but using only integral types as arguments. + * This can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); + + +/******************************************************************** +* Advanced streaming functions +* Warning : most of these functions are now redundant with the Advanced API. +* Once Advanced API reaches "stable" status, +* redundant functions will be deprecated, and then at some point removed. +********************************************************************/ + +/*===== Advanced Streaming compression functions =====*/ + +/*! ZSTD_initCStream_srcSize() : + * This function is DEPRECATED, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, + int compressionLevel, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingDict() : + * This function is DEPRECATED, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + int compressionLevel); + +/*! ZSTD_initCStream_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. + * pledgedSrcSize must be correct. + * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingCDict() : + * This function is DEPRECATED, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); + +/*! ZSTD_initCStream_usingCDict_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize); + +/*! ZSTD_resetCStream() : + * This function is DEPRECATED, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but + * ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be + * explicitly specified. + * + * start a new frame, using same parameters from previous frame. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); + + +typedef struct { + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ +} ZSTD_frameProgression; + +/* ZSTD_getFrameProgression() : + * tells how much data has been ingested (read from input) + * consumed (input actually compressed) and produced (output) for current frame. + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. + */ +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); + +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flush speed is limited by production speed of oldest job + * irrespective of the speed of concurrent (and newer) jobs. + */ +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); + + +/*===== Advanced Streaming decompression functions =====*/ + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); + * + * note: no dictionary will be used if dict == NULL or dictSize < 8 + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, ddict); + * + * note : ddict is referenced, it must outlive decompression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * + * re-use decompression parameters from previous init; saves dictionary loading + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); + + +/********************************************************************* +* Buffer-less and synchronous inner streaming functions +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. +********************************************************************* */ + +/** + Buffer-less streaming compression (synchronous mode) + + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. + + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. +*/ + +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */ +ZSTD_DEPRECATED("use advanced API to access custom parameters") +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTD_DEPRECATED("use advanced API to access custom parameters") +size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +/** + Buffer-less streaming decompression (synchronous mode) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). + Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. + Data fragment must be large enough to ensure successful decoding. + `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. + @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). + + It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). + Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. + As a consequence, check that values remain within valid application range. + For example, do not allocate memory blindly, check that `windowSize` is within expectation. + Each application can set its own limits, depending on local restrictions. + For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference distance. + There are multiple ways to guarantee this condition. + + The most memory efficient way is to use a round buffer of sufficient size. + Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), + which can @return an error code if required value is too large for current system (in 32-bits mode). + In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, + up to the moment there is not enough room left in the buffer to guarantee decoding another full block, + which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. + At which point, decoding can resume from the beginning of the buffer. + Note that already decoded data stored in the buffer should be flushed before being overwritten. + + There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. + + Finally, if you control the compression process, you can also ignore all buffer size rules, + as long as the encoder and decoder progress in "lock-step", + aka use exactly the same buffer sizes, break contiguity at the same place, etc. + + Once buffers are setup, start decompression, with ZSTD_decompressBegin(). + If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. + + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + + == Special case : skippable frames == + + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by decompressor. + The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. + For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. +*/ + +/*===== Buffer-less streaming decompression functions =====*/ +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; +typedef struct { + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameHeader; + +/*! ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* misc */ +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + + + + +/* ============================ */ +/** Block level API */ +/* ============================ */ + +/*! + Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. +*/ + +/*===== Raw zstd block functions =====*/ +ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + + +#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif diff --git a/libkram/zstd/zstddeclib.cpp b/libkram/zstd/zstddeclib.cpp index 7d8cf975..c4f292fb 100644 --- a/libkram/zstd/zstddeclib.cpp +++ b/libkram/zstd/zstddeclib.cpp @@ -1,47 +1,166 @@ /** * \file zstddeclib.c * Single-file Zstandard decompressor. - * + * * Generate using: * \code - * combine.sh -r ../../lib -r ../../lib/common -r ../../lib/decompress -o zstddeclib.c zstddeclib-in.c + * combine.sh -r ../../lib -o zstddeclib.c zstddeclib-in.c * \endcode */ -/* - * BSD License - * - * For Zstandard software - * - * Copyright 2016-present, Facebook, Inc. All rights reserved. - * SPDX-License-Identifier: BSD-3-Clause +/* + * Copyright (c) 2016-2021, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ /* * Settings to bake for the standalone decompressor. - * + * * Note: It's important that none of these affects 'zstd.h' (only the * implementation files we're amalgamating). - * + * * Note: MEM_MODULE stops xxhash redefining BYTE, U16, etc., which are also * defined in mem.h (breaking C99 compatibility). + * + * Note: the undefs for xxHash allow Zstd's implementation to coinside with with + * standalone xxHash usage (with global defines). */ #define DEBUGLEVEL 0 #define MEM_MODULE +#undef XXH_NAMESPACE #define XXH_NAMESPACE ZSTD_ +#undef XXH_PRIVATE_API #define XXH_PRIVATE_API +#undef XXH_INLINE_ALL #define XXH_INLINE_ALL #define ZSTD_LEGACY_SUPPORT 0 -#define ZSTD_LIB_COMPRESSION 0 -#define ZSTD_LIB_DEPRECATED 0 -#define ZSTD_NOBENCH #define ZSTD_STRIP_ERROR_STRINGS +#define ZSTD_TRACE 0 + +/* Include zstd_deps.h first with all the options we need enabled. */ +#define ZSTD_DEPS_NEED_MALLOC +/**** start inlining common/zstd_deps.h ****/ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This file provides common libc dependencies that zstd requires. + * The purpose is to allow replacing this file with a custom implementation + * to compile zstd without libc support. + */ + +/* Need: + * NULL + * INT_MAX + * UINT_MAX + * ZSTD_memcpy() + * ZSTD_memset() + * ZSTD_memmove() + */ +#ifndef ZSTD_DEPS_COMMON +#define ZSTD_DEPS_COMMON + +#include +#include +#include + +#if defined(__GNUC__) && __GNUC__ >= 4 +# define ZSTD_memcpy(d,s,l) __builtin_memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) __builtin_memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) __builtin_memset((p),(v),(l)) +#else +# define ZSTD_memcpy(d,s,l) memcpy((d),(s),(l)) +# define ZSTD_memmove(d,s,l) memmove((d),(s),(l)) +# define ZSTD_memset(p,v,l) memset((p),(v),(l)) +#endif + +#endif /* ZSTD_DEPS_COMMON */ + +/* Need: + * ZSTD_malloc() + * ZSTD_free() + * ZSTD_calloc() + */ +#ifdef ZSTD_DEPS_NEED_MALLOC +#ifndef ZSTD_DEPS_MALLOC +#define ZSTD_DEPS_MALLOC + +#include + +#define ZSTD_malloc(s) malloc(s) +#define ZSTD_calloc(n,s) calloc((n), (s)) +#define ZSTD_free(p) free((p)) + +#endif /* ZSTD_DEPS_MALLOC */ +#endif /* ZSTD_DEPS_NEED_MALLOC */ + +/* + * Provides 64-bit math support. + * Need: + * U64 ZSTD_div64(U64 dividend, U32 divisor) + */ +#ifdef ZSTD_DEPS_NEED_MATH64 +#ifndef ZSTD_DEPS_MATH64 +#define ZSTD_DEPS_MATH64 + +#define ZSTD_div64(dividend, divisor) ((dividend) / (divisor)) + +#endif /* ZSTD_DEPS_MATH64 */ +#endif /* ZSTD_DEPS_NEED_MATH64 */ + +/* Need: + * assert() + */ +#ifdef ZSTD_DEPS_NEED_ASSERT +#ifndef ZSTD_DEPS_ASSERT +#define ZSTD_DEPS_ASSERT -/**** start inlining debug.c ****/ +#include + +#endif /* ZSTD_DEPS_ASSERT */ +#endif /* ZSTD_DEPS_NEED_ASSERT */ + +/* Need: + * ZSTD_DEBUG_PRINT() + */ +#ifdef ZSTD_DEPS_NEED_IO +#ifndef ZSTD_DEPS_IO +#define ZSTD_DEPS_IO + +#include +#define ZSTD_DEBUG_PRINT(...) fprintf(stderr, __VA_ARGS__) + +#endif /* ZSTD_DEPS_IO */ +#endif /* ZSTD_DEPS_NEED_IO */ + +/* Only requested when is known to be present. + * Need: + * intptr_t + */ +#ifdef ZSTD_DEPS_NEED_STDINT +#ifndef ZSTD_DEPS_STDINT +#define ZSTD_DEPS_STDINT + +#include + +#endif /* ZSTD_DEPS_STDINT */ +#endif /* ZSTD_DEPS_NEED_STDINT */ +/**** ended inlining common/zstd_deps.h ****/ + +/**** start inlining common/debug.c ****/ /* ****************************************************************** * debug * Part of FSE library - * - * Copyright 2013-2020, Yann Collet, Facebook, Inc. - * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-or-later + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -63,7 +182,7 @@ /* ****************************************************************** * debug * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -113,15 +232,6 @@ extern "C" { #endif -/* DEBUGFILE can be defined externally, - * typically through compiler command line. - * note : currently useless. - * Value must be stderr or stdout */ -#ifndef DEBUGFILE -# define DEBUGFILE stderr -#endif - - /* recommended values for DEBUGLEVEL : * 0 : release mode, no debug, all run-time checks disabled * 1 : enables assert() only, no display @@ -138,7 +248,8 @@ extern "C" { */ #if (DEBUGLEVEL>=1) -# include +# define ZSTD_DEPS_NEED_ASSERT +/**** skipping file: zstd_deps.h ****/ #else # ifndef assert /* assert may be already defined, due to prior #include */ # define assert(condition) ((void)0) /* disable assert (default) */ @@ -146,7 +257,8 @@ extern "C" { #endif #if (DEBUGLEVEL>=2) -# include +# define ZSTD_DEPS_NEED_IO +/**** skipping file: zstd_deps.h ****/ extern int g_debuglevel; /* the variable is only declared, it actually lives in debug.c, and is shared by the whole process. @@ -154,14 +266,14 @@ extern int g_debuglevel; /* the variable is only declared, It's useful when enabling very verbose levels on selective conditions (such as position in src) */ -# define RAWLOG(l, ...) { \ - if (l<=g_debuglevel) { \ - fprintf(stderr, __VA_ARGS__); \ +# define RAWLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__VA_ARGS__); \ } } -# define DEBUGLOG(l, ...) { \ - if (l<=g_debuglevel) { \ - fprintf(stderr, __FILE__ ": " __VA_ARGS__); \ - fprintf(stderr, " \n"); \ +# define DEBUGLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ + ZSTD_DEBUG_PRINT(" \n"); \ } } #else # define RAWLOG(l, ...) {} /* disabled */ @@ -177,11 +289,11 @@ extern int g_debuglevel; /* the variable is only declared, /**** ended inlining debug.h ****/ int g_debuglevel = DEBUGLEVEL; -/**** ended inlining debug.c ****/ -/**** start inlining entropy_common.c ****/ +/**** ended inlining common/debug.c ****/ +/**** start inlining common/entropy_common.c ****/ /* ****************************************************************** * Common functions of New Generation Entropy library - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -198,7 +310,7 @@ int g_debuglevel = DEBUGLEVEL; ***************************************/ /**** start inlining mem.h ****/ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -217,48 +329,233 @@ extern "C" { /*-**************************************** * Dependencies ******************************************/ -#include /* size_t, ptrdiff_t */ -#include /* memcpy */ +#include /* size_t, ptrdiff_t */ +/**** start inlining compiler.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H -/*-**************************************** +/*-******************************************************* * Compiler specifics -******************************************/ -#if defined(_MSC_VER) /* Visual Studio */ -# include /* _byteswap_ulong */ -# include /* _byteswap_* */ +*********************************************************/ +/* force inlining */ + +#if !defined(ZSTD_NO_INLINE) +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD #endif -#if defined(__GNUC__) -# define MEM_STATIC static __inline __attribute__((unused)) -#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define MEM_STATIC static inline + +#if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_INLINE_ATTR __attribute__((always_inline)) #elif defined(_MSC_VER) -# define MEM_STATIC static __inline +# define FORCE_INLINE_ATTR __forceinline #else -# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +# define FORCE_INLINE_ATTR +#endif + +#else + +#define INLINE_KEYWORD +#define FORCE_INLINE_ATTR + +#endif + +/** + On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). + This explictly marks such functions as __cdecl so that the code will still compile + if a CC other than __cdecl has been made the default. +*/ +#if defined(_MSC_VER) +# define WIN_CDECL __cdecl +#else +# define WIN_CDECL +#endif + +/** + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to eliminate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +/** + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR +#endif + +/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ +#if defined(__GNUC__) +# define UNUSED_ATTR __attribute__((unused)) +#else +# define UNUSED_ATTR +#endif + +/* force no inlining */ +#ifdef _MSC_VER +# define FORCE_NOINLINE static __declspec(noinline) +#else +# if defined(__GNUC__) || defined(__ICCARM__) +# define FORCE_NOINLINE static __attribute__((__noinline__)) +# else +# define FORCE_NOINLINE static +# endif +#endif + + +/* target attribute */ +#ifndef __has_attribute + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ +#endif +#if defined(__GNUC__) || defined(__ICCARM__) +# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) +#else +# define TARGET_ATTRIBUTE(target) +#endif + +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if ((defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (defined(__x86_64__) || defined(_M_X86)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif + +/* prefetch + * can be disabled, by declaring NO_PREFETCH build macro */ +#if defined(NO_PREFETCH) +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +#else +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ +# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ +# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) +# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) +# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +# elif defined(__aarch64__) +# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) +# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) +# else +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +# endif +#endif /* NO_PREFETCH */ + +#define CACHELINE_SIZE 64 + +#define PREFETCH_AREA(p, s) { \ + const char* const _ptr = (const char*)(p); \ + size_t const _size = (size_t)(s); \ + size_t _pos; \ + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ + PREFETCH_L2(_ptr + _pos); \ + } \ +} + +/* vectorization + * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ +#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) +# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) +# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +# else +# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") +# endif +#else +# define DONT_VECTORIZE +#endif + +/* Tell the compiler that a branch is likely or unlikely. + * Only use these macros if it causes the compiler to generate better code. + * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc + * and clang, please do. + */ +#if defined(__GNUC__) +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) +#else +#define LIKELY(x) (x) +#define UNLIKELY(x) (x) +#endif + +/* disable warnings */ +#ifdef _MSC_VER /* Visual Studio */ +# include /* For Visual 2005 */ +# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ +# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ +# pragma warning(disable : 4324) /* disable: C4324: padded structure */ +#endif + +/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ +#ifndef STATIC_BMI2 +# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) +# ifdef __AVX2__ //MSVC does not have a BMI2 specific flag, but every CPU that supports AVX2 also supports BMI2 +# define STATIC_BMI2 1 +# endif +# endif +#endif + +#ifndef STATIC_BMI2 + #define STATIC_BMI2 0 #endif +/* compat. with non-clang compilers */ #ifndef __has_builtin -# define __has_builtin(x) 0 /* compat. with non-clang compilers */ +# define __has_builtin(x) 0 #endif -/* code only tested on 32 and 64 bits systems */ -#define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; } -MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } +/* compat. with non-clang compilers */ +#ifndef __has_feature +# define __has_feature(x) 0 +#endif /* detects whether we are being compiled under msan */ -#if defined (__has_feature) +#ifndef ZSTD_MEMORY_SANITIZER # if __has_feature(memory_sanitizer) -# define MEMORY_SANITIZER 1 +# define ZSTD_MEMORY_SANITIZER 1 +# else +# define ZSTD_MEMORY_SANITIZER 0 # endif #endif -#if defined (MEMORY_SANITIZER) +#if ZSTD_MEMORY_SANITIZER /* Not all platforms that support msan provide sanitizers/msan_interface.h. * We therefore declare the functions we need ourselves, rather than trying to * include the header file... */ - -#include /* intptr_t */ +#include /* size_t */ +#define ZSTD_DEPS_NEED_STDINT +/**** skipping file: zstd_deps.h ****/ /* Make memory region fully initialized (without changing its contents). */ void __msan_unpoison(const volatile void *a, size_t size); @@ -274,18 +571,21 @@ intptr_t __msan_test_shadow(const volatile void *x, size_t size); #endif /* detects whether we are being compiled under asan */ -#if defined (__has_feature) +#ifndef ZSTD_ADDRESS_SANITIZER # if __has_feature(address_sanitizer) -# define ADDRESS_SANITIZER 1 +# define ZSTD_ADDRESS_SANITIZER 1 +# elif defined(__SANITIZE_ADDRESS__) +# define ZSTD_ADDRESS_SANITIZER 1 +# else +# define ZSTD_ADDRESS_SANITIZER 0 # endif -#elif defined(__SANITIZE_ADDRESS__) -# define ADDRESS_SANITIZER 1 #endif -#if defined (ADDRESS_SANITIZER) +#if ZSTD_ADDRESS_SANITIZER /* Not all platforms that support asan provide sanitizers/asan_interface.h. * We therefore declare the functions we need ourselves, rather than trying to * include the header file... */ +#include /* size_t */ /** * Marks a memory region ([addr, addr+size)) as unaddressable. @@ -319,12 +619,38 @@ void __asan_poison_memory_region(void const volatile *addr, size_t size); void __asan_unpoison_memory_region(void const volatile *addr, size_t size); #endif +#endif /* ZSTD_COMPILER_H */ +/**** ended inlining compiler.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: zstd_deps.h ****/ -/*-************************************************************** + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(_MSC_VER) /* Visual Studio */ +# include /* _byteswap_ulong */ +# include /* _byteswap_* */ +#endif +#if defined(__GNUC__) +# define MEM_STATIC static __inline __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define MEM_STATIC static inline +#elif defined(_MSC_VER) +# define MEM_STATIC static __inline +#else +# define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + +/*-************************************************************** * Basic Types *****************************************************************/ #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include +# if defined(_AIX) +# include +# else +# include /* intptr_t */ +# endif typedef uint8_t BYTE; typedef uint16_t U16; typedef int16_t S16; @@ -356,7 +682,53 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); /*-************************************************************** -* Memory I/O +* Memory I/O API +*****************************************************************/ +/*=== Static platform detection ===*/ +MEM_STATIC unsigned MEM_32bits(void); +MEM_STATIC unsigned MEM_64bits(void); +MEM_STATIC unsigned MEM_isLittleEndian(void); + +/*=== Native unaligned read/write ===*/ +MEM_STATIC U16 MEM_read16(const void* memPtr); +MEM_STATIC U32 MEM_read32(const void* memPtr); +MEM_STATIC U64 MEM_read64(const void* memPtr); +MEM_STATIC size_t MEM_readST(const void* memPtr); + +MEM_STATIC void MEM_write16(void* memPtr, U16 value); +MEM_STATIC void MEM_write32(void* memPtr, U32 value); +MEM_STATIC void MEM_write64(void* memPtr, U64 value); + +/*=== Little endian unaligned read/write ===*/ +MEM_STATIC U16 MEM_readLE16(const void* memPtr); +MEM_STATIC U32 MEM_readLE24(const void* memPtr); +MEM_STATIC U32 MEM_readLE32(const void* memPtr); +MEM_STATIC U64 MEM_readLE64(const void* memPtr); +MEM_STATIC size_t MEM_readLEST(const void* memPtr); + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val); +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val); +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val); + +/*=== Big endian unaligned read/write ===*/ +MEM_STATIC U32 MEM_readBE32(const void* memPtr); +MEM_STATIC U64 MEM_readBE64(const void* memPtr); +MEM_STATIC size_t MEM_readBEST(const void* memPtr); + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val); + +/*=== Byteswap ===*/ +MEM_STATIC U32 MEM_swap32(U32 in); +MEM_STATIC U64 MEM_swap64(U64 in); +MEM_STATIC size_t MEM_swapST(size_t in); + + +/*-************************************************************** +* Memory I/O Implementation *****************************************************************/ /* MEM_FORCE_MEMORY_ACCESS : * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. @@ -372,9 +744,7 @@ void __asan_unpoison_memory_region(void const volatile *addr, size_t size); * Prefer these methods in priority order (0 > 1 > 2) */ #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define MEM_FORCE_MEMORY_ACCESS 2 -# elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) +# if defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) # define MEM_FORCE_MEMORY_ACCESS 1 # endif #endif @@ -435,37 +805,37 @@ MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = MEM_STATIC U16 MEM_read16(const void* memPtr) { - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; + U16 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; } MEM_STATIC U32 MEM_read32(const void* memPtr) { - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; + U32 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; } MEM_STATIC U64 MEM_read64(const void* memPtr) { - U64 val; memcpy(&val, memPtr, sizeof(val)); return val; + U64 val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; } MEM_STATIC size_t MEM_readST(const void* memPtr) { - size_t val; memcpy(&val, memPtr, sizeof(val)); return val; + size_t val; ZSTD_memcpy(&val, memPtr, sizeof(val)); return val; } MEM_STATIC void MEM_write16(void* memPtr, U16 value) { - memcpy(memPtr, &value, sizeof(value)); + ZSTD_memcpy(memPtr, &value, sizeof(value)); } MEM_STATIC void MEM_write32(void* memPtr, U32 value) { - memcpy(memPtr, &value, sizeof(value)); + ZSTD_memcpy(memPtr, &value, sizeof(value)); } MEM_STATIC void MEM_write64(void* memPtr, U64 value) { - memcpy(memPtr, &value, sizeof(value)); + ZSTD_memcpy(memPtr, &value, sizeof(value)); } #endif /* MEM_FORCE_MEMORY_ACCESS */ @@ -537,7 +907,7 @@ MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) MEM_STATIC U32 MEM_readLE24(const void* memPtr) { - return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16); + return (U32)MEM_readLE16(memPtr) + ((U32)(((const BYTE*)memPtr)[2]) << 16); } MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val) @@ -644,6 +1014,9 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) MEM_writeBE64(memPtr, (U64)val); } +/* code only tested on 32 and 64 bits systems */ +MEM_STATIC void MEM_check(void) { DEBUG_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); } + #if defined (__cplusplus) } @@ -653,7 +1026,7 @@ MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val) /**** ended inlining mem.h ****/ /**** start inlining error_private.h ****/ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -675,10 +1048,9 @@ extern "C" { /* **************************************** * Dependencies ******************************************/ -#include /* size_t */ -/**** start inlining zstd_errors.h ****/ +/**** start inlining ../zstd_errors.h ****/ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -755,6 +1127,8 @@ typedef enum { /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ ZSTD_error_frameIndex_tooLarge = 100, ZSTD_error_seekableIO = 102, + ZSTD_error_dstBuffer_wrong = 104, + ZSTD_error_srcBuffer_wrong = 105, ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ } ZSTD_ErrorCode; @@ -770,7 +1144,8 @@ ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Sa #endif #endif /* ZSTD_ERRORS_H_398273423 */ -/**** ended inlining zstd_errors.h ****/ +/**** ended inlining ../zstd_errors.h ****/ +/**** skipping file: zstd_deps.h ****/ /* **************************************** @@ -797,7 +1172,7 @@ typedef ZSTD_ErrorCode ERR_enum; /*-**************************************** * Error codes handling ******************************************/ -#undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */ +#undef ERROR /* already defined on Visual Studio */ #define ERROR(name) ZSTD_ERROR(name) #define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) @@ -805,6 +1180,10 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } +/* check and forward error code */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + /*-**************************************** * Error Strings @@ -828,7 +1207,7 @@ ERR_STATIC const char* ERR_getErrorName(size_t code) /* ****************************************************************** * FSE : Finite State Entropy codec * Public Prototypes declaration - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -850,7 +1229,7 @@ extern "C" { /*-***************************************** * Dependencies ******************************************/ -#include /* size_t, ptrdiff_t */ +/**** skipping file: zstd_deps.h ****/ /*-***************************************** @@ -964,10 +1343,16 @@ FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize /*! FSE_normalizeCount(): normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + useLowProbCount is a boolean parameter which trades off compressed size for + faster header decoding. When it is set to 1, the compressed data will be slightly + smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be + faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0 + is a good default, since header deserialization makes a big speed difference. + Otherwise, useLowProbCount=1 is a good default, since the speed difference is small. @return : tableLog, or an errorCode, which can be tested using FSE_isError() */ FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, - const unsigned* count, size_t srcSize, unsigned maxSymbolValue); + const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount); /*! FSE_NCountWriteBound(): Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. @@ -1055,6 +1440,13 @@ FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize); +/*! FSE_readNCount_bmi2(): + * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise. + */ +FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize, int bmi2); + /*! Constructor and Destructor of FSE_DTable. Note that its size depends on 'tableLog' */ typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ @@ -1111,7 +1503,7 @@ If there is an error, the function will return an error code, which can be teste /* ****************************************************************** * bitstream * Part of FSE library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -1127,7 +1519,6 @@ If there is an error, the function will return an error code, which can be teste #if defined (__cplusplus) extern "C" { #endif - /* * This API consists of small unitary functions, which must be inlined for best performance. * Since link-time-optimization is not available for all compilers, @@ -1138,180 +1529,7 @@ extern "C" { * Dependencies ******************************************/ /**** skipping file: mem.h ****/ -/**** start inlining compiler.h ****/ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ - -#ifndef ZSTD_COMPILER_H -#define ZSTD_COMPILER_H - -/*-******************************************************* -* Compiler specifics -*********************************************************/ -/* force inlining */ - -#if !defined(ZSTD_NO_INLINE) -#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# define INLINE_KEYWORD inline -#else -# define INLINE_KEYWORD -#endif - -#if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_INLINE_ATTR __attribute__((always_inline)) -#elif defined(_MSC_VER) -# define FORCE_INLINE_ATTR __forceinline -#else -# define FORCE_INLINE_ATTR -#endif - -#else - -#define INLINE_KEYWORD -#define FORCE_INLINE_ATTR - -#endif - -/** - * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant - * parameters. They must be inlined for the compiler to eliminate the constant - * branches. - */ -#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR -/** - * HINT_INLINE is used to help the compiler generate better code. It is *not* - * used for "templates", so it can be tweaked based on the compilers - * performance. - * - * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the - * always_inline attribute. - * - * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline - * attribute. - */ -#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 -# define HINT_INLINE static INLINE_KEYWORD -#else -# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR -#endif - -/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ -#if defined(__GNUC__) -# define UNUSED_ATTR __attribute__((unused)) -#else -# define UNUSED_ATTR -#endif - -/* force no inlining */ -#ifdef _MSC_VER -# define FORCE_NOINLINE static __declspec(noinline) -#else -# if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_NOINLINE static __attribute__((__noinline__)) -# else -# define FORCE_NOINLINE static -# endif -#endif - -/* target attribute */ -#ifndef __has_attribute - #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ -#endif -#if defined(__GNUC__) || defined(__ICCARM__) -# define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) -#else -# define TARGET_ATTRIBUTE(target) -#endif - -/* Enable runtime BMI2 dispatch based on the CPU. - * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. - */ -#ifndef DYNAMIC_BMI2 - #if ((defined(__clang__) && __has_attribute(__target__)) \ - || (defined(__GNUC__) \ - && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ - && (defined(__x86_64__) || defined(_M_X86)) \ - && !defined(__BMI2__) - # define DYNAMIC_BMI2 1 - #else - # define DYNAMIC_BMI2 0 - #endif -#endif - -/* prefetch - * can be disabled, by declaring NO_PREFETCH build macro */ -#if defined(NO_PREFETCH) -# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ -# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ -#else -# if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */ -# include /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ -# define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0) -# define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1) -# elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) -# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) -# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) -# else -# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ -# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ -# endif -#endif /* NO_PREFETCH */ - -#define CACHELINE_SIZE 64 - -#define PREFETCH_AREA(p, s) { \ - const char* const _ptr = (const char*)(p); \ - size_t const _size = (size_t)(s); \ - size_t _pos; \ - for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ - PREFETCH_L2(_ptr + _pos); \ - } \ -} - -/* vectorization - * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ -#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) -# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) -# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) -# else -# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") -# endif -#else -# define DONT_VECTORIZE -#endif - -/* Tell the compiler that a branch is likely or unlikely. - * Only use these macros if it causes the compiler to generate better code. - * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc - * and clang, please do. - */ -#if defined(__GNUC__) -#define LIKELY(x) (__builtin_expect((x), 1)) -#define UNLIKELY(x) (__builtin_expect((x), 0)) -#else -#define LIKELY(x) (x) -#define UNLIKELY(x) (x) -#endif - -/* disable warnings */ -#ifdef _MSC_VER /* Visual Studio */ -# include /* For Visual 2005 */ -# pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */ -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */ -# pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */ -# pragma warning(disable : 4324) /* disable: C4324: padded structure */ -#endif - -#endif /* ZSTD_COMPILER_H */ -/**** ended inlining compiler.h ****/ +/**** skipping file: compiler.h ****/ /**** skipping file: debug.h ****/ /**** skipping file: error_private.h ****/ @@ -1319,10 +1537,12 @@ extern "C" { /*========================================= * Target specific =========================================*/ -#if defined(__BMI__) && defined(__GNUC__) -# include /* support for bextr (experimental) */ -#elif defined(__ICCARM__) -# include +#ifndef ZSTD_NO_INTRINSICS +# if defined(__BMI__) && defined(__GNUC__) +# include /* support for bextr (experimental) */ +# elif defined(__ICCARM__) +# include +# endif #endif #define STREAM_ACCUMULATOR_MIN_32 25 @@ -1424,8 +1644,12 @@ MEM_STATIC unsigned BIT_highbit32 (U32 val) assert(val != 0); { # if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - return _BitScanReverse ( &r, val ) ? (unsigned)r : 0; +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val) ^ 31; +# else + unsigned long r = 0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ return __builtin_clz (val) ^ 31; # elif defined(__ICCARM__) /* IAR Intrinsic */ @@ -1481,7 +1705,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits) { - MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32); + DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); assert(nbBits < BIT_MASK_SIZE); assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; @@ -1554,7 +1778,7 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) */ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) { - if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } bitD->start = (const char*)srcBuffer; bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); @@ -1600,12 +1824,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si return srcSize; } -MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) { return bitContainer >> start; } -MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { U32 const regMask = sizeof(bitContainer)*8 - 1; /* if start > regMask, bitstream is corrupted, and result is undefined */ @@ -1613,10 +1837,14 @@ MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 co return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; } -MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { +#if defined(STATIC_BMI2) && STATIC_BMI2 == 1 + return _bzhi_u64(bitContainer, nbBits); +#else assert(nbBits < BIT_MASK_SIZE); return bitContainer & BIT_mask[nbBits]; +#endif } /*! BIT_lookBits() : @@ -1625,7 +1853,7 @@ MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) * On 32-bits, maxNbBits==24. * On 64-bits, maxNbBits==56. * @return : value extracted */ -MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) { /* arbitrate between double-shift and shift+mask */ #if 1 @@ -1648,7 +1876,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); } -MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) { bitD->bitsConsumed += nbBits; } @@ -1657,7 +1885,7 @@ MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) * Read (consume) next n bits from local register and update. * Pay attention to not read more than nbBits contained into local register. * @return : extracted value. */ -MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) { size_t const value = BIT_lookBits(bitD, nbBits); BIT_skipBits(bitD, nbBits); @@ -1743,12 +1971,12 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) *******************************************/ /* FSE buffer bounds */ #define FSE_NCOUNTBOUND 512 -#define FSE_BLOCKBOUND(size) (size + (size>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) +#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ -#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) -#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1< 12) ? (1 << (maxTableLog - 2)) : 1024) ) +#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); @@ -1777,18 +2005,30 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); /* FSE_buildCTable_wksp() : * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). - * `wkspSize` must be >= `(1<= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. */ +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2))) +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8) +#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned)) +FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ + size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); /**< build a fake FSE_DTable, designed to always generate the same symbolValue */ -size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog); -/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */ +#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) +#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); +/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); +/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ typedef enum { FSE_repeat_none, /**< Cannot use the previous table */ @@ -2099,6 +2339,9 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) #ifndef FSE_DEFAULT_MEMORY_USAGE # define FSE_DEFAULT_MEMORY_USAGE 13 #endif +#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE) +# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE" +#endif /*!FSE_MAX_SYMBOL_VALUE : * Maximum symbol value authorized. @@ -2132,7 +2375,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" #endif -#define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3) +#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3) #endif /* FSE_STATIC_LINKING_ONLY */ @@ -2147,7 +2390,7 @@ MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) /* ****************************************************************** * huff0 huffman codec, * part of Finite State Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -2166,7 +2409,7 @@ extern "C" { #define HUF_H_298734234 /* *** Dependencies *** */ -#include /* size_t */ +/**** skipping file: zstd_deps.h ****/ /* *** library symbols visibility *** */ @@ -2236,7 +2479,7 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, /** HUF_compress4X_wksp() : * Same as HUF_compress2(), but uses externally allocated `workSpace`. * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ -#define HUF_WORKSPACE_SIZE (6 << 10) +#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) #define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -2257,6 +2500,8 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, /* *** Dependencies *** */ /**** skipping file: mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ /* *** Constants *** */ @@ -2279,12 +2524,16 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ /* static allocation of HUF's Compression Table */ +/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt */ +typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */ #define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ #define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ - U32 name##hb[HUF_CTABLE_SIZE_U32(maxSymbolValue)]; \ - void* name##hv = &(name##hb); \ - HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */ + HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */ /* static allocation of HUF's DTable */ typedef U32 HUF_DTable; @@ -2330,11 +2579,12 @@ size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, * or to save and regenerate 'CTable' using external methods. */ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); -typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); typedef enum { HUF_repeat_none, /**< Cannot use the previous table */ @@ -2371,6 +2621,19 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize); +/*! HUF_readStats_wksp() : + * Same as HUF_readStats() but takes an external workspace which must be + * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1) +#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + int bmi2); + /** HUF_readCTable() : * Loading a CTable saved with HUF_writeCTable() */ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); @@ -2405,7 +2668,7 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); * a required workspace size greater than that specified in the following * macro. */ -#define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10) +#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) #ifndef HUF_FORCE_DECOMPRESS_X2 @@ -2477,6 +2740,9 @@ size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstS #endif size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif #endif /* HUF_STATIC_LINKING_ONLY */ @@ -2501,8 +2767,31 @@ const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } /*-************************************************************** * FSE NCount encoding-decoding ****************************************************************/ -size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, - const void* headerBuffer, size_t hbSize) +static U32 FSE_ctz(U32 val) +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ + unsigned long r=0; + return _BitScanForward(&r, val) ? (unsigned)r : 0; +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_ctz(val); +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return __CTZ(val); +# else /* Software version */ + U32 count = 0; + while ((val & 1) == 0) { + val >>= 1; + ++count; + } + return count; +# endif + } +} + +FORCE_INLINE_TEMPLATE +size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) { const BYTE* const istart = (const BYTE*) headerBuffer; const BYTE* const iend = istart + hbSize; @@ -2513,23 +2802,23 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t U32 bitStream; int bitCount; unsigned charnum = 0; + unsigned const maxSV1 = *maxSVPtr + 1; int previous0 = 0; - if (hbSize < 4) { - /* This function only works when hbSize >= 4 */ - char buffer[4]; - memset(buffer, 0, sizeof(buffer)); - memcpy(buffer, headerBuffer, hbSize); + if (hbSize < 8) { + /* This function only works when hbSize >= 8 */ + char buffer[8] = {0}; + ZSTD_memcpy(buffer, headerBuffer, hbSize); { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, buffer, sizeof(buffer)); if (FSE_isError(countSize)) return countSize; if (countSize > hbSize) return ERROR(corruption_detected); return countSize; } } - assert(hbSize >= 4); + assert(hbSize >= 8); /* init */ - memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ + ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ bitStream = MEM_readLE32(ip); nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); @@ -2540,36 +2829,58 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t threshold = 1<1) & (charnum<=*maxSVPtr)) { + for (;;) { if (previous0) { - unsigned n0 = charnum; - while ((bitStream & 0xFFFF) == 0xFFFF) { - n0 += 24; - if (ip < iend-5) { - ip += 2; - bitStream = MEM_readLE32(ip) >> bitCount; + /* Count the number of repeats. Each time the + * 2-bit repeat code is 0b11 there is another + * repeat. + * Avoid UB by setting the high bit to 1. + */ + int repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + while (repeats >= 12) { + charnum += 3 * 12; + if (LIKELY(ip <= iend-7)) { + ip += 3; } else { - bitStream >>= 16; - bitCount += 16; - } } - while ((bitStream & 3) == 3) { - n0 += 3; - bitStream >>= 2; - bitCount += 2; + bitCount -= (int)(8 * (iend - 7 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; } - n0 += bitStream & 3; + charnum += 3 * repeats; + bitStream >>= 2 * repeats; + bitCount += 2 * repeats; + + /* Add the final repeat which isn't 0b11. */ + assert((bitStream & 3) < 3); + charnum += bitStream & 3; bitCount += 2; - if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall); - while (charnum < n0) normalizedCounter[charnum++] = 0; - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + + /* This is an error, but break and return an error + * at the end, because returning out of a loop makes + * it harder for the compiler to optimize. + */ + if (charnum >= maxSV1) break; + + /* We don't need to set the normalized count to 0 + * because we already memset the whole buffer to 0. + */ + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { assert((bitCount >> 3) <= 3); /* For first condition to work */ ip += bitCount>>3; bitCount &= 7; - bitStream = MEM_readLE32(ip) >> bitCount; } else { - bitStream >>= 2; - } } - { int const max = (2*threshold-1) - remaining; + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } + { + int const max = (2*threshold-1) - remaining; int count; if ((bitStream & (threshold-1)) < (U32)max) { @@ -2582,24 +2893,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t } count--; /* extra accuracy */ - remaining -= count < 0 ? -count : count; /* -1 means +1 */ + /* When it matters (small blocks), this is a + * predictable branch, because we don't use -1. + */ + if (count >= 0) { + remaining -= count; + } else { + assert(count == -1); + remaining += count; + } normalizedCounter[charnum++] = (short)count; previous0 = !count; - while (remaining < threshold) { - nbBits--; - threshold >>= 1; + + assert(threshold > 1); + if (remaining < threshold) { + /* This branch can be folded into the + * threshold update condition because we + * know that threshold > 1. + */ + if (remaining <= 1) break; + nbBits = BIT_highbit32(remaining) + 1; + threshold = 1 << (nbBits - 1); } + if (charnum >= maxSV1) break; - if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { ip += bitCount>>3; bitCount &= 7; } else { bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; ip = iend - 4; } - bitStream = MEM_readLE32(ip) >> (bitCount & 31); - } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ + bitStream = MEM_readLE32(ip) >> bitCount; + } } if (remaining != 1) return ERROR(corruption_detected); + /* Only possible when there are too many zeros. */ + if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall); if (bitCount > 32) return ERROR(corruption_detected); *maxSVPtr = charnum-1; @@ -2607,6 +2937,43 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t return ip-istart; } +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_readNCount_body_default( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} +#endif + +size_t FSE_readNCount_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); + } +#endif + (void)bmi2; + return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +size_t FSE_readNCount( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); +} + /*! HUF_readStats() : Read compact Huffman tree, saved by HUF_writeCTable(). @@ -2618,6 +2985,17 @@ size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* t size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, const void* src, size_t srcSize) +{ + U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); +} + +FORCE_INLINE_TEMPLATE size_t +HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) { U32 weightTotal; const BYTE* ip = (const BYTE*) src; @@ -2626,7 +3004,7 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, if (!srcSize) return ERROR(srcSize_wrong); iSize = ip[0]; - /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ + /* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ if (iSize >= 128) { /* special header */ oSize = iSize - 127; @@ -2640,14 +3018,14 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, huffWeight[n+1] = ip[n/2] & 15; } } } else { /* header compressed with FSE (normal case) */ - FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ if (iSize+1 > srcSize) return ERROR(srcSize_wrong); - oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ + /* max (hwSize-1) values decoded, as last one is implied */ + oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2); if (FSE_isError(oSize)) return oSize; } /* collect weight stats */ - memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); + ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); weightTotal = 0; { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); @@ -2677,10 +3055,44 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, *nbSymbolsPtr = (U32)(oSize+1); return iSize+1; } -/**** ended inlining entropy_common.c ****/ -/**** start inlining error_private.c ****/ + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1); +} +#endif + +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); + } +#endif + (void)bmi2; + return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); +} +/**** ended inlining common/entropy_common.c ****/ +/**** start inlining common/error_private.c ****/ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -2728,16 +3140,18 @@ const char* ERR_getErrorString(ERR_enum code) /* following error codes are not stable and may be removed or changed in a future version */ case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; + case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; + case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; case PREFIX(maxCode): default: return notErrorCode; } #endif } -/**** ended inlining error_private.c ****/ -/**** start inlining fse_decompress.c ****/ +/**** ended inlining common/error_private.c ****/ +/**** start inlining common/fse_decompress.c ****/ /* ****************************************************************** * FSE : Finite State Entropy decoder - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * * You can contact the author at : * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy @@ -2753,13 +3167,14 @@ const char* ERR_getErrorString(ERR_enum code) /* ************************************************************** * Includes ****************************************************************/ -#include /* malloc, free, qsort */ -#include /* memcpy, memset */ +/**** skipping file: debug.h ****/ /**** skipping file: bitstream.h ****/ /**** skipping file: compiler.h ****/ #define FSE_STATIC_LINKING_ONLY /**** skipping file: fse.h ****/ /**** skipping file: error_private.h ****/ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ /* ************************************************************** @@ -2768,11 +3183,6 @@ const char* ERR_getErrorString(ERR_enum code) #define FSE_isError ERR_isError #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ -/* check and forward error code */ -#ifndef CHECK_F -#define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; } -#endif - /* ************************************************************** * Templates @@ -2801,25 +3211,27 @@ const char* ERR_getErrorString(ERR_enum code) FSE_DTable* FSE_createDTable (unsigned tableLog) { if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; - return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); + return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); } void FSE_freeDTable (FSE_DTable* dt) { - free(dt); + ZSTD_free(dt); } -size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) { void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); - U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1]; + U16* symbolNext = (U16*)workSpace; + BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1); U32 const maxSV1 = maxSymbolValue + 1; U32 const tableSize = 1 << tableLog; U32 highThreshold = tableSize-1; /* Sanity Checks */ + if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge); if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); @@ -2837,11 +3249,57 @@ size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; symbolNext[s] = normalizedCounter[s]; } } } - memcpy(dt, &DTableH, sizeof(DTableH)); + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); } /* Spread symbols */ - { U32 const tableMask = tableSize-1; + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ - if (tableLog > maxLog) return ERROR(tableLog_tooLarge); - ip += NCountLength; - cSrcSize -= NCountLength; + { + size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); + if (FSE_isError(NCountLength)) return NCountLength; + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + assert(NCountLength <= cSrcSize); + ip += NCountLength; + cSrcSize -= NCountLength; + } + + if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); + workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog); + wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); + + CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); + + { + const void* ptr = wksp->dtable; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1); + return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0); + } +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0); +} - CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) ); +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); +} +#endif - return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */ +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); + } +#endif + (void)bmi2; + return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); } typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) { + U32 wksp[FSE_BUILD_DTABLE_WKSP_SIZE_U32(FSE_TABLELOG_ABSOLUTE_MAX, FSE_MAX_SYMBOL_VALUE)]; + return FSE_buildDTable_wksp(dt, normalizedCounter, maxSymbolValue, tableLog, wksp, sizeof(wksp)); +} + size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) { - DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ - return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG); + /* Static analyzer seems unable to understand this table will be properly initialized later */ + U32 wksp[FSE_DECOMPRESS_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; + return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, FSE_MAX_TABLELOG, wksp, sizeof(wksp)); } - +#endif #endif /* FSE_COMMONDEFS_ONLY */ -/**** ended inlining fse_decompress.c ****/ -/**** start inlining xxhash.c ****/ +/**** ended inlining common/fse_decompress.c ****/ +/**** start inlining common/zstd_common.c ****/ /* - * xxHash - Fast Hash algorithm - * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. * - * You can contact the author at : - * - xxHash homepage: http://www.xxhash.com - * - xxHash source repository : https://github.com/Cyan4973/xxHash - * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. -*/ - - -/* ************************************* -* Tuning parameters -***************************************/ -/*!XXH_FORCE_MEMORY_ACCESS : - * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. - * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. - * The below switch allow to select different access method for improved performance. - * Method 0 (default) : use `memcpy()`. Safe and portable. - * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). - * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. - * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. - * It can generate buggy code on targets which do not support unaligned memory accesses. - * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) - * See http://stackoverflow.com/a/32095106/646947 for details. - * Prefer these methods in priority order (0 > 1 > 2) */ -#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ -# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) -# define XXH_FORCE_MEMORY_ACCESS 2 -# elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ - (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ - defined(__ICCARM__) -# define XXH_FORCE_MEMORY_ACCESS 1 -# endif -#endif -/*!XXH_ACCEPT_NULL_INPUT_POINTER : - * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. - * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. - * By default, this option is disabled. To enable it, uncomment below define : - */ -/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ -/*!XXH_FORCE_NATIVE_FORMAT : - * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. - * Results are therefore identical for little-endian and big-endian CPU. - * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. - * Should endian-independence be of no importance for your application, you may set the #define below to 1, - * to improve speed for Big-endian CPU. - * This option has no impact on Little_Endian CPU. - */ -#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ -# define XXH_FORCE_NATIVE_FORMAT 0 -#endif -/*!XXH_FORCE_ALIGN_CHECK : - * This is a minor performance trick, only useful with lots of very small keys. - * It means : check for aligned/unaligned input. - * The check costs one initial branch per hash; set to 0 when the input data - * is guaranteed to be aligned. +/*-************************************* +* Dependencies +***************************************/ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ +/**** skipping file: error_private.h ****/ +/**** start inlining zstd_internal.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. */ -#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ -# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) -# define XXH_FORCE_ALIGN_CHECK 0 -# else -# define XXH_FORCE_ALIGN_CHECK 1 -# endif -#endif +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE -/* ************************************* -* Includes & Memory related functions -***************************************/ -/* Modify the local functions below should you wish to use some other memory routines */ -/* for malloc(), free() */ -#include -#include /* size_t */ -static void* XXH_malloc(size_t s) { return malloc(s); } -static void XXH_free (void* p) { free(p); } -/* for memcpy() */ -#include -static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); } +/* this module contains definitions which must be identical + * across compression, decompression and dictBuilder. + * It also contains a few functions useful to at least 2 of them + * and which benefit from being inlined */ -#ifndef XXH_STATIC_LINKING_ONLY -# define XXH_STATIC_LINKING_ONLY +/*-************************************* +* Dependencies +***************************************/ +#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) +#include #endif -/**** start inlining xxhash.h ****/ +/**** skipping file: compiler.h ****/ +/**** skipping file: mem.h ****/ +/**** skipping file: debug.h ****/ +/**** skipping file: error_private.h ****/ +#define ZSTD_STATIC_LINKING_ONLY +/**** start inlining ../zstd.h ****/ /* - * xxHash - Extremely Fast Hash algorithm - * Header File - * Copyright (c) 2012-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. * - * You can contact the author at : - * - xxHash source repository : https://github.com/Cyan4973/xxHash - * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses. -*/ - -/* Notice extracted from xxHash homepage : - -xxHash is an extremely fast Hash algorithm, running at RAM speed limits. -It also successfully passes all tests from the SMHasher suite. - -Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) - -Name Speed Q.Score Author -xxHash 5.4 GB/s 10 -CrapWow 3.2 GB/s 2 Andrew -MumurHash 3a 2.7 GB/s 10 Austin Appleby -SpookyHash 2.0 GB/s 10 Bob Jenkins -SBox 1.4 GB/s 9 Bret Mulvey -Lookup3 1.2 GB/s 9 Bob Jenkins -SuperFastHash 1.2 GB/s 1 Paul Hsieh -CityHash64 1.05 GB/s 10 Pike & Alakuijala -FNV 0.55 GB/s 5 Fowler, Noll, Vo -CRC32 0.43 GB/s 9 -MD5-32 0.33 GB/s 10 Ronald L. Rivest -SHA1-32 0.28 GB/s 10 - -Q.Score is a measure of quality of the hash function. -It depends on successfully passing SMHasher test set. -10 is a perfect score. - -A 64-bits version, named XXH64, is available since r35. -It offers much better speed, but for 64-bits applications only. -Name Speed on 64 bits Speed on 32 bits -XXH64 13.8 GB/s 1.9 GB/s -XXH32 6.8 GB/s 6.0 GB/s -*/ - + */ #if defined (__cplusplus) extern "C" { #endif -#ifndef XXHASH_H_5627135585666179 -#define XXHASH_H_5627135585666179 1 - +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 -/* **************************** -* Definitions -******************************/ +/* ====== Dependency ======*/ +#include /* INT_MAX */ #include /* size_t */ -typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; -/* **************************** -* API modifier -******************************/ -/** XXH_PRIVATE_API -* This is useful if you want to include xxhash functions in `static` mode -* in order to inline them, and remove their symbol from the public list. -* Methodology : -* #define XXH_PRIVATE_API -* #include "xxhash.h" -* `xxhash.c` is automatically included. -* It's not useful to compile and link it as a separate module anymore. -*/ -#ifdef XXH_PRIVATE_API -# ifndef XXH_STATIC_LINKING_ONLY -# define XXH_STATIC_LINKING_ONLY -# endif -# if defined(__GNUC__) -# define XXH_PUBLIC_API static __inline __attribute__((unused)) -# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -# define XXH_PUBLIC_API static inline -# elif defined(_MSC_VER) -# define XXH_PUBLIC_API static __inline -# else -# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ +/* ===== ZSTDLIB_API : control library symbols visibility ===== */ +#ifndef ZSTDLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define ZSTDLIB_VISIBILITY # endif +#endif +#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) +# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY +#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) +# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ #else -# define XXH_PUBLIC_API /* do nothing */ -#endif /* XXH_PRIVATE_API */ - -/*!XXH_NAMESPACE, aka Namespace Emulation : - -If you want to include _and expose_ xxHash functions from within your own library, -but also want to avoid symbol collisions with another library which also includes xxHash, - -you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library -with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). - -Note that no change is required within the calling program as long as it includes `xxhash.h` : -regular symbol name will be automatically translated by this header. -*/ -#ifdef XXH_NAMESPACE -# define XXH_CAT(A,B) A##B -# define XXH_NAME2(A,B) XXH_CAT(A,B) -# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) -# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) -# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) -# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) -# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) -# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) -# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) -# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) -# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) -# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) -# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) -# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) -# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) -# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) -# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) -# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) -# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) -# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) -# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +# define ZSTDLIB_API ZSTDLIB_VISIBILITY #endif -/* ************************************* -* Version -***************************************/ -#define XXH_VERSION_MAJOR 0 -#define XXH_VERSION_MINOR 6 -#define XXH_VERSION_RELEASE 2 -#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) -XXH_PUBLIC_API unsigned XXH_versionNumber (void); - +/******************************************************************************* + Introduction -/* **************************** -* Simple Hash Functions -******************************/ -typedef unsigned int XXH32_hash_t; -typedef unsigned long long XXH64_hash_t; + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. -XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); -XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). -/*! -XXH32() : - Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". - The memory between input & input+length must be valid (allocated and read-accessible). - "seed" can be used to alter the result predictably. - Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s -XXH64() : - Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". - "seed" can be used to alter the result predictably. - This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). -*/ + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit context) + - unbounded multiple steps (described as Streaming compression) + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) -/* **************************** -* Streaming Hash Functions -******************************/ -typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ -typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. -/*! State allocation, compatible with dynamic libraries */ + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); +/*------ Version ------*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 5 +#define ZSTD_VERSION_RELEASE 0 +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); +/*! ZSTD_versionNumber() : + * Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */ +ZSTDLIB_API unsigned ZSTD_versionNumber(void); +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) -/* hash streaming */ +/*! ZSTD_versionString() : + * Return runtime library version, like "1.4.5". Requires v1.3.0+. */ +ZSTDLIB_API const char* ZSTD_versionString(void); -XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); -XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); -XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); +/* ************************************* + * Default constant + ***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif -XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); -XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); +/* ************************************* + * Constants + ***************************************/ -/* -These functions generate the xxHash of an input provided in multiple segments. -Note that, for small input, they are slower than single-call functions, due to state management. -For small input, prefer `XXH32()` and `XXH64()` . +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 -XXH state must first be allocated, using XXH*_createState() . +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); -Finally, a hash value can be produced anytime, by using XXH*_digest(). -This function returns the nn-bits hash as an int or long long. +/*! ZSTD_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); -It's still possible to continue inserting input into the hash state after a digest, -and generate some new hashes later on, by calling again XXH*_digest(). +/*! ZSTD_getFrameContentSize() : requires v1.3.0+ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); -When done, free XXH state space if it was allocated dynamically. -*/ +/*! ZSTD_getDecompressedSize() : + * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); +/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+ + * `src` should point to the start of a ZSTD frame or skippable frame. + * `srcSize` must be >= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); -/* ************************** -* Utils -****************************/ -#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ -# define restrict /* disable restrict */ -#endif -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); -XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); +/*====== Helper functions ======*/ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed, requires v1.4.0+ */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ +ZSTDLIB_API int ZSTD_defaultCLevel(void); /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */ -/* ************************** -* Canonical representation -****************************/ -/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. -* The canonical representation uses human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. -*/ -typedef struct { unsigned char digest[4]; } XXH32_canonical_t; -typedef struct { unsigned char digest[8]; } XXH64_canonical_t; +/*************************************** +* Explicit context +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer */ -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); +/*! ZSTD_compressCCtx() : + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. + * Important : in order to behave similarly to `ZSTD_compress()`, + * this function compresses at requested compression level, + * __ignoring any other parameter__ . + * If any advanced parameter was set using the advanced API, + * they will all be reset. Only `compressionLevel` remains. + */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution. */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer */ -#endif /* XXHASH_H_5627135585666179 */ +/*! ZSTD_decompressDCtx() : + * Same as ZSTD_decompress(), + * requires an allocated ZSTD_DCtx. + * Compatible with sticky parameters. + */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); +/********************************************* +* Advanced compression API (Requires v1.4.0+) +**********************************************/ -/* ================================================================================================ - This section contains definitions which are not guaranteed to remain stable. - They may change in future versions, becoming incompatible with a different version of the library. - They shall only be used with static linking. - Never use these definitions in association with dynamic linking ! -=================================================================================================== */ -#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) -#define XXH_STATIC_H_3543687687345 +/* API design : + * Parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. + * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! + * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * + * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). + * + * This API supercedes all other "advanced" API entry points in the experimental section. + * In the future, we expect to remove from experimental API entry points which are redundant with this API. + */ -/* These definitions are only meant to allow allocation of XXH state - statically, on stack, or in a struct for example. - Do not use members directly. */ - struct XXH32_state_s { - unsigned total_len_32; - unsigned large_len; - unsigned v1; - unsigned v2; - unsigned v3; - unsigned v4; - unsigned mem32[4]; /* buffer defined as U32 for alignment */ - unsigned memsize; - unsigned reserved; /* never read nor write, will be removed in a future version */ - }; /* typedef'd to XXH32_state_t */ +/* Compression strategies, listed from fastest to strongest */ +typedef enum { ZSTD_fast=1, + ZSTD_dfast=2, + ZSTD_greedy=3, + ZSTD_lazy=4, + ZSTD_lazy2=5, + ZSTD_btlazy2=6, + ZSTD_btopt=7, + ZSTD_btultra=8, + ZSTD_btultra2=9 + /* note : new strategies _might_ be added in the future. + Only the order (from fast to strong) is guaranteed */ +} ZSTD_strategy; - struct XXH64_state_s { - unsigned long long total_len; - unsigned long long v1; - unsigned long long v2; - unsigned long long v3; - unsigned long long v4; - unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ - unsigned memsize; - unsigned reserved[2]; /* never read nor write, will be removed in a future version */ - }; /* typedef'd to XXH64_state_t */ +typedef enum { + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. + * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. + * Note that exact compression parameters are dynamically determined, + * depending on both compression level and srcSize (when known). + * Default level is ZSTD_CLEVEL_DEFAULT==3. + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. + * Note 1 : it's possible to pass a negative compression level. + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ + /* Advanced compression parameters : + * It's possible to pin down compression parameters to some specific values. + * In which case, these values are no longer dynamically selected by the compressor */ + ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * This will set a memory budget for streaming decompression, + * with larger values requiring more memory + * and typically compressing more. + * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. + * Special: value 0 means "use default windowLog". + * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT + * requires explicitly allowing such size at streaming decompression stage. */ + ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. + * Resulting memory usage is (1 << (hashLog+2)). + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. + * Larger tables improve compression ratio of strategies <= dFast, + * and improve speed of strategies > dFast. + * Special: value 0 means "use default hashLog". */ + ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. + * Resulting memory usage is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. + * Larger tables result in better and slower compression. + * This parameter is useless for "fast" strategy. + * It's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. + * Special: value 0 means "use default chainLog". */ + ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. + * More attempts result in better and slower compression. + * This parameter is useless for "fast" and "dFast" strategies. + * Special: value 0 means "use default searchLog". */ + ZSTD_c_minMatch=105, /* Minimum size of searched matches. + * Note that Zstandard can still find matches of smaller size, + * it just tweaks its search algorithm to look for this size and larger. + * Larger values increase compression and decompression speed, but decrease ratio. + * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. + * Note that currently, for all strategies < btopt, effective minimum is 4. + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ + ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. + * For strategies btopt, btultra & btultra2: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. + * Special: value 0 means "use default targetLength". */ + ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. + * The higher the value of selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "use default strategy". */ + /* LDM mode parameters */ + ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve compression ratio + * for large inputs, by finding large matches at long distance. + * It increases memory usage and window size. + * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB + * except when expressly set to a different value. + * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and + * compression strategy >= ZSTD_btopt (== compression level 16+) */ + ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * default: windowlog - 7. + * Special: value 0 means "automatically determine hashlog". */ + ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ + ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. + * Special: value 0 means "use default value" (default: 3). */ + ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Special: value 0 means "automatically determine hashRateLog". */ -# ifdef XXH_PRIVATE_API -/**** skipping file: xxhash.c ****/ -# endif + /* frame parameters */ + ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ + ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ -#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ + /* multi-threading parameters */ + /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * Otherwise, trying to set any other value than default (0) will be a no-op and return an error. + * In a situation where it's unknown if the linked library supports multi-threading or not, + * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. + */ + ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. + * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : + * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, + * while compression is performed in parallel, within worker thread(s). + * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : + * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, + * compression is performed inside Caller's thread, and all invocations are blocking */ + ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. + * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. + * 0 means default, which is dynamically determined based on compression parameters. + * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest. + * The minimum size is automatically and transparently enforced. */ + ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. + * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. + * It helps preserve compression ratio, while each job is compressed in parallel. + * This value is enforced only when nbWorkers >= 1. + * Larger values increase compression ratio, but decrease speed. + * Possible values range from 0 to 9 : + * - 0 means "default" : value will be determined by the library, depending on strategy + * - 1 means "no overlap" + * - 9 means "full overlap", using a full window size. + * Each intermediate rank increases/decreases load size by a factor 2 : + * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default + * default value varies between 6 and 9, depending on strategy */ + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_rsyncable + * ZSTD_c_format + * ZSTD_c_forceMaxWindow + * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint + * ZSTD_c_enableDedicatedDictSearch + * ZSTD_c_stableInBuffer + * ZSTD_c_stableOutBuffer + * ZSTD_c_blockDelimiters + * ZSTD_c_validateSequences + * ZSTD_c_splitBlocks + * ZSTD_c_useRowMatchFinder + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. + */ + ZSTD_c_experimentalParam1=500, + ZSTD_c_experimentalParam2=10, + ZSTD_c_experimentalParam3=1000, + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004, + ZSTD_c_experimentalParam8=1005, + ZSTD_c_experimentalParam9=1006, + ZSTD_c_experimentalParam10=1007, + ZSTD_c_experimentalParam11=1008, + ZSTD_c_experimentalParam12=1009, + ZSTD_c_experimentalParam13=1010, + ZSTD_c_experimentalParam14=1011, + ZSTD_c_experimentalParam15=1012 +} ZSTD_cParameter; -#if defined (__cplusplus) -} -#endif -/**** ended inlining xxhash.h ****/ +typedef struct { + size_t error; + int lowerBound; + int upperBound; +} ZSTD_bounds; +/*! ZSTD_cParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - lower and upper bounds, both inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); -/* ************************************* -* Compiler Specific Options -***************************************/ -#if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ -# define INLINE_KEYWORD inline -#else -# define INLINE_KEYWORD -#endif - -#if defined(__GNUC__) || defined(__ICCARM__) -# define FORCE_INLINE_ATTR __attribute__((always_inline)) -#elif defined(_MSC_VER) -# define FORCE_INLINE_ATTR __forceinline -#else -# define FORCE_INLINE_ATTR -#endif - -#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR - - -#ifdef _MSC_VER -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#endif - - -/* ************************************* -* Basic Types -***************************************/ -#ifndef MEM_MODULE -# define MEM_MODULE -# if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) -# include - typedef uint8_t BYTE; - typedef uint16_t U16; - typedef uint32_t U32; - typedef int32_t S32; - typedef uint64_t U64; -# else - typedef unsigned char BYTE; - typedef unsigned short U16; - typedef unsigned int U32; - typedef signed int S32; - typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */ -# endif -#endif - - -#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) - -/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ -static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } -static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } - -#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) - -/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ -/* currently only defined for gcc and icc */ -typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; - -static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } -static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } - -#else +/*! ZSTD_CCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_cParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbWorkers >= 1), + * the following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active for next job only (after a flush()). + * @return : an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); -/* portable and safe solution. Generally efficient. - * see : http://stackoverflow.com/a/32095106/646947 +/*! ZSTD_CCtx_setPledgedSrcSize() : + * Total input data size to be compressed as a single frame. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. + * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. + * Note 2 : pledgedSrcSize is only valid once, for the next frame. + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), + * this value is automatically overridden by srcSize instead. */ +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); -static U32 XXH_read32(const void* memPtr) -{ - U32 val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} +typedef enum { + ZSTD_reset_session_only = 1, + ZSTD_reset_parameters = 2, + ZSTD_reset_session_and_parameters = 3 +} ZSTD_ResetDirective; -static U64 XXH_read64(const void* memPtr) -{ - U64 val; - memcpy(&val, memPtr, sizeof(val)); - return val; -} +/*! ZSTD_CCtx_reset() : + * There are 2 different things that can be reset, independently or jointly : + * - The session : will stop compressing current frame, and make CCtx ready to start a new one. + * Useful after an error, or to interrupt any ongoing compression. + * Any internal data not yet flushed is cancelled. + * Compression parameters and dictionary remain unchanged. + * They will be used to compress next frame. + * Resetting session never fails. + * - The parameters : changes all parameters back to "default". + * This removes any reference to any dictionary too. + * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) + * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) + * - Both : similar to resetting the session, followed by resetting parameters. + */ +ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); -#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ +/*! ZSTD_compress2() : + * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - The function is always blocking, returns when compression is completed. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); -/* **************************************** -* Compiler-specific Functions and Macros -******************************************/ -#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +/*********************************************** +* Advanced decompression API (Requires v1.4.0+) +************************************************/ -/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ -#if defined(_MSC_VER) -# define XXH_rotl32(x,r) _rotl(x,r) -# define XXH_rotl64(x,r) _rotl64(x,r) -#else -#if defined(__ICCARM__) -# include -# define XXH_rotl32(x,r) __ROR(x,(32 - r)) -#else -# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) -#endif -# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) -#endif +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ -#if defined(_MSC_VER) /* Visual Studio */ -# define XXH_swap32 _byteswap_ulong -# define XXH_swap64 _byteswap_uint64 -#elif GCC_VERSION >= 403 -# define XXH_swap32 __builtin_bswap32 -# define XXH_swap64 __builtin_bswap64 -#else -static U32 XXH_swap32 (U32 x) -{ - return ((x << 24) & 0xff000000 ) | - ((x << 8) & 0x00ff0000 ) | - ((x >> 8) & 0x0000ff00 ) | - ((x >> 24) & 0x000000ff ); -} -static U64 XXH_swap64 (U64 x) -{ - return ((x << 56) & 0xff00000000000000ULL) | - ((x << 40) & 0x00ff000000000000ULL) | - ((x << 24) & 0x0000ff0000000000ULL) | - ((x << 8) & 0x000000ff00000000ULL) | - ((x >> 8) & 0x00000000ff000000ULL) | - ((x >> 24) & 0x0000000000ff0000ULL) | - ((x >> 40) & 0x000000000000ff00ULL) | - ((x >> 56) & 0x00000000000000ffULL); -} -#endif +typedef enum { + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ -/* ************************************* -* Architecture Macros -***************************************/ -typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_d_format + * ZSTD_d_stableOutBuffer + * ZSTD_d_forceIgnoreChecksum + * ZSTD_d_refMultipleDDicts + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001, + ZSTD_d_experimentalParam3=1002, + ZSTD_d_experimentalParam4=1003 -/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ -#ifndef XXH_CPU_LITTLE_ENDIAN - static const int g_one = 1; -# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) -#endif +} ZSTD_dParameter; +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); -/* *************************** -* Memory reads -*****************************/ -typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); -FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); - else - return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); -} +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); -FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE32_align(ptr, endian, XXH_unaligned); -} -static U32 XXH_readBE32(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); -} +/**************************** +* Streaming +****************************/ -FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) -{ - if (align==XXH_unaligned) - return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); - else - return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); -} +typedef struct ZSTD_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; -FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) -{ - return XXH_readLE64_align(ptr, endian, XXH_unaligned); -} +typedef struct ZSTD_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; -static U64 XXH_readBE64(const void* ptr) -{ - return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); -} -/* ************************************* -* Macros -***************************************/ -#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ - - -/* ************************************* -* Constants -***************************************/ -static const U32 PRIME32_1 = 2654435761U; -static const U32 PRIME32_2 = 2246822519U; -static const U32 PRIME32_3 = 3266489917U; -static const U32 PRIME32_4 = 668265263U; -static const U32 PRIME32_5 = 374761393U; +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* +* For parallel execution, use one separate ZSTD_CStream per thread. +* +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ -static const U64 PRIME64_1 = 11400714785074694791ULL; -static const U64 PRIME64_2 = 14029467366897019727ULL; -static const U64 PRIME64_3 = 1609587929392839161ULL; -static const U64 PRIME64_4 = 9650029242287828579ULL; -static const U64 PRIME64_5 = 2870177450012600261ULL; +typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); /* accept NULL pointer */ -XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } +/*===== Streaming compression functions =====*/ +typedef enum { + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush=1, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ + ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. + * note that frame is only closed after compressed data is fully flushed (return value == 0). + * After that point, any additional data starts a new frame. + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ +} ZSTD_EndDirective; +/*! ZSTD_compressStream2() : Requires v1.4.0+ + * Behaves about the same as ZSTD_compressStream, with additional control on end directive. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) + * - output->pos must be <= dstCapacity, input->pos must be <= srcSize + * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - endOp must be a valid directive + * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. + * - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. + * - @return provides a minimum amount of data remaining to be flushed from internal buffers + * or an error code, which can be tested using ZSTD_isError(). + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. + * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), + * only ZSTD_e_end or ZSTD_e_flush operations are allowed. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); -/* ************************** -* Utils -****************************/ -XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); -} -XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState) -{ - memcpy(dstState, srcState, sizeof(*dstState)); -} +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ -/* *************************** -* Simple Hash Functions -*****************************/ +/* ***************************************************************************** + * This following is a legacy streaming API, available since v1.0+ . + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. + * Streaming in combination with advanced parameters and dictionary compression + * can only be used through the new API. + ******************************************************************************/ -static U32 XXH32_round(U32 seed, U32 input) -{ - seed += input * PRIME32_2; - seed = XXH_rotl32(seed, 13); - seed *= PRIME32_1; - return seed; -} +/*! + * Equivalent to: + * + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + */ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/*! + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the minimum nb of bytes left to flush (if non-zero and not an error). + */ +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* bEnd = p + len; - U32 h32; -#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { - len=0; - bEnd=p=(const BYTE*)(size_t)16; - } -#endif +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. +* If `output.pos < output.size`, decoder has flushed everything it could. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. +* *******************************************************************************/ - if (len>=16) { - const BYTE* const limit = bEnd - 16; - U32 v1 = seed + PRIME32_1 + PRIME32_2; - U32 v2 = seed + PRIME32_2; - U32 v3 = seed + 0; - U32 v4 = seed - PRIME32_1; +typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); /* accept NULL pointer */ - do { - v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; - v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; - v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; - v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; - } while (p<=limit); +/*===== Streaming decompression functions =====*/ - h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); - } else { - h32 = seed + PRIME32_5; - } +/* This function is redundant with the advanced API and equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, NULL); + */ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); - h32 += (U32) len; +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); - while (p+4<=bEnd) { - h32 += XXH_get32bits(p) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; - p+=4; - } +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} - - -XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) -{ -#if 0 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH32_CREATESTATE_STATIC(state); - XXH32_reset(state, seed); - XXH32_update(state, input, len); - return XXH32_digest(state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } } +/************************** +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); -static U64 XXH64_round(U64 acc, U64 input) -{ - acc += input * PRIME64_2; - acc = XXH_rotl64(acc, 31); - acc *= PRIME64_1; - return acc; -} +/*********************************** + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; -static U64 XXH64_mergeRound(U64 acc, U64 val) -{ - val = XXH64_round(0, val); - acc ^= val; - acc = acc * PRIME64_1 + PRIME64_4; - return acc; -} +/*! ZSTD_createCDict() : + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); -FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; - U64 h64; -#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). + * If a NULL pointer is passed, no operation is performed. */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (p==NULL) { - len=0; - bEnd=p=(const BYTE*)(size_t)32; - } -#endif +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); - if (len>=32) { - const BYTE* const limit = bEnd - 32; - U64 v1 = seed + PRIME64_1 + PRIME64_2; - U64 v2 = seed + PRIME64_2; - U64 v3 = seed + 0; - U64 v4 = seed - PRIME64_1; - do { - v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; - v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; - v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; - v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; - } while (p<=limit); +typedef struct ZSTD_DDict_s ZSTD_DDict; - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - h64 = XXH64_mergeRound(h64, v1); - h64 = XXH64_mergeRound(h64, v2); - h64 = XXH64_mergeRound(h64, v3); - h64 = XXH64_mergeRound(h64, v4); +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); - } else { - h64 = seed + PRIME64_5; - } +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() + * If a NULL pointer is passed, no operation is performed. */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); - h64 += (U64) len; +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); - while (p+8<=bEnd) { - U64 const k1 = XXH64_round(0, XXH_get64bits(p)); - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } - if (p+4<=bEnd) { - h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } +/******************************** + * Dictionary helper functions + *******************************/ - while (p> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; +/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+ + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); - return h64; -} +/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+ + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); +/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+ + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); -XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) -{ -#if 0 - /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ - XXH64_CREATESTATE_STATIC(state); - XXH64_reset(state, seed); - XXH64_update(state, input, len); - return XXH64_digest(state); -#else - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; - if (XXH_FORCE_ALIGN_CHECK) { - if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); - } } +/******************************************************************************* + * Advanced dictionary and prefix API (Requires v1.4.0+) + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); - else - return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); -#endif -} +/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+ + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); -/* ************************************************** -* Advanced Hash Functions -****************************************************/ +/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+ + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); -XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) -{ - return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); -} -XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} - -XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) -{ - return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); -} -XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) -{ - XXH_free(statePtr); - return XXH_OK; -} +/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+ + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); +/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+ + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * The dictionary remains valid for all future frames, until explicitly invalidated. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * It's recommended to "load once, use many times", to amortize the cost + * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. + * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of + * how dictionary content is loaded and interpreted. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -/*** Hash feed ***/ +/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+ + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special: referencing a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); -XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) -{ - XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ - state.v1 = seed + PRIME32_1 + PRIME32_2; - state.v2 = seed + PRIME32_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME32_1; - memcpy(statePtr, &state, sizeof(state)); - return XXH_OK; -} +/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+ + * Reference a prefix (single-usage dictionary) to decompress next frame. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_decompressStream() returns 0. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_decompressStream() returns 0. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A full dictionary is more costly, as it requires building tables. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); +/* === Memory management === */ -XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) -{ - XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ - memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ - state.v1 = seed + PRIME64_1 + PRIME64_2; - state.v2 = seed + PRIME64_2; - state.v3 = seed + 0; - state.v4 = seed - PRIME64_1; - memcpy(statePtr, &state, sizeof(state)); - return XXH_OK; -} +/*! ZSTD_sizeof_*() : Requires v1.4.0+ + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); +#endif /* ZSTD_H_235446 */ -FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; -#endif +/* ************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ - state->total_len_32 += (unsigned)len; - state->large_len |= (len>=16) | (state->total_len_32>=16); +#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY - if (state->memsize + len < 16) { /* fill in tmp buffer */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); - state->memsize += (unsigned)len; - return XXH_OK; - } +/* Deprecation warnings : + * Should these warnings be a problem, it is generally possible to disable them, + * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual. + * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS. + */ +#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS +# define ZSTD_DEPRECATED(message) ZSTDLIB_API /* disable deprecation warnings */ +#else +# if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ +# define ZSTD_DEPRECATED(message) [[deprecated(message)]] ZSTDLIB_API +# elif (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__) +# define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ >= 3) +# define ZSTD_DEPRECATED(message) ZSTDLIB_API __attribute__((deprecated)) +# elif defined(_MSC_VER) +# define ZSTD_DEPRECATED(message) ZSTDLIB_API __declspec(deprecated(message)) +# else +# pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler") +# define ZSTD_DEPRECATED(message) ZSTDLIB_API +# endif +#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */ - if (state->memsize) { /* some data left from previous update */ - XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); - { const U32* p32 = state->mem32; - state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; - state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; - state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; - state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; - } - p += 16-state->memsize; - state->memsize = 0; - } +/**************************************************************************************** + * experimental API (static linking only) + **************************************************************************************** + * The following symbols and constants + * are not planned to join "stable API" status in the near future. + * They can still change in future versions. + * Some of them are planned to remain in the static_only section indefinitely. + * Some of them might be removed in the future (especially when redundant with existing stable functions) + * ***************************************************************************************/ - if (p <= bEnd-16) { - const BYTE* const limit = bEnd - 16; - U32 v1 = state->v1; - U32 v2 = state->v2; - U32 v3 = state->v3; - U32 v4 = state->v4; +#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ +#define ZSTD_SKIPPABLEHEADERSIZE 8 - do { - v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; - v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; - v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; - v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; - } while (p<=limit); +/* compression parameter bounds */ +#define ZSTD_WINDOWLOG_MAX_32 30 +#define ZSTD_WINDOWLOG_MAX_64 31 +#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX_32 29 +#define ZSTD_CHAINLOG_MAX_64 30 +#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ +#define ZSTD_STRATEGY_MIN ZSTD_fast +#define ZSTD_STRATEGY_MAX ZSTD_btultra2 - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } - if (p < bEnd) { - XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } +#define ZSTD_OVERLAPLOG_MIN 0 +#define ZSTD_OVERLAPLOG_MAX 9 - return XXH_OK; -} +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame + * requiring larger than (1<mem32; - const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; - U32 h32; - if (state->large_len) { - h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); - } else { - h32 = state->v3 /* == seed */ + PRIME32_5; - } +/* --- Advanced types --- */ - h32 += state->total_len_32; +typedef struct ZSTD_CCtx_params_s ZSTD_CCtx_params; - while (p+4<=bEnd) { - h32 += XXH_readLE32(p, endian) * PRIME32_3; - h32 = XXH_rotl32(h32, 17) * PRIME32_4; - p+=4; - } +typedef struct { + unsigned int offset; /* The offset of the match. (NOT the same as the offset code) + * If offset == 0 and matchLength == 0, this sequence represents the last + * literals in the block of litLength size. + */ + + unsigned int litLength; /* Literal length of the sequence. */ + unsigned int matchLength; /* Match length of the sequence. */ + + /* Note: Users of this API may provide a sequence with matchLength == litLength == offset == 0. + * In this case, we will treat the sequence as a marker for a block boundary. + */ + + unsigned int rep; /* Represents which repeat offset is represented by the field 'offset'. + * Ranges from [0, 3]. + * + * Repeat offsets are essentially previous offsets from previous sequences sorted in + * recency order. For more detail, see doc/zstd_compression_format.md + * + * If rep == 0, then 'offset' does not contain a repeat offset. + * If rep > 0: + * If litLength != 0: + * rep == 1 --> offset == repeat_offset_1 + * rep == 2 --> offset == repeat_offset_2 + * rep == 3 --> offset == repeat_offset_3 + * If litLength == 0: + * rep == 1 --> offset == repeat_offset_2 + * rep == 2 --> offset == repeat_offset_3 + * rep == 3 --> offset == repeat_offset_1 - 1 + * + * Note: This field is optional. ZSTD_generateSequences() will calculate the value of + * 'rep', but repeat offsets do not necessarily need to be calculated from an external + * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * use this 'rep' field at all (as of now). + */ +} ZSTD_Sequence; - while (p> 15; - h32 *= PRIME32_2; - h32 ^= h32 >> 13; - h32 *= PRIME32_3; - h32 ^= h32 >> 16; - - return h32; -} +typedef struct { + unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /**< dispatch table : larger == faster, more memory */ + unsigned searchLog; /**< nb of searches : larger == more compression, slower */ + unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ +} ZSTD_compressionParameters; +typedef struct { + int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ + int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ + int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ +} ZSTD_frameParameters; -XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH32_digest_endian(state_in, XXH_littleEndian); - else - return XXH32_digest_endian(state_in, XXH_bigEndian); -} +typedef enum { + ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ + ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ +} ZSTD_dictContentType_e; +typedef enum { + ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ + ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ +} ZSTD_dictLoadMethod_e; +typedef enum { + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder cannot recognise automatically this format, requiring this instruction. */ +} ZSTD_format_e; -/* **** XXH64 **** */ +typedef enum { + /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */ + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_forceIgnoreChecksum_e; -FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) -{ - const BYTE* p = (const BYTE*)input; - const BYTE* const bEnd = p + len; +typedef enum { + /* Note: this enum controls ZSTD_d_refMultipleDDicts */ + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 +} ZSTD_refMultipleDDicts_e; -#ifdef XXH_ACCEPT_NULL_INPUT_POINTER - if (input==NULL) return XXH_ERROR; -#endif +typedef enum { + /* Note: this enum and the behavior it controls are effectively internal + * implementation details of the compressor. They are expected to continue + * to evolve and should be considered only in the context of extremely + * advanced performance tuning. + * + * Zstd currently supports the use of a CDict in three ways: + * + * - The contents of the CDict can be copied into the working context. This + * means that the compression can search both the dictionary and input + * while operating on a single set of internal tables. This makes + * the compression faster per-byte of input. However, the initial copy of + * the CDict's tables incurs a fixed cost at the beginning of the + * compression. For small compressions (< 8 KB), that copy can dominate + * the cost of the compression. + * + * - The CDict's tables can be used in-place. In this model, compression is + * slower per input byte, because the compressor has to search two sets of + * tables. However, this model incurs no start-up cost (as long as the + * working context's tables can be reused). For small inputs, this can be + * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * + * Zstd has a simple internal heuristic that selects which strategy to use + * at the beginning of a compression. However, if experimentation shows that + * Zstd is making poor choices, it is possible to override that choice with + * this enum. + */ + ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ + ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ + ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ +} ZSTD_dictAttachPref_e; - state->total_len += len; +typedef enum { + ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. + * Negative compression levels will be uncompressed, and positive compression + * levels will be compressed. */ + ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be + * emitted if Huffman compression is not profitable. */ + ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ +} ZSTD_literalCompressionMode_e; - if (state->memsize + len < 32) { /* fill in tmp buffer */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); - state->memsize += (U32)len; - return XXH_OK; - } +typedef enum { + ZSTD_urm_auto = 0, /* Automatically determine whether or not we use row matchfinder */ + ZSTD_urm_disableRowMatchFinder = 1, /* Never use row matchfinder */ + ZSTD_urm_enableRowMatchFinder = 2 /* Always use row matchfinder when applicable */ +} ZSTD_useRowMatchFinderMode_e; - if (state->memsize) { /* tmp buffer is full */ - XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); - state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); - state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); - state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); - state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); - p += 32-state->memsize; - state->memsize = 0; - } +/*************************************** +* Frame size functions +***************************************/ - if (p+32 <= bEnd) { - const BYTE* const limit = bEnd - 32; - U64 v1 = state->v1; - U64 v2 = state->v2; - U64 v3 = state->v3; - U64 v4 = state->v4; +/*! ZSTD_findDecompressedSize() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - decompressed size of all data in all successive frames + * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + * read each contained frame header. This is fast as most of the data is skipped, + * however it does mean that all frame data must be present and valid. */ +ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); - do { - v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; - v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; - v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; - v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; - } while (p<=limit); +/*! ZSTD_decompressBound() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - upper-bound for the decompressed size of all data in all successive frames + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. + * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. + * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. + * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: + * upper-bound = # blocks * min(128 KB, Window_Size) + */ +ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); - state->v1 = v1; - state->v2 = v2; - state->v3 = v3; - state->v4 = v4; - } +/*! ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); - if (p < bEnd) { - XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); - state->memsize = (unsigned)(bEnd-p); - } +typedef enum { + ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ + ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ +} ZSTD_sequenceFormat_e; - return XXH_OK; -} +/*! ZSTD_generateSequences() : + * Generate sequences using ZSTD_compress2, given a source buffer. + * + * Each block will end with a dummy sequence + * with offset == 0, matchLength == 0, and litLength == length of last literals. + * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) + * simply acts as a block delimiter. + * + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters + * @return : number of sequences generated + */ -XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; +ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_update_endian(state_in, input, len, XXH_littleEndian); - else - return XXH64_update_endian(state_in, input, len, XXH_bigEndian); -} +/*! ZSTD_mergeBlockDelimiters() : + * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals + * by merging them into into the literals of the next sequence. + * + * As such, the final generated result has no explicit representation of block boundaries, + * and the final last literals segment is not represented in the sequences. + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters + * @return : number of sequences left after merging + */ +ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); +/*! ZSTD_compressSequences() : + * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. + * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * The entire source is compressed into a single frame. + * + * The compression behavior changes based on cctx params. In particular: + * If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on + * the block size derived from the cctx, and sequences may be split. This is the default setting. + * + * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * + * In addition to the two adjustable experimental params, there are other important cctx params. + * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. + * - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression. + * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset + * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md + * + * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. + * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history + * @return : final compressed size or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); -FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) -{ - const BYTE * p = (const BYTE*)state->mem64; - const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; - U64 h64; +/*! ZSTD_writeSkippableFrame() : + * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * + * Returns an error if destination buffer is not large enough, if the source size is not representable + * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); - if (state->total_len >= 32) { - U64 const v1 = state->v1; - U64 const v2 = state->v2; - U64 const v3 = state->v3; - U64 const v4 = state->v4; - h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); - h64 = XXH64_mergeRound(h64, v1); - h64 = XXH64_mergeRound(h64, v2); - h64 = XXH64_mergeRound(h64, v3); - h64 = XXH64_mergeRound(h64, v4); - } else { - h64 = state->v3 + PRIME64_5; - } +/*************************************** +* Memory management +***************************************/ - h64 += (U64) state->total_len; +/*! ZSTD_estimate*() : + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. + * + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. + * + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); - while (p+8<=bEnd) { - U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); - h64 ^= k1; - h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; - p+=8; - } - - if (p+4<=bEnd) { - h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; - h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; - p+=4; - } - - while (p= 1. + * Note : CStream size estimation is only correct for single-threaded compression. + * ZSTD_DStream memory budget depends on window Size. + * This information can be passed manually, using ZSTD_estimateDStreamSize, + * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), + * an internal ?Dict will be created, which additional size is not estimated here. + * In this case, get total size by adding ZSTD_estimate?DictSize */ +ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); - h64 ^= h64 >> 33; - h64 *= PRIME64_2; - h64 ^= h64 >> 29; - h64 *= PRIME64_3; - h64 ^= h64 >> 32; +/*! ZSTD_estimate?DictSize() : + * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. + */ +ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); - return h64; -} +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). + * Note 2 : there is no corresponding "free" function. + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. + */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ -XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) -{ - XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; +ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams); - if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) - return XXH64_digest_endian(state_in, XXH_littleEndian); - else - return XXH64_digest_endian(state_in, XXH_bigEndian); -} +ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType); -/* ************************** -* Canonical representation -****************************/ +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +static +#ifdef __GNUC__ +__attribute__((__unused__)) +#endif +ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ -/*! Default XXH result types are basic unsigned 32 and 64 bits. -* The canonical representation follows human-readable write convention, aka big-endian (large digits first). -* These functions allow transformation of hash result into and from its canonical format. -* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. -*/ +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); -XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); - memcpy(dst, &hash, sizeof(*dst)); -} +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem); -XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) -{ - XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); - if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); - memcpy(dst, &hash, sizeof(*dst)); -} +/* ! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer. + */ +typedef struct POOL_ctx_s ZSTD_threadPool; +ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); /* accept NULL pointer */ +ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); -XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) -{ - return XXH_readBE32(src); -} -XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) -{ - return XXH_readBE64(src); -} -/**** ended inlining xxhash.c ****/ -/**** start inlining zstd_common.c ****/ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. + * This API is temporary and is expected to change or disappear in the future! */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* cctxParams, + ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); -/*-************************************* -* Dependencies +/*************************************** +* Advanced compression functions ***************************************/ -#include /* malloc, calloc, free */ -#include /* memset */ -/**** skipping file: error_private.h ****/ -/**** start inlining zstd_internal.h ****/ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ -#ifndef ZSTD_CCOMMON_H_MODULE -#define ZSTD_CCOMMON_H_MODULE +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is just referenced, not duplicated. + * As a consequence, `dictBuffer` **must** outlive CDict, + * and its content must remain unmodified throughout the lifetime of CDict. + * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); -/* this module contains definitions which must be identical - * across compression, decompression and dictBuilder. - * It also contains a few functions useful to at least 2 of them - * and which benefit from being inlined */ +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. + * `estimatedSrcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); -/*-************************************* -* Dependencies -***************************************/ -/**** skipping file: compiler.h ****/ -/**** skipping file: mem.h ****/ -/**** skipping file: debug.h ****/ -/**** skipping file: error_private.h ****/ -#define ZSTD_STATIC_LINKING_ONLY -/**** start inlining zstd.h ****/ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ -#if defined (__cplusplus) -extern "C" { -#endif +/*! ZSTD_getParams() : + * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. + * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); -#ifndef ZSTD_H_235446 -#define ZSTD_H_235446 +/*! ZSTD_checkCParams() : + * Ensure param values remain within authorized range. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); -/* ====== Dependency ======*/ -#include /* INT_MAX */ -#include /* size_t */ +/*! ZSTD_adjustCParams() : + * optimize params for a given `srcSize` and `dictSize`. + * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. + * `dictSize` must be `0` when there is no dictionary. + * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. + * This function never fails (wide contract) */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); +/*! ZSTD_compress_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. + * This prototype will generate compilation warnings. */ +ZSTD_DEPRECATED("use ZSTD_compress2") +size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); -/* ===== ZSTDLIB_API : control library symbols visibility ===== */ -#ifndef ZSTDLIB_VISIBILITY -# if defined(__GNUC__) && (__GNUC__ >= 4) -# define ZSTDLIB_VISIBILITY __attribute__ ((visibility ("default"))) -# else -# define ZSTDLIB_VISIBILITY -# endif -#endif -#if defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) -# define ZSTDLIB_API __declspec(dllexport) ZSTDLIB_VISIBILITY -#elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) -# define ZSTDLIB_API __declspec(dllimport) ZSTDLIB_VISIBILITY /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ -#else -# define ZSTDLIB_API ZSTDLIB_VISIBILITY -#endif +/*! ZSTD_compress_usingCDict_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. + * This prototype will generate compilation warnings. */ +ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary") +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams); -/******************************************************************************* - Introduction +/*! ZSTD_CCtx_loadDictionary_byReference() : + * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. + * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); - zstd, short for Zstandard, is a fast lossless compression algorithm, targeting - real-time compression scenarios at zlib-level and better compression ratios. - The zstd compression library provides in-memory compression and decompression - functions. +/*! ZSTD_CCtx_loadDictionary_advanced() : + * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); - The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), - which is currently 22. Levels >= 20, labeled `--ultra`, should be used with - caution, as they require more memory. The library also offers negative - compression levels, which extend the range of speed vs. ratio preferences. - The lower the level, the faster the speed (at the cost of compression). - - Compression can be done in: - - a single step (described as Simple API) - - a single step, reusing a context (described as Explicit context) - - unbounded multiple steps (described as Streaming compression) +/*! ZSTD_CCtx_refPrefix_advanced() : + * Same as ZSTD_CCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); - The compression ratio achievable on small data can be highly improved using - a dictionary. Dictionary compression can be performed in: - - a single step (described as Simple dictionary API) - - a single step, reusing a dictionary (described as Bulk-processing - dictionary API) +/* === experimental parameters === */ +/* these parameters can be used with ZSTD_setParameter() + * they are not guaranteed to remain supported in the future */ - Advanced experimental functions can be accessed using - `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. + /* Enables rsyncable mode, + * which makes compressed files more rsync friendly + * by adding periodic synchronization points to the compressed data. + * The target average block size is ZSTD_c_jobSize / 2. + * It's possible to modify the job size to increase or decrease + * the granularity of the synchronization point. + * Once the jobSize is smaller than the window size, + * it will result in compression ratio degradation. + * NOTE 1: rsyncable mode only works when multithreading is enabled. + * NOTE 2: rsyncable performs poorly in combination with long range mode, + * since it will decrease the effectiveness of synchronization points, + * though mileage may vary. + * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. + * If the selected compression level is already running significantly slower, + * the overall speed won't be significantly impacted. + */ + #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 - Advanced experimental APIs should never be used with a dynamically-linked - library. They are not "stable"; their definitions or signatures may change in - the future. Only static linking is allowed. -*******************************************************************************/ +/* Select a compression format. + * The value must be of type ZSTD_format_e. + * See ZSTD_format_e enum definition for details */ +#define ZSTD_c_format ZSTD_c_experimentalParam2 -/*------ Version ------*/ -#define ZSTD_VERSION_MAJOR 1 -#define ZSTD_VERSION_MINOR 4 -#define ZSTD_VERSION_RELEASE 5 +/* Force back-reference distances to remain < windowSize, + * even when referencing into Dictionary content (default:0) */ +#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 -#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) -ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ +/* Controls whether the contents of a CDict + * are used in place, or copied into the working context. + * Accepts values from the ZSTD_dictAttachPref_e enum. + * See the comments on that enum for an explanation of the feature. */ +#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 -#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE -#define ZSTD_QUOTE(str) #str -#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) -#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) -ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_e enum definition for details. + */ +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 -/* ************************************* - * Default constant - ***************************************/ -#ifndef ZSTD_CLEVEL_DEFAULT -# define ZSTD_CLEVEL_DEFAULT 3 -#endif +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 -/* ************************************* - * Constants - ***************************************/ +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 -/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ -#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ -#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ -#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ -#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 +/* Controls whether the new and experimental "dedicated dictionary search + * structure" can be used. This feature is still rough around the edges, be + * prepared for surprising behavior! + * + * How to use it: + * + * When using a CDict, whether to use this feature or not is controlled at + * CDict creation, and it must be set in a CCtxParams set passed into that + * construction (via ZSTD_createCDict_advanced2()). A compression will then + * use the feature or not based on how the CDict was constructed; the value of + * this param, set in the CCtx, will have no effect. + * + * However, when a dictionary buffer is passed into a CCtx, such as via + * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control + * whether the CDict that is created internally can use the feature or not. + * + * What it does: + * + * Normally, the internal data structures of the CDict are analogous to what + * would be stored in a CCtx after compressing the contents of a dictionary. + * To an approximation, a compression using a dictionary can then use those + * data structures to simply continue what is effectively a streaming + * compression where the simulated compression of the dictionary left off. + * Which is to say, the search structures in the CDict are normally the same + * format as in the CCtx. + * + * It is possible to do better, since the CDict is not like a CCtx: the search + * structures are written once during CDict creation, and then are only read + * after that, while the search structures in the CCtx are both read and + * written as the compression goes along. This means we can choose a search + * structure for the dictionary that is read-optimized. + * + * This feature enables the use of that different structure. + * + * Note that some of the members of the ZSTD_compressionParameters struct have + * different semantics and constraints in the dedicated search structure. It is + * highly recommended that you simply set a compression level in the CCtxParams + * you pass into the CDict creation call, and avoid messing with the cParams + * directly. + * + * Effects: + * + * This will only have any effect when the selected ZSTD_strategy + * implementation supports this feature. Currently, that's limited to + * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2. + * + * Note that this means that the CDict tables can no longer be copied into the + * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be + * useable. The dictionary can only be attached or reloaded. + * + * In general, you should expect compression to be faster--sometimes very much + * so--and CDict creation to be slightly slower. Eventually, we will probably + * make this mode the default. + */ +#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 -#define ZSTD_BLOCKSIZELOG_MAX 17 -#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. - * @return : compressed size written into `dst` (<= `dstCapacity), - * or an error code if it fails (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel); +/* ZSTD_c_validateSequences + * Default is 0 == disabled. Set to 1 to enable sequence validation. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * during function execution. + * + * Without validation, providing a sequence that does not conform to the zstd spec will cause + * undefined behavior, and may produce a corrupted block. + * + * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and + * return an error. + * + */ +#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 -/*! ZSTD_decompress() : - * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. - * `dstCapacity` is an upper bound of originalSize to regenerate. - * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. - * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), - * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, - const void* src, size_t compressedSize); +/* ZSTD_c_splitBlocks + * Default is 0 == disabled. Set to 1 to enable block splitting. + * + * Will attempt to split blocks in order to improve compression ratio at the cost of speed. + */ +#define ZSTD_c_splitBlocks ZSTD_c_experimentalParam13 -/*! ZSTD_getFrameContentSize() : requires v1.3.0+ - * `src` should point to the start of a ZSTD encoded frame. - * `srcSize` must be at least as large as the frame header. - * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. - * @return : - decompressed size of `src` frame content, if known - * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined - * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) - * note 1 : a 0 return value means the frame is valid but "empty". - * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. - * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. - * In which case, it's necessary to use streaming mode to decompress data. - * Optionally, application can rely on some implicit limit, - * as ZSTD_decompress() only needs an upper bound of decompressed size. - * (For example, data could be necessarily cut into blocks <= 16 KB). - * note 3 : decompressed size is always present when compression is completed using single-pass functions, - * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). - * note 4 : decompressed size can be very large (64-bits value), - * potentially larger than what local system can handle as a single memory segment. - * In which case, it's necessary to use streaming mode to decompress data. - * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. - * Always ensure return value fits within application's authorized limits. - * Each application can set its own limits. - * note 6 : This function replaces ZSTD_getDecompressedSize() */ -#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) -#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) -ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); +/* ZSTD_c_useRowMatchFinder + * Default is ZSTD_urm_auto. + * Controlled with ZSTD_useRowMatchFinderMode_e enum. + * + * By default, in ZSTD_urm_auto, when finalizing the compression parameters, the library + * will decide at runtime whether to use the row-based matchfinder based on support for SIMD + * instructions as well as the windowLog. + * + * Set to ZSTD_urm_disableRowMatchFinder to never use row-based matchfinder. + * Set to ZSTD_urm_enableRowMatchFinder to force usage of row-based matchfinder. + */ +#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14 -/*! ZSTD_getDecompressedSize() : - * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). - * Both functions work the same way, but ZSTD_getDecompressedSize() blends - * "empty", "unknown" and "error" results to the same return value (0), - * while ZSTD_getFrameContentSize() gives them separate return values. - * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ -ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); +/* ZSTD_c_deterministicRefPrefix + * Default is 0 == disabled. Set to 1 to enable. + * + * Zstd produces different results for prefix compression when the prefix is + * directly adjacent to the data about to be compressed vs. when it isn't. + * This is because zstd detects that the two buffers are contiguous and it can + * use a more efficient match finding algorithm. However, this produces different + * results than when the two buffers are non-contiguous. This flag forces zstd + * to always load the prefix in non-contiguous mode, even if it happens to be + * adjacent to the data, to guarantee determinism. + * + * If you really care about determinism when using a dictionary or prefix, + * like when doing delta compression, you should select this option. It comes + * at a speed penalty of about ~2.5% if the dictionary and data happened to be + * contiguous, and is free if they weren't contiguous. We don't expect that + * intentionally making the dictionary and data contiguous will be worth the + * cost to memcpy() the data. + */ +#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15 -/*! ZSTD_findFrameCompressedSize() : - * `src` should point to the start of a ZSTD frame or skippable frame. - * `srcSize` must be >= first frame size - * @return : the compressed size of the first frame starting at `src`, - * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, - * or an error code if input is invalid */ -ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); +/*! ZSTD_CCtx_getParameter() : + * Get the requested compression parameter value, selected by enum ZSTD_cParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); -/*====== Helper functions ======*/ -#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ -ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ -ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ -ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ -ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ -ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ +/*! ZSTD_CCtx_params : + * Quick howto : + * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent frames. + * - ZSTD_compressStream2() : Do compression using the CCtx. + * - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer. + * + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation of CCtx for single-threaded compression. + */ +ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); /* accept NULL pointer */ +/*! ZSTD_CCtxParams_reset() : + * Reset params to default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); -/*************************************** -* Explicit context -***************************************/ -/*= Compression context - * When compressing many times, - * it is recommended to allocate a context just once, - * and re-use it for each successive compression operation. - * This will make workload friendlier for system's memory. - * Note : re-using context is just a speed / resource optimization. - * It doesn't change the compression ratio, which remains identical. - * Note 2 : In multi-threaded environments, - * use one different context per thread for parallel execution. +/*! ZSTD_CCtxParams_init() : + * Initializes the compression parameters of cctxParams according to + * compression level. All other parameters are reset to their default values. */ -typedef struct ZSTD_CCtx_s ZSTD_CCtx; -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); -ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); -/*! ZSTD_compressCCtx() : - * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. - * Important : in order to behave similarly to `ZSTD_compress()`, - * this function compresses at requested compression level, - * __ignoring any other parameter__ . - * If any advanced parameter was set using the advanced API, - * they will all be reset. Only `compressionLevel` remains. +/*! ZSTD_CCtxParams_init_advanced() : + * Initializes the compression and frame parameters of cctxParams according to + * params. All other parameters are reset to their default values. */ -ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - int compressionLevel); +ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); -/*= Decompression context - * When decompressing many times, - * it is recommended to allocate a context only once, - * and re-use it for each successive compression operation. - * This will make workload friendlier for system's memory. - * Use one context per thread for parallel execution. */ -typedef struct ZSTD_DCtx_s ZSTD_DCtx; -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); -ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); +/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+ + * Similar to ZSTD_CCtx_setParameter. + * Set one compression parameter, selected by enum ZSTD_cParameter. + * Parameters must be applied to a ZSTD_CCtx using + * ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : a code representing success or failure (which can be tested with + * ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); -/*! ZSTD_decompressDCtx() : - * Same as ZSTD_decompress(), - * requires an allocated ZSTD_DCtx. - * Compatible with sticky parameters. +/*! ZSTD_CCtxParams_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); + +/*! ZSTD_CCtx_setParametersUsingCCtxParams() : + * Apply a set of ZSTD_CCtx_params to the compression context. + * This can be done even after compression is started, + * if nbWorkers==0, this will have no impact until a new compression is started. + * if nbWorkers>=1, new parameters will be picked up at next job, + * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); + +/*! ZSTD_compressStream2_simpleArgs() : + * Same as ZSTD_compressStream2(), + * but using only integral types as arguments. + * This variant might be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp); /*************************************** -* Advanced compression API +* Advanced decompression functions ***************************************/ -/* API design : - * Parameters are pushed one by one into an existing context, - * using ZSTD_CCtx_set*() functions. - * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. - * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! - * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, + * it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_byReference() : + * Same as ZSTD_DCtx_loadDictionary(), + * but references `dict` content instead of copying it into `dctx`. + * This saves memory if `dict` remains around., + * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_advanced() : + * Same as ZSTD_DCtx_loadDictionary(), + * but gives direct control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_refPrefix_advanced() : + * Same as ZSTD_DCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This protects a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); + +/*! ZSTD_DCtx_getParameter() : + * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); + +/* ZSTD_d_format + * experimental parameter, + * allowing selection between ZSTD_format_e input compression formats + */ +#define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. * - * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. * - * This API supercedes all other "advanced" API entry points in the experimental section. - * In the future, we expect to remove from experimental API entry points which are redundant with this API. + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. */ +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 +/* ZSTD_d_forceIgnoreChecksum + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * Tells the decompressor to skip checksum validation during decompression, regardless + * of whether checksumming was specified during compression. This offers some + * slight performance benefits, and may be useful for debugging. + * Param has values of type ZSTD_forceIgnoreChecksum_e + */ +#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 -/* Compression strategies, listed from fastest to strongest */ -typedef enum { ZSTD_fast=1, - ZSTD_dfast=2, - ZSTD_greedy=3, - ZSTD_lazy=4, - ZSTD_lazy2=5, - ZSTD_btlazy2=6, - ZSTD_btopt=7, - ZSTD_btultra=8, - ZSTD_btultra2=9 - /* note : new strategies _might_ be added in the future. - Only the order (from fast to strong) is guaranteed */ -} ZSTD_strategy; +/* ZSTD_d_refMultipleDDicts + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * If enabled and dctx is allocated on the heap, then additional memory will be allocated + * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. + * + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. + * + * Param has values of byte ZSTD_refMultipleDDicts_e + * + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. + * + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. + */ +#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 -typedef enum { +/*! ZSTD_DCtx_setFormat() : + * This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter(). + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead") +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); - /* compression parameters - * Note: When compressing with a ZSTD_CDict these parameters are superseded - * by the parameters used to construct the ZSTD_CDict. - * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ - ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. - * Note that exact compression parameters are dynamically determined, - * depending on both compression level and srcSize (when known). - * Default level is ZSTD_CLEVEL_DEFAULT==3. - * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. - * Note 1 : it's possible to pass a negative compression level. - * Note 2 : setting a level does not automatically set all other compression parameters - * to default. Setting this will however eventually dynamically impact the compression - * parameters which have not been manually set. The manually set - * ones will 'stick'. */ - /* Advanced compression parameters : - * It's possible to pin down compression parameters to some specific values. - * In which case, these values are no longer dynamically selected by the compressor */ - ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. - * This will set a memory budget for streaming decompression, - * with larger values requiring more memory - * and typically compressing more. - * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. - * Special: value 0 means "use default windowLog". - * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT - * requires explicitly allowing such size at streaming decompression stage. */ - ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. - * Resulting memory usage is (1 << (hashLog+2)). - * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. - * Larger tables improve compression ratio of strategies <= dFast, - * and improve speed of strategies > dFast. - * Special: value 0 means "use default hashLog". */ - ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. - * Resulting memory usage is (1 << (chainLog+2)). - * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. - * Larger tables result in better and slower compression. - * This parameter is useless for "fast" strategy. - * It's still useful when using "dfast" strategy, - * in which case it defines a secondary probe table. - * Special: value 0 means "use default chainLog". */ - ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. - * More attempts result in better and slower compression. - * This parameter is useless for "fast" and "dFast" strategies. - * Special: value 0 means "use default searchLog". */ - ZSTD_c_minMatch=105, /* Minimum size of searched matches. - * Note that Zstandard can still find matches of smaller size, - * it just tweaks its search algorithm to look for this size and larger. - * Larger values increase compression and decompression speed, but decrease ratio. - * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. - * Note that currently, for all strategies < btopt, effective minimum is 4. - * , for all strategies > fast, effective maximum is 6. - * Special: value 0 means "use default minMatchLength". */ - ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. - * For strategies btopt, btultra & btultra2: - * Length of Match considered "good enough" to stop search. - * Larger values make compression stronger, and slower. - * For strategy fast: - * Distance between match sampling. - * Larger values make compression faster, and weaker. - * Special: value 0 means "use default targetLength". */ - ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. - * The higher the value of selected strategy, the more complex it is, - * resulting in stronger and slower compression. - * Special: value 0 means "use default strategy". */ +/*! ZSTD_decompressStream_simpleArgs() : + * Same as ZSTD_decompressStream(), + * but using only integral types as arguments. + * This can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); - /* LDM mode parameters */ - ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. - * This parameter is designed to improve compression ratio - * for large inputs, by finding large matches at long distance. - * It increases memory usage and window size. - * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB - * except when expressly set to a different value. */ - ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. - * Larger values increase memory usage and compression ratio, - * but decrease compression speed. - * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX - * default: windowlog - 7. - * Special: value 0 means "automatically determine hashlog". */ - ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. - * Larger/too small values usually decrease compression ratio. - * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. - * Special: value 0 means "use default value" (default: 64). */ - ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. - * Larger values improve collision resolution but decrease compression speed. - * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. - * Special: value 0 means "use default value" (default: 3). */ - ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. - * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). - * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. - * Larger values improve compression speed. - * Deviating far from default value will likely result in a compression ratio decrease. - * Special: value 0 means "automatically determine hashRateLog". */ - /* frame parameters */ - ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) - * Content size must be known at the beginning of compression. - * This is automatically the case when using ZSTD_compress2(), - * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ - ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ - ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ +/******************************************************************** +* Advanced streaming functions +* Warning : most of these functions are now redundant with the Advanced API. +* Once Advanced API reaches "stable" status, +* redundant functions will be deprecated, and then at some point removed. +********************************************************************/ - /* multi-threading parameters */ - /* These parameters are only useful if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). - * They return an error otherwise. */ - ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. - * When nbWorkers >= 1, triggers asynchronous mode when used with ZSTD_compressStream*() : - * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, - * while compression work is performed in parallel, within worker threads. - * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : - * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). - * More workers improve speed, but also increase memory usage. - * Default value is `0`, aka "single-threaded mode" : no worker is spawned, compression is performed inside Caller's thread, all invocations are blocking */ - ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. - * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. - * 0 means default, which is dynamically determined based on compression parameters. - * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. - * The minimum size is automatically and transparently enforced. */ - ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. - * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. - * It helps preserve compression ratio, while each job is compressed in parallel. - * This value is enforced only when nbWorkers >= 1. - * Larger values increase compression ratio, but decrease speed. - * Possible values range from 0 to 9 : - * - 0 means "default" : value will be determined by the library, depending on strategy - * - 1 means "no overlap" - * - 9 means "full overlap", using a full window size. - * Each intermediate rank increases/decreases load size by a factor 2 : - * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default - * default value varies between 6 and 9, depending on strategy */ +/*===== Advanced Streaming compression functions =====*/ - /* note : additional experimental parameters are also available - * within the experimental section of the API. - * At the time of this writing, they include : - * ZSTD_c_rsyncable - * ZSTD_c_format - * ZSTD_c_forceMaxWindow - * ZSTD_c_forceAttachDict - * ZSTD_c_literalCompressionMode - * ZSTD_c_targetCBlockSize - * ZSTD_c_srcSizeHint - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly; - * also, the enums values themselves are unstable and can still change. - */ - ZSTD_c_experimentalParam1=500, - ZSTD_c_experimentalParam2=10, - ZSTD_c_experimentalParam3=1000, - ZSTD_c_experimentalParam4=1001, - ZSTD_c_experimentalParam5=1002, - ZSTD_c_experimentalParam6=1003, - ZSTD_c_experimentalParam7=1004 -} ZSTD_cParameter; +/*! ZSTD_initCStream_srcSize() : + * This function is DEPRECATED, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, + int compressionLevel, + unsigned long long pledgedSrcSize); -typedef struct { - size_t error; - int lowerBound; - int upperBound; -} ZSTD_bounds; +/*! ZSTD_initCStream_usingDict() : + * This function is DEPRECATED, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + int compressionLevel); -/*! ZSTD_cParam_getBounds() : - * All parameters must belong to an interval with lower and upper bounds, - * otherwise they will either trigger an error or be automatically clamped. - * @return : a structure, ZSTD_bounds, which contains - * - an error status field, which must be tested using ZSTD_isError() - * - lower and upper bounds, both inclusive +/*! ZSTD_initCStream_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. + * pledgedSrcSize must be correct. + * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * This prototype will generate compilation warnings. */ -ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize); -/*! ZSTD_CCtx_setParameter() : - * Set one compression parameter, selected by enum ZSTD_cParameter. - * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). - * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). - * Setting a parameter is generally only possible during frame initialization (before starting compression). - * Exception : when using multi-threading mode (nbWorkers >= 1), - * the following parameters can be updated _during_ compression (within same frame): - * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. - * new parameters will be active for next job only (after a flush()). - * @return : an error code (which can be tested using ZSTD_isError()). +/*! ZSTD_initCStream_usingCDict() : + * This function is DEPRECATED, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); +ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); -/*! ZSTD_CCtx_setPledgedSrcSize() : - * Total input data size to be compressed as a single frame. - * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. - * This value will also be controlled at end of frame, and trigger an error if not respected. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. - * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. - * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. - * Note 2 : pledgedSrcSize is only valid once, for the next frame. - * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. - * Note 3 : Whenever all input data is provided and consumed in a single round, - * for example with ZSTD_compress2(), - * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), - * this value is automatically overridden by srcSize instead. +/*! ZSTD_initCStream_usingCDict_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + * This prototype will generate compilation warnings. */ -ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); +ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions") +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize); -typedef enum { - ZSTD_reset_session_only = 1, - ZSTD_reset_parameters = 2, - ZSTD_reset_session_and_parameters = 3 -} ZSTD_ResetDirective; +/*! ZSTD_resetCStream() : + * This function is DEPRECATED, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but + * ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be + * explicitly specified. + * + * start a new frame, using same parameters from previous frame. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + * This prototype will generate compilation warnings. + */ +ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions") +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); -/*! ZSTD_CCtx_reset() : - * There are 2 different things that can be reset, independently or jointly : - * - The session : will stop compressing current frame, and make CCtx ready to start a new one. - * Useful after an error, or to interrupt any ongoing compression. - * Any internal data not yet flushed is cancelled. - * Compression parameters and dictionary remain unchanged. - * They will be used to compress next frame. - * Resetting session never fails. - * - The parameters : changes all parameters back to "default". - * This removes any reference to any dictionary too. - * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) - * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) - * - Both : similar to resetting the session, followed by resetting parameters. + +typedef struct { + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ +} ZSTD_frameProgression; + +/* ZSTD_getFrameProgression() : + * tells how much data has been ingested (read from input) + * consumed (input actually compressed) and produced (output) for current frame. + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. */ -ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); -/*! ZSTD_compress2() : - * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. - * ZSTD_compress2() always starts a new frame. - * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. - * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() - * - The function is always blocking, returns when compression is completed. - * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. - * @return : compressed size written into `dst` (<= `dstCapacity), - * or an error code if it fails (which can be tested using ZSTD_isError()). +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flush speed is limited by production speed of oldest job + * irrespective of the speed of concurrent (and newer) jobs. */ -ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); -/*************************************** -* Advanced decompression API -***************************************/ +/*===== Advanced Streaming decompression functions =====*/ -/* The advanced API pushes parameters one by one into an existing DCtx context. - * Parameters are sticky, and remain valid for all following frames - * using the same DCtx context. - * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). - * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). - * Therefore, no new decompression function is necessary. +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); + * + * note: no dictionary will be used if dict == NULL or dictSize < 8 + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, ddict); + * + * note : ddict is referenced, it must outlive decompression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * + * re-use decompression parameters from previous init; saves dictionary loading + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x */ +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); + + +/********************************************************************* +* Buffer-less and synchronous inner streaming functions +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. +********************************************************************* */ + +/** + Buffer-less streaming compression (synchronous mode) + + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. + + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. +*/ + +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */ +ZSTD_DEPRECATED("use advanced API to access custom parameters") +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTD_DEPRECATED("use advanced API to access custom parameters") +size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +/** + Buffer-less streaming decompression (synchronous mode) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). + Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. + Data fragment must be large enough to ensure successful decoding. + `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. + @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). + + It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). + Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. + As a consequence, check that values remain within valid application range. + For example, do not allocate memory blindly, check that `windowSize` is within expectation. + Each application can set its own limits, depending on local restrictions. + For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference distance. + There are multiple ways to guarantee this condition. + + The most memory efficient way is to use a round buffer of sufficient size. + Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), + which can @return an error code if required value is too large for current system (in 32-bits mode). + In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, + up to the moment there is not enough room left in the buffer to guarantee decoding another full block, + which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. + At which point, decoding can resume from the beginning of the buffer. + Note that already decoded data stored in the buffer should be flushed before being overwritten. + + There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. + + Finally, if you control the compression process, you can also ignore all buffer size rules, + as long as the encoder and decoder progress in "lock-step", + aka use exactly the same buffer sizes, break contiguity at the same place, etc. + + Once buffers are setup, start decompression, with ZSTD_decompressBegin(). + If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. + + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + + == Special case : skippable frames == + + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by decompressor. + The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. + For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. +*/ + +/*===== Buffer-less streaming decompression functions =====*/ +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; +typedef struct { + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameHeader; + +/*! ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* misc */ +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + + + + +/* ============================ */ +/** Block level API */ +/* ============================ */ + +/*! + Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. +*/ + +/*===== Raw zstd block functions =====*/ +ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + + +#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ + +#if defined (__cplusplus) +} +#endif +/**** ended inlining ../zstd.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: huf.h ****/ +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ +#endif +/**** start inlining xxhash.h ****/ +/* + * xxHash - Extremely Fast Hash algorithm + * Header File + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +*/ + +/* Notice extracted from xxHash homepage : + +xxHash is an extremely fast Hash algorithm, running at RAM speed limits. +It also successfully passes all tests from the SMHasher suite. + +Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) + +Name Speed Q.Score Author +xxHash 5.4 GB/s 10 +CrapWow 3.2 GB/s 2 Andrew +MumurHash 3a 2.7 GB/s 10 Austin Appleby +SpookyHash 2.0 GB/s 10 Bob Jenkins +SBox 1.4 GB/s 9 Bret Mulvey +Lookup3 1.2 GB/s 9 Bob Jenkins +SuperFastHash 1.2 GB/s 1 Paul Hsieh +CityHash64 1.05 GB/s 10 Pike & Alakuijala +FNV 0.55 GB/s 5 Fowler, Noll, Vo +CRC32 0.43 GB/s 9 +MD5-32 0.33 GB/s 10 Ronald L. Rivest +SHA1-32 0.28 GB/s 10 + +Q.Score is a measure of quality of the hash function. +It depends on successfully passing SMHasher test set. +10 is a perfect score. + +A 64-bits version, named XXH64, is available since r35. +It offers much better speed, but for 64-bits applications only. +Name Speed on 64 bits Speed on 32 bits +XXH64 13.8 GB/s 1.9 GB/s +XXH32 6.8 GB/s 6.0 GB/s +*/ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef XXHASH_H_5627135585666179 +#define XXHASH_H_5627135585666179 1 + + +/* **************************** +* Definitions +******************************/ +/**** skipping file: zstd_deps.h ****/ +typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode; + + +/* **************************** +* API modifier +******************************/ +/** XXH_PRIVATE_API +* This is useful if you want to include xxhash functions in `static` mode +* in order to inline them, and remove their symbol from the public list. +* Methodology : +* #define XXH_PRIVATE_API +* #include "xxhash.h" +* `xxhash.c` is automatically included. +* It's not useful to compile and link it as a separate module anymore. +*/ +#ifdef XXH_PRIVATE_API +# ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +# endif +# if defined(__GNUC__) +# define XXH_PUBLIC_API static __inline __attribute__((unused)) +# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define XXH_PUBLIC_API static inline +# elif defined(_MSC_VER) +# define XXH_PUBLIC_API static __inline +# else +# define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */ +# endif +#else +# define XXH_PUBLIC_API /* do nothing */ +#endif /* XXH_PRIVATE_API */ + +/*!XXH_NAMESPACE, aka Namespace Emulation : + +If you want to include _and expose_ xxHash functions from within your own library, +but also want to avoid symbol collisions with another library which also includes xxHash, + +you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library +with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values). + +Note that no change is required within the calling program as long as it includes `xxhash.h` : +regular symbol name will be automatically translated by this header. +*/ +#ifdef XXH_NAMESPACE +# define XXH_CAT(A,B) A##B +# define XXH_NAME2(A,B) XXH_CAT(A,B) +# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32) +# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64) +# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber) +# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState) +# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState) +# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState) +# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState) +# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset) +# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset) +# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update) +# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update) +# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest) +# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest) +# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState) +# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState) +# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash) +# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash) +# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical) +# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical) +#endif + + +/* ************************************* +* Version +***************************************/ +#define XXH_VERSION_MAJOR 0 +#define XXH_VERSION_MINOR 6 +#define XXH_VERSION_RELEASE 2 +#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE) +XXH_PUBLIC_API unsigned XXH_versionNumber (void); + + +/* **************************** +* Simple Hash Functions +******************************/ +typedef unsigned int XXH32_hash_t; +typedef unsigned long long XXH64_hash_t; + +XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed); +XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed); + +/*! +XXH32() : + Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input". + The memory between input & input+length must be valid (allocated and read-accessible). + "seed" can be used to alter the result predictably. + Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s +XXH64() : + Calculate the 64-bits hash of sequence of length "len" stored at memory address "input". + "seed" can be used to alter the result predictably. + This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark). +*/ + + +/* **************************** +* Streaming Hash Functions +******************************/ +typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */ +typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */ + +/*! State allocation, compatible with dynamic libraries */ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void); +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr); + + +/* hash streaming */ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed); +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr); + +XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed); +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length); +XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr); + +/* +These functions generate the xxHash of an input provided in multiple segments. +Note that, for small input, they are slower than single-call functions, due to state management. +For small input, prefer `XXH32()` and `XXH64()` . + +XXH state must first be allocated, using XXH*_createState() . + +Start a new hash by initializing state with a seed, using XXH*_reset(). + +Then, feed the hash state by calling XXH*_update() as many times as necessary. +Obviously, input must be allocated and read accessible. +The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. + +Finally, a hash value can be produced anytime, by using XXH*_digest(). +This function returns the nn-bits hash as an int or long long. + +It's still possible to continue inserting input into the hash state after a digest, +and generate some new hashes later on, by calling again XXH*_digest(). + +When done, free XXH state space if it was allocated dynamically. +*/ + + +/* ************************** +* Utils +****************************/ +#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */ +# define restrict /* disable restrict */ +#endif + +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state); +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state); + + +/* ************************** +* Canonical representation +****************************/ +/* Default result type for XXH functions are primitive unsigned 32 and 64 bits. +* The canonical representation uses human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs. +*/ +typedef struct { unsigned char digest[4]; } XXH32_canonical_t; +typedef struct { unsigned char digest[8]; } XXH64_canonical_t; + +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash); + +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src); +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src); + +#endif /* XXHASH_H_5627135585666179 */ + + + +/* ================================================================================================ + This section contains definitions which are not guaranteed to remain stable. + They may change in future versions, becoming incompatible with a different version of the library. + They shall only be used with static linking. + Never use these definitions in association with dynamic linking ! +=================================================================================================== */ +#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXH_STATIC_H_3543687687345) +#define XXH_STATIC_H_3543687687345 + +/* These definitions are only meant to allow allocation of XXH state + statically, on stack, or in a struct for example. + Do not use members directly. */ + + struct XXH32_state_s { + unsigned total_len_32; + unsigned large_len; + unsigned v1; + unsigned v2; + unsigned v3; + unsigned v4; + unsigned mem32[4]; /* buffer defined as U32 for alignment */ + unsigned memsize; + unsigned reserved; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH32_state_t */ + + struct XXH64_state_s { + unsigned long long total_len; + unsigned long long v1; + unsigned long long v2; + unsigned long long v3; + unsigned long long v4; + unsigned long long mem64[4]; /* buffer defined as U64 for alignment */ + unsigned memsize; + unsigned reserved[2]; /* never read nor write, will be removed in a future version */ + }; /* typedef'd to XXH64_state_t */ + + +# ifdef XXH_PRIVATE_API +/**** start inlining xxhash.c ****/ +/* + * xxHash - Fast Hash algorithm + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - xxHash homepage: http://www.xxhash.com + * - xxHash source repository : https://github.com/Cyan4973/xxHash + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +*/ + + +/* ************************************* +* Tuning parameters +***************************************/ +/*!XXH_FORCE_MEMORY_ACCESS : + * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. + * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. + * The below switch allow to select different access method for improved performance. + * Method 0 (default) : use `memcpy()`. Safe and portable. + * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). + * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. + * Method 2 : direct access. This method doesn't depend on compiler but violate C standard. + * It can generate buggy code on targets which do not support unaligned memory accesses. + * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) + * See http://stackoverflow.com/a/32095106/646947 for details. + * Prefer these methods in priority order (0 > 1 > 2) + */ +#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ +# if (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ + (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ + defined(__ICCARM__) +# define XXH_FORCE_MEMORY_ACCESS 1 +# endif +#endif + +/*!XXH_ACCEPT_NULL_INPUT_POINTER : + * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer. + * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input. + * By default, this option is disabled. To enable it, uncomment below define : + */ +/* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ + +/*!XXH_FORCE_NATIVE_FORMAT : + * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. + * Results are therefore identical for little-endian and big-endian CPU. + * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. + * Should endian-independence be of no importance for your application, you may set the #define below to 1, + * to improve speed for Big-endian CPU. + * This option has no impact on Little_Endian CPU. + */ +#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */ +# define XXH_FORCE_NATIVE_FORMAT 0 +#endif + +/*!XXH_FORCE_ALIGN_CHECK : + * This is a minor performance trick, only useful with lots of very small keys. + * It means : check for aligned/unaligned input. + * The check costs one initial branch per hash; set to 0 when the input data + * is guaranteed to be aligned. + */ +#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */ +# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64) +# define XXH_FORCE_ALIGN_CHECK 0 +# else +# define XXH_FORCE_ALIGN_CHECK 1 +# endif +#endif + + +/* ************************************* +* Includes & Memory related functions +***************************************/ +/* Modify the local functions below should you wish to use some other memory routines */ +/* for ZSTD_malloc(), ZSTD_free() */ +#define ZSTD_DEPS_NEED_MALLOC +/**** skipping file: zstd_deps.h ****/ +static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); } +static void XXH_free (void* p) { ZSTD_free(p); } +static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); } + +#ifndef XXH_STATIC_LINKING_ONLY +# define XXH_STATIC_LINKING_ONLY +#endif +/**** skipping file: xxhash.h ****/ + + +/* ************************************* +* Compiler Specific Options +***************************************/ +/**** skipping file: compiler.h ****/ + + +/* ************************************* +* Basic Types +***************************************/ +/**** skipping file: mem.h ****/ + +#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2)) + +/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */ +static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; } +static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; } + +#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1)) + +/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ +/* currently only defined for gcc and icc */ +typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign; + +static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } +static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } + +#else + +/* portable and safe solution. Generally efficient. + * see : http://stackoverflow.com/a/32095106/646947 + */ + +static U32 XXH_read32(const void* memPtr) +{ + U32 val; + ZSTD_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +static U64 XXH_read64(const void* memPtr) +{ + U64 val; + ZSTD_memcpy(&val, memPtr, sizeof(val)); + return val; +} + +#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */ + + +/* **************************************** +* Compiler-specific Functions and Macros +******************************************/ +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */ +#if defined(_MSC_VER) +# define XXH_rotl32(x,r) _rotl(x,r) +# define XXH_rotl64(x,r) _rotl64(x,r) +#else +#if defined(__ICCARM__) +# include +# define XXH_rotl32(x,r) __ROR(x,(32 - r)) +#else +# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) +#endif +# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) +#endif + +#if defined(_MSC_VER) /* Visual Studio */ +# define XXH_swap32 _byteswap_ulong +# define XXH_swap64 _byteswap_uint64 +#elif GCC_VERSION >= 403 +# define XXH_swap32 __builtin_bswap32 +# define XXH_swap64 __builtin_bswap64 +#else +static U32 XXH_swap32 (U32 x) +{ + return ((x << 24) & 0xff000000 ) | + ((x << 8) & 0x00ff0000 ) | + ((x >> 8) & 0x0000ff00 ) | + ((x >> 24) & 0x000000ff ); +} +static U64 XXH_swap64 (U64 x) +{ + return ((x << 56) & 0xff00000000000000ULL) | + ((x << 40) & 0x00ff000000000000ULL) | + ((x << 24) & 0x0000ff0000000000ULL) | + ((x << 8) & 0x000000ff00000000ULL) | + ((x >> 8) & 0x00000000ff000000ULL) | + ((x >> 24) & 0x0000000000ff0000ULL) | + ((x >> 40) & 0x000000000000ff00ULL) | + ((x >> 56) & 0x00000000000000ffULL); +} +#endif + + +/* ************************************* +* Architecture Macros +***************************************/ +typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess; + +/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */ +#ifndef XXH_CPU_LITTLE_ENDIAN + static const int g_one = 1; +# define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one)) +#endif + + +/* *************************** +* Memory reads +*****************************/ +typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment; + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr)); + else + return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr); +} + +FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE32_align(ptr, endian, XXH_unaligned); +} + +static U32 XXH_readBE32(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align) +{ + if (align==XXH_unaligned) + return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr)); + else + return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr); +} + +FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian) +{ + return XXH_readLE64_align(ptr, endian, XXH_unaligned); +} + +static U64 XXH_readBE64(const void* ptr) +{ + return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr); +} + + +/* ************************************* +* Macros +***************************************/ +#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ + + +/* ************************************* +* Constants +***************************************/ +static const U32 PRIME32_1 = 2654435761U; +static const U32 PRIME32_2 = 2246822519U; +static const U32 PRIME32_3 = 3266489917U; +static const U32 PRIME32_4 = 668265263U; +static const U32 PRIME32_5 = 374761393U; + +static const U64 PRIME64_1 = 11400714785074694791ULL; +static const U64 PRIME64_2 = 14029467366897019727ULL; +static const U64 PRIME64_3 = 1609587929392839161ULL; +static const U64 PRIME64_4 = 9650029242287828579ULL; +static const U64 PRIME64_5 = 2870177450012600261ULL; + +XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; } + + +/* ************************** +* Utils +****************************/ +XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState) +{ + ZSTD_memcpy(dstState, srcState, sizeof(*dstState)); +} + +XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState) +{ + ZSTD_memcpy(dstState, srcState, sizeof(*dstState)); +} + + +/* *************************** +* Simple Hash Functions +*****************************/ + +static U32 XXH32_round(U32 seed, U32 input) +{ + seed += input * PRIME32_2; + seed = XXH_rotl32(seed, 13); + seed *= PRIME32_1; + return seed; +} + +FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* bEnd = p + len; + U32 h32; +#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)16; + } +#endif + + if (len>=16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = seed + PRIME32_1 + PRIME32_2; + U32 v2 = seed + PRIME32_2; + U32 v3 = seed + 0; + U32 v4 = seed - PRIME32_1; + + do { + v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4; + v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4; + v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4; + v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4; + } while (p<=limit); + + h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); + } else { + h32 = seed + PRIME32_5; + } + + h32 += (U32) len; + + while (p+4<=bEnd) { + h32 += XXH_get32bits(p) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; + p+=4; + } + + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; + + return h32; +} + + +XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH32_CREATESTATE_STATIC(state); + XXH32_reset(state, seed); + XXH32_update(state, input, len); + return XXH32_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +static U64 XXH64_round(U64 acc, U64 input) +{ + acc += input * PRIME64_2; + acc = XXH_rotl64(acc, 31); + acc *= PRIME64_1; + return acc; +} + +static U64 XXH64_mergeRound(U64 acc, U64 val) +{ + val = XXH64_round(0, val); + acc ^= val; + acc = acc * PRIME64_1 + PRIME64_4; + return acc; +} + +FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + U64 h64; +#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align) + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (p==NULL) { + len=0; + bEnd=p=(const BYTE*)(size_t)32; + } +#endif + + if (len>=32) { + const BYTE* const limit = bEnd - 32; + U64 v1 = seed + PRIME64_1 + PRIME64_2; + U64 v2 = seed + PRIME64_2; + U64 v3 = seed + 0; + U64 v4 = seed - PRIME64_1; + + do { + v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8; + v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8; + v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8; + v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8; + } while (p<=limit); + + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + + } else { + h64 = seed + PRIME64_5; + } + + h64 += (U64) len; + + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_get64bits(p)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } + + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } + + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + + return h64; +} + + +XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed) +{ +#if 0 + /* Simple version, good for code maintenance, but unfortunately slow for small inputs */ + XXH64_CREATESTATE_STATIC(state); + XXH64_reset(state, seed); + XXH64_update(state, input, len); + return XXH64_digest(state); +#else + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if (XXH_FORCE_ALIGN_CHECK) { + if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */ + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned); + } } + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned); + else + return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned); +#endif +} + + +/* ************************************************** +* Advanced Hash Functions +****************************************************/ + +XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void) +{ + return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + +XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void) +{ + return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t)); +} +XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) +{ + XXH_free(statePtr); + return XXH_OK; +} + + +/*** Hash feed ***/ + +XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed) +{ + XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + ZSTD_memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME32_1 + PRIME32_2; + state.v2 = seed + PRIME32_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME32_1; + ZSTD_memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed) +{ + XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */ + ZSTD_memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */ + state.v1 = seed + PRIME64_1 + PRIME64_2; + state.v2 = seed + PRIME64_2; + state.v3 = seed + 0; + state.v4 = seed - PRIME64_1; + ZSTD_memcpy(statePtr, &state, sizeof(state)); + return XXH_OK; +} + + +FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; + +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + + state->total_len_32 += (unsigned)len; + state->large_len |= (len>=16) | (state->total_len_32>=16); + + if (state->memsize + len < 16) { /* fill in tmp buffer */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len); + state->memsize += (unsigned)len; + return XXH_OK; + } + + if (state->memsize) { /* some data left from previous update */ + XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize); + { const U32* p32 = state->mem32; + state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++; + state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++; + state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++; + state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++; + } + p += 16-state->memsize; + state->memsize = 0; + } + + if (p <= bEnd-16) { + const BYTE* const limit = bEnd - 16; + U32 v1 = state->v1; + U32 v2 = state->v2; + U32 v3 = state->v3; + U32 v4 = state->v4; + + do { + v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4; + v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4; + v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4; + v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4; + } while (p<=limit); + + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } + + if (p < bEnd) { + XXH_memcpy(state->mem32, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } + + return XXH_OK; +} + +XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH32_update_endian(state_in, input, len, XXH_bigEndian); +} -typedef enum { - ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which - * the streaming API will refuse to allocate memory buffer - * in order to protect the host from unreasonable memory requirements. - * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. - * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). - * Special: value 0 means "use default maximum windowLog". */ - /* note : additional experimental parameters are also available - * within the experimental section of the API. - * At the time of this writing, they include : - * ZSTD_c_format - * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. - * note : never ever use experimentalParam? names directly - */ - ZSTD_d_experimentalParam1=1000 +FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem32; + const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize; + U32 h32; -} ZSTD_dParameter; + if (state->large_len) { + h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); + } else { + h32 = state->v3 /* == seed */ + PRIME32_5; + } -/*! ZSTD_dParam_getBounds() : - * All parameters must belong to an interval with lower and upper bounds, - * otherwise they will either trigger an error or be automatically clamped. - * @return : a structure, ZSTD_bounds, which contains - * - an error status field, which must be tested using ZSTD_isError() - * - both lower and upper bounds, inclusive - */ -ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); + h32 += state->total_len_32; -/*! ZSTD_DCtx_setParameter() : - * Set one compression parameter, selected by enum ZSTD_dParameter. - * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). - * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). - * Setting a parameter is only possible during frame initialization (before starting decompression). - * @return : 0, or an error code (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); + while (p+4<=bEnd) { + h32 += XXH_readLE32(p, endian) * PRIME32_3; + h32 = XXH_rotl32(h32, 17) * PRIME32_4; + p+=4; + } -/*! ZSTD_DCtx_reset() : - * Return a DCtx to clean state. - * Session and parameters can be reset jointly or separately. - * Parameters can only be reset when no active frame is being decompressed. - * @return : 0, or an error code, which can be tested with ZSTD_isError() - */ -ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); + while (p> 15; + h32 *= PRIME32_2; + h32 ^= h32 >> 13; + h32 *= PRIME32_3; + h32 ^= h32 >> 16; -/**************************** -* Streaming -****************************/ + return h32; +} -typedef struct ZSTD_inBuffer_s { - const void* src; /**< start of input buffer */ - size_t size; /**< size of input buffer */ - size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ -} ZSTD_inBuffer; -typedef struct ZSTD_outBuffer_s { - void* dst; /**< start of output buffer */ - size_t size; /**< size of output buffer */ - size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ -} ZSTD_outBuffer; +XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH32_digest_endian(state_in, XXH_littleEndian); + else + return XXH32_digest_endian(state_in, XXH_bigEndian); +} -/*-*********************************************************************** -* Streaming compression - HowTo -* -* A ZSTD_CStream object is required to track streaming operation. -* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. -* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. -* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. -* -* For parallel execution, use one separate ZSTD_CStream per thread. -* -* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. -* -* Parameters are sticky : when starting a new compression on the same context, -* it will re-use the same sticky parameters as previous compression session. -* When in doubt, it's recommended to fully initialize the context before usage. -* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), -* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to -* set more specific parameters, the pledged source size, or load a dictionary. -* -* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to -* consume input stream. The function will automatically update both `pos` -* fields within `input` and `output`. -* Note that the function may not consume the entire input, for example, because -* the output buffer is already full, in which case `input.pos < input.size`. -* The caller must check if input has been entirely consumed. -* If not, the caller must make some room to receive more compressed data, -* and then present again remaining input data. -* note: ZSTD_e_continue is guaranteed to make some forward progress when called, -* but doesn't guarantee maximal forward progress. This is especially relevant -* when compressing with multiple threads. The call won't block if it can -* consume some input, but if it can't it will wait for some, but not all, -* output to be flushed. -* @return : provides a minimum amount of data remaining to be flushed from internal buffers -* or an error code, which can be tested using ZSTD_isError(). -* -* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, -* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. -* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). -* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. -* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the -* operation. -* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will -* block until the flush is complete or the output buffer is full. -* @return : 0 if internal buffers are entirely flushed, -* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), -* or an error code, which can be tested using ZSTD_isError(). -* -* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. -* It will perform a flush and write frame epilogue. -* The epilogue is required for decoders to consider a frame completed. -* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. -* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to -* start a new frame. -* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will -* block until the flush is complete or the output buffer is full. -* @return : 0 if frame fully completed and fully flushed, -* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), -* or an error code, which can be tested using ZSTD_isError(). -* -* *******************************************************************/ -typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ - /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ -/*===== ZSTD_CStream management functions =====*/ -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); -ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); +/* **** XXH64 **** */ -/*===== Streaming compression functions =====*/ -typedef enum { - ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ - ZSTD_e_flush=1, /* flush any data provided so far, - * it creates (at least) one new block, that can be decoded immediately on reception; - * frame will continue: any future data can still reference previously compressed data, improving compression. - * note : multithreaded compression will block to flush as much output as possible. */ - ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. - * note that frame is only closed after compressed data is fully flushed (return value == 0). - * After that point, any additional data starts a new frame. - * note : each frame is independent (does not reference any content from previous frame). - : note : multithreaded compression will block to flush as much output as possible. */ -} ZSTD_EndDirective; +FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian) +{ + const BYTE* p = (const BYTE*)input; + const BYTE* const bEnd = p + len; -/*! ZSTD_compressStream2() : - * Behaves about the same as ZSTD_compressStream, with additional control on end directive. - * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() - * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) - * - output->pos must be <= dstCapacity, input->pos must be <= srcSize - * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. - * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. - * - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available, - * and then immediately returns, just indicating that there is some data remaining to be flushed. - * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. - * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. - * - @return provides a minimum amount of data remaining to be flushed from internal buffers - * or an error code, which can be tested using ZSTD_isError(). - * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. - * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. - * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. - * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), - * only ZSTD_e_end or ZSTD_e_flush operations are allowed. - * Before starting a new compression job, or changing compression parameters, - * it is required to fully flush internal buffers. - */ -ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, - ZSTD_outBuffer* output, - ZSTD_inBuffer* input, - ZSTD_EndDirective endOp); +#ifdef XXH_ACCEPT_NULL_INPUT_POINTER + if (input==NULL) return XXH_ERROR; +#endif + state->total_len += len; -/* These buffer sizes are softly recommended. - * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. - * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), - * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. - * - * However, note that these recommendations are from the perspective of a C caller program. - * If the streaming interface is invoked from some other language, - * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, - * a major performance rule is to reduce crossing such interface to an absolute minimum. - * It's not rare that performance ends being spent more into the interface, rather than compression itself. - * In which cases, prefer using large buffers, as large as practical, - * for both input and output, to reduce the nb of roundtrips. - */ -ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ + if (state->memsize + len < 32) { /* fill in tmp buffer */ + if (input != NULL) { + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len); + } + state->memsize += (U32)len; + return XXH_OK; + } + if (state->memsize) { /* tmp buffer is full */ + XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize); + state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian)); + state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian)); + state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian)); + state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian)); + p += 32-state->memsize; + state->memsize = 0; + } -/* ***************************************************************************** - * This following is a legacy streaming API. - * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). - * It is redundant, but remains fully supported. - * Advanced parameters and dictionary compression can only be used through the - * new API. - ******************************************************************************/ + if (p+32 <= bEnd) { + const BYTE* const limit = bEnd - 32; + U64 v1 = state->v1; + U64 v2 = state->v2; + U64 v3 = state->v3; + U64 v4 = state->v4; -/*! - * Equivalent to: - * - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - */ -ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); -/*! - * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). - * NOTE: The return value is different. ZSTD_compressStream() returns a hint for - * the next read size (if non-zero and not an error). ZSTD_compressStream2() - * returns the minimum nb of bytes left to flush (if non-zero and not an error). - */ -ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ -ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); -/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ -ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); + do { + v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8; + v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8; + v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8; + v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8; + } while (p<=limit); + state->v1 = v1; + state->v2 = v2; + state->v3 = v3; + state->v4 = v4; + } -/*-*************************************************************************** -* Streaming decompression - HowTo -* -* A ZSTD_DStream object is required to track streaming operations. -* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. -* ZSTD_DStream objects can be re-used multiple times. -* -* Use ZSTD_initDStream() to start a new decompression operation. -* @return : recommended first input size -* Alternatively, use advanced API to set specific properties. -* -* Use ZSTD_decompressStream() repetitively to consume your input. -* The function will update both `pos` fields. -* If `input.pos < input.size`, some input has not been consumed. -* It's up to the caller to present again remaining data. -* The function tries to flush all data decoded immediately, respecting output buffer size. -* If `output.pos < output.size`, decoder has flushed everything it could. -* But if `output.pos == output.size`, there might be some data left within internal buffers., -* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. -* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. -* @return : 0 when a frame is completely decoded and fully flushed, -* or an error code, which can be tested using ZSTD_isError(), -* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : -* the return value is a suggested next input size (just a hint for better latency) -* that will never request more than the remaining frame size. -* *******************************************************************************/ + if (p < bEnd) { + XXH_memcpy(state->mem64, p, (size_t)(bEnd-p)); + state->memsize = (unsigned)(bEnd-p); + } -typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ - /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ -/*===== ZSTD_DStream management functions =====*/ -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); -ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); + return XXH_OK; +} -/*===== Streaming decompression functions =====*/ +XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; -/* This function is redundant with the advanced API and equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_refDDict(zds, NULL); - */ -ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_update_endian(state_in, input, len, XXH_littleEndian); + else + return XXH64_update_endian(state_in, input, len, XXH_bigEndian); +} -ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); -ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ -ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ +FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian) +{ + const BYTE * p = (const BYTE*)state->mem64; + const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize; + U64 h64; -/************************** -* Simple dictionary API -***************************/ -/*! ZSTD_compress_usingDict() : - * Compression at an explicit compression level using a Dictionary. - * A dictionary can be any arbitrary data segment (also called a prefix), - * or a buffer with specified information (see dictBuilder/zdict.h). - * Note : This function loads the dictionary, resulting in significant startup delay. - * It's intended for a dictionary used only once. - * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - int compressionLevel); + if (state->total_len >= 32) { + U64 const v1 = state->v1; + U64 const v2 = state->v2; + U64 const v3 = state->v3; + U64 const v4 = state->v4; -/*! ZSTD_decompress_usingDict() : - * Decompression using a known Dictionary. - * Dictionary must be identical to the one used during compression. - * Note : This function loads the dictionary, resulting in significant startup delay. - * It's intended for a dictionary used only once. - * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize); + h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18); + h64 = XXH64_mergeRound(h64, v1); + h64 = XXH64_mergeRound(h64, v2); + h64 = XXH64_mergeRound(h64, v3); + h64 = XXH64_mergeRound(h64, v4); + } else { + h64 = state->v3 + PRIME64_5; + } + h64 += (U64) state->total_len; -/*********************************** - * Bulk processing dictionary API - **********************************/ -typedef struct ZSTD_CDict_s ZSTD_CDict; + while (p+8<=bEnd) { + U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian)); + h64 ^= k1; + h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; + p+=8; + } -/*! ZSTD_createCDict() : - * When compressing multiple messages or blocks using the same dictionary, - * it's recommended to digest the dictionary only once, since it's a costly operation. - * ZSTD_createCDict() will create a state from digesting a dictionary. - * The resulting state can be used for future compression operations with very limited startup cost. - * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. - * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. - * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. - * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, - * in which case the only thing that it transports is the @compressionLevel. - * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, - * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, - int compressionLevel); + if (p+4<=bEnd) { + h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1; + h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3; + p+=4; + } -/*! ZSTD_freeCDict() : - * Function frees memory allocated by ZSTD_createCDict(). */ -ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + while (p> 33; + h64 *= PRIME64_2; + h64 ^= h64 >> 29; + h64 *= PRIME64_3; + h64 ^= h64 >> 32; + return h64; +} -typedef struct ZSTD_DDict_s ZSTD_DDict; -/*! ZSTD_createDDict() : - * Create a digested dictionary, ready to start decompression operation without startup delay. - * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); +XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in) +{ + XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN; -/*! ZSTD_freeDDict() : - * Function frees memory allocated with ZSTD_createDDict() */ -ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT) + return XXH64_digest_endian(state_in, XXH_littleEndian); + else + return XXH64_digest_endian(state_in, XXH_bigEndian); +} -/*! ZSTD_decompress_usingDDict() : - * Decompression using a digested Dictionary. - * Recommended when same dictionary is used multiple times. */ -ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_DDict* ddict); +/* ************************** +* Canonical representation +****************************/ -/******************************** - * Dictionary helper functions - *******************************/ +/*! Default XXH result types are basic unsigned 32 and 64 bits. +* The canonical representation follows human-readable write convention, aka big-endian (large digits first). +* These functions allow transformation of hash result into and from its canonical format. +* This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs. +*/ -/*! ZSTD_getDictID_fromDict() : - * Provides the dictID stored within dictionary. - * if @return == 0, the dictionary is not conformant with Zstandard specification. - * It can still be loaded, but as a content-only dictionary. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); +XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash); + ZSTD_memcpy(dst, &hash, sizeof(*dst)); +} -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); +XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash) +{ + XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); + if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash); + ZSTD_memcpy(dst, &hash, sizeof(*dst)); +} -/*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompressed the frame stored within `src`. - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary to be decoded (most common case). - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ -ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); +XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src) +{ + return XXH_readBE32(src); +} + +XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src) +{ + return XXH_readBE64(src); +} +/**** ended inlining xxhash.c ****/ +# endif +#endif /* XXH_STATIC_LINKING_ONLY && XXH_STATIC_H_3543687687345 */ -/******************************************************************************* - * Advanced dictionary and prefix API + +#if defined (__cplusplus) +} +#endif +/**** ended inlining xxhash.h ****/ +#ifndef ZSTD_NO_TRACE +/**** start inlining zstd_trace.h ****/ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. * - * This API allows dictionaries to be used with ZSTD_compress2(), - * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and - * only reset with the context is reset with ZSTD_reset_parameters or - * ZSTD_reset_session_and_parameters. Prefixes are single-use. - ******************************************************************************/ + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_TRACE_H +#define ZSTD_TRACE_H + +#if defined (__cplusplus) +extern "C" { +#endif +#include -/*! ZSTD_CCtx_loadDictionary() : - * Create an internal CDict from `dict` buffer. - * Decompression will have to use same dictionary. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, - * meaning "return to no-dictionary mode". - * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. - * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). - * Note 2 : Loading a dictionary involves building tables. - * It's also a CPU consuming operation, with non-negligible impact on latency. - * Tables are dependent on compression parameters, and for this reason, - * compression parameters can no longer be changed after loading a dictionary. - * Note 3 :`dict` content will be copied internally. - * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. - * In such a case, dictionary buffer must outlive its users. - * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() - * to precisely select how dictionary content must be interpreted. */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); +/* weak symbol support */ +#if !defined(ZSTD_HAVE_WEAK_SYMBOLS) && defined(__GNUC__) && \ + !defined(__APPLE__) && !defined(_WIN32) && !defined(__MINGW32__) && \ + !defined(__CYGWIN__) +# define ZSTD_HAVE_WEAK_SYMBOLS 1 +#else +# define ZSTD_HAVE_WEAK_SYMBOLS 0 +#endif +#if ZSTD_HAVE_WEAK_SYMBOLS +# define ZSTD_WEAK_ATTR __attribute__((__weak__)) +#else +# define ZSTD_WEAK_ATTR +#endif -/*! ZSTD_CCtx_refCDict() : - * Reference a prepared dictionary, to be used for all next compressed frames. - * Note that compression parameters are enforced from within CDict, - * and supersede any compression parameter previously set within CCtx. - * The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. - * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. - * The dictionary will remain valid for future compressed frames using same CCtx. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special : Referencing a NULL CDict means "return to no-dictionary mode". - * Note 1 : Currently, only one dictionary can be managed. - * Referencing a new dictionary effectively "discards" any previous one. - * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ -ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); +/* Only enable tracing when weak symbols are available. */ +#ifndef ZSTD_TRACE +# define ZSTD_TRACE ZSTD_HAVE_WEAK_SYMBOLS +#endif -/*! ZSTD_CCtx_refPrefix() : - * Reference a prefix (single-usage dictionary) for next compressed frame. - * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). - * Decompression will need same prefix to properly regenerate data. - * Compressing with a prefix is similar in outcome as performing a diff and compressing it, - * but performs much faster, especially during decompression (compression speed is tunable with compression level). - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary - * Note 1 : Prefix buffer is referenced. It **must** outlive compression. - * Its content must remain unmodified during compression. - * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, - * ensure that the window size is large enough to contain the entire source. - * See ZSTD_c_windowLog. - * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. - * It's a CPU consuming operation, with non-negligible impact on latency. - * If there is a need to use the same prefix multiple times, consider loadDictionary instead. - * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). - * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, - const void* prefix, size_t prefixSize); +#if ZSTD_TRACE -/*! ZSTD_DCtx_loadDictionary() : - * Create an internal DDict from dict buffer, - * to be used to decompress next frames. - * The dictionary remains valid for all future frames, until explicitly invalidated. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, - * meaning "return to no-dictionary mode". - * Note 1 : Loading a dictionary involves building tables, - * which has a non-negligible impact on CPU usage and latency. - * It's recommended to "load once, use many times", to amortize the cost - * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. - * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. - * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of - * how dictionary content is loaded and interpreted. - */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +struct ZSTD_CCtx_s; +struct ZSTD_DCtx_s; +struct ZSTD_CCtx_params_s; -/*! ZSTD_DCtx_refDDict() : - * Reference a prepared dictionary, to be used to decompress next frames. - * The dictionary remains active for decompression of future frames using same DCtx. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : Currently, only one dictionary can be managed. - * Referencing a new dictionary effectively "discards" any previous one. - * Special: referencing a NULL DDict means "return to no-dictionary mode". - * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. +typedef struct { + /** + * ZSTD_VERSION_NUMBER + * + * This is guaranteed to be the first member of ZSTD_trace. + * Otherwise, this struct is not stable between versions. If + * the version number does not match your expectation, you + * should not interpret the rest of the struct. + */ + unsigned version; + /** + * Non-zero if streaming (de)compression is used. + */ + unsigned streaming; + /** + * The dictionary ID. + */ + unsigned dictionaryID; + /** + * Is the dictionary cold? + * Only set on decompression. + */ + unsigned dictionaryIsCold; + /** + * The dictionary size or zero if no dictionary. + */ + size_t dictionarySize; + /** + * The uncompressed size of the data. + */ + size_t uncompressedSize; + /** + * The compressed size of the data. + */ + size_t compressedSize; + /** + * The fully resolved CCtx parameters (NULL on decompression). + */ + struct ZSTD_CCtx_params_s const* params; + /** + * The ZSTD_CCtx pointer (NULL on decompression). + */ + struct ZSTD_CCtx_s const* cctx; + /** + * The ZSTD_DCtx pointer (NULL on compression). + */ + struct ZSTD_DCtx_s const* dctx; +} ZSTD_Trace; + +/** + * A tracing context. It must be 0 when tracing is disabled. + * Otherwise, any non-zero value returned by a tracing begin() + * function is presented to any subsequent calls to end(). + * + * Any non-zero value is treated as tracing is enabled and not + * interpreted by the library. + * + * Two possible uses are: + * * A timestamp for when the begin() function was called. + * * A unique key identifying the (de)compression, like the + * address of the [dc]ctx pointer if you need to track + * more information than just a timestamp. */ -ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); +typedef unsigned long long ZSTD_TraceCtx; -/*! ZSTD_DCtx_refPrefix() : - * Reference a prefix (single-usage dictionary) to decompress next frame. - * This is the reverse operation of ZSTD_CCtx_refPrefix(), - * and must use the same prefix as the one used during compression. - * Prefix is **only used once**. Reference is discarded at end of frame. - * End of frame is reached when ZSTD_decompressStream() returns 0. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary - * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. - * Prefix buffer must remain unmodified up to the end of frame, - * reached when ZSTD_decompressStream() returns 0. - * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). - * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) - * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. - * A full dictionary is more costly, as it requires building tables. +/** + * Trace the beginning of a compression call. + * @param cctx The dctx pointer for the compression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). */ -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, - const void* prefix, size_t prefixSize); +ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_compress_begin( + struct ZSTD_CCtx_s const* cctx); -/* === Memory management === */ +/** + * Trace the end of a compression call. + * @param ctx The return value of ZSTD_trace_compress_begin(). + * @param trace The zstd tracing info. + */ +ZSTD_WEAK_ATTR void ZSTD_trace_compress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); -/*! ZSTD_sizeof_*() : - * These functions give the _current_ memory usage of selected object. - * Note that object memory usage can evolve (increase or decrease) over time. */ -ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); -ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); -ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); -ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); -ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); +/** + * Trace the beginning of a decompression call. + * @param dctx The dctx pointer for the decompression. + * It can be used as a key to map begin() to end(). + * @returns Non-zero if tracing is enabled. The return value is + * passed to ZSTD_trace_compress_end(). + */ +ZSTD_WEAK_ATTR ZSTD_TraceCtx ZSTD_trace_decompress_begin( + struct ZSTD_DCtx_s const* dctx); -#endif /* ZSTD_H_235446 */ +/** + * Trace the end of a decompression call. + * @param ctx The return value of ZSTD_trace_decompress_begin(). + * @param trace The zstd tracing info. + */ +ZSTD_WEAK_ATTR void ZSTD_trace_decompress_end( + ZSTD_TraceCtx ctx, + ZSTD_Trace const* trace); +#endif /* ZSTD_TRACE */ -/* ************************************************************************************** - * ADVANCED AND EXPERIMENTAL FUNCTIONS - **************************************************************************************** - * The definitions in the following section are considered experimental. - * They are provided for advanced scenarios. - * They should never be used with a dynamic library, as prototypes may change in the future. - * Use them only in association with static linking. - * ***************************************************************************************/ +#if defined (__cplusplus) +} +#endif -#if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) -#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY +#endif /* ZSTD_TRACE_H */ +/**** ended inlining zstd_trace.h ****/ +#else +# define ZSTD_TRACE 0 +#endif -/**************************************************************************************** - * experimental API (static linking only) - **************************************************************************************** - * The following symbols and constants - * are not planned to join "stable API" status in the near future. - * They can still change in future versions. - * Some of them are planned to remain in the static_only section indefinitely. - * Some of them might be removed in the future (especially when redundant with existing stable functions) - * ***************************************************************************************/ +#if defined (__cplusplus) +extern "C" { +#endif -#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ -#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) -#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ -#define ZSTD_SKIPPABLEHEADERSIZE 8 +/* ---- static assert (debug) --- */ +#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError -/* compression parameter bounds */ -#define ZSTD_WINDOWLOG_MAX_32 30 -#define ZSTD_WINDOWLOG_MAX_64 31 -#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) -#define ZSTD_WINDOWLOG_MIN 10 -#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) -#define ZSTD_HASHLOG_MIN 6 -#define ZSTD_CHAINLOG_MAX_32 29 -#define ZSTD_CHAINLOG_MAX_64 30 -#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) -#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN -#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) -#define ZSTD_SEARCHLOG_MIN 1 -#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ -#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ -#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX -#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ -#define ZSTD_STRATEGY_MIN ZSTD_fast -#define ZSTD_STRATEGY_MAX ZSTD_btultra2 +/*-************************************* +* shared macros +***************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) -#define ZSTD_OVERLAPLOG_MIN 0 -#define ZSTD_OVERLAPLOG_MAX 9 +/** + * Ignore: this is an internal helper. + * + * This is a helper function to help force C99-correctness during compilation. + * Under strict compilation modes, variadic macro arguments can't be empty. + * However, variadic function arguments can be. Using a function therefore lets + * us statically check that at least one (string) argument was passed, + * independent of the compilation flags. + */ +static INLINE_KEYWORD UNUSED_ATTR +void _force_has_format_string(const char *format, ...) { + (void)format; +} -#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame - * requiring larger than (1< 3, then this is seqDef.offset - 3 - * If seqDef.offset < 3, then this is the corresponding repeat offset - * But if seqDef.offset < 3 and litLength == 0, this is the - * repeat offset before the corresponding repeat offset - * And if seqDef.offset == 3 and litLength == 0, this is the - * most recent repeat offset - 1 - */ - unsigned int offset; - unsigned int litLength; /* Literal length */ - unsigned int matchLength; /* Match length */ - /* 0 when seq not rep and seqDef.offset otherwise - * when litLength == 0 this will be <= 4, otherwise <= 3 like normal - */ - unsigned int rep; -} ZSTD_Sequence; +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 -typedef struct { - unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ - unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ - unsigned hashLog; /**< dispatch table : larger == faster, more memory */ - unsigned searchLog; /**< nb of searches : larger == more compression, slower */ - unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ - unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ - ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ -} ZSTD_compressionParameters; +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 +static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; -typedef struct { - int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ - int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ - int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ -} ZSTD_frameParameters; +#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ -typedef struct { - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; -} ZSTD_parameters; +#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ +static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; -typedef enum { - ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ - ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ - ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ -} ZSTD_dictContentType_e; +#define ZSTD_FRAMECHECKSUMSIZE 4 -typedef enum { - ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ - ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ -} ZSTD_dictLoadMethod_e; +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ -typedef enum { - ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ - ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. - * Useful to save 4 bytes per generated frame. - * Decoder cannot recognise automatically this format, requiring this instruction. */ -} ZSTD_format_e; +#define HufLog 12 +typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; -typedef enum { - /* Note: this enum and the behavior it controls are effectively internal - * implementation details of the compressor. They are expected to continue - * to evolve and should be considered only in the context of extremely - * advanced performance tuning. - * - * Zstd currently supports the use of a CDict in three ways: - * - * - The contents of the CDict can be copied into the working context. This - * means that the compression can search both the dictionary and input - * while operating on a single set of internal tables. This makes - * the compression faster per-byte of input. However, the initial copy of - * the CDict's tables incurs a fixed cost at the beginning of the - * compression. For small compressions (< 8 KB), that copy can dominate - * the cost of the compression. - * - * - The CDict's tables can be used in-place. In this model, compression is - * slower per input byte, because the compressor has to search two sets of - * tables. However, this model incurs no start-up cost (as long as the - * working context's tables can be reused). For small inputs, this can be - * faster than copying the CDict's tables. - * - * - The CDict's tables are not used at all, and instead we use the working - * context alone to reload the dictionary and use params based on the source - * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). - * This method is effective when the dictionary sizes are very small relative - * to the input size, and the input size is fairly large to begin with. - * - * Zstd has a simple internal heuristic that selects which strategy to use - * at the beginning of a compression. However, if experimentation shows that - * Zstd is making poor choices, it is possible to override that choice with - * this enum. - */ - ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ - ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ - ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ - ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ -} ZSTD_dictAttachPref_e; +#define LONGNBSEQ 0x7F00 -typedef enum { - ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. - * Negative compression levels will be uncompressed, and positive compression - * levels will be compressed. */ - ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be - * emitted if Huffman compression is not profitable. */ - ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ -} ZSTD_literalCompressionMode_e; +#define MINMATCH 3 +#define Litbits 8 +#define MaxLit ((1<= ZSTD_FRAMEHEADERSIZE_PREFIX. - * @return : size of the Frame Header, - * or an error code (if srcSize is too small) */ -ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); -/*! ZSTD_getSequences() : - * Extract sequences from the sequence store - * zc can be used to insert custom compression params. - * This function invokes ZSTD_compress2 - * @return : number of sequences extracted - */ -ZSTDLIB_API size_t ZSTD_getSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, - size_t outSeqsSize, const void* src, size_t srcSize); +/*-******************************************* +* Shared functions to include for inlining +*********************************************/ +static void ZSTD_copy8(void* dst, const void* src) { +#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) + vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src)); +#else + ZSTD_memcpy(dst, src, 8); +#endif +} +#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } +static void ZSTD_copy16(void* dst, const void* src) { +#if !defined(ZSTD_NO_INTRINSICS) && defined(__ARM_NEON) + vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src)); +#else + ZSTD_memcpy(dst, src, 16); +#endif +} +#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } -/*************************************** -* Memory management -***************************************/ +#define WILDCOPY_OVERLENGTH 32 +#define WILDCOPY_VECLEN 16 -/*! ZSTD_estimate*() : - * These functions make it possible to estimate memory usage of a future - * {D,C}Ctx, before its creation. - * - * ZSTD_estimateCCtxSize() will provide a budget large enough for any - * compression level up to selected one. Unlike ZSTD_estimateCStreamSize*(), - * this estimate does not include space for a window buffer, so this estimate - * is guaranteed to be enough for single-shot compressions, but not streaming - * compressions. It will however assume the input may be arbitrarily large, - * which is the worst case. If srcSize is known to always be small, - * ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. - * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with - * ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with - * ZSTD_CCtxParams_setParameter(). - * - * Note: only single-threaded compression is supported. This function will - * return an error code if ZSTD_c_nbWorkers is >= 1. */ -ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); +typedef enum { + ZSTD_no_overlap, + ZSTD_overlap_src_before_dst + /* ZSTD_overlap_dst_before_src, */ +} ZSTD_overlap_e; -/*! ZSTD_estimateCStreamSize() : - * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. - * It will also consider src size to be arbitrarily "large", which is worst case. - * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. - * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. - * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. - * Note : CStream size estimation is only correct for single-threaded compression. - * ZSTD_DStream memory budget depends on window Size. - * This information can be passed manually, using ZSTD_estimateDStreamSize, - * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); - * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), - * an internal ?Dict will be created, which additional size is not estimated here. - * In this case, get total size by adding ZSTD_estimate?DictSize */ -ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); -ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); -ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); -ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); +/*! ZSTD_wildcopy() : + * Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. + * The src buffer must be before the dst buffer. + */ +MEM_STATIC FORCE_INLINE_ATTR +void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) +{ + ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + length; + + assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); + + if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { + /* Handle short offset copies. */ + do { + COPY8(op, ip) + } while (op < oend); + } else { + assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); + /* Separate out the first COPY16() call because the copy length is + * almost certain to be short, so the branches have different + * probabilities. Since it is almost certain to be short, only do + * one COPY16() in the first call. Then, do two calls per loop since + * at that point it is more likely to have a high trip count. + */ +#ifdef __aarch64__ + do { + COPY16(op, ip); + } + while (op < oend); +#else + ZSTD_copy16(op, ip); + if (16 >= length) return; + op += 16; + ip += 16; + do { + COPY16(op, ip); + COPY16(op, ip); + } + while (op < oend); +#endif + } +} -/*! ZSTD_estimate?DictSize() : - * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). - * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). - * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. - */ -ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); -ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + if (length > 0) { + ZSTD_memcpy(dst, src, length); + } + return length; +} -/*! ZSTD_initStatic*() : - * Initialize an object using a pre-allocated fixed-size buffer. - * workspace: The memory area to emplace the object into. - * Provided pointer *must be 8-bytes aligned*. - * Buffer must outlive object. - * workspaceSize: Use ZSTD_estimate*Size() to determine - * how large workspace must be to support target scenario. - * @return : pointer to object (same address as workspace, just different type), - * or NULL if error (size too small, incorrect alignment, etc.) - * Note : zstd will never resize nor malloc() when using a static buffer. - * If the object requires more memory than available, - * zstd will just error out (typically ZSTD_error_memory_allocation). - * Note 2 : there is no corresponding "free" function. - * Since workspace is allocated externally, it must be freed externally too. - * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level - * into its associated cParams. - * Limitation 1 : currently not compatible with internal dictionary creation, triggered by - * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). - * Limitation 2 : static cctx currently not compatible with multi-threading. - * Limitation 3 : static dctx is incompatible with legacy support. - */ -ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); -ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ +/* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 -ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); -ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ +/* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 -ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( - void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams); +/* Controls whether the input/output buffer is buffered or stable. */ +typedef enum { + ZSTD_bm_buffered = 0, /* Buffer the input/output */ + ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */ +} ZSTD_bufferMode_e; -ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( - void* workspace, size_t workspaceSize, - const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType); +/*-******************************************* +* Private declarations +*********************************************/ +typedef struct seqDef_s { + U32 offset; /* offset == rawOffset + ZSTD_REP_NUM, or equivalently, offCode + 1 */ + U16 litLength; + U16 matchLength; +} seqDef; -/*! Custom memory allocation : - * These prototypes make it possible to pass your own allocation/free functions. - * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. - * All allocation/free operations will be completed using these custom variants instead of regular ones. - */ -typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); -typedef void (*ZSTD_freeFunction) (void* opaque, void* address); -typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; -static ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ +/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */ +typedef enum { + ZSTD_llt_none = 0, /* no longLengthType */ + ZSTD_llt_literalLength = 1, /* represents a long literal */ + ZSTD_llt_matchLength = 2 /* represents a long match */ +} ZSTD_longLengthType_e; -ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); -ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); +typedef struct { + seqDef* sequencesStart; + seqDef* sequences; /* ptr to end of sequences */ + BYTE* litStart; + BYTE* lit; /* ptr to end of literals */ + BYTE* llCode; + BYTE* mlCode; + BYTE* ofCode; + size_t maxNbSeq; + size_t maxNbLit; -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_compressionParameters cParams, - ZSTD_customMem customMem); + /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength + * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment + * the existing value of the litLength or matchLength by 0x10000. + */ + ZSTD_longLengthType_e longLengthType; + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ +} seqStore_t; -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, - ZSTD_dictLoadMethod_e dictLoadMethod, - ZSTD_dictContentType_e dictContentType, - ZSTD_customMem customMem); +typedef struct { + U32 litLength; + U32 matchLength; +} ZSTD_sequenceLength; +/** + * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences + * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. + */ +MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) +{ + ZSTD_sequenceLength seqLen; + seqLen.litLength = seq->litLength; + seqLen.matchLength = seq->matchLength + MINMATCH; + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { + if (seqStore->longLengthType == ZSTD_llt_literalLength) { + seqLen.litLength += 0xFFFF; + } + if (seqStore->longLengthType == ZSTD_llt_matchLength) { + seqLen.matchLength += 0xFFFF; + } + } + return seqLen; +} +/** + * Contains the compressed frame size and an upper-bound for the decompressed frame size. + * Note: before using `compressedSize`, check for errors using ZSTD_isError(). + * similarly, before using `decompressedBound`, check for errors using: + * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` + */ +typedef struct { + size_t compressedSize; + unsigned long long decompressedBound; +} ZSTD_frameSizeInfo; /* decompress & legacy */ -/*************************************** -* Advanced compression functions -***************************************/ +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ -/*! ZSTD_createCDict_byReference() : - * Create a digested dictionary for compression - * Dictionary content is just referenced, not duplicated. - * As a consequence, `dictBuffer` **must** outlive CDict, - * and its content must remain unmodified throughout the lifetime of CDict. - * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ -ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); +/* custom memory allocation functions */ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); -/*! ZSTD_getCParams() : - * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. - * `estimatedSrcSize` value is optional, select 0 if not known */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); -/*! ZSTD_getParams() : - * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. - * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ -ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); +MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ +{ + assert(val != 0); + { +# if defined(_MSC_VER) /* Visual */ +# if STATIC_BMI2 == 1 + return _lzcnt_u32(val)^31; +# else + unsigned long r=0; + return _BitScanReverse(&r, val) ? (unsigned)r : 0; +# endif +# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# elif defined(__ICCARM__) /* IAR Intrinsic */ + return 31 - __CLZ(val); +# else /* Software version */ + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; +# endif + } +} -/*! ZSTD_checkCParams() : - * Ensure param values remain within authorized range. - * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ -ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); -/*! ZSTD_adjustCParams() : - * optimize params for a given `srcSize` and `dictSize`. - * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. - * `dictSize` must be `0` when there is no dictionary. - * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. - * This function never fails (wide contract) */ -ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ -/*! ZSTD_compress_advanced() : - * Note : this function is now DEPRECATED. - * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ -ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const void* dict,size_t dictSize, - ZSTD_parameters params); -/*! ZSTD_compress_usingCDict_advanced() : - * Note : this function is now REDUNDANT. - * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. - * This prototype will be marked as deprecated and generate compilation warning in some future version */ -ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams); +typedef struct { + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; /* declared here for decompress and fullbench */ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr); +/*! ZSTD_decodeSeqHeaders() : + * decode sequence header from src */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize); -/*! ZSTD_CCtx_loadDictionary_byReference() : - * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. - * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); -/*! ZSTD_CCtx_loadDictionary_advanced() : - * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over - * how to load the dictionary (by copy ? by reference ?) - * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); +#if defined (__cplusplus) +} +#endif -/*! ZSTD_CCtx_refPrefix_advanced() : - * Same as ZSTD_CCtx_refPrefix(), but gives finer control over - * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); +#endif /* ZSTD_CCOMMON_H_MODULE */ +/**** ended inlining zstd_internal.h ****/ -/* === experimental parameters === */ -/* these parameters can be used with ZSTD_setParameter() - * they are not guaranteed to remain supported in the future */ - /* Enables rsyncable mode, - * which makes compressed files more rsync friendly - * by adding periodic synchronization points to the compressed data. - * The target average block size is ZSTD_c_jobSize / 2. - * It's possible to modify the job size to increase or decrease - * the granularity of the synchronization point. - * Once the jobSize is smaller than the window size, - * it will result in compression ratio degradation. - * NOTE 1: rsyncable mode only works when multithreading is enabled. - * NOTE 2: rsyncable performs poorly in combination with long range mode, - * since it will decrease the effectiveness of synchronization points, - * though mileage may vary. - * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. - * If the selected compression level is already running significantly slower, - * the overall speed won't be significantly impacted. - */ - #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 +/*-**************************************** +* Version +******************************************/ +unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } -/* Select a compression format. - * The value must be of type ZSTD_format_e. - * See ZSTD_format_e enum definition for details */ -#define ZSTD_c_format ZSTD_c_experimentalParam2 +const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } + + +/*-**************************************** +* ZSTD Error Management +******************************************/ +#undef ZSTD_isError /* defined within zstd_internal.h */ +/*! ZSTD_isError() : + * tells if a return value is an error code + * symbol is required for external callers */ +unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } -/* Force back-reference distances to remain < windowSize, - * even when referencing into Dictionary content (default:0) */ -#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 +/*! ZSTD_getErrorName() : + * provides error code string from function result (useful for debugging) */ +const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } -/* Controls whether the contents of a CDict - * are used in place, or copied into the working context. - * Accepts values from the ZSTD_dictAttachPref_e enum. - * See the comments on that enum for an explanation of the feature. */ -#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 +/*! ZSTD_getError() : + * convert a `size_t` function result into a proper ZSTD_errorCode enum */ +ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } -/* Controls how the literals are compressed (default is auto). - * The value must be of type ZSTD_literalCompressionMode_e. - * See ZSTD_literalCompressionMode_t enum definition for details. - */ -#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 +/*! ZSTD_getErrorString() : + * provides error code string from enum */ +const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } -/* Tries to fit compressed block size to be around targetCBlockSize. - * No target when targetCBlockSize == 0. - * There is no guarantee on compressed block size (default:0) */ -#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 -/* User's best guess of source size. - * Hint is not valid when srcSizeHint == 0. - * There is no guarantee that hint is close to actual source size, - * but compression ratio may regress significantly if guess considerably underestimates */ -#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 -/*! ZSTD_CCtx_getParameter() : - * Get the requested compression parameter value, selected by enum ZSTD_cParameter, - * and store it into int* value. - * @return : 0, or an error code (which can be tested with ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); +/*=************************************************************** +* Custom allocator +****************************************************************/ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) + return customMem.customAlloc(customMem.opaque, size); + return ZSTD_malloc(size); +} +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) { + /* calloc implemented as malloc+memset; + * not as efficient as calloc, but next best guess for custom malloc */ + void* const ptr = customMem.customAlloc(customMem.opaque, size); + ZSTD_memset(ptr, 0, size); + return ptr; + } + return ZSTD_calloc(1, size); +} -/*! ZSTD_CCtx_params : - * Quick howto : - * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure - * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into - * an existing ZSTD_CCtx_params structure. - * This is similar to - * ZSTD_CCtx_setParameter(). - * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to - * an existing CCtx. - * These parameters will be applied to - * all subsequent frames. - * - ZSTD_compressStream2() : Do compression using the CCtx. - * - ZSTD_freeCCtxParams() : Free the memory. +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) +{ + if (ptr!=NULL) { + if (customMem.customFree) + customMem.customFree(customMem.opaque, ptr); + else + ZSTD_free(ptr); + } +} +/**** ended inlining common/zstd_common.c ****/ + +/**** start inlining decompress/huf_decompress.c ****/ +/* ****************************************************************** + * huff0 huffman decoder, + * part of Finite State Entropy library + * Copyright (c) Yann Collet, Facebook, Inc. * - * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() - * for static allocation of CCtx for single-threaded compression. - */ -ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); -ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ -/*! ZSTD_CCtxParams_reset() : - * Reset params to default values. - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); +/* ************************************************************** +* Dependencies +****************************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/bitstream.h ****/ +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/error_private.h ****/ -/*! ZSTD_CCtxParams_init() : - * Initializes the compression parameters of cctxParams according to - * compression level. All other parameters are reset to their default values. - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); +/* ************************************************************** +* Macros +****************************************************************/ -/*! ZSTD_CCtxParams_init_advanced() : - * Initializes the compression and frame parameters of cctxParams according to - * params. All other parameters are reset to their default values. +/* These two optional macros force the use one way or another of the two + * Huffman decompression implementations. You can't force in both directions + * at the same time. */ -ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); +#if defined(HUF_FORCE_DECOMPRESS_X1) && \ + defined(HUF_FORCE_DECOMPRESS_X2) +#error "Cannot force the use of the X1 and X2 decoders at the same time!" +#endif -/*! ZSTD_CCtxParams_setParameter() : - * Similar to ZSTD_CCtx_setParameter. - * Set one compression parameter, selected by enum ZSTD_cParameter. - * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); -/*! ZSTD_CCtxParams_getParameter() : - * Similar to ZSTD_CCtx_getParameter. - * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. - * @result : 0, or an error code (which can be tested with ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError -/*! ZSTD_CCtx_setParametersUsingCCtxParams() : - * Apply a set of ZSTD_CCtx_params to the compression context. - * This can be done even after compression is started, - * if nbWorkers==0, this will have no impact until a new compression is started. - * if nbWorkers>=1, new parameters will be picked up at next job, - * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). - */ -ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( - ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); -/*! ZSTD_compressStream2_simpleArgs() : - * Same as ZSTD_compressStream2(), - * but using only integral types as arguments. - * This variant might be helpful for binders from dynamic languages - * which have troubles handling structures containing memory pointers. - */ -ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( - ZSTD_CCtx* cctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos, - ZSTD_EndDirective endOp); +/* ************************************************************** +* Byte alignment for workSpace management +****************************************************************/ +#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) +#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) -/*************************************** -* Advanced decompression functions -***************************************/ +/* ************************************************************** +* BMI2 Variant Wrappers +****************************************************************/ +#if DYNAMIC_BMI2 -/*! ZSTD_isFrame() : - * Tells if the content of `buffer` starts with a valid Frame Identifier. - * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. - * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - * Note 3 : Skippable Frame Identifiers are considered valid. */ -ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); +#define HUF_DGEN(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + if (bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ + } -/*! ZSTD_createDDict_byReference() : - * Create a digested dictionary, ready to start decompression operation without startup delay. - * Dictionary content is referenced, and therefore stays in dictBuffer. - * It is important that dictBuffer outlives DDict, - * it must remain read accessible throughout the lifetime of DDict */ -ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); +#else -/*! ZSTD_DCtx_loadDictionary_byReference() : - * Same as ZSTD_DCtx_loadDictionary(), - * but references `dict` content instead of copying it into `dctx`. - * This saves memory if `dict` remains around., - * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +#define HUF_DGEN(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + (void)bmi2; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } -/*! ZSTD_DCtx_loadDictionary_advanced() : - * Same as ZSTD_DCtx_loadDictionary(), - * but gives direct control over - * how to load the dictionary (by copy ? by reference ?) - * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ -ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); +#endif -/*! ZSTD_DCtx_refPrefix_advanced() : - * Same as ZSTD_DCtx_refPrefix(), but gives finer control over - * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ -ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); -/*! ZSTD_DCtx_setMaxWindowSize() : - * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. - * This protects a decoder context from reserving too much memory for itself (potential attack scenario). - * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. - * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) - * @return : 0, or an error code (which can be tested using ZSTD_isError()). - */ -ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; -/* ZSTD_d_format - * experimental parameter, - * allowing selection between ZSTD_format_e input compression formats +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + ZSTD_memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} + + +#ifndef HUF_FORCE_DECOMPRESS_X2 + +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ + +/** + * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at + * a time. */ -#define ZSTD_d_format ZSTD_d_experimentalParam1 +static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { + U64 D4; + if (MEM_isLittleEndian()) { + D4 = symbol + (nbBits << 8); + } else { + D4 = (symbol << 8) + nbBits; + } + D4 *= 0x0001000100010001ULL; + return D4; +} -/*! ZSTD_DCtx_setFormat() : - * Instruct the decoder context about what kind of data to decode next. - * This instruction is mandatory to decode data without a fully-formed header, - * such ZSTD_f_zstd1_magicless for example. - * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ -ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); +typedef struct { + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; +} HUF_ReadDTableX1_Workspace; -/*! ZSTD_decompressStream_simpleArgs() : - * Same as ZSTD_decompressStream(), - * but using only integral types as arguments. - * This can be helpful for binders from dynamic languages - * which have troubles handling structures containing memory pointers. - */ -ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( - ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, size_t* dstPos, - const void* src, size_t srcSize, size_t* srcPos); +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +{ + return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); +} -/******************************************************************** -* Advanced streaming functions -* Warning : most of these functions are now redundant with the Advanced API. -* Once Advanced API reaches "stable" status, -* redundant functions will be deprecated, and then at some point removed. -********************************************************************/ +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + U32 tableLog = 0; + U32 nbSymbols = 0; + size_t iSize; + void* const dtPtr = DTable + 1; + HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; + HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; -/*===== Advanced Streaming compression functions =====*/ -/**! ZSTD_initCStream_srcSize() : - * This function is deprecated, and equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * - * pledgedSrcSize must be correct. If it is not known at init time, use - * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, - * "0" also disables frame content size field. It may be enabled in the future. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, - int compressionLevel, - unsigned long long pledgedSrcSize); + DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); + if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); -/**! ZSTD_initCStream_usingDict() : - * This function is deprecated, and is equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); - * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); - * - * Creates of an internal CDict (incompatible with static CCtx), except if - * dict == NULL or dictSize < 8, in which case no dict is used. - * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if - * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - int compressionLevel); + DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ -/**! ZSTD_initCStream_advanced() : - * This function is deprecated, and is approximately equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * // Pseudocode: Set each zstd parameter and leave the rest as-is. - * for ((param, value) : params) { - * ZSTD_CCtx_setParameter(zcs, param, value); - * } - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); - * - * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. - * pledgedSrcSize must be correct. - * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_advanced(ZSTD_CStream* zcs, - const void* dict, size_t dictSize, - ZSTD_parameters params, - unsigned long long pledgedSrcSize); + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); + if (HUF_isError(iSize)) return iSize; -/**! ZSTD_initCStream_usingCDict() : - * This function is deprecated, and equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_refCDict(zcs, cdict); - * - * note : cdict will just be referenced, and must outlive compression session - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + } -/**! ZSTD_initCStream_usingCDict_advanced() : - * This function is DEPRECATED, and is approximately equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. - * for ((fParam, value) : fParams) { - * ZSTD_CCtx_setParameter(zcs, fParam, value); - * } - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * ZSTD_CCtx_refCDict(zcs, cdict); - * - * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. - * pledgedSrcSize must be correct. If srcSize is not known at init time, use - * value ZSTD_CONTENTSIZE_UNKNOWN. - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t -ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, - const ZSTD_CDict* cdict, - ZSTD_frameParameters fParams, - unsigned long long pledgedSrcSize); + /* Compute symbols and rankStart given rankVal: + * + * rankVal already contains the number of values of each weight. + * + * symbols contains the symbols ordered by weight. First are the rankVal[0] + * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. + * symbols[0] is filled (but unused) to avoid a branch. + * + * rankStart contains the offset where each rank belongs in the DTable. + * rankStart[0] is not filled because there are no entries in the table for + * weight 0. + */ + { + int n; + int nextRankStart = 0; + int const unroll = 4; + int const nLimit = (int)nbSymbols - unroll + 1; + for (n=0; n<(int)tableLog+1; n++) { + U32 const curr = nextRankStart; + nextRankStart += wksp->rankVal[n]; + wksp->rankStart[n] = curr; + } + for (n=0; n < nLimit; n += unroll) { + int u; + for (u=0; u < unroll; ++u) { + size_t const w = wksp->huffWeight[n+u]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); + } + } + for (; n < (int)nbSymbols; ++n) { + size_t const w = wksp->huffWeight[n]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; + } + } -/*! ZSTD_resetCStream() : - * This function is deprecated, and is equivalent to: - * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); - * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); - * - * start a new frame, using same parameters from previous frame. - * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. - * Note that zcs must be init at least once before using ZSTD_resetCStream(). - * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. - * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. - * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, - * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. - * @return : 0, or an error code (which can be tested using ZSTD_isError()) - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); + /* fill DTable + * We fill all entries of each weight in order. + * That way length is a constant for each iteration of the outter loop. + * We can switch based on the length to a different inner loop which is + * optimized for that particular case. + */ + { + U32 w; + int symbol=wksp->rankVal[0]; + int rankStart=0; + for (w=1; wrankVal[w]; + int const length = (1 << w) >> 1; + int uStart = rankStart; + BYTE const nbBits = (BYTE)(tableLog + 1 - w); + int s; + int u; + switch (length) { + case 1: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart] = D; + uStart += 1; + } + break; + case 2: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart+0] = D; + dt[uStart+1] = D; + uStart += 2; + } + break; + case 4: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + uStart += 4; + } + break; + case 8: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + MEM_write64(dt + uStart + 4, D4); + uStart += 8; + } + break; + default: + for (s=0; ssymbols[symbol + s], nbBits); + for (u=0; u < length; u += 16) { + MEM_write64(dt + uStart + u + 0, D4); + MEM_write64(dt + uStart + u + 4, D4); + MEM_write64(dt + uStart + u + 8, D4); + MEM_write64(dt + uStart + u + 12, D4); + } + assert(u == length); + uStart += length; + } + break; + } + symbol += symbolCount; + rankStart += symbolCount * length; + } + } + return iSize; +} +FORCE_INLINE_TEMPLATE BYTE +HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} -typedef struct { - unsigned long long ingested; /* nb input bytes read and buffered */ - unsigned long long consumed; /* nb input bytes actually compressed */ - unsigned long long produced; /* nb of compressed bytes generated and buffered */ - unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ - unsigned currentJobID; /* MT only : latest started job nb */ - unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ -} ZSTD_frameProgression; +#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) -/* ZSTD_getFrameProgression() : - * tells how much data has been ingested (read from input) - * consumed (input actually compressed) and produced (output) for current frame. - * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. - * Aggregates progression inside active worker threads. - */ -ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); +#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) -/*! ZSTD_toFlushNow() : - * Tell how many bytes are ready to be flushed immediately. - * Useful for multithreading scenarios (nbWorkers >= 1). - * Probe the oldest active job, defined as oldest job not yet entirely flushed, - * and check its output buffer. - * @return : amount of data stored in oldest job and ready to be flushed immediately. - * if @return == 0, it means either : - * + there is no active job (could be checked with ZSTD_frameProgression()), or - * + oldest job is still actively compressing data, - * but everything it has produced has also been flushed so far, - * therefore flush speed is limited by production speed of oldest job - * irrespective of the speed of concurrent (and newer) jobs. - */ -ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); +#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) +HINT_INLINE size_t +HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; -/*===== Advanced Streaming decompression functions =====*/ -/** - * This function is deprecated, and is equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); - * - * note: no dictionary will be used if dict == NULL or dictSize < 8 - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + } -/** - * This function is deprecated, and is equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * ZSTD_DCtx_refDDict(zds, ddict); - * - * note : ddict is referenced, it must outlive decompression session - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); + /* [0-3] symbols remaining */ + if (MEM_32bits()) + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); -/** - * This function is deprecated, and is equivalent to: - * - * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); - * - * re-use decompression parameters from previous init; saves dictionary loading - * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x - */ -ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); + /* no more data to retrieve from bitstream, no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + return pEnd-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstSize; + const void* dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + BIT_DStream_t bitD; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); -/********************************************************************* -* Buffer-less and synchronous inner streaming functions -* -* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. -* But it's also a complex one, with several restrictions, documented below. -* Prefer normal streaming API for an easier experience. -********************************************************************* */ + HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); -/** - Buffer-less streaming compression (synchronous mode) + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); - A ZSTD_CCtx object is required to track streaming operations. - Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. - ZSTD_CCtx object can be re-used multiple times within successive compression operations. + return dstSize; +} - Start by initializing a context. - Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, - or ZSTD_compressBegin_advanced(), for finer parameter control. - It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + /* Check */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - Then, consume your input using ZSTD_compressContinue(). - There are some important considerations to keep in mind when using this advanced function : - - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. - - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. - - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. - Worst case evaluation is provided by ZSTD_compressBound(). - ZSTD_compressContinue() doesn't guarantee recover after a failed compression. - - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). - It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) - - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. - In which case, it will "discard" the relevant memory section from its history. + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - 3; + const void* const dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; - Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. - It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. - Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + U32 endSignal = 1; - `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. -*/ + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); -/*===== Buffer-less streaming compression functions =====*/ -ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); -ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ -ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ -ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ + for ( ; (endSignal) & (op4 < olimit) ; ) { + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + } -ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + /* check corruption */ + /* note : should not be necessary : op# advance in lock step, and we control op4. + * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + /* finish bitStreams one by one */ + HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); -/*- - Buffer-less streaming decompression (synchronous mode) + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } - A ZSTD_DCtx object is required to track streaming operations. - Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. - A ZSTD_DCtx object can be re-used multiple times. + /* decoded size */ + return dstSize; + } +} - First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). - Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. - Data fragment must be large enough to ensure successful decoding. - `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. - @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. - >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. - errorCode, which can be tested using ZSTD_isError(). - It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, - such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). - Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. - As a consequence, check that values remain within valid application range. - For example, do not allocate memory blindly, check that `windowSize` is within expectation. - Each application can set its own limits, depending on local restrictions. - For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. +typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, + const void *cSrc, + size_t cSrcSize, + const HUF_DTable *DTable); - ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. - ZSTD_decompressContinue() is very sensitive to contiguity, - if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, - or that previous contiguous segment is large enough to properly handle maximum back-reference distance. - There are multiple ways to guarantee this condition. +HUF_DGEN(HUF_decompress1X1_usingDTable_internal) +HUF_DGEN(HUF_decompress4X1_usingDTable_internal) - The most memory efficient way is to use a round buffer of sufficient size. - Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), - which can @return an error code if required value is too large for current system (in 32-bits mode). - In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, - up to the moment there is not enough room left in the buffer to guarantee decoding another full block, - which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. - At which point, decoding can resume from the beginning of the buffer. - Note that already decoded data stored in the buffer should be flushed before being overwritten. - There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. - Finally, if you control the compression process, you can also ignore all buffer size rules, - as long as the encoder and decoder progress in "lock-step", - aka use exactly the same buffer sizes, break contiguity at the same place, etc. +size_t HUF_decompress1X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} - Once buffers are setup, start decompression, with ZSTD_decompressBegin(). - If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; - Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. - ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). - ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; - @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). - It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. - It can also be an error code, which can be tested with ZSTD_isError(). + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} - A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. - Context can then be reset to start a new decompression. - Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). - This information is not required to properly decode a frame. +size_t HUF_decompress4X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} - == Special case : skippable frames == +static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; - Skippable frames allow integration of user-defined data into a flow of concatenated frames. - Skippable frames will be ignored (skipped) by decompressor. - The format of skippable frames is as follows : - a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F - b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits - c) Frame Content - any content (User Data) of length equal to Frame Size - For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. - For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. -*/ + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; -/*===== Buffer-less streaming decompression functions =====*/ -typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; -typedef struct { - unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ - unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ - unsigned blockSizeMax; - ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ - unsigned headerSize; - unsigned dictID; - unsigned checksumFlag; -} ZSTD_frameHeader; + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} -/*! ZSTD_getFrameHeader() : - * decode Frame Header, or requires larger `srcSize`. - * @return : 0, `zfhPtr` is correctly filled, - * >0, `srcSize` is too small, value is wanted `srcSize` amount, - * or an error code, which can be tested using ZSTD_isError() */ -ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ -/*! ZSTD_getFrameHeader_advanced() : - * same as ZSTD_getFrameHeader(), - * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ -ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); -ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); +} -ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); -ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); -ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); -ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +#endif /* HUF_FORCE_DECOMPRESS_X2 */ -/* misc */ -ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); -typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; -ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); +#ifndef HUF_FORCE_DECOMPRESS_X1 +/* *************************/ +/* double-symbols decoding */ +/* *************************/ +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; -/* ============================ */ -/** Block level API */ -/* ============================ */ -/*! - Block functions produce and decode raw zstd blocks, without frame metadata. - Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). - But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. +/* HUF_fillDTableX2Level2() : + * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize) +{ + HUF_DEltX2 DElt; + U32* rankVal = wksp; - A few rules to respect : - - Compressing and decompressing require a context structure - + Use ZSTD_createCCtx() and ZSTD_createDCtx() - - It is necessary to init context before starting - + compression : any ZSTD_compressBegin*() variant, including with dictionary - + decompression : any ZSTD_decompressBegin*() variant, including with dictionary - + copyCCtx() and copyDCtx() can be used too - - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB - + If input is larger than a block size, it's necessary to split input data into multiple blocks - + For inputs larger than a single block, consider using regular ZSTD_compress() instead. - Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. - - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! - ===> In which case, nothing is produced into `dst` ! - + User __must__ test for such outcome and deal directly with uncompressed data - + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. - Doing so would mess up with statistics history, leading to potential data corruption. - + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! - + In case of multiple successive blocks, should some of them be uncompressed, - decoder must be informed of their existence in order to follow proper history. - Use ZSTD_insertBlock() for such a case. -*/ + assert(wkspSize >= HUF_TABLELOG_MAX + 1); + (void)wkspSize; + /* get pre-calculated rankVal */ + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); -/*===== Raw zstd block functions =====*/ -ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); -ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + /* fill skipped values */ + if (minWeight>1) { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + /* fill DTable */ + { U32 s; for (s=0; s= 1 */ -#if defined (__cplusplus) + rankVal[weight] += length; + } } } -#endif -/**** ended inlining zstd.h ****/ -#define FSE_STATIC_LINKING_ONLY -/**** skipping file: fse.h ****/ -#define HUF_STATIC_LINKING_ONLY -/**** skipping file: huf.h ****/ -#ifndef XXH_STATIC_LINKING_ONLY -# define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */ -#endif -/**** skipping file: xxhash.h ****/ -#if defined (__cplusplus) -extern "C" { -#endif -/* ---- static assert (debug) --- */ -#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) -#define ZSTD_isError ERR_isError /* for inlining */ -#define FSE_isError ERR_isError -#define HUF_isError ERR_isError +static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline, U32* wksp, size_t wkspSize) +{ + U32* rankVal = wksp; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + assert(wkspSize >= HUF_TABLELOG_MAX + 1); + wksp += HUF_TABLELOG_MAX + 1; + wkspSize -= HUF_TABLELOG_MAX + 1; -/*-************************************* -* shared macros -***************************************/ -#undef MIN -#undef MAX -#define MIN(a,b) ((a)<(b) ? (a) : (b)) -#define MAX(a,b) ((a)>(b) ? (a) : (b)) + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); -/** - * Return the specified error if the condition evaluates to true. - * - * In debug modes, prints additional information. - * In order to do that (particularly, printing the conditional that failed), - * this can't just wrap RETURN_ERROR(). - */ -#define RETURN_ERROR_IF(cond, err, ...) \ - if (cond) { \ - RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return ERROR(err); \ - } + /* fill DTable */ + for (s=0; s= minBits) { /* enough room for a second symbol */ + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol, wksp, wkspSize); + } else { + HUF_DEltX2 DElt; + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + { U32 const end = start + length; + U32 u; + for (u = start; u < end; u++) DTable[u] = DElt; + } } + rankVal[weight] += length; + } +} -/** - * If the provided expression evaluates to an error code, returns that error code. - * - * In debug modes, prints additional information. - */ -#define FORWARD_IF_ERROR(err, ...) \ - do { \ - size_t const err_code = (err); \ - if (ERR_isError(err_code)) { \ - RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ - RAWLOG(3, ": " __VA_ARGS__); \ - RAWLOG(3, "\n"); \ - return err_code; \ - } \ - } while(0); +typedef struct { + rankValCol_t rankVal[HUF_TABLELOG_MAX]; + U32 rankStats[HUF_TABLELOG_MAX + 1]; + U32 rankStart0[HUF_TABLELOG_MAX + 2]; + sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; + BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; + U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; +} HUF_ReadDTableX2_Workspace; + +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + U32 tableLog, maxW, sizeOfSort, nbSymbols; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog; + size_t iSize; + void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + U32 *rankStart; + HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace; -/*-************************************* -* Common constants -***************************************/ -#define ZSTD_OPT_NUM (1<<12) + if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC); -#define ZSTD_REP_NUM 3 /* number of repcodes */ -#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) -static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + rankStart = wksp->rankStart0 + 1; + ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats)); + ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0)); -#define KB *(1 <<10) -#define MB *(1 <<20) -#define GB *(1U<<30) + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ + if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ -#define BIT7 128 -#define BIT6 64 -#define BIT5 32 -#define BIT4 16 -#define BIT1 2 -#define BIT0 1 + iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0); + if (HUF_isError(iSize)) return iSize; -#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 -static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; -static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; + /* check result */ + if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ -#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ + /* find maxWeight */ + for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ -#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ -static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; -typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; + /* Get start index of each weight */ + { U32 w, nextRankStart = 0; + for (w=1; wrankStats[w]; + rankStart[w] = curr; + } + rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ + sizeOfSort = nextRankStart; + } -#define ZSTD_FRAMECHECKSUMSIZE 4 + /* sort symbols by weight */ + { U32 s; + for (s=0; sweightList[s]; + U32 const r = rankStart[w]++; + wksp->sortedSymbol[r].symbol = (BYTE)s; + wksp->sortedSymbol[r].weight = (BYTE)w; + } + rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ + } + + /* Build rankVal */ + { U32* const rankVal0 = wksp->rankVal[0]; + { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ + U32 nextRankVal = 0; + U32 w; + for (w=1; wrankStats[w] << (w+rescale); + rankVal0[w] = curr; + } } + { U32 const minBits = tableLog+1 - maxW; + U32 consumed; + for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { + U32* const rankValPtr = wksp->rankVal[consumed]; + U32 w; + for (w = 1; w < maxW+1; w++) { + rankValPtr[w] = rankVal0[w] >> consumed; + } } } } + + HUF_fillDTableX2(dt, maxTableLog, + wksp->sortedSymbol, sizeOfSort, + wksp->rankStart0, wksp->rankVal, maxW, + tableLog+1, + wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32)); + + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + return iSize; +} -#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ -#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ -#define HufLog 12 -typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; +FORCE_INLINE_TEMPLATE U32 +HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} -#define LONGNBSEQ 0x7F00 +FORCE_INLINE_TEMPLATE U32 +HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); + } } + return 1; +} -#define MINMATCH 3 +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) -#define Litbits 8 -#define MaxLit ((1<bitContainer)-1))) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } -/*-******************************************* -* Shared functions to include for inlining -*********************************************/ -static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } + /* closer to end : up to 2 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); -#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } -static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); } -#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ -#define WILDCOPY_OVERLENGTH 32 -#define WILDCOPY_VECLEN 16 + if (p < pEnd) + p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); -typedef enum { - ZSTD_no_overlap, - ZSTD_overlap_src_before_dst - /* ZSTD_overlap_dst_before_src, */ -} ZSTD_overlap_e; + return p-pStart; +} -/*! ZSTD_wildcopy() : - * Custom version of memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0) - * @param ovtype controls the overlap detection - * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. - * - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart. - * The src buffer must be before the dst buffer. - */ -MEM_STATIC FORCE_INLINE_ATTR -void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype) +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) { - ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - BYTE* const oend = op + length; + BIT_DStream_t bitD; - assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); + /* Init */ + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); - if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { - /* Handle short offset copies. */ - do { - COPY8(op, ip) - } while (op < oend); - } else { - assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); - /* Separate out the first COPY16() call because the copy length is - * almost certain to be short, so the branches have different - * probabilities. Since it is almost certain to be short, only do - * one COPY16() in the first call. Then, do two calls per loop since - * at that point it is more likely to have a high trip count. - */ - COPY16(op, ip); - if (op >= oend) return; - do { - COPY16(op, ip); - COPY16(op, ip); - } - while (op < oend); + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); } + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; } +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ -/*-******************************************* -* Private declarations -*********************************************/ -typedef struct seqDef_s { - U32 offset; - U16 litLength; - U16 matchLength; -} seqDef; + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - (sizeof(size_t)-1); + const void* const dtPtr = DTable+1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; -typedef struct { - seqDef* sequencesStart; - seqDef* sequences; - BYTE* litStart; - BYTE* lit; - BYTE* llCode; - BYTE* mlCode; - BYTE* ofCode; - size_t maxNbSeq; - size_t maxNbLit; - U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ - U32 longLengthPos; -} seqStore_t; + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal = 1; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; -/** - * Contains the compressed frame size and an upper-bound for the decompressed frame size. - * Note: before using `compressedSize`, check for errors using ZSTD_isError(). - * similarly, before using `decompressedBound`, check for errors using: - * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` - */ -typedef struct { - size_t compressedSize; - unsigned long long decompressedBound; -} ZSTD_frameSizeInfo; /* decompress & legacy */ + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); -const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ -void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ + /* 16-32 symbols per loop (4-8 symbols per stream) */ + for ( ; (endSignal) & (op4 < olimit); ) { +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; +#else + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = (U32)LIKELY( + (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); +#endif + } -/* custom memory allocation functions */ -void* ZSTD_malloc(size_t size, ZSTD_customMem customMem); -void* ZSTD_calloc(size_t size, ZSTD_customMem customMem); -void ZSTD_free(void* ptr, ZSTD_customMem customMem); + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } -MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ -{ - assert(val != 0); - { -# if defined(_MSC_VER) /* Visual */ - unsigned long r=0; - return _BitScanReverse(&r, val) ? (unsigned)r : 0; -# elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ - return __builtin_clz (val) ^ 31; -# elif defined(__ICCARM__) /* IAR Intrinsic */ - return 31 - __CLZ(val); -# else /* Software version */ - static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; - U32 v = val; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; -# endif + /* decoded size */ + return dstSize; } } +HUF_DGEN(HUF_decompress1X2_usingDTable_internal) +HUF_DGEN(HUF_decompress4X2_usingDTable_internal) -/* ZSTD_invalidateRepCodes() : - * ensures next compression will not use repcodes from previous block. - * Note : only works with regular variant; - * do not use with extDict variant ! */ -void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ - - -typedef struct { - blockType_e blockType; - U32 lastBlock; - U32 origSize; -} blockProperties_t; /* declared here for decompress and fullbench */ - -/*! ZSTD_getcBlockSize() : - * Provides the size of compressed block from block header `src` */ -/* Used by: decompress, fullbench (does not get its definition from here) */ -size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, - blockProperties_t* bpPtr); +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} -/*! ZSTD_decodeSeqHeaders() : - * decode sequence header from src */ -/* Used by: decompress, fullbench (does not get its definition from here) */ -size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, - const void* src, size_t srcSize); +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; -#if defined (__cplusplus) + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); } -#endif - -#endif /* ZSTD_CCOMMON_H_MODULE */ -/**** ended inlining zstd_internal.h ****/ - -/*-**************************************** -* Version -******************************************/ -unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } -const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} +static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; -/*-**************************************** -* ZSTD Error Management -******************************************/ -#undef ZSTD_isError /* defined within zstd_internal.h */ -/*! ZSTD_isError() : - * tells if a return value is an error code - * symbol is required for external callers */ -unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } + size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; -/*! ZSTD_getErrorName() : - * provides error code string from function result (useful for debugging) */ -const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} -/*! ZSTD_getError() : - * convert a `size_t` function result into a proper ZSTD_errorCode enum */ -ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); +} -/*! ZSTD_getErrorString() : - * provides error code string from enum */ -const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } +#endif /* HUF_FORCE_DECOMPRESS_X1 */ -/*=************************************************************** -* Custom allocator -****************************************************************/ -void* ZSTD_malloc(size_t size, ZSTD_customMem customMem) -{ - if (customMem.customAlloc) - return customMem.customAlloc(customMem.opaque, size); - return malloc(size); -} +/* ***********************************/ +/* Universal decompression selectors */ +/* ***********************************/ -void* ZSTD_calloc(size_t size, ZSTD_customMem customMem) +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) { - if (customMem.customAlloc) { - /* calloc implemented as malloc+memset; - * not as efficient as calloc, but next best guess for custom malloc */ - void* const ptr = customMem.customAlloc(customMem.opaque, size); - memset(ptr, 0, size); - return ptr; - } - return calloc(1, size); + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif } -void ZSTD_free(void* ptr, ZSTD_customMem customMem) +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) { - if (ptr!=NULL) { - if (customMem.customFree) - customMem.customFree(customMem.opaque, ptr); - else - free(ptr); - } + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif } -/**** ended inlining zstd_common.c ****/ -/**** start inlining huf_decompress.c ****/ -/* ****************************************************************** - * huff0 huffman decoder, - * part of Finite State Entropy library - * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc. - * - * You can contact the author at : - * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. -****************************************************************** */ - -/* ************************************************************** -* Dependencies -****************************************************************/ -#include /* memcpy, memset */ -/**** skipping file: compiler.h ****/ -/**** skipping file: bitstream.h ****/ -/**** skipping file: fse.h ****/ -#define HUF_STATIC_LINKING_ONLY -/**** skipping file: huf.h ****/ -/**** skipping file: error_private.h ****/ -/* ************************************************************** -* Macros -****************************************************************/ -/* These two optional macros force the use one way or another of the two - * Huffman decompression implementations. You can't force in both directions - * at the same time. - */ -#if defined(HUF_FORCE_DECOMPRESS_X1) && \ - defined(HUF_FORCE_DECOMPRESS_X2) -#error "Cannot force the use of the X1 and X2 decoders at the same time!" +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) +typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; +static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +{ + /* single, double, quad */ + {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ + {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ + {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ + {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ + {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ + {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ + {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ + {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ + {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ + {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ + {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ + {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ + {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ + {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ + {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ + {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ +}; #endif - -/* ************************************************************** -* Error Management -****************************************************************/ -#define HUF_isError ERR_isError -#ifndef CHECK_F -#define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; } +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +{ + assert(dstSize > 0); + assert(dstSize <= 128*1024); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dstSize; + (void)cSrcSize; + return 0; +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dstSize; + (void)cSrcSize; + return 1; +#else + /* decoder timing evaluation */ + { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ + return DTime1 < DTime0; + } #endif +} -/* ************************************************************** -* Byte alignment for workSpace management -****************************************************************/ -#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) -#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) - - -/* ************************************************************** -* BMI2 Variant Wrappers -****************************************************************/ -#if DYNAMIC_BMI2 - -#define HUF_DGEN(fn) \ - \ - static size_t fn##_default( \ - void* dst, size_t dstSize, \ - const void* cSrc, size_t cSrcSize, \ - const HUF_DTable* DTable) \ - { \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - \ - static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ - void* dst, size_t dstSize, \ - const void* cSrc, size_t cSrcSize, \ - const HUF_DTable* DTable) \ - { \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ - { \ - if (bmi2) { \ - return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ - } \ - return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ - } +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, + size_t dstSize, const void* cSrc, + size_t cSrcSize, void* workSpace, + size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); #else - -#define HUF_DGEN(fn) \ - static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ - size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ - { \ - (void)bmi2; \ - return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#endif } +} -#endif +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#endif + } +} -/*-***************************/ -/* generic DTableDesc */ -/*-***************************/ -typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; -static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) { - DTableDesc dtd; - memcpy(&dtd, table, sizeof(dtd)); - return dtd; + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif } - #ifndef HUF_FORCE_DECOMPRESS_X2 - -/*-***************************/ -/* single-symbol decoding */ -/*-***************************/ -typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ - -size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) { - U32 tableLog = 0; - U32 nbSymbols = 0; - size_t iSize; - void* const dtPtr = DTable + 1; - HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; - - U32* rankVal; - BYTE* huffWeight; - size_t spaceUsed32 = 0; + const BYTE* ip = (const BYTE*) cSrc; - rankVal = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; - huffWeight = (BYTE *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; - if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} +#endif - DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); - /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} - iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize); - if (HUF_isError(iSize)) return iSize; +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); - /* Table header */ - { DTableDesc dtd = HUF_getDTableDesc(DTable); - if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ - dtd.tableType = 0; - dtd.tableLog = (BYTE)tableLog; - memcpy(DTable, &dtd, sizeof(dtd)); + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : + HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#endif } - - /* Calculate starting value for each rank */ - { U32 n, nextRankStart = 0; - for (n=1; n> 1; - size_t const uStart = rankVal[w]; - size_t const uEnd = uStart + length; - size_t u; - HUF_DEltX1 D; - D.byte = (BYTE)n; - D.nbBits = (BYTE)(tableLog + 1 - w); - rankVal[w] = (U32)uEnd; - if (length < 4) { - /* Use length in the loop bound so the compiler knows it is short. */ - for (u = 0; u < length; ++u) - dt[uStart + u] = D; - } else { - /* Unroll the loop 4 times, we know it is a power of 2. */ - for (u = uStart; u < uEnd; u += 4) { - dt[u + 0] = D; - dt[u + 1] = D; - dt[u + 2] = D; - dt[u + 3] = D; - } } } } - return iSize; } +#ifndef ZSTD_NO_UNUSED_FUNCTIONS +#ifndef HUF_FORCE_DECOMPRESS_X2 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) { U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; @@ -6863,1065 +9145,1108 @@ size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize) workSpace, sizeof(workSpace)); } -FORCE_INLINE_TEMPLATE BYTE -HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) +size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) { - size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ - BYTE const c = dt[val].byte; - BIT_skipBits(Dstream, dt[val].nbBits); - return c; + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); } -#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ - *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif -#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_readDTableX2_wksp(DTable, src, srcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} + +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif -#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ - if (MEM_64bits()) \ - HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} -HINT_INLINE size_t -HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - BYTE* const pStart = p; + HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); + return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +} +#endif - /* up to 4 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { - HUF_DECODE_SYMBOLX1_2(p, bitDPtr); - HUF_DECODE_SYMBOLX1_1(p, bitDPtr); - HUF_DECODE_SYMBOLX1_2(p, bitDPtr); - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); - } +typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); - /* [0-3] symbols remaining */ - if (MEM_32bits()) - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); +size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) + static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; +#endif - /* no more data to retrieve from bitstream, no need to reload */ - while (p < pEnd) - HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - return pEnd-pStart; + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); +#else + return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); +#endif + } } -FORCE_INLINE_TEMPLATE size_t -HUF_decompress1X1_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) { - BYTE* op = (BYTE*)dst; - BYTE* const oend = op + dstSize; - const void* dtPtr = DTable + 1; - const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; - BIT_DStream_t bitD; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - - CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); - - HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); +#else + return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : + HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; +#endif + } +} - return dstSize; +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) +{ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); } -FORCE_INLINE_TEMPLATE size_t -HUF_decompress4X1_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) +size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize) { - /* Check */ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; + return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, + workSpace, sizeof(workSpace)); +} +#endif +/**** ended inlining decompress/huf_decompress.c ****/ +/**** start inlining decompress/zstd_ddict.c ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ - { const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - BYTE* const olimit = oend - 3; - const void* const dtPtr = DTable + 1; - const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; +/* zstd_ddict.c : + * concentrates all logic that needs to know the internals of ZSTD_DDict object */ - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - size_t const length1 = MEM_readLE16(istart); - size_t const length2 = MEM_readLE16(istart+2); - size_t const length3 = MEM_readLE16(istart+4); - size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - const size_t segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - U32 endSignal = 1; +/*-******************************************************* +* Dependencies +*********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** start inlining ../common/cpu.h ****/ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); - CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); - CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); - CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); +#ifndef ZSTD_COMMON_CPU_H +#define ZSTD_COMMON_CPU_H - /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ - for ( ; (endSignal) & (op4 < olimit) ; ) { - HUF_DECODE_SYMBOLX1_2(op1, &bitD1); - HUF_DECODE_SYMBOLX1_2(op2, &bitD2); - HUF_DECODE_SYMBOLX1_2(op3, &bitD3); - HUF_DECODE_SYMBOLX1_2(op4, &bitD4); - HUF_DECODE_SYMBOLX1_1(op1, &bitD1); - HUF_DECODE_SYMBOLX1_1(op2, &bitD2); - HUF_DECODE_SYMBOLX1_1(op3, &bitD3); - HUF_DECODE_SYMBOLX1_1(op4, &bitD4); - HUF_DECODE_SYMBOLX1_2(op1, &bitD1); - HUF_DECODE_SYMBOLX1_2(op2, &bitD2); - HUF_DECODE_SYMBOLX1_2(op3, &bitD3); - HUF_DECODE_SYMBOLX1_2(op4, &bitD4); - HUF_DECODE_SYMBOLX1_0(op1, &bitD1); - HUF_DECODE_SYMBOLX1_0(op2, &bitD2); - HUF_DECODE_SYMBOLX1_0(op3, &bitD3); - HUF_DECODE_SYMBOLX1_0(op4, &bitD4); - endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; - } +/** + * Implementation taken from folly/CpuId.h + * https://github.com/facebook/folly/blob/master/folly/CpuId.h + */ - /* check corruption */ - /* note : should not be necessary : op# advance in lock step, and we control op4. - * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 supposed already verified within main loop */ +/**** skipping file: mem.h ****/ - /* finish bitStreams one by one */ - HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); +#ifdef _MSC_VER +#include +#endif - /* check */ - { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endCheck) return ERROR(corruption_detected); } +typedef struct { + U32 f1c; + U32 f1d; + U32 f7b; + U32 f7c; +} ZSTD_cpuid_t; - /* decoded size */ - return dstSize; +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { + U32 f1c = 0; + U32 f1d = 0; + U32 f7b = 0; + U32 f7c = 0; +#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) + int reg[4]; + __cpuid((int*)reg, 0); + { + int const n = reg[0]; + if (n >= 1) { + __cpuid((int*)reg, 1); + f1c = (U32)reg[2]; + f1d = (U32)reg[3]; + } + if (n >= 7) { + __cpuidex((int*)reg, 7, 0); + f7b = (U32)reg[1]; + f7c = (U32)reg[2]; + } + } +#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) + /* The following block like the normal cpuid branch below, but gcc + * reserves ebx for use of its pic register so we must specially + * handle the save and restore to avoid clobbering the register + */ + U32 n; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(n) + : "a"(0) + : "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(f1a), "=c"(f1c), "=d"(f1d) + : "a"(1)); + } + if (n >= 7) { + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "movl %%ebx, %%eax\n\t" + "popl %%ebx" + : "=a"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) + U32 n; + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); + } + if (n >= 7) { + U32 f7a; + __asm__("cpuid" + : "=a"(f7a), "=b"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#endif + { + ZSTD_cpuid_t cpuid; + cpuid.f1c = f1c; + cpuid.f1d = f1d; + cpuid.f7b = f7b; + cpuid.f7c = f7c; + return cpuid; } } +#define X(name, r, bit) \ + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ + return ((cpuid.r) & (1U << bit)) != 0; \ + } -typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, - const void *cSrc, - size_t cSrcSize, - const HUF_DTable *DTable); +/* cpuid(1): Processor Info and Feature Bits. */ +#define C(name, bit) X(name, f1c, bit) + C(sse3, 0) + C(pclmuldq, 1) + C(dtes64, 2) + C(monitor, 3) + C(dscpl, 4) + C(vmx, 5) + C(smx, 6) + C(eist, 7) + C(tm2, 8) + C(ssse3, 9) + C(cnxtid, 10) + C(fma, 12) + C(cx16, 13) + C(xtpr, 14) + C(pdcm, 15) + C(pcid, 17) + C(dca, 18) + C(sse41, 19) + C(sse42, 20) + C(x2apic, 21) + C(movbe, 22) + C(popcnt, 23) + C(tscdeadline, 24) + C(aes, 25) + C(xsave, 26) + C(osxsave, 27) + C(avx, 28) + C(f16c, 29) + C(rdrand, 30) +#undef C +#define D(name, bit) X(name, f1d, bit) + D(fpu, 0) + D(vme, 1) + D(de, 2) + D(pse, 3) + D(tsc, 4) + D(msr, 5) + D(pae, 6) + D(mce, 7) + D(cx8, 8) + D(apic, 9) + D(sep, 11) + D(mtrr, 12) + D(pge, 13) + D(mca, 14) + D(cmov, 15) + D(pat, 16) + D(pse36, 17) + D(psn, 18) + D(clfsh, 19) + D(ds, 21) + D(acpi, 22) + D(mmx, 23) + D(fxsr, 24) + D(sse, 25) + D(sse2, 26) + D(ss, 27) + D(htt, 28) + D(tm, 29) + D(pbe, 31) +#undef D -HUF_DGEN(HUF_decompress1X1_usingDTable_internal) -HUF_DGEN(HUF_decompress4X1_usingDTable_internal) +/* cpuid(7): Extended Features. */ +#define B(name, bit) X(name, f7b, bit) + B(bmi1, 3) + B(hle, 4) + B(avx2, 5) + B(smep, 7) + B(bmi2, 8) + B(erms, 9) + B(invpcid, 10) + B(rtm, 11) + B(mpx, 14) + B(avx512f, 16) + B(avx512dq, 17) + B(rdseed, 18) + B(adx, 19) + B(smap, 20) + B(avx512ifma, 21) + B(pcommit, 22) + B(clflushopt, 23) + B(clwb, 24) + B(avx512pf, 26) + B(avx512er, 27) + B(avx512cd, 28) + B(sha, 29) + B(avx512bw, 30) + B(avx512vl, 31) +#undef B +#define C(name, bit) X(name, f7c, bit) + C(prefetchwt1, 0) + C(avx512vbmi, 1) +#undef C +#undef X +#endif /* ZSTD_COMMON_CPU_H */ +/**** ended inlining ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ +#define FSE_STATIC_LINKING_ONLY +/**** skipping file: ../common/fse.h ****/ +#define HUF_STATIC_LINKING_ONLY +/**** skipping file: ../common/huf.h ****/ +/**** start inlining zstd_decompress_internal.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ -size_t HUF_decompress1X1_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) return ERROR(GENERIC); - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} -size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - const BYTE* ip = (const BYTE*) cSrc; +/* zstd_decompress_internal: + * objects and definitions shared within lib/decompress modules */ - size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; + #ifndef ZSTD_DECOMPRESS_INTERNAL_H + #define ZSTD_DECOMPRESS_INTERNAL_H - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); -} +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ -size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} -size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize); -} -size_t HUF_decompress4X1_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) return ERROR(GENERIC); - return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} +/*-******************************************************* + * Constants + *********************************************************/ +static UNUSED_ATTR const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; -static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize, int bmi2) -{ - const BYTE* ip = (const BYTE*) cSrc; +static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; - size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize, - workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; +static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; - return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -} +static UNUSED_ATTR const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; -size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); -} +/*-******************************************************* + * Decompression types + *********************************************************/ + typedef struct { + U32 fastMode; + U32 tableLog; + } ZSTD_seqSymbol_header; + + typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; + } ZSTD_seqSymbol; -size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} -size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} + #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) -#endif /* HUF_FORCE_DECOMPRESS_X2 */ +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 rep[ZSTD_REP_NUM]; + U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; +} ZSTD_entropyDTables_t; -#ifndef HUF_FORCE_DECOMPRESS_X1 +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; -/* *************************/ -/* double-symbols decoding */ -/* *************************/ +typedef enum { zdss_init=0, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; -typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ -typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; -typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; -typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; +typedef enum { + ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ + ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ + ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ +} ZSTD_dictUses_e; +/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */ +typedef struct { + const ZSTD_DDict** ddictPtrTable; + size_t ddictPtrTableSize; + size_t ddictPtrCount; +} ZSTD_DDictHashSet; -/* HUF_fillDTableX2Level2() : - * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ -static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, - const U32* rankValOrigin, const int minWeight, - const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, - U32 nbBitsBaseline, U16 baseSeq) +struct ZSTD_DCtx_s { - HUF_DEltX2 DElt; - U32 rankVal[HUF_TABLELOG_MAX + 1]; + const ZSTD_seqSymbol* LLTptr; + const ZSTD_seqSymbol* MLTptr; + const ZSTD_seqSymbol* OFTptr; + const HUF_DTable* HUFptr; + ZSTD_entropyDTables_t entropy; + U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ + const void* previousDstEnd; /* detect continuity */ + const void* prefixStart; /* start of current segment */ + const void* virtualStart; /* virtual start of previous segment if it was just before current one */ + const void* dictEnd; /* end of previous segment */ + size_t expected; + ZSTD_frameHeader fParams; + U64 processedCSize; + U64 decodedSize; + blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + XXH64_state_t xxhState; + size_t headerSize; + ZSTD_format_e format; + ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ + U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litSize; + size_t rleSize; + size_t staticSize; + int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ - /* get pre-calculated rankVal */ - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); + /* dictionary */ + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ + U32 dictID; + int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ + ZSTD_dictUses_e dictUses; + ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ + ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ - /* fill skipped values */ - if (minWeight>1) { - U32 i, skipSize = rankVal[minWeight]; - MEM_writeLE16(&(DElt.sequence), baseSeq); - DElt.nbBits = (BYTE)(consumed); - DElt.length = 1; - for (i = 0; i < skipSize; i++) - DTable[i] = DElt; - } + /* streaming */ + ZSTD_dStreamStage streamStage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t lhSize; + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; + U32 hostageByte; + int noForwardProgress; + ZSTD_bufferMode_e outBufferMode; + ZSTD_outBuffer expectedOutBuffer; - /* fill DTable */ - { U32 s; for (s=0; s= 1 */ + size_t oversizedDuration; - rankVal[weight] += length; - } } -} +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + void const* dictContentBeginForFuzzing; + void const* dictContentEndForFuzzing; +#endif + /* Tracing */ +#if ZSTD_TRACE + ZSTD_TraceCtx traceCtx; +#endif +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ -static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, - const sortedSymbol_t* sortedList, const U32 sortedListSize, - const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, - const U32 nbBitsBaseline) -{ - U32 rankVal[HUF_TABLELOG_MAX + 1]; - const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ - const U32 minBits = nbBitsBaseline - maxWeight; - U32 s; - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); +/*-******************************************************* + * Shared internal functions + *********************************************************/ - /* fill DTable */ - for (s=0; s= minBits) { /* enough room for a second symbol */ - U32 sortedRank; - int minWeight = nbBits + scaleLog; - if (minWeight < 1) minWeight = 1; - sortedRank = rankStart[minWeight]; - HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, - rankValOrigin[nbBits], minWeight, - sortedList+sortedRank, sortedListSize-sortedRank, - nbBitsBaseline, symbol); - } else { - HUF_DEltX2 DElt; - MEM_writeLE16(&(DElt.sequence), symbol); - DElt.nbBits = (BYTE)(nbBits); - DElt.length = 1; - { U32 const end = start + length; - U32 u; - for (u = start; u < end; u++) DTable[u] = DElt; - } } - rankVal[weight] += length; - } -} +/*! ZSTD_checkContinuity() : + * check if next `dst` follows previous position, where decompression ended. + * If yes, do nothing (continue on current segment). + * If not, classify previous segment as "external dictionary", and start a new segment. + * This function cannot fail. */ +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize); -size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, - const void* src, size_t srcSize, - void* workSpace, size_t wkspSize) -{ - U32 tableLog, maxW, sizeOfSort, nbSymbols; - DTableDesc dtd = HUF_getDTableDesc(DTable); - U32 const maxTableLog = dtd.maxTableLog; - size_t iSize; - void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ - HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; - U32 *rankStart; - rankValCol_t* rankVal; - U32* rankStats; - U32* rankStart0; - sortedSymbol_t* sortedSymbol; - BYTE* weightList; - size_t spaceUsed32 = 0; - - rankVal = (rankValCol_t *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; - rankStats = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 1; - rankStart0 = (U32 *)workSpace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 2; - sortedSymbol = (sortedSymbol_t *)workSpace + (spaceUsed32 * sizeof(U32)) / sizeof(sortedSymbol_t); - spaceUsed32 += HUF_ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; - weightList = (BYTE *)((U32 *)workSpace + spaceUsed32); - spaceUsed32 += HUF_ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge); - - rankStart = rankStart0 + 1; - memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); +#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ +/**** ended inlining zstd_decompress_internal.h ****/ +/**** start inlining zstd_ddict.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + - DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ - if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); - /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ +#ifndef ZSTD_DDICT_H +#define ZSTD_DDICT_H - iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); - if (HUF_isError(iSize)) return iSize; +/*-******************************************************* + * Dependencies + *********************************************************/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../zstd.h ****/ - /* check result */ - if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ - /* find maxWeight */ - for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ +/*-******************************************************* + * Interface + *********************************************************/ - /* Get start index of each weight */ - { U32 w, nextRankStart = 0; - for (w=1; w> consumed; - } } } } +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); - HUF_fillDTableX2(dt, maxTableLog, - sortedSymbol, sizeOfSort, - rankStart0, rankVal, maxW, - tableLog+1); - dtd.tableLog = (BYTE)maxTableLog; - dtd.tableType = 1; - memcpy(DTable, &dtd, sizeof(dtd)); - return iSize; -} -size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_readDTableX2_wksp(DTable, src, srcSize, - workSpace, sizeof(workSpace)); -} +#endif /* ZSTD_DDICT_H */ +/**** ended inlining zstd_ddict.h ****/ +#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) +/**** start inlining ../legacy/zstd_legacy.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ -FORCE_INLINE_TEMPLATE U32 -HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 2); - BIT_skipBits(DStream, dt[val].nbBits); - return dt[val].length; -} +#ifndef ZSTD_LEGACY_H +#define ZSTD_LEGACY_H -FORCE_INLINE_TEMPLATE U32 -HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt+val, 1); - if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); - else { - if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { - BIT_skipBits(DStream, dt[val].nbBits); - if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) - /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ - DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); - } } - return 1; -} +#if defined (__cplusplus) +extern "C" { +#endif -#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) +/* ************************************* +* Includes +***************************************/ +/**** skipping file: ../common/mem.h ****/ +/**** skipping file: ../common/error_private.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ -#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ - if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) +#if !defined (ZSTD_LEGACY_SUPPORT) || (ZSTD_LEGACY_SUPPORT == 0) +# undef ZSTD_LEGACY_SUPPORT +# define ZSTD_LEGACY_SUPPORT 8 +#endif -#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ - if (MEM_64bits()) \ - ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) +#if (ZSTD_LEGACY_SUPPORT <= 1) +/**** start inlining zstd_v01.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ -HINT_INLINE size_t -HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, - const HUF_DEltX2* const dt, const U32 dtLog) -{ - BYTE* const pStart = p; +#ifndef ZSTD_V01_H_28739879432 +#define ZSTD_V01_H_28739879432 - /* up to 8 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_1(p, bitDPtr); - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - } +#if defined (__cplusplus) +extern "C" { +#endif - /* closer to end : up to 2 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ - while (p <= pEnd-2) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ - if (p < pEnd) - p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv01_decompress() : decompress ZSTD frames compliant with v0.1.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv01_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv01_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.1.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv01_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); - return p-pStart; -} +/** +ZSTDv01_isError() : tells if the result of ZSTDv01_decompress() is an error +*/ +unsigned ZSTDv01_isError(size_t code); -FORCE_INLINE_TEMPLATE size_t -HUF_decompress1X2_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - BIT_DStream_t bitD; - /* Init */ - CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv01_Dctx_s ZSTDv01_Dctx; +ZSTDv01_Dctx* ZSTDv01_createDCtx(void); +size_t ZSTDv01_freeDCtx(ZSTDv01_Dctx* dctx); - /* decode */ - { BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); - } +size_t ZSTDv01_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); - /* check */ - if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv01_resetDCtx(ZSTDv01_Dctx* dctx); - /* decoded size */ - return dstSize; -} +size_t ZSTDv01_nextSrcSizeToDecompress(ZSTDv01_Dctx* dctx); +size_t ZSTDv01_decompressContinue(ZSTDv01_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ -FORCE_INLINE_TEMPLATE size_t -HUF_decompress4X2_usingDTable_internal_body( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv01_magicNumber 0xFD2FB51E /* Big Endian version */ +#define ZSTDv01_magicNumberLE 0x1EB52FFD /* Little Endian version */ - { const BYTE* const istart = (const BYTE*) cSrc; - BYTE* const ostart = (BYTE*) dst; - BYTE* const oend = ostart + dstSize; - BYTE* const olimit = oend - (sizeof(size_t)-1); - const void* const dtPtr = DTable+1; - const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - size_t const length1 = MEM_readLE16(istart); - size_t const length2 = MEM_readLE16(istart+2); - size_t const length3 = MEM_readLE16(istart+4); - size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); - const BYTE* const istart1 = istart + 6; /* jumpTable */ - const BYTE* const istart2 = istart1 + length1; - const BYTE* const istart3 = istart2 + length2; - const BYTE* const istart4 = istart3 + length3; - size_t const segmentSize = (dstSize+3) / 4; - BYTE* const opStart2 = ostart + segmentSize; - BYTE* const opStart3 = opStart2 + segmentSize; - BYTE* const opStart4 = opStart3 + segmentSize; - BYTE* op1 = ostart; - BYTE* op2 = opStart2; - BYTE* op3 = opStart3; - BYTE* op4 = opStart4; - U32 endSignal = 1; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; +#if defined (__cplusplus) +} +#endif - if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ - CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); - CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); - CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); - CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); +#endif /* ZSTD_V01_H_28739879432 */ +/**** ended inlining zstd_v01.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) +/**** start inlining zstd_v02.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ - /* 16-32 symbols per loop (4-8 symbols per stream) */ - for ( ; (endSignal) & (op4 < olimit); ) { -#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; - endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; -#else - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal = LIKELY( - (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) - & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); +#ifndef ZSTD_V02_H_4174539423 +#define ZSTD_V02_H_4174539423 + +#if defined (__cplusplus) +extern "C" { #endif - } - /* check corruption */ - if (op1 > opStart2) return ERROR(corruption_detected); - if (op2 > opStart3) return ERROR(corruption_detected); - if (op3 > opStart4) return ERROR(corruption_detected); - /* note : op4 already verified within main loop */ +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ - /* finish bitStreams one by one */ - HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); - /* check */ - { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endCheck) return ERROR(corruption_detected); } +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv02_decompress() : decompress ZSTD frames compliant with v0.2.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv02_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv02_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.2.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv02_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); - /* decoded size */ - return dstSize; - } -} +/** +ZSTDv02_isError() : tells if the result of ZSTDv02_decompress() is an error +*/ +unsigned ZSTDv02_isError(size_t code); -HUF_DGEN(HUF_decompress1X2_usingDTable_internal) -HUF_DGEN(HUF_decompress4X2_usingDTable_internal) -size_t HUF_decompress1X2_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 1) return ERROR(GENERIC); - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv02_Dctx_s ZSTDv02_Dctx; +ZSTDv02_Dctx* ZSTDv02_createDCtx(void); +size_t ZSTDv02_freeDCtx(ZSTDv02_Dctx* dctx); -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - const BYTE* ip = (const BYTE*) cSrc; +size_t ZSTDv02_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); - size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, - workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv02_resetDCtx(ZSTDv02_Dctx* dctx); - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); -} +size_t ZSTDv02_nextSrcSizeToDecompress(ZSTDv02_Dctx* dctx); +size_t ZSTDv02_decompressContinue(ZSTDv02_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv02_magicNumber 0xFD2FB522 /* v0.2 */ -size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} -size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); +#if defined (__cplusplus) } +#endif -size_t HUF_decompress4X2_usingDTable( - void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 1) return ERROR(GENERIC); - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -} +#endif /* ZSTD_V02_H_4174539423 */ +/**** ended inlining zstd_v02.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) +/**** start inlining zstd_v03.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ -static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize, int bmi2) -{ - const BYTE* ip = (const BYTE*) cSrc; +#ifndef ZSTD_V03_H_298734209782 +#define ZSTD_V03_H_298734209782 - size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, - workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; +#if defined (__cplusplus) +extern "C" { +#endif - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -} +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); -} +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv03_decompress() : decompress ZSTD frames compliant with v0.3.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv03_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv03_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.3.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ + void ZSTDv03_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); -size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} + /** +ZSTDv03_isError() : tells if the result of ZSTDv03_decompress() is an error +*/ +unsigned ZSTDv03_isError(size_t code); -size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX); - return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize); -} -#endif /* HUF_FORCE_DECOMPRESS_X1 */ +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv03_Dctx_s ZSTDv03_Dctx; +ZSTDv03_Dctx* ZSTDv03_createDCtx(void); +size_t ZSTDv03_freeDCtx(ZSTDv03_Dctx* dctx); +size_t ZSTDv03_decompressDCtx(void* ctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); -/* ***********************************/ -/* Universal decompression selectors */ -/* ***********************************/ +/* ************************************* +* Streaming functions +***************************************/ +size_t ZSTDv03_resetDCtx(ZSTDv03_Dctx* dctx); -size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#else - return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : - HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#endif -} +size_t ZSTDv03_nextSrcSizeToDecompress(ZSTDv03_Dctx* dctx); +size_t ZSTDv03_decompressContinue(ZSTDv03_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ -size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, - const void* cSrc, size_t cSrcSize, - const HUF_DTable* DTable) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#else - return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : - HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); -#endif -} +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv03_magicNumber 0xFD2FB523 /* v0.3 */ -#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) -typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; -static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = -{ - /* single, double, quad */ - {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ - {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ - {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ - {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ - {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ - {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ - {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ - {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ - {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ - {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ - {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ - {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ - {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ - {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ - {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ - {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ -}; +#if defined (__cplusplus) +} #endif -/** HUF_selectDecoder() : - * Tells which decoder is likely to decode faster, - * based on a set of pre-computed metrics. - * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . - * Assumption : 0 < dstSize <= 128 KB */ -U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) -{ - assert(dstSize > 0); - assert(dstSize <= 128*1024); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dstSize; - (void)cSrcSize; - return 0; -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dstSize; - (void)cSrcSize; - return 1; -#else - /* decoder timing evaluation */ - { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ - U32 const D256 = (U32)(dstSize >> 8); - U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); - U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); - DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ - return DTime1 < DTime0; - } +#endif /* ZSTD_V03_H_298734209782 */ +/**** ended inlining zstd_v03.h ****/ #endif -} - +#if (ZSTD_LEGACY_SUPPORT <= 4) +/**** start inlining zstd_v04.h ****/ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ -typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +#ifndef ZSTD_V04_H_91868324769238 +#define ZSTD_V04_H_91868324769238 -size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ -#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) - static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 }; +#if defined (__cplusplus) +extern "C" { #endif - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ +/* ************************************* +* Includes +***************************************/ +#include /* size_t */ - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1(dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2(dst, dstSize, cSrc, cSrcSize); -#else - return decompress[algoNb](dst, dstSize, cSrc, cSrcSize); -#endif - } -} -size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ +/* ************************************* +* Simple one-step function +***************************************/ +/** +ZSTDv04_decompress() : decompress ZSTD frames compliant with v0.4.x format + compressedSize : is the exact source size + maxOriginalSize : is the size of the 'dst' buffer, which must be already allocated. + It must be equal or larger than originalSize, otherwise decompression will fail. + return : the number of bytes decompressed into destination buffer (originalSize) + or an errorCode if it fails (which can be tested using ZSTDv01_isError()) +*/ +size_t ZSTDv04_decompress( void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); + + /** + ZSTDv04_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.4.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ + void ZSTDv04_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/** +ZSTDv04_isError() : tells if the result of ZSTDv04_decompress() is an error +*/ +unsigned ZSTDv04_isError(size_t code); + - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize); -#else - return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) : - HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ; -#endif - } -} +/* ************************************* +* Advanced functions +***************************************/ +typedef struct ZSTDv04_Dctx_s ZSTDv04_Dctx; +ZSTDv04_Dctx* ZSTDv04_createDCtx(void); +size_t ZSTDv04_freeDCtx(ZSTDv04_Dctx* dctx); -size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress4X_hufOnly_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} +size_t ZSTDv04_decompressDCtx(ZSTDv04_Dctx* dctx, + void* dst, size_t maxOriginalSize, + const void* src, size_t compressedSize); -size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, - size_t dstSize, const void* cSrc, - size_t cSrcSize, void* workSpace, - size_t wkspSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize == 0) return ERROR(corruption_detected); +/* ************************************* +* Direct Streaming +***************************************/ +size_t ZSTDv04_resetDCtx(ZSTDv04_Dctx* dctx); - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); -#else - return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize): - HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); -#endif - } -} +size_t ZSTDv04_nextSrcSizeToDecompress(ZSTDv04_Dctx* dctx); +size_t ZSTDv04_decompressContinue(ZSTDv04_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize); +/** + Use above functions alternatively. + ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() will use previous data blocks to improve compression if they are located prior to current block. + Result is the number of bytes regenerated within 'dst'. + It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header. +*/ -size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize, - void* workSpace, size_t wkspSize) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ - if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize); -#else - return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize): - HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, - cSrcSize, workSpace, wkspSize); -#endif - } -} +/* ************************************* +* Buffered Streaming +***************************************/ +typedef struct ZBUFFv04_DCtx_s ZBUFFv04_DCtx; +ZBUFFv04_DCtx* ZBUFFv04_createDCtx(void); +size_t ZBUFFv04_freeDCtx(ZBUFFv04_DCtx* dctx); -size_t HUF_decompress1X_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, - const void* cSrc, size_t cSrcSize) -{ - U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; - return HUF_decompress1X_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, - workSpace, sizeof(workSpace)); -} +size_t ZBUFFv04_decompressInit(ZBUFFv04_DCtx* dctx); +size_t ZBUFFv04_decompressWithDictionary(ZBUFFv04_DCtx* dctx, const void* dict, size_t dictSize); +size_t ZBUFFv04_decompressContinue(ZBUFFv04_DCtx* dctx, void* dst, size_t* maxDstSizePtr, const void* src, size_t* srcSizePtr); -size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#else - return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : - HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#endif -} +/** ************************************************ +* Streaming decompression +* +* A ZBUFF_DCtx object is required to track streaming operation. +* Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources. +* Use ZBUFF_decompressInit() to start a new decompression operation. +* ZBUFF_DCtx objects can be reused multiple times. +* +* Optionally, a reference to a static dictionary can be set, using ZBUFF_decompressWithDictionary() +* It must be the same content as the one set during compression phase. +* Dictionary content must remain accessible during the decompression process. +* +* Use ZBUFF_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *maxDstSizePtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *maxDstSizePtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of dst will be overwritten (up to *maxDstSizePtr) at each function call, so save its content if it matters or change dst. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency) +* or 0 when a frame is completely decoded +* or an error code, which can be tested using ZBUFF_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize / ZBUFF_recommendedDOutSize +* output : ZBUFF_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when it's decoded. +* input : ZBUFF_recommendedDInSize==128Kb+3; just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* **************************************************/ +unsigned ZBUFFv04_isError(size_t errorCode); +const char* ZBUFFv04_getErrorName(size_t errorCode); -#ifndef HUF_FORCE_DECOMPRESS_X2 -size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) -{ - const BYTE* ip = (const BYTE*) cSrc; - size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize); - if (HUF_isError(hSize)) return hSize; - if (hSize >= cSrcSize) return ERROR(srcSize_wrong); - ip += hSize; cSrcSize -= hSize; +/** The below functions provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are not compulsory, they just tend to offer better latency */ +size_t ZBUFFv04_recommendedDInSize(void); +size_t ZBUFFv04_recommendedDOutSize(void); - return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); -} -#endif -size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)dtd; - assert(dtd.tableType == 0); - return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)dtd; - assert(dtd.tableType == 1); - return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#else - return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : - HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); -#endif -} +/* ************************************* +* Prefix - version detection +***************************************/ +#define ZSTDv04_magicNumber 0xFD2FB524 /* v0.4 */ -size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) -{ - /* validation checks */ - if (dstSize == 0) return ERROR(dstSize_tooSmall); - if (cSrcSize == 0) return ERROR(corruption_detected); - { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); -#if defined(HUF_FORCE_DECOMPRESS_X1) - (void)algoNb; - assert(algoNb == 0); - return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -#elif defined(HUF_FORCE_DECOMPRESS_X2) - (void)algoNb; - assert(algoNb == 1); - return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -#else - return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : - HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); -#endif - } +#if defined (__cplusplus) } -/**** ended inlining huf_decompress.c ****/ -/**** start inlining zstd_ddict.c ****/ +#endif + +#endif /* ZSTD_V04_H_91868324769238 */ +/**** ended inlining zstd_v04.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) +/**** start inlining zstd_v05.h ****/ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -7930,16 +10255,164 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds * You may select, at your option, one of the above-listed licenses. */ -/* zstd_ddict.c : - * concentrates all logic that needs to know the internals of ZSTD_DDict object */ +#ifndef ZSTDv05_H +#define ZSTDv05_H -/*-******************************************************* +#if defined (__cplusplus) +extern "C" { +#endif + +/*-************************************* * Dependencies -*********************************************************/ -#include /* memcpy, memmove, memset */ -/**** start inlining cpu.h ****/ +***************************************/ +#include /* size_t */ +/**** skipping file: ../common/mem.h ****/ + + +/* ************************************* +* Simple functions +***************************************/ +/*! ZSTDv05_decompress() : + `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. + `dstCapacity` must be large enough, equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv05_isError()) */ +size_t ZSTDv05_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + + /** + ZSTDv05_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.5.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. + */ +void ZSTDv05_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/* ************************************* +* Helper functions +***************************************/ +/* Error Management */ +unsigned ZSTDv05_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +const char* ZSTDv05_getErrorName(size_t code); /*!< provides readable string for an error code */ + + +/* ************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv05_DCtx_s ZSTDv05_DCtx; +ZSTDv05_DCtx* ZSTDv05_createDCtx(void); +size_t ZSTDv05_freeDCtx(ZSTDv05_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv05_decompressDCtx() : +* Same as ZSTDv05_decompress(), but requires an already allocated ZSTDv05_DCtx (see ZSTDv05_createDCtx()) */ +size_t ZSTDv05_decompressDCtx(ZSTDv05_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* Simple Dictionary API +*************************/ +/*! ZSTDv05_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. +* Note : dict can be NULL, in which case, it's equivalent to ZSTDv05_decompressDCtx() */ +size_t ZSTDv05_decompress_usingDict(ZSTDv05_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + +/*-************************ +* Advanced Streaming API +***************************/ +typedef enum { ZSTDv05_fast, ZSTDv05_greedy, ZSTDv05_lazy, ZSTDv05_lazy2, ZSTDv05_btlazy2, ZSTDv05_opt, ZSTDv05_btopt } ZSTDv05_strategy; +typedef struct { + U64 srcSize; + U32 windowLog; /* the only useful information to retrieve */ + U32 contentLog; U32 hashLog; U32 searchLog; U32 searchLength; U32 targetLength; ZSTDv05_strategy strategy; +} ZSTDv05_parameters; +size_t ZSTDv05_getFrameParams(ZSTDv05_parameters* params, const void* src, size_t srcSize); + +size_t ZSTDv05_decompressBegin_usingDict(ZSTDv05_DCtx* dctx, const void* dict, size_t dictSize); +void ZSTDv05_copyDCtx(ZSTDv05_DCtx* dstDCtx, const ZSTDv05_DCtx* srcDCtx); +size_t ZSTDv05_nextSrcSizeToDecompress(ZSTDv05_DCtx* dctx); +size_t ZSTDv05_decompressContinue(ZSTDv05_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* ZBUFF API +*************************/ +typedef struct ZBUFFv05_DCtx_s ZBUFFv05_DCtx; +ZBUFFv05_DCtx* ZBUFFv05_createDCtx(void); +size_t ZBUFFv05_freeDCtx(ZBUFFv05_DCtx* dctx); + +size_t ZBUFFv05_decompressInit(ZBUFFv05_DCtx* dctx); +size_t ZBUFFv05_decompressInitDictionary(ZBUFFv05_DCtx* dctx, const void* dict, size_t dictSize); + +size_t ZBUFFv05_decompressContinue(ZBUFFv05_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); + +/*-*************************************************************************** +* Streaming decompression +* +* A ZBUFFv05_DCtx object is required to track streaming operations. +* Use ZBUFFv05_createDCtx() and ZBUFFv05_freeDCtx() to create/release resources. +* Use ZBUFFv05_decompressInit() to start a new decompression operation, +* or ZBUFFv05_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv05_DCtx objects can be reused multiple times. +* +* Use ZBUFFv05_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of @dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change @dst. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency) +* or 0 when a frame is completely decoded +* or an error code, which can be tested using ZBUFFv05_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv05_recommendedDInSize() / ZBUFFv05_recommendedDOutSize() +* output : ZBUFFv05_recommendedDOutSize==128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv05_recommendedDInSize==128Kb+3; just follow indications from ZBUFFv05_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ + + +/* ************************************* +* Tool functions +***************************************/ +unsigned ZBUFFv05_isError(size_t errorCode); +const char* ZBUFFv05_getErrorName(size_t errorCode); + +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, and tend to offer better latency */ +size_t ZBUFFv05_recommendedDInSize(void); +size_t ZBUFFv05_recommendedDOutSize(void); + + + +/*-************************************* +* Constants +***************************************/ +#define ZSTDv05_MAGICNUMBER 0xFD2FB525 /* v0.5 */ + + + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv0505_H */ +/**** ended inlining zstd_v05.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) +/**** start inlining zstd_v06.h ****/ /* - * Copyright (c) 2018-2020, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -7948,220 +10421,174 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds * You may select, at your option, one of the above-listed licenses. */ -#ifndef ZSTD_COMMON_CPU_H -#define ZSTD_COMMON_CPU_H +#ifndef ZSTDv06_H +#define ZSTDv06_H + +#if defined (__cplusplus) +extern "C" { +#endif + +/*====== Dependency ======*/ +#include /* size_t */ + + +/*====== Export for Windows ======*/ +/*! +* ZSTDv06_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTDv06_DLL_EXPORT) && (ZSTDv06_DLL_EXPORT==1) +# define ZSTDLIBv06_API __declspec(dllexport) +#else +# define ZSTDLIBv06_API +#endif + + +/* ************************************* +* Simple functions +***************************************/ +/*! ZSTDv06_decompress() : + `compressedSize` : is the _exact_ size of the compressed blob, otherwise decompression will fail. + `dstCapacity` must be large enough, equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv06_isError()) */ +ZSTDLIBv06_API size_t ZSTDv06_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/** +ZSTDv06_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.6.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. +*/ +void ZSTDv06_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/* ************************************* +* Helper functions +***************************************/ +ZSTDLIBv06_API size_t ZSTDv06_compressBound(size_t srcSize); /*!< maximum compressed size (worst case scenario) */ + +/* Error Management */ +ZSTDLIBv06_API unsigned ZSTDv06_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIBv06_API const char* ZSTDv06_getErrorName(size_t code); /*!< provides readable string for an error code */ + + +/* ************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv06_DCtx_s ZSTDv06_DCtx; +ZSTDLIBv06_API ZSTDv06_DCtx* ZSTDv06_createDCtx(void); +ZSTDLIBv06_API size_t ZSTDv06_freeDCtx(ZSTDv06_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv06_decompressDCtx() : +* Same as ZSTDv06_decompress(), but requires an already allocated ZSTDv06_DCtx (see ZSTDv06_createDCtx()) */ +ZSTDLIBv06_API size_t ZSTDv06_decompressDCtx(ZSTDv06_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/*-*********************** +* Dictionary API +*************************/ +/*! ZSTDv06_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. +* Note : dict can be NULL, in which case, it's equivalent to ZSTDv06_decompressDCtx() */ +ZSTDLIBv06_API size_t ZSTDv06_decompress_usingDict(ZSTDv06_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*-************************ +* Advanced Streaming API +***************************/ +struct ZSTDv06_frameParams_s { unsigned long long frameContentSize; unsigned windowLog; }; +typedef struct ZSTDv06_frameParams_s ZSTDv06_frameParams; + +ZSTDLIBv06_API size_t ZSTDv06_getFrameParams(ZSTDv06_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +ZSTDLIBv06_API size_t ZSTDv06_decompressBegin_usingDict(ZSTDv06_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIBv06_API void ZSTDv06_copyDCtx(ZSTDv06_DCtx* dctx, const ZSTDv06_DCtx* preparedDCtx); + +ZSTDLIBv06_API size_t ZSTDv06_nextSrcSizeToDecompress(ZSTDv06_DCtx* dctx); +ZSTDLIBv06_API size_t ZSTDv06_decompressContinue(ZSTDv06_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + -/** - * Implementation taken from folly/CpuId.h - * https://github.com/facebook/folly/blob/master/folly/CpuId.h - */ +/* ************************************* +* ZBUFF API +***************************************/ -#include +typedef struct ZBUFFv06_DCtx_s ZBUFFv06_DCtx; +ZSTDLIBv06_API ZBUFFv06_DCtx* ZBUFFv06_createDCtx(void); +ZSTDLIBv06_API size_t ZBUFFv06_freeDCtx(ZBUFFv06_DCtx* dctx); -/**** skipping file: mem.h ****/ +ZSTDLIBv06_API size_t ZBUFFv06_decompressInit(ZBUFFv06_DCtx* dctx); +ZSTDLIBv06_API size_t ZBUFFv06_decompressInitDictionary(ZBUFFv06_DCtx* dctx, const void* dict, size_t dictSize); -#ifdef _MSC_VER -#include -#endif +ZSTDLIBv06_API size_t ZBUFFv06_decompressContinue(ZBUFFv06_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); -typedef struct { - U32 f1c; - U32 f1d; - U32 f7b; - U32 f7c; -} ZSTD_cpuid_t; +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFFv06_DCtx object is required to track streaming operations. +* Use ZBUFFv06_createDCtx() and ZBUFFv06_freeDCtx() to create/release resources. +* Use ZBUFFv06_decompressInit() to start a new decompression operation, +* or ZBUFFv06_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv06_DCtx objects can be re-init multiple times. +* +* Use ZBUFFv06_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency), +* or 0 when a frame is completely decoded, +* or an error code, which can be tested using ZBUFFv06_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv06_recommendedDInSize() and ZBUFFv06_recommendedDOutSize() +* output : ZBUFFv06_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv06_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFFv06_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ -MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { - U32 f1c = 0; - U32 f1d = 0; - U32 f7b = 0; - U32 f7c = 0; -#if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) - int reg[4]; - __cpuid((int*)reg, 0); - { - int const n = reg[0]; - if (n >= 1) { - __cpuid((int*)reg, 1); - f1c = (U32)reg[2]; - f1d = (U32)reg[3]; - } - if (n >= 7) { - __cpuidex((int*)reg, 7, 0); - f7b = (U32)reg[1]; - f7c = (U32)reg[2]; - } - } -#elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) - /* The following block like the normal cpuid branch below, but gcc - * reserves ebx for use of its pic register so we must specially - * handle the save and restore to avoid clobbering the register - */ - U32 n; - __asm__( - "pushl %%ebx\n\t" - "cpuid\n\t" - "popl %%ebx\n\t" - : "=a"(n) - : "a"(0) - : "ecx", "edx"); - if (n >= 1) { - U32 f1a; - __asm__( - "pushl %%ebx\n\t" - "cpuid\n\t" - "popl %%ebx\n\t" - : "=a"(f1a), "=c"(f1c), "=d"(f1d) - : "a"(1)); - } - if (n >= 7) { - __asm__( - "pushl %%ebx\n\t" - "cpuid\n\t" - "movl %%ebx, %%eax\n\t" - "popl %%ebx" - : "=a"(f7b), "=c"(f7c) - : "a"(7), "c"(0) - : "edx"); - } -#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) - U32 n; - __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); - if (n >= 1) { - U32 f1a; - __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); - } - if (n >= 7) { - U32 f7a; - __asm__("cpuid" - : "=a"(f7a), "=b"(f7b), "=c"(f7c) - : "a"(7), "c"(0) - : "edx"); - } -#endif - { - ZSTD_cpuid_t cpuid; - cpuid.f1c = f1c; - cpuid.f1d = f1d; - cpuid.f7b = f7b; - cpuid.f7c = f7c; - return cpuid; - } -} -#define X(name, r, bit) \ - MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ - return ((cpuid.r) & (1U << bit)) != 0; \ - } +/* ************************************* +* Tool functions +***************************************/ +ZSTDLIBv06_API unsigned ZBUFFv06_isError(size_t errorCode); +ZSTDLIBv06_API const char* ZBUFFv06_getErrorName(size_t errorCode); -/* cpuid(1): Processor Info and Feature Bits. */ -#define C(name, bit) X(name, f1c, bit) - C(sse3, 0) - C(pclmuldq, 1) - C(dtes64, 2) - C(monitor, 3) - C(dscpl, 4) - C(vmx, 5) - C(smx, 6) - C(eist, 7) - C(tm2, 8) - C(ssse3, 9) - C(cnxtid, 10) - C(fma, 12) - C(cx16, 13) - C(xtpr, 14) - C(pdcm, 15) - C(pcid, 17) - C(dca, 18) - C(sse41, 19) - C(sse42, 20) - C(x2apic, 21) - C(movbe, 22) - C(popcnt, 23) - C(tscdeadline, 24) - C(aes, 25) - C(xsave, 26) - C(osxsave, 27) - C(avx, 28) - C(f16c, 29) - C(rdrand, 30) -#undef C -#define D(name, bit) X(name, f1d, bit) - D(fpu, 0) - D(vme, 1) - D(de, 2) - D(pse, 3) - D(tsc, 4) - D(msr, 5) - D(pae, 6) - D(mce, 7) - D(cx8, 8) - D(apic, 9) - D(sep, 11) - D(mtrr, 12) - D(pge, 13) - D(mca, 14) - D(cmov, 15) - D(pat, 16) - D(pse36, 17) - D(psn, 18) - D(clfsh, 19) - D(ds, 21) - D(acpi, 22) - D(mmx, 23) - D(fxsr, 24) - D(sse, 25) - D(sse2, 26) - D(ss, 27) - D(htt, 28) - D(tm, 29) - D(pbe, 31) -#undef D +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZSTDLIBv06_API size_t ZBUFFv06_recommendedDInSize(void); +ZSTDLIBv06_API size_t ZBUFFv06_recommendedDOutSize(void); -/* cpuid(7): Extended Features. */ -#define B(name, bit) X(name, f7b, bit) - B(bmi1, 3) - B(hle, 4) - B(avx2, 5) - B(smep, 7) - B(bmi2, 8) - B(erms, 9) - B(invpcid, 10) - B(rtm, 11) - B(mpx, 14) - B(avx512f, 16) - B(avx512dq, 17) - B(rdseed, 18) - B(adx, 19) - B(smap, 20) - B(avx512ifma, 21) - B(pcommit, 22) - B(clflushopt, 23) - B(clwb, 24) - B(avx512pf, 26) - B(avx512er, 27) - B(avx512cd, 28) - B(sha, 29) - B(avx512bw, 30) - B(avx512vl, 31) -#undef B -#define C(name, bit) X(name, f7c, bit) - C(prefetchwt1, 0) - C(avx512vbmi, 1) -#undef C -#undef X +/*-************************************* +* Constants +***************************************/ +#define ZSTDv06_MAGICNUMBER 0xFD2FB526 /* v0.6 */ -#endif /* ZSTD_COMMON_CPU_H */ -/**** ended inlining cpu.h ****/ -/**** skipping file: mem.h ****/ -#define FSE_STATIC_LINKING_ONLY -/**** skipping file: fse.h ****/ -#define HUF_STATIC_LINKING_ONLY -/**** skipping file: huf.h ****/ -/**** start inlining zstd_decompress_internal.h ****/ + + +#if defined (__cplusplus) +} +#endif + +#endif /* ZSTDv06_BUFFERED_H */ +/**** ended inlining zstd_v06.h ****/ +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) +/**** start inlining zstd_v07.h ****/ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -8170,223 +10597,551 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { * You may select, at your option, one of the above-listed licenses. */ +#ifndef ZSTDv07_H_235446 +#define ZSTDv07_H_235446 -/* zstd_decompress_internal: - * objects and definitions shared within lib/decompress modules */ +#if defined (__cplusplus) +extern "C" { +#endif - #ifndef ZSTD_DECOMPRESS_INTERNAL_H - #define ZSTD_DECOMPRESS_INTERNAL_H +/*====== Dependency ======*/ +#include /* size_t */ -/*-******************************************************* - * Dependencies - *********************************************************/ -/**** skipping file: mem.h ****/ -/**** skipping file: zstd_internal.h ****/ +/*====== Export for Windows ======*/ +/*! +* ZSTDv07_DLL_EXPORT : +* Enable exporting of functions when building a Windows DLL +*/ +#if defined(_WIN32) && defined(ZSTDv07_DLL_EXPORT) && (ZSTDv07_DLL_EXPORT==1) +# define ZSTDLIBv07_API __declspec(dllexport) +#else +# define ZSTDLIBv07_API +#endif +/* ************************************* +* Simple API +***************************************/ +/*! ZSTDv07_getDecompressedSize() : +* @return : decompressed size if known, 0 otherwise. + note 1 : if `0`, follow up with ZSTDv07_getFrameParams() to know precise failure cause. + note 2 : decompressed size could be wrong or intentionally modified ! + always ensure results fit within application's authorized limits */ +unsigned long long ZSTDv07_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTDv07_decompress() : + `compressedSize` : must be _exact_ size of compressed input, otherwise decompression will fail. + `dstCapacity` must be equal or larger than originalSize. + @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + or an errorCode if it fails (which can be tested using ZSTDv07_isError()) */ +ZSTDLIBv07_API size_t ZSTDv07_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); -/*-******************************************************* - * Constants - *********************************************************/ -static const U32 LL_base[MaxLL+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 18, 20, 22, 24, 28, 32, 40, - 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, - 0x2000, 0x4000, 0x8000, 0x10000 }; +/** +ZSTDv07_findFrameSizeInfoLegacy() : get the source length and decompressed bound of a ZSTD frame compliant with v0.7.x format + srcSize : The size of the 'src' buffer, at least as large as the frame pointed to by 'src' + cSize (output parameter) : the number of bytes that would be read to decompress this frame + or an error code if it fails (which can be tested using ZSTDv01_isError()) + dBound (output parameter) : an upper-bound for the decompressed size of the data in the frame + or ZSTD_CONTENTSIZE_ERROR if an error occurs + + note : assumes `cSize` and `dBound` are _not_ NULL. +*/ +void ZSTDv07_findFrameSizeInfoLegacy(const void *src, size_t srcSize, + size_t* cSize, unsigned long long* dBound); + +/*====== Helper functions ======*/ +ZSTDLIBv07_API unsigned ZSTDv07_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIBv07_API const char* ZSTDv07_getErrorName(size_t code); /*!< provides readable string from an error code */ + + +/*-************************************* +* Explicit memory management +***************************************/ +/** Decompression context */ +typedef struct ZSTDv07_DCtx_s ZSTDv07_DCtx; +ZSTDLIBv07_API ZSTDv07_DCtx* ZSTDv07_createDCtx(void); +ZSTDLIBv07_API size_t ZSTDv07_freeDCtx(ZSTDv07_DCtx* dctx); /*!< @return : errorCode */ + +/** ZSTDv07_decompressDCtx() : +* Same as ZSTDv07_decompress(), requires an allocated ZSTDv07_DCtx (see ZSTDv07_createDCtx()) */ +ZSTDLIBv07_API size_t ZSTDv07_decompressDCtx(ZSTDv07_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); -static const U32 OF_base[MaxOff+1] = { - 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, - 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, - 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, - 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; -static const U32 OF_bits[MaxOff+1] = { - 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31 }; +/*-************************ +* Simple dictionary API +***************************/ +/*! ZSTDv07_decompress_usingDict() : +* Decompression using a pre-defined Dictionary content (see dictBuilder). +* Dictionary must be identical to the one used during compression. +* Note : This function load the dictionary, resulting in a significant startup time */ +ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDict(ZSTDv07_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*-************************** +* Advanced Dictionary API +****************************/ +/*! ZSTDv07_createDDict() : +* Create a digested dictionary, ready to start decompression operation without startup delay. +* `dict` can be released after creation */ +typedef struct ZSTDv07_DDict_s ZSTDv07_DDict; +ZSTDLIBv07_API ZSTDv07_DDict* ZSTDv07_createDDict(const void* dict, size_t dictSize); +ZSTDLIBv07_API size_t ZSTDv07_freeDDict(ZSTDv07_DDict* ddict); + +/*! ZSTDv07_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Faster startup than ZSTDv07_decompress_usingDict(), recommended when same dictionary is used multiple times. */ +ZSTDLIBv07_API size_t ZSTDv07_decompress_usingDDict(ZSTDv07_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTDv07_DDict* ddict); -static const U32 ML_base[MaxML+1] = { - 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, - 27, 28, 29, 30, 31, 32, 33, 34, - 35, 37, 39, 41, 43, 47, 51, 59, - 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, - 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; +typedef struct { + unsigned long long frameContentSize; + unsigned windowSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTDv07_frameParams; +ZSTDLIBv07_API size_t ZSTDv07_getFrameParams(ZSTDv07_frameParams* fparamsPtr, const void* src, size_t srcSize); /**< doesn't consume input */ -/*-******************************************************* - * Decompression types - *********************************************************/ - typedef struct { - U32 fastMode; - U32 tableLog; - } ZSTD_seqSymbol_header; - typedef struct { - U16 nextState; - BYTE nbAdditionalBits; - BYTE nbBits; - U32 baseValue; - } ZSTD_seqSymbol; - #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) -typedef struct { - ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ - ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ - ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ - HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ - U32 rep[ZSTD_REP_NUM]; -} ZSTD_entropyDTables_t; +/* ************************************* +* Streaming functions +***************************************/ +typedef struct ZBUFFv07_DCtx_s ZBUFFv07_DCtx; +ZSTDLIBv07_API ZBUFFv07_DCtx* ZBUFFv07_createDCtx(void); +ZSTDLIBv07_API size_t ZBUFFv07_freeDCtx(ZBUFFv07_DCtx* dctx); -typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, - ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, - ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, - ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; +ZSTDLIBv07_API size_t ZBUFFv07_decompressInit(ZBUFFv07_DCtx* dctx); +ZSTDLIBv07_API size_t ZBUFFv07_decompressInitDictionary(ZBUFFv07_DCtx* dctx, const void* dict, size_t dictSize); -typedef enum { zdss_init=0, zdss_loadHeader, - zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; +ZSTDLIBv07_API size_t ZBUFFv07_decompressContinue(ZBUFFv07_DCtx* dctx, + void* dst, size_t* dstCapacityPtr, + const void* src, size_t* srcSizePtr); -typedef enum { - ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ - ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ - ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ -} ZSTD_dictUses_e; +/*-*************************************************************************** +* Streaming decompression howto +* +* A ZBUFFv07_DCtx object is required to track streaming operations. +* Use ZBUFFv07_createDCtx() and ZBUFFv07_freeDCtx() to create/release resources. +* Use ZBUFFv07_decompressInit() to start a new decompression operation, +* or ZBUFFv07_decompressInitDictionary() if decompression requires a dictionary. +* Note that ZBUFFv07_DCtx objects can be re-init multiple times. +* +* Use ZBUFFv07_decompressContinue() repetitively to consume your input. +* *srcSizePtr and *dstCapacityPtr can be any size. +* The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr. +* Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again. +* The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`. +* @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to help latency), +* or 0 when a frame is completely decoded, +* or an error code, which can be tested using ZBUFFv07_isError(). +* +* Hint : recommended buffer sizes (not compulsory) : ZBUFFv07_recommendedDInSize() and ZBUFFv07_recommendedDOutSize() +* output : ZBUFFv07_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded. +* input : ZBUFFv07_recommendedDInSize == 128KB + 3; +* just follow indications from ZBUFFv07_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 . +* *******************************************************************************/ -struct ZSTD_DCtx_s -{ - const ZSTD_seqSymbol* LLTptr; - const ZSTD_seqSymbol* MLTptr; - const ZSTD_seqSymbol* OFTptr; - const HUF_DTable* HUFptr; - ZSTD_entropyDTables_t entropy; - U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ - const void* previousDstEnd; /* detect continuity */ - const void* prefixStart; /* start of current segment */ - const void* virtualStart; /* virtual start of previous segment if it was just before current one */ - const void* dictEnd; /* end of previous segment */ - size_t expected; - ZSTD_frameHeader fParams; - U64 decodedSize; - blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ - ZSTD_dStage stage; - U32 litEntropy; - U32 fseEntropy; - XXH64_state_t xxhState; - size_t headerSize; - ZSTD_format_e format; - const BYTE* litPtr; - ZSTD_customMem customMem; - size_t litSize; - size_t rleSize; - size_t staticSize; - int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ - /* dictionary */ - ZSTD_DDict* ddictLocal; - const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ - U32 dictID; - int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ - ZSTD_dictUses_e dictUses; +/* ************************************* +* Tool functions +***************************************/ +ZSTDLIBv07_API unsigned ZBUFFv07_isError(size_t errorCode); +ZSTDLIBv07_API const char* ZBUFFv07_getErrorName(size_t errorCode); - /* streaming */ - ZSTD_dStreamStage streamStage; - char* inBuff; - size_t inBuffSize; - size_t inPos; - size_t maxWindowSize; - char* outBuff; - size_t outBuffSize; - size_t outStart; - size_t outEnd; - size_t lhSize; - void* legacyContext; - U32 previousLegacyVersion; - U32 legacyVersion; - U32 hostageByte; - int noForwardProgress; +/** Functions below provide recommended buffer sizes for Compression or Decompression operations. +* These sizes are just hints, they tend to offer better latency */ +ZSTDLIBv07_API size_t ZBUFFv07_recommendedDInSize(void); +ZSTDLIBv07_API size_t ZBUFFv07_recommendedDOutSize(void); - /* workspace */ - BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; - BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; -}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ +/*-************************************* +* Constants +***************************************/ +#define ZSTDv07_MAGICNUMBER 0xFD2FB527 /* v0.7 */ -/*-******************************************************* - * Shared internal functions - *********************************************************/ -/*! ZSTD_loadDEntropy() : - * dict : must point at beginning of a valid zstd dictionary. - * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ -size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, - const void* const dict, size_t const dictSize); +#if defined (__cplusplus) +} +#endif -/*! ZSTD_checkContinuity() : - * check if next `dst` follows previous position, where decompression ended. - * If yes, do nothing (continue on current segment). - * If not, classify previous segment as "external dictionary", and start a new segment. - * This function cannot fail. */ -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst); +#endif /* ZSTDv07_H_235446 */ +/**** ended inlining zstd_v07.h ****/ +#endif +/** ZSTD_isLegacy() : + @return : > 0 if supported by legacy decoder. 0 otherwise. + return value is the version. +*/ +MEM_STATIC unsigned ZSTD_isLegacy(const void* src, size_t srcSize) +{ + U32 magicNumberLE; + if (srcSize<4) return 0; + magicNumberLE = MEM_readLE32(src); + switch(magicNumberLE) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case ZSTDv01_magicNumberLE:return 1; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case ZSTDv02_magicNumber : return 2; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case ZSTDv03_magicNumber : return 3; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case ZSTDv04_magicNumber : return 4; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case ZSTDv05_MAGICNUMBER : return 5; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case ZSTDv06_MAGICNUMBER : return 6; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case ZSTDv07_MAGICNUMBER : return 7; +#endif + default : return 0; + } +} -#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ -/**** ended inlining zstd_decompress_internal.h ****/ -/**** start inlining zstd_ddict.h ****/ -/* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under both the BSD-style license (found in the - * LICENSE file in the root directory of this source tree) and the GPLv2 (found - * in the COPYING file in the root directory of this source tree). - * You may select, at your option, one of the above-listed licenses. - */ +MEM_STATIC unsigned long long ZSTD_getDecompressedSize_legacy(const void* src, size_t srcSize) +{ + U32 const version = ZSTD_isLegacy(src, srcSize); + if (version < 5) return 0; /* no decompressed size in frame header, or not a legacy format */ +#if (ZSTD_LEGACY_SUPPORT <= 5) + if (version==5) { + ZSTDv05_parameters fParams; + size_t const frResult = ZSTDv05_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.srcSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + if (version==6) { + ZSTDv06_frameParams fParams; + size_t const frResult = ZSTDv06_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + if (version==7) { + ZSTDv07_frameParams fParams; + size_t const frResult = ZSTDv07_getFrameParams(&fParams, src, srcSize); + if (frResult != 0) return 0; + return fParams.frameContentSize; + } +#endif + return 0; /* should not be possible */ +} -#ifndef ZSTD_DDICT_H -#define ZSTD_DDICT_H -/*-******************************************************* - * Dependencies - *********************************************************/ -#include /* size_t */ -/**** skipping file: zstd.h ****/ +MEM_STATIC size_t ZSTD_decompressLegacy( + void* dst, size_t dstCapacity, + const void* src, size_t compressedSize, + const void* dict,size_t dictSize) +{ + U32 const version = ZSTD_isLegacy(src, compressedSize); + (void)dst; (void)dstCapacity; (void)dict; (void)dictSize; /* unused when ZSTD_LEGACY_SUPPORT >= 8 */ + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + return ZSTDv01_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + return ZSTDv02_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + return ZSTDv03_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + return ZSTDv04_decompress(dst, dstCapacity, src, compressedSize); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { size_t result; + ZSTDv05_DCtx* const zd = ZSTDv05_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv05_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv05_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { size_t result; + ZSTDv06_DCtx* const zd = ZSTDv06_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv06_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv06_freeDCtx(zd); + return result; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { size_t result; + ZSTDv07_DCtx* const zd = ZSTDv07_createDCtx(); + if (zd==NULL) return ERROR(memory_allocation); + result = ZSTDv07_decompress_usingDict(zd, dst, dstCapacity, src, compressedSize, dict, dictSize); + ZSTDv07_freeDCtx(zd); + return result; + } +#endif + default : + return ERROR(prefix_unknown); + } +} + +MEM_STATIC ZSTD_frameSizeInfo ZSTD_findFrameSizeInfoLegacy(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + U32 const version = ZSTD_isLegacy(src, srcSize); + switch(version) + { +#if (ZSTD_LEGACY_SUPPORT <= 1) + case 1 : + ZSTDv01_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 2) + case 2 : + ZSTDv02_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 3) + case 3 : + ZSTDv03_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + ZSTDv04_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + ZSTDv05_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + ZSTDv06_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + ZSTDv07_findFrameSizeInfoLegacy(src, srcSize, + &frameSizeInfo.compressedSize, + &frameSizeInfo.decompressedBound); + break; +#endif + default : + frameSizeInfo.compressedSize = ERROR(prefix_unknown); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + break; + } + if (!ZSTD_isError(frameSizeInfo.compressedSize) && frameSizeInfo.compressedSize > srcSize) { + frameSizeInfo.compressedSize = ERROR(srcSize_wrong); + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + } + return frameSizeInfo; +} +MEM_STATIC size_t ZSTD_findFrameCompressedSizeLegacy(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo = ZSTD_findFrameSizeInfoLegacy(src, srcSize); + return frameSizeInfo.compressedSize; +} -/*-******************************************************* - * Interface - *********************************************************/ +MEM_STATIC size_t ZSTD_freeLegacyStreamContext(void* legacyContext, U32 version) +{ + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : return ZBUFFv04_freeDCtx((ZBUFFv04_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : return ZBUFFv05_freeDCtx((ZBUFFv05_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : return ZBUFFv06_freeDCtx((ZBUFFv06_DCtx*)legacyContext); +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : return ZBUFFv07_freeDCtx((ZBUFFv07_DCtx*)legacyContext); +#endif + } +} -/* note: several prototypes are already published in `zstd.h` : - * ZSTD_createDDict() - * ZSTD_createDDict_byReference() - * ZSTD_createDDict_advanced() - * ZSTD_freeDDict() - * ZSTD_initStaticDDict() - * ZSTD_sizeof_DDict() - * ZSTD_estimateDDictSize() - * ZSTD_getDictID_fromDict() - */ -const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); -size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); +MEM_STATIC size_t ZSTD_initLegacyStream(void** legacyContext, U32 prevVersion, U32 newVersion, + const void* dict, size_t dictSize) +{ + DEBUGLOG(5, "ZSTD_initLegacyStream for v0.%u", newVersion); + if (prevVersion != newVersion) ZSTD_freeLegacyStreamContext(*legacyContext, prevVersion); + switch(newVersion) + { + default : + case 1 : + case 2 : + case 3 : + (void)dict; (void)dictSize; + return 0; +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv04_createDCtx() : (ZBUFFv04_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv04_decompressInit(dctx); + ZBUFFv04_decompressWithDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv05_createDCtx() : (ZBUFFv05_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv05_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv06_createDCtx() : (ZBUFFv06_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv06_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (prevVersion != newVersion) ? ZBUFFv07_createDCtx() : (ZBUFFv07_DCtx*)*legacyContext; + if (dctx==NULL) return ERROR(memory_allocation); + ZBUFFv07_decompressInitDictionary(dctx, dict, dictSize); + *legacyContext = dctx; + return 0; + } +#endif + } +} + -void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); +MEM_STATIC size_t ZSTD_decompressLegacyStream(void* legacyContext, U32 version, + ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + DEBUGLOG(5, "ZSTD_decompressLegacyStream for v0.%u", version); + switch(version) + { + default : + case 1 : + case 2 : + case 3 : + (void)legacyContext; (void)output; (void)input; + return ERROR(version_unsupported); +#if (ZSTD_LEGACY_SUPPORT <= 4) + case 4 : + { + ZBUFFv04_DCtx* dctx = (ZBUFFv04_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv04_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 5) + case 5 : + { + ZBUFFv05_DCtx* dctx = (ZBUFFv05_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv05_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 6) + case 6 : + { + ZBUFFv06_DCtx* dctx = (ZBUFFv06_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv06_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif +#if (ZSTD_LEGACY_SUPPORT <= 7) + case 7 : + { + ZBUFFv07_DCtx* dctx = (ZBUFFv07_DCtx*) legacyContext; + const void* src = (const char*)input->src + input->pos; + size_t readSize = input->size - input->pos; + void* dst = (char*)output->dst + output->pos; + size_t decodedSize = output->size - output->pos; + size_t const hintSize = ZBUFFv07_decompressContinue(dctx, dst, &decodedSize, src, &readSize); + output->pos += decodedSize; + input->pos += readSize; + return hintSize; + } +#endif + } +} -#endif /* ZSTD_DDICT_H */ -/**** ended inlining zstd_ddict.h ****/ +#if defined (__cplusplus) +} +#endif -#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) -/**** start inlining zstd_legacy.h ****/ -#error Unable to find "zstd_legacy.h" -/**** ended inlining zstd_legacy.h ****/ +#endif /* ZSTD_LEGACY_H */ +/**** ended inlining ../legacy/zstd_legacy.h ****/ #endif @@ -8426,6 +11181,10 @@ void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) dctx->virtualStart = ddict->dictContent; dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; dctx->previousDstEnd = dctx->dictEnd; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif if (ddict->entropyPresent) { dctx->litEntropy = 1; dctx->fseEntropy = 1; @@ -8468,7 +11227,7 @@ ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, /* load entropy tables */ RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( &ddict->entropy, ddict->dictContent, ddict->dictSize)), - dictionary_corrupted); + dictionary_corrupted, ""); ddict->entropyPresent = 1; return 0; } @@ -8484,17 +11243,17 @@ static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, ddict->dictContent = dict; if (!dict) dictSize = 0; } else { - void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem); + void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); ddict->dictBuffer = internalBuffer; ddict->dictContent = internalBuffer; if (!internalBuffer) return ERROR(memory_allocation); - memcpy(internalBuffer, dict, dictSize); + ZSTD_memcpy(internalBuffer, dict, dictSize); } ddict->dictSize = dictSize; ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ /* parse dictionary content */ - FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) ); + FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); return 0; } @@ -8504,9 +11263,9 @@ ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_dictContentType_e dictContentType, ZSTD_customMem customMem) { - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; - { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem); + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); if (ddict == NULL) return NULL; ddict->cMem = customMem; { size_t const initResult = ZSTD_initDDict_internal(ddict, @@ -8555,7 +11314,7 @@ const ZSTD_DDict* ZSTD_initStaticDDict( if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ if (sBufferSize < neededSpace) return NULL; if (dictLoadMethod == ZSTD_dlm_byCopy) { - memcpy(ddict+1, dict, dictSize); /* local copy */ + ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ dict = ddict+1; } if (ZSTD_isError( ZSTD_initDDict_internal(ddict, @@ -8570,8 +11329,8 @@ size_t ZSTD_freeDDict(ZSTD_DDict* ddict) { if (ddict==NULL) return 0; /* support free on NULL */ { ZSTD_customMem const cMem = ddict->cMem; - ZSTD_free(ddict->dictBuffer, cMem); - ZSTD_free(ddict, cMem); + ZSTD_customFree(ddict->dictBuffer, cMem); + ZSTD_customFree(ddict, cMem); return 0; } } @@ -8599,10 +11358,10 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) if (ddict==NULL) return 0; return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); } -/**** ended inlining zstd_ddict.c ****/ -/**** start inlining zstd_decompress.c ****/ +/**** ended inlining decompress/zstd_ddict.c ****/ +/**** start inlining decompress/zstd_decompress.c ****/ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -8658,19 +11417,20 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) /*-******************************************************* * Dependencies *********************************************************/ -#include /* memcpy, memmove, memset */ -/**** skipping file: cpu.h ****/ -/**** skipping file: mem.h ****/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ #define FSE_STATIC_LINKING_ONLY -/**** skipping file: fse.h ****/ +/**** skipping file: ../common/fse.h ****/ #define HUF_STATIC_LINKING_ONLY -/**** skipping file: huf.h ****/ -/**** skipping file: zstd_internal.h ****/ +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/xxhash.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ /**** skipping file: zstd_decompress_internal.h ****/ /**** skipping file: zstd_ddict.h ****/ /**** start inlining zstd_decompress_block.h ****/ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -8686,9 +11446,9 @@ unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) /*-******************************************************* * Dependencies *********************************************************/ -#include /* size_t */ -/**** skipping file: zstd.h ****/ -/**** skipping file: zstd_internal.h ****/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../zstd.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ /**** skipping file: zstd_decompress_internal.h ****/ @@ -8719,22 +11479,163 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, * this function must be called with valid parameters only * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) * in which case it cannot fail. + * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is + * defined in zstd_decompress_internal.h. * Internal use only. */ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog); + unsigned tableLog, void* wksp, size_t wkspSize, + int bmi2); #endif /* ZSTD_DEC_BLOCK_H */ /**** ended inlining zstd_decompress_block.h ****/ #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) -/**** skipping file: zstd_legacy.h ****/ +/**** skipping file: ../legacy/zstd_legacy.h ****/ #endif + +/************************************* + * Multiple DDicts Hashset internals * + *************************************/ + +#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 +#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. + * Currently, that means a 0.75 load factor. + * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded + * the load factor of the ddict hash set. + */ + +#define DDICT_HASHSET_TABLE_BASE_SIZE 64 +#define DDICT_HASHSET_RESIZE_FACTOR 2 + +/* Hash function to determine starting position of dict insertion within the table + * Returns an index between [0, hashSet->ddictPtrTableSize] + */ +static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) { + const U64 hash = XXH64(&dictID, sizeof(U32), 0); + /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */ + return hash & (hashSet->ddictPtrTableSize - 1); +} + +/* Adds DDict to a hashset without resizing it. + * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set. + * Returns 0 if successful, or a zstd error code if something went wrong. + */ +static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) { + const U32 dictID = ZSTD_getDictID_fromDDict(ddict); + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!"); + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + while (hashSet->ddictPtrTable[idx] != NULL) { + /* Replace existing ddict if inserting ddict with same dictID */ + if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) { + DEBUGLOG(4, "DictID already exists, replacing rather than adding"); + hashSet->ddictPtrTable[idx] = ddict; + return 0; + } + idx &= idxRangeMask; + idx++; + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + hashSet->ddictPtrTable[idx] = ddict; + hashSet->ddictPtrCount++; + return 0; +} + +/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and + * rehashes all values, allocates new table, frees old table. + * Returns 0 on success, otherwise a zstd error code. + */ +static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR; + const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem); + const ZSTD_DDict** oldTable = hashSet->ddictPtrTable; + size_t oldTableSize = hashSet->ddictPtrTableSize; + size_t i; + + DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize); + RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!"); + hashSet->ddictPtrTable = newTable; + hashSet->ddictPtrTableSize = newTableSize; + hashSet->ddictPtrCount = 0; + for (i = 0; i < oldTableSize; ++i) { + if (oldTable[i] != NULL) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), ""); + } + } + ZSTD_customFree((void*)oldTable, customMem); + DEBUGLOG(4, "Finished re-hash"); + return 0; +} + +/* Fetches a DDict with the given dictID + * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL. + */ +static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) { + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + for (;;) { + size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]); + if (currDictID == dictID || currDictID == 0) { + /* currDictID == 0 implies a NULL ddict entry */ + break; + } else { + idx &= idxRangeMask; /* Goes to start of table when we reach the end */ + idx++; + } + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + return hashSet->ddictPtrTable[idx]; +} + +/* Allocates space for and returns a ddict hash set + * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with. + * Returns NULL if allocation failed. + */ +static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) { + ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem); + DEBUGLOG(4, "Allocating new hash set"); + ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem); + ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE; + ret->ddictPtrCount = 0; + if (!ret || !ret->ddictPtrTable) { + return NULL; + } + return ret; +} + +/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself. + * Note: The ZSTD_DDict* within the table are NOT freed. + */ +static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + DEBUGLOG(4, "Freeing ddict hash set"); + if (hashSet && hashSet->ddictPtrTable) { + ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem); + } + if (hashSet) { + ZSTD_customFree(hashSet, customMem); + } +} + +/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set. + * Returns 0 on success, or a ZSTD error. + */ +static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) { + DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize); + if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), ""); + } + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), ""); + return 0; +} + /*-************************************************************* * Context management ***************************************************************/ @@ -8757,11 +11658,19 @@ static size_t ZSTD_startingInputLength(ZSTD_format_e format) return startingInputLength; } +static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) +{ + assert(dctx->streamStage == zdss_init); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->outBufferMode = ZSTD_bm_buffered; + dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; + dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; +} + static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) { - dctx->format = ZSTD_f_zstd1; /* ZSTD_decompressBegin() invokes ZSTD_startingInputLength() with argument dctx->format */ dctx->staticSize = 0; - dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; dctx->ddict = NULL; dctx->ddictLocal = NULL; dctx->dictEnd = NULL; @@ -8774,7 +11683,13 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) dctx->legacyContext = NULL; dctx->previousLegacyVersion = 0; dctx->noForwardProgress = 0; + dctx->oversizedDuration = 0; dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + dctx->ddictSet = NULL; + ZSTD_DCtx_resetParameters(dctx); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = NULL; +#endif } ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) @@ -8792,9 +11707,9 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) { - if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; - { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem); + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem); if (!dctx) return NULL; dctx->customMem = customMem; ZSTD_initDCtx_internal(dctx); @@ -8822,13 +11737,17 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); { ZSTD_customMem const cMem = dctx->customMem; ZSTD_clearDict(dctx); - ZSTD_free(dctx->inBuff, cMem); + ZSTD_customFree(dctx->inBuff, cMem); dctx->inBuff = NULL; #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) if (dctx->legacyContext) ZSTD_freeLegacyStreamContext(dctx->legacyContext, dctx->previousLegacyVersion); #endif - ZSTD_free(dctx, cMem); + if (dctx->ddictSet) { + ZSTD_freeDDictHashSet(dctx->ddictSet, cMem); + dctx->ddictSet = NULL; + } + ZSTD_customFree(dctx, cMem); return 0; } } @@ -8837,7 +11756,30 @@ size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) { size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); - memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ + ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ +} + +/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on + * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then + * accordingly sets the ddict to be used to decompress the frame. + * + * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is. + * + * ZSTD_d_refMultipleDDicts must be enabled for this function to be called. + */ +static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) { + assert(dctx->refMultipleDDicts && dctx->ddictSet); + DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame"); + if (dctx->ddict) { + const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID); + if (frameDDict) { + DEBUGLOG(4, "DDict found!"); + ZSTD_clearDict(dctx); + dctx->dictID = dctx->fParams.dictID; + dctx->ddict = frameDDict; + dctx->dictUses = ZSTD_use_indefinitely; + } + } } @@ -8871,7 +11813,7 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size) static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) { size_t const minInputSize = ZSTD_startingInputLength(format); - RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong); + RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; U32 const dictID= fhd & 3; @@ -8904,7 +11846,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s const BYTE* ip = (const BYTE*)src; size_t const minInputSize = ZSTD_startingInputLength(format); - memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ if (srcSize < minInputSize) return minInputSize; RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); @@ -8914,12 +11856,12 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s /* skippable frame */ if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ - memset(zfhPtr, 0, sizeof(*zfhPtr)); + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); zfhPtr->frameType = ZSTD_skippableFrame; return 0; } - RETURN_ERROR(prefix_unknown); + RETURN_ERROR(prefix_unknown, ""); } /* ensure there is enough `srcSize` to fully read/decode frame header */ @@ -8943,7 +11885,7 @@ size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, s if (!singleSegment) { BYTE const wlByte = ip[pos++]; U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; - RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge); + RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); windowSize = (1ULL << windowLog); windowSize += (windowSize >> 3) * (wlByte&7); } @@ -9015,14 +11957,14 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize) size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; U32 sizeU32; - RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong); + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, - frameParameter_unsupported); + frameParameter_unsupported, ""); { size_t const skippableSize = skippableHeaderSize + sizeU32; - RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong); + RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); return skippableSize; } } @@ -9091,20 +12033,29 @@ unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) /** ZSTD_decodeFrameHeader() : * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). + * If multiple DDict references are enabled, also will choose the correct DDict to use. * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) { size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); if (ZSTD_isError(result)) return result; /* invalid header */ RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); + + /* Reference DDict requested by frame if dctx references multiple ddicts */ + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) { + ZSTD_DCtx_selectFrameDDict(dctx); + } + #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION /* Skip the dictID check in fuzzing mode, because it makes the search * harder. */ RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), - dictionary_wrong); + dictionary_wrong, ""); #endif - if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); + dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; + if (dctx->validateChecksum) XXH64_reset(&dctx->xxhState, 0); + dctx->processedCSize += headerSize; return 0; } @@ -9119,7 +12070,7 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) { ZSTD_frameSizeInfo frameSizeInfo; - memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); + ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) if (ZSTD_isLegacy(src, srcSize)) @@ -9174,7 +12125,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize ip += 4; } - frameSizeInfo.compressedSize = ip - ipstart; + frameSizeInfo.compressedSize = (size_t)(ip - ipstart); frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) ? zfh.frameContentSize : nbBlocks * zfh.blockSizeMax; @@ -9227,7 +12178,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) { DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); - ZSTD_checkContinuity(dctx, blockStart); + ZSTD_checkContinuity(dctx, blockStart, blockSize); dctx->previousDstEnd = (const char*)blockStart + blockSize; return blockSize; } @@ -9237,12 +12188,12 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, const void* src, size_t srcSize) { DEBUGLOG(5, "ZSTD_copyRawBlock"); + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); if (dst == NULL) { if (srcSize == 0) return 0; - RETURN_ERROR(dstBuffer_null); + RETURN_ERROR(dstBuffer_null, ""); } - RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall); - memcpy(dst, src, srcSize); + ZSTD_memcpy(dst, src, srcSize); return srcSize; } @@ -9250,15 +12201,41 @@ static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, BYTE b, size_t regenSize) { + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); if (dst == NULL) { if (regenSize == 0) return 0; - RETURN_ERROR(dstBuffer_null); + RETURN_ERROR(dstBuffer_null, ""); } - RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall); - memset(dst, b, regenSize); + ZSTD_memset(dst, b, regenSize); return regenSize; } +static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming) +{ +#if ZSTD_TRACE + if (dctx->traceCtx && ZSTD_trace_decompress_end != NULL) { + ZSTD_Trace trace; + ZSTD_memset(&trace, 0, sizeof(trace)); + trace.version = ZSTD_VERSION_NUMBER; + trace.streaming = streaming; + if (dctx->ddict) { + trace.dictionaryID = ZSTD_getDictID_fromDDict(dctx->ddict); + trace.dictionarySize = ZSTD_DDict_dictSize(dctx->ddict); + trace.dictionaryIsCold = dctx->ddictIsCold; + } + trace.uncompressedSize = (size_t)uncompressedSize; + trace.compressedSize = (size_t)compressedSize; + trace.dctx = dctx; + ZSTD_trace_decompress_end(dctx->traceCtx, &trace); + } +#else + (void)dctx; + (void)uncompressedSize; + (void)compressedSize; + (void)streaming; +#endif +} + /*! ZSTD_decompressFrame() : * @dctx must be properly initialized @@ -9268,8 +12245,9 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void** srcPtr, size_t *srcSizePtr) { - const BYTE* ip = (const BYTE*)(*srcPtr); - BYTE* const ostart = (BYTE* const)dst; + const BYTE* const istart = (const BYTE*)(*srcPtr); + const BYTE* ip = istart; + BYTE* const ostart = (BYTE*)dst; BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; BYTE* op = ostart; size_t remainingSrcSize = *srcSizePtr; @@ -9279,15 +12257,15 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, /* check */ RETURN_ERROR_IF( remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize, - srcSize_wrong); + srcSize_wrong, ""); /* Frame Header */ { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, - srcSize_wrong); - FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) ); + srcSize_wrong, ""); + FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; } @@ -9300,26 +12278,26 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, ip += ZSTD_blockHeaderSize; remainingSrcSize -= ZSTD_blockHeaderSize; - RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong); + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); switch(blockProperties.blockType) { case bt_compressed: - decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend-op, ip, cBlockSize, /* frame */ 1); + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1); break; case bt_raw : - decodedSize = ZSTD_copyRawBlock(op, oend-op, ip, cBlockSize); + decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); break; case bt_rle : - decodedSize = ZSTD_setRleBlock(op, oend-op, *ip, blockProperties.origSize); + decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize); break; case bt_reserved : default: - RETURN_ERROR(corruption_detected); + RETURN_ERROR(corruption_detected, "invalid block type"); } if (ZSTD_isError(decodedSize)) return decodedSize; - if (dctx->fParams.checksumFlag) + if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, op, decodedSize); if (decodedSize != 0) op += decodedSize; @@ -9331,22 +12309,24 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, - corruption_detected); + corruption_detected, ""); } if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ - U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); - U32 checkRead; - RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong); - checkRead = MEM_readLE32(ip); - RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong); + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); + if (!dctx->forceIgnoreChecksum) { + U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); + U32 checkRead; + checkRead = MEM_readLE32(ip); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + } ip += 4; remainingSrcSize -= 4; } - + ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); /* Allow caller to get size read */ *srcPtr = ip; *srcSizePtr = remainingSrcSize; - return op-ostart; + return (size_t)(op-ostart); } static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, @@ -9379,7 +12359,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); if (ZSTD_isError(decodedSize)) return decodedSize; - assert(decodedSize <=- dstCapacity); + assert(decodedSize <= dstCapacity); dst = (BYTE*)dst + decodedSize; dstCapacity -= decodedSize; @@ -9395,7 +12375,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, (unsigned)magicNumber, ZSTD_MAGICNUMBER); if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { size_t const skippableSize = readSkippableFrameSize(src, srcSize); - FORWARD_IF_ERROR(skippableSize); + FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); assert(skippableSize <= srcSize); src = (const BYTE *)src + skippableSize; @@ -9405,13 +12385,13 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, if (ddict) { /* we were called from ZSTD_decompress_usingDDict */ - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict)); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); } else { /* this will initialize correctly with no dict if dict == NULL, so * use this in all cases but ddict */ - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); } - ZSTD_checkContinuity(dctx, dst); + ZSTD_checkContinuity(dctx, dst, dstCapacity); { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize); @@ -9419,15 +12399,13 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) && (moreThan1Frame==1), srcSize_wrong, - "at least one frame successfully completed, but following " - "bytes are garbage: it's more likely to be a srcSize error, " - "specifying more bytes than compressed size of frame(s). This " - "error message replaces ERROR(prefix_unknown), which would be " - "confusing, as the first header is actually correct. Note that " - "one could be unlucky, it might be a corruption error instead, " - "happening right at the place where we expect zstd magic " - "bytes. But this is _much_ less likely than a srcSize field " - "error."); + "At least one frame successfully completed, " + "but following bytes are garbage: " + "it's more likely to be a srcSize error, " + "specifying more input bytes than size of frame(s). " + "Note: one could be unlucky, it might be a corruption error instead, " + "happening right at the place where we expect zstd magic bytes. " + "But this is _much_ less likely than a srcSize field error."); if (ZSTD_isError(res)) return res; assert(res <= dstCapacity); if (res != 0) @@ -9439,7 +12417,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); - return (BYTE*)dst - (BYTE*)dststart; + return (size_t)((BYTE*)dst - (BYTE*)dststart); } size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, @@ -9479,7 +12457,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) size_t regenSize; ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - RETURN_ERROR_IF(dctx==NULL, memory_allocation); + RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); ZSTD_freeDCtx(dctx); return regenSize; @@ -9548,7 +12526,9 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); /* Sanity check */ RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); - if (dstCapacity) ZSTD_checkContinuity(dctx, dst); + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + dctx->processedCSize += srcSize; switch (dctx->stage) { @@ -9557,22 +12537,22 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c if (dctx->format == ZSTD_f_zstd1) { /* allows header */ assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - memcpy(dctx->headerBuffer, src, srcSize); + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ dctx->stage = ZSTDds_decodeSkippableHeader; return 0; } } dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; - memcpy(dctx->headerBuffer, src, srcSize); + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); dctx->expected = dctx->headerSize - srcSize; dctx->stage = ZSTDds_decodeFrameHeader; return 0; case ZSTDds_decodeFrameHeader: assert(src != NULL); - memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); - FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); + ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); dctx->expected = ZSTD_blockHeaderSize; dctx->stage = ZSTDds_decodeBlockHeader; return 0; @@ -9619,7 +12599,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case bt_raw : assert(srcSize <= dctx->expected); rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); - FORWARD_IF_ERROR(rSize); + FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); assert(rSize == srcSize); dctx->expected -= rSize; break; @@ -9629,13 +12609,13 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c break; case bt_reserved : /* should never happen */ default: - RETURN_ERROR(corruption_detected); + RETURN_ERROR(corruption_detected, "invalid block type"); } - FORWARD_IF_ERROR(rSize); + FORWARD_IF_ERROR(rSize, ""); RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); dctx->decodedSize += rSize; - if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); + if (dctx->validateChecksum) XXH64_update(&dctx->xxhState, dst, rSize); dctx->previousDstEnd = (char*)dst + rSize; /* Stay on the same stage until we are finished streaming the block. */ @@ -9648,11 +12628,12 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c RETURN_ERROR_IF( dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN && dctx->decodedSize != dctx->fParams.frameContentSize, - corruption_detected); + corruption_detected, ""); if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ dctx->expected = 4; dctx->stage = ZSTDds_checkChecksum; } else { + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); dctx->expected = 0; /* ends here */ dctx->stage = ZSTDds_getFrameHeaderSize; } @@ -9665,10 +12646,14 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_checkChecksum: assert(srcSize == 4); /* guaranteed by dctx->expected */ - { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); - U32 const check32 = MEM_readLE32(src); - DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); - RETURN_ERROR_IF(check32 != h32, checksum_wrong); + { + if (dctx->validateChecksum) { + U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); + RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + } + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); dctx->expected = 0; dctx->stage = ZSTDds_getFrameHeaderSize; return 0; @@ -9677,7 +12662,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c case ZSTDds_decodeSkippableHeader: assert(src != NULL); assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); - memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ + ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ dctx->stage = ZSTDds_skipFrame; return 0; @@ -9689,7 +12674,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c default: assert(0); /* impossible */ - RETURN_ERROR(GENERIC); /* some compiler require default to do something */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ } } @@ -9700,6 +12685,10 @@ static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dict dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); dctx->prefixStart = dict; dctx->previousDstEnd = (const char*)dict + dictSize; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif return 0; } @@ -9713,7 +12702,7 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, const BYTE* dictPtr = (const BYTE*)dict; const BYTE* const dictEnd = dictPtr + dictSize; - RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted); + RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ dictPtr += 8; /* skip header = magic + dictID */ @@ -9729,63 +12718,69 @@ ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, workspace, workspaceSize); #else size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, - dictPtr, dictEnd - dictPtr, + dictPtr, (size_t)(dictEnd - dictPtr), workspace, workspaceSize); #endif - RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted); + RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); dictPtr += hSize; } { short offcodeNCount[MaxOff+1]; unsigned offcodeMaxValue = MaxOff, offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted); - RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); ZSTD_buildFSETable( entropy->OFTable, offcodeNCount, offcodeMaxValue, OF_base, OF_bits, - offcodeLog); + offcodeLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */0); dictPtr += offcodeHeaderSize; } { short matchlengthNCount[MaxML+1]; unsigned matchlengthMaxValue = MaxML, matchlengthLog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted); - RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); ZSTD_buildFSETable( entropy->MLTable, matchlengthNCount, matchlengthMaxValue, ML_base, ML_bits, - matchlengthLog); + matchlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); dictPtr += matchlengthHeaderSize; } { short litlengthNCount[MaxLL+1]; unsigned litlengthMaxValue = MaxLL, litlengthLog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); - RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); - RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted); - RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); ZSTD_buildFSETable( entropy->LLTable, litlengthNCount, litlengthMaxValue, LL_base, LL_bits, - litlengthLog); + litlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); dictPtr += litlengthHeaderSize; } - RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); { int i; size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); for (i=0; i<3; i++) { U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; RETURN_ERROR_IF(rep==0 || rep > dictContentSize, - dictionary_corrupted); + dictionary_corrupted, ""); entropy->rep[i] = rep; } } - return dictPtr - (const BYTE*)dict; + return (size_t)(dictPtr - (const BYTE*)dict); } static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) @@ -9799,7 +12794,7 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict /* load entropy tables */ { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); - RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted); + RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); dict = (const char*)dict + eSize; dictSize -= eSize; } @@ -9812,8 +12807,12 @@ static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) { assert(dctx != NULL); +#if ZSTD_TRACE + dctx->traceCtx = (ZSTD_trace_decompress_begin != NULL) ? ZSTD_trace_decompress_begin(dctx) : 0; +#endif dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->processedCSize = 0; dctx->decodedSize = 0; dctx->previousDstEnd = NULL; dctx->prefixStart = NULL; @@ -9824,7 +12823,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) dctx->dictID = 0; dctx->bType = bt_reserved; ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); - memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ dctx->LLTptr = dctx->entropy.LLTable; dctx->MLTptr = dctx->entropy.MLTable; dctx->OFTptr = dctx->entropy.OFTable; @@ -9834,11 +12833,11 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) { - FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); if (dict && dictSize) RETURN_ERROR_IF( ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), - dictionary_corrupted); + dictionary_corrupted, ""); return 0; } @@ -9857,7 +12856,7 @@ size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) DEBUGLOG(4, "DDict is %s", dctx->ddictIsCold ? "~cold~" : "hot!"); } - FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); if (ddict) { /* NULL ddict is equivalent to no dictionary */ ZSTD_copyDDictParameters(dctx, ddict); } @@ -9948,11 +12947,11 @@ size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); ZSTD_clearDict(dctx); if (dict && dictSize != 0) { dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); - RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation); + RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); dctx->ddict = dctx->ddictLocal; dctx->dictUses = ZSTD_use_indefinitely; } @@ -9971,7 +12970,7 @@ size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSi size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) { - FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType)); + FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); dctx->dictUses = ZSTD_use_once; return 0; } @@ -9988,8 +12987,8 @@ size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSiz size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) { DEBUGLOG(4, "ZSTD_initDStream_usingDict"); - FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); return ZSTD_startingInputLength(zds->format); } @@ -10005,8 +13004,8 @@ size_t ZSTD_initDStream(ZSTD_DStream* zds) * this function cannot fail */ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) { - FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) ); - FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) ); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); return ZSTD_startingInputLength(dctx->format); } @@ -10015,18 +13014,28 @@ size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) * this function cannot fail */ size_t ZSTD_resetDStream(ZSTD_DStream* dctx) { - FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only)); + FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); return ZSTD_startingInputLength(dctx->format); } size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); ZSTD_clearDict(dctx); if (ddict) { dctx->ddict = ddict; dctx->dictUses = ZSTD_use_indefinitely; + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) { + if (dctx->ddictSet == NULL) { + dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem); + if (!dctx->ddictSet) { + RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!"); + } + } + assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */ + FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), ""); + } } return 0; } @@ -10039,16 +13048,16 @@ size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); size_t const min = (size_t)1 << bounds.lowerBound; size_t const max = (size_t)1 << bounds.upperBound; - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); - RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound); - RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); + RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); dctx->maxWindowSize = maxWindowSize; return 0; } size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) { - return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, format); + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format); } ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) @@ -10064,6 +13073,18 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) bounds.upperBound = (int)ZSTD_f_zstd1_magicless; ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); return bounds; + case ZSTD_d_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + case ZSTD_d_forceIgnoreChecksum: + bounds.lowerBound = (int)ZSTD_d_validateChecksum; + bounds.upperBound = (int)ZSTD_d_ignoreChecksum; + return bounds; + case ZSTD_d_refMultipleDDicts: + bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; + bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; + return bounds; default:; } bounds.error = ERROR(parameter_unsupported); @@ -10083,12 +13104,35 @@ static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) } #define CHECK_DBOUNDS(p,v) { \ - RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound); \ + RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ +} + +size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value) +{ + switch (param) { + case ZSTD_d_windowLogMax: + *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize); + return 0; + case ZSTD_d_format: + *value = (int)dctx->format; + return 0; + case ZSTD_d_stableOutBuffer: + *value = (int)dctx->outBufferMode; + return 0; + case ZSTD_d_forceIgnoreChecksum: + *value = (int)dctx->forceIgnoreChecksum; + return 0; + case ZSTD_d_refMultipleDDicts: + *value = (int)dctx->refMultipleDDicts; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); } size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); switch(dParam) { case ZSTD_d_windowLogMax: if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; @@ -10099,9 +13143,24 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value CHECK_DBOUNDS(ZSTD_d_format, value); dctx->format = (ZSTD_format_e)value; return 0; + case ZSTD_d_stableOutBuffer: + CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); + dctx->outBufferMode = (ZSTD_bufferMode_e)value; + return 0; + case ZSTD_d_forceIgnoreChecksum: + CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); + dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; + return 0; + case ZSTD_d_refMultipleDDicts: + CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value); + if (dctx->staticSize != 0) { + RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!"); + } + dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; + return 0; default:; } - RETURN_ERROR(parameter_unsupported); + RETURN_ERROR(parameter_unsupported, ""); } size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) @@ -10113,10 +13172,9 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) } if ( (reset == ZSTD_reset_parameters) || (reset == ZSTD_reset_session_and_parameters) ) { - RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); ZSTD_clearDict(dctx); - dctx->format = ZSTD_f_zstd1; - dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + ZSTD_DCtx_resetParameters(dctx); } return 0; } @@ -10134,7 +13192,7 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); size_t const minRBSize = (size_t) neededSize; RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, - frameParameter_windowTooLarge); + frameParameter_windowTooLarge, ""); return minRBSize; } @@ -10152,24 +13210,84 @@ size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) ZSTD_frameHeader zfh; size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); if (ZSTD_isError(err)) return err; - RETURN_ERROR_IF(err>0, srcSize_wrong); + RETURN_ERROR_IF(err>0, srcSize_wrong, ""); RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, - frameParameter_windowTooLarge); + frameParameter_windowTooLarge, ""); return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); } /* ***** Decompression ***** */ -MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) { - size_t const length = MIN(dstCapacity, srcSize); - if (length > 0) { - memcpy(dst, src, length); - } - return length; + return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; +} + +static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) + zds->oversizedDuration++; + else + zds->oversizedDuration = 0; +} + +static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) +{ + return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ +static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) +{ + ZSTD_outBuffer const expect = zds->expectedOutBuffer; + /* No requirement when ZSTD_obm_stable is not enabled. */ + if (zds->outBufferMode != ZSTD_bm_stable) + return 0; + /* Any buffer is allowed in zdss_init, this must be the same for every other call until + * the context is reset. + */ + if (zds->streamStage == zdss_init) + return 0; + /* The buffer must match our expectation exactly. */ + if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) + return 0; + RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!"); } +/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() + * and updates the stage and the output buffer state. This call is extracted so it can be + * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. + * NOTE: You must break after calling this function since the streamStage is modified. + */ +static size_t ZSTD_decompressContinueStream( + ZSTD_DStream* zds, char** op, char* oend, + void const* src, size_t srcSize) { + int const isSkipFrame = ZSTD_isSkipFrame(zds); + if (zds->outBufferMode == ZSTD_bm_buffered) { + size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; + size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + if (!decodedSize && !isSkipFrame) { + zds->streamStage = zdss_read; + } else { + zds->outEnd = zds->outStart + decodedSize; + zds->streamStage = zdss_flush; + } + } else { + /* Write directly into the output buffer */ + size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op); + size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + *op += decodedSize; + /* Flushing is not needed. */ + zds->streamStage = zdss_read; + assert(*op <= oend); + assert(zds->outBufferMode == ZSTD_bm_stable); + } + return 0; +} size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) { @@ -10195,6 +13313,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB "forbidden. out: pos: %u vs size: %u", (U32)output->pos, (U32)output->size); DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); while (someMoreWork) { switch(zds->streamStage) @@ -10205,6 +13324,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; zds->legacyVersion = 0; zds->hostageByte = 0; + zds->expectedOutBuffer = *output; /* fall-through */ case zdss_loadHeader : @@ -10219,6 +13339,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB } } #endif { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + if (zds->refMultipleDDicts && zds->ddictSet) { + ZSTD_DCtx_selectFrameDDict(zds); + } DEBUGLOG(5, "header size : %u", (U32)hSize); if (ZSTD_isError(hSize)) { #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) @@ -10232,7 +13355,7 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB "legacy support is incompatible with static dctx"); FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, zds->previousLegacyVersion, legacyVersion, - dict, dictSize)); + dict, dictSize), ""); zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, legacyVersion, output, input); if (hint==0) zds->streamStage = zdss_init; /* or stay in stage zdss_loadHeader */ @@ -10247,24 +13370,25 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB assert(iend >= ip); if (toLoad > remainingInput) { /* not enough input to load full header */ if (remainingInput > 0) { - memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); zds->lhSize += remainingInput; } input->pos = input->size; return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ } assert(ip != NULL); - memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; break; } } /* check for single-pass mode opportunity */ - if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */ + if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && zds->fParams.frameType != ZSTD_skippableFrame && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { - size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); + size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); if (cSize <= (size_t)(iend-istart)) { /* shortcut : using single-pass mode */ - size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds)); + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); if (ZSTD_isError(decompressedSize)) return decompressedSize; DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") ip = istart + cSize; @@ -10275,15 +13399,23 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB break; } } + /* Check output buffer is large enough for ZSTD_odm_stable. */ + if (zds->outBufferMode == ZSTD_bm_stable + && zds->fParams.frameType != ZSTD_skippableFrame + && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { + RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); + } + /* Consume header (see ZSTDds_decodeFrameHeader) */ DEBUGLOG(4, "Consume header"); - FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds))); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); zds->stage = ZSTDds_skipFrame; } else { - FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize)); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); zds->expected = ZSTD_blockHeaderSize; zds->stage = ZSTDds_decodeBlockHeader; } @@ -10294,40 +13426,48 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB (U32)(zds->maxWindowSize >> 10) ); zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, - frameParameter_windowTooLarge); + frameParameter_windowTooLarge, ""); /* Adapt buffer sizes to frame header instructions */ { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); - size_t const neededOutBuffSize = ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize); - if ((zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize)) { - size_t const bufferSize = neededInBuffSize + neededOutBuffSize; - DEBUGLOG(4, "inBuff : from %u to %u", - (U32)zds->inBuffSize, (U32)neededInBuffSize); - DEBUGLOG(4, "outBuff : from %u to %u", - (U32)zds->outBuffSize, (U32)neededOutBuffSize); - if (zds->staticSize) { /* static DCtx */ - DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); - assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ - RETURN_ERROR_IF( - bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), - memory_allocation); - } else { - ZSTD_free(zds->inBuff, zds->customMem); - zds->inBuffSize = 0; - zds->outBuffSize = 0; - zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); - RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation); - } - zds->inBuffSize = neededInBuffSize; - zds->outBuff = zds->inBuff + zds->inBuffSize; - zds->outBuffSize = neededOutBuffSize; - } } + size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered + ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) + : 0; + + ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); + + { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); + int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); + + if (tooSmall || tooLarge) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + RETURN_ERROR_IF( + bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), + memory_allocation, ""); + } else { + ZSTD_customFree(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem); + RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; + } } } zds->streamStage = zdss_read; /* fall-through */ case zdss_read: DEBUGLOG(5, "stage zdss_read"); - { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)); DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); if (neededInSize==0) { /* end of frame */ zds->streamStage = zdss_init; @@ -10335,15 +13475,9 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB break; } if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ - int const isSkipFrame = ZSTD_isSkipFrame(zds); - size_t const decodedSize = ZSTD_decompressContinue(zds, - zds->outBuff + zds->outStart, (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), - ip, neededInSize); - if (ZSTD_isError(decodedSize)) return decodedSize; + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); ip += neededInSize; - if (!decodedSize && !isSkipFrame) break; /* this was just a header */ - zds->outEnd = zds->outStart + decodedSize; - zds->streamStage = zdss_flush; + /* Function modifies the stage so we must break */ break; } } if (ip==iend) { someMoreWork = 0; break; } /* no more input */ @@ -10363,27 +13497,21 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, corruption_detected, "should never happen"); - loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); } ip += loadedSize; zds->inPos += loadedSize; if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ /* decode loaded input */ - { size_t const decodedSize = ZSTD_decompressContinue(zds, - zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart, - zds->inBuff, neededInSize); - if (ZSTD_isError(decodedSize)) return decodedSize; - zds->inPos = 0; /* input is consumed */ - if (!decodedSize && !isSkipFrame) { zds->streamStage = zdss_read; break; } /* this was just a header */ - zds->outEnd = zds->outStart + decodedSize; - } } - zds->streamStage = zdss_flush; - /* fall-through */ - + zds->inPos = 0; /* input is consumed */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); + /* Function modifies the stage so we must break */ + break; + } case zdss_flush: { size_t const toFlushSize = zds->outEnd - zds->outStart; - size_t const flushedSize = ZSTD_limitCopy(op, oend-op, zds->outBuff + zds->outStart, toFlushSize); + size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); op += flushedSize; zds->outStart += flushedSize; if (flushedSize == toFlushSize) { /* flush completed */ @@ -10403,17 +13531,21 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB default: assert(0); /* impossible */ - RETURN_ERROR(GENERIC); /* some compiler require default to do something */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ } } /* result */ input->pos = (size_t)(ip - (const char*)(input->src)); output->pos = (size_t)(op - (char*)(output->dst)); + + /* Update the expected output buffer for ZSTD_obm_stable. */ + zds->expectedOutBuffer = *output; + if ((ip==istart) && (op==ostart)) { /* no forward progress */ zds->noForwardProgress ++; if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { - RETURN_ERROR_IF(op==oend, dstSize_tooSmall); - RETURN_ERROR_IF(ip==iend, srcSize_wrong); + RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); + RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); assert(0); } } else { @@ -10458,10 +13590,10 @@ size_t ZSTD_decompressStream_simpleArgs ( *srcPos = input.pos; return cErr; } -/**** ended inlining zstd_decompress.c ****/ -/**** start inlining zstd_decompress_block.c ****/ +/**** ended inlining decompress/zstd_decompress.c ****/ +/**** start inlining decompress/zstd_decompress_block.c ****/ /* - * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the @@ -10476,15 +13608,15 @@ size_t ZSTD_decompressStream_simpleArgs ( /*-******************************************************* * Dependencies *********************************************************/ -#include /* memcpy, memmove, memset */ -/**** skipping file: compiler.h ****/ -/**** skipping file: cpu.h ****/ -/**** skipping file: mem.h ****/ +/**** skipping file: ../common/zstd_deps.h ****/ +/**** skipping file: ../common/compiler.h ****/ +/**** skipping file: ../common/cpu.h ****/ +/**** skipping file: ../common/mem.h ****/ #define FSE_STATIC_LINKING_ONLY -/**** skipping file: fse.h ****/ +/**** skipping file: ../common/fse.h ****/ #define HUF_STATIC_LINKING_ONLY -/**** skipping file: huf.h ****/ -/**** skipping file: zstd_internal.h ****/ +/**** skipping file: ../common/huf.h ****/ +/**** skipping file: ../common/zstd_internal.h ****/ /**** skipping file: zstd_decompress_internal.h ****/ /**** skipping file: zstd_ddict.h ****/ /**** skipping file: zstd_decompress_block.h ****/ @@ -10506,7 +13638,7 @@ size_t ZSTD_decompressStream_simpleArgs ( /*_******************************************************* * Memory operations **********************************************************/ -static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } +static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } /*-************************************************************* @@ -10518,7 +13650,7 @@ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); } size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr) { - RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong); + RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, ""); { U32 const cBlockHeader = MEM_readLE24(src); U32 const cSize = cBlockHeader >> 3; @@ -10526,7 +13658,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); bpPtr->origSize = cSize; /* only useful for RLE */ if (bpPtr->blockType == bt_rle) return 1; - RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected); + RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); return cSize; } } @@ -10542,7 +13674,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ { DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); - RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected); + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); { const BYTE* const istart = (const BYTE*) src; symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); @@ -10551,7 +13683,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, { case set_repeat: DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); - RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted); + RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); /* fall-through */ case set_compressed: @@ -10583,8 +13715,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); break; } - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); - RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected); + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); /* prefetch huffman table if cold */ if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { @@ -10622,13 +13754,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } } - RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected); + RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; dctx->litEntropy = 1; if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); return litCSize + lhSize; } @@ -10652,11 +13784,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, } if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ - RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected); - memcpy(dctx->litBuffer, istart+lhSize, litSize); + RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); + ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); return lhSize+litSize; } /* direct reference into compressed stream */ @@ -10684,8 +13816,8 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); break; } - RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); - memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); dctx->litPtr = dctx->litBuffer; dctx->litSize = litSize; return lhSize+1; @@ -10698,7 +13830,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, /* Default FSE distribution tables. * These are pre-calculated FSE decoding tables using default distributions as defined in specification : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions * They were generated programmatically with following method : * - start from default distributions, present in /lib/common/zstd_internal.h * - generate tables normally, using ZSTD_buildFSETable() @@ -10826,23 +13958,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB * generate FSE decoding table for one symbol (ll, ml or off) * cannot fail if input is valid => * all inputs are presumed validated at this stage */ -void -ZSTD_buildFSETable(ZSTD_seqSymbol* dt, +FORCE_INLINE_TEMPLATE +void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, const short* normalizedCounter, unsigned maxSymbolValue, const U32* baseValue, const U32* nbAdditionalBits, - unsigned tableLog) + unsigned tableLog, void* wksp, size_t wkspSize) { ZSTD_seqSymbol* const tableDecode = dt+1; - U16 symbolNext[MaxSeq+1]; - U32 const maxSV1 = maxSymbolValue + 1; U32 const tableSize = 1 << tableLog; - U32 highThreshold = tableSize-1; + + U16* symbolNext = (U16*)wksp; + BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1); + U32 highThreshold = tableSize - 1; + /* Sanity Checks */ assert(maxSymbolValue <= MaxSeq); assert(tableLog <= MaxFSELog); - + assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); + (void)wkspSize; /* Init, lay down lowprob symbols */ { ZSTD_seqSymbol_header DTableH; DTableH.tableLog = tableLog; @@ -10858,16 +13993,69 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, assert(normalizedCounter[s]>=0); symbolNext[s] = (U16)normalizedCounter[s]; } } } - memcpy(dt, &DTableH, sizeof(DTableH)); + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); } /* Spread symbols */ - { U32 const tableMask = tableSize-1; + assert(tableSize <= 512); + /* Specialized symbol spreading for the case when there are + * no low probability (-1 count) symbols. When compressing + * small blocks we avoid low probability symbols to hit this + * case, since header decoding speed matters more. + */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ @@ -10876,7 +14064,8 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt, } /* Build Decoding table */ - { U32 u; + { + U32 u; for (u=0; u max, corruption_detected); + RETURN_ERROR_IF(!srcSize, srcSize_wrong, ""); + RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, ""); { U32 const symbol = *(const BYTE*)src; U32 const baseline = baseValue[symbol]; U32 const nbBits = nbAdditionalBits[symbol]; @@ -10915,7 +14144,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb *DTablePtr = defaultTable; return 0; case set_repeat: - RETURN_ERROR_IF(!flagRepeatTable, corruption_detected); + RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); /* prefetch FSE table if used */ if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { const void* const pStart = *DTablePtr; @@ -10927,9 +14156,9 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb { unsigned tableLog; S16 norm[MaxSeq+1]; size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); - RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected); - RETURN_ERROR_IF(tableLog > maxLog, corruption_detected); - ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); + RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); + RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2); *DTablePtr = DTableSpace; return headerSize; } @@ -10942,35 +14171,36 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, const void* src, size_t srcSize) { - const BYTE* const istart = (const BYTE* const)src; + const BYTE* const istart = (const BYTE*)src; const BYTE* const iend = istart + srcSize; const BYTE* ip = istart; int nbSeq; DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); /* check */ - RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong); + RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); /* SeqHead */ nbSeq = *ip++; if (!nbSeq) { *nbSeqPtr=0; - RETURN_ERROR_IF(srcSize != 1, srcSize_wrong); + RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); return 1; } if (nbSeq > 0x7F) { if (nbSeq == 0xFF) { - RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong); - nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; + RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ; + ip+=2; } else { - RETURN_ERROR_IF(ip >= iend, srcSize_wrong); + RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); nbSeq = ((nbSeq-0x80)<<8) + *ip++; } } *nbSeqPtr = nbSeq; /* FSE table descriptors */ - RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); @@ -10982,8 +14212,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, LL_base, LL_bits, LL_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected); + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += llhSize; } @@ -10992,8 +14224,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, OF_base, OF_bits, OF_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected); + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += ofhSize; } @@ -11002,8 +14236,10 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, ip, iend-ip, ML_base, ML_bits, ML_defaultDTable, dctx->fseEntropy, - dctx->ddictIsCold, nbSeq); - RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected); + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); ip += mlhSize; } } @@ -11016,7 +14252,6 @@ typedef struct { size_t litLength; size_t matchLength; size_t offset; - const BYTE* match; } seq_t; typedef struct { @@ -11030,9 +14265,6 @@ typedef struct { ZSTD_fseState stateOffb; ZSTD_fseState stateML; size_t prevOffset[ZSTD_REP_NUM]; - const BYTE* prefixStart; - const BYTE* dictEnd; - size_t pos; } seqState_t; /*! ZSTD_overlapCopy8() : @@ -11127,15 +14359,15 @@ size_t ZSTD_execSequenceEnd(BYTE* op, { BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* match = oLitEnd - sequence.offset; BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; - /* bounds checks */ - assert(oLitEnd < oMatchEnd); - RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must fit within dstBuffer"); - RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer"); + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); /* copy literals */ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); @@ -11145,15 +14377,15 @@ size_t ZSTD_execSequenceEnd(BYTE* op, /* copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix */ - RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); match = dictEnd - (prefixStart-match); if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); + ZSTD_memmove(oLitEnd, match, sequence.matchLength); return sequenceLength; } /* span extDict & currentPrefixSegment */ { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); + ZSTD_memmove(oLitEnd, match, length1); op = oLitEnd + length1; sequence.matchLength -= length1; match = prefixStart; @@ -11171,16 +14403,27 @@ size_t ZSTD_execSequence(BYTE* op, BYTE* const oLitEnd = op + sequence.litLength; size_t const sequenceLength = sequence.litLength + sequence.matchLength; BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ const BYTE* const iLitEnd = *litPtr + sequence.litLength; const BYTE* match = oLitEnd - sequence.offset; - /* Errors and uncommon cases handled here. */ - assert(oLitEnd < oMatchEnd); - if (UNLIKELY(iLitEnd > litLimit || oMatchEnd > oend_w)) + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); assert(iLitEnd <= litLimit /* Literal length is in bounds */); assert(oLitEnd <= oend_w /* Can wildcopy literals */); assert(oMatchEnd <= oend_w /* Can wildcopy matches */); @@ -11200,15 +14443,15 @@ size_t ZSTD_execSequence(BYTE* op, /* Copy Match */ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { /* offset beyond prefix -> go into extDict */ - RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected); + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); match = dictEnd + (match - prefixStart); if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); + ZSTD_memmove(oLitEnd, match, sequence.matchLength); return sequenceLength; } /* span extDict & currentPrefixSegment */ { size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); + ZSTD_memmove(oLitEnd, match, length1); op = oLitEnd + length1; sequence.matchLength -= length1; match = prefixStart; @@ -11283,10 +14526,9 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD : 0) typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; -typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; FORCE_INLINE_TEMPLATE seq_t -ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets) { seq_t seq; ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; @@ -11361,14 +14603,6 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); - if (prefetch == ZSTD_p_prefetch) { - size_t const pos = seqState->pos + seq.litLength; - const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; - seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. - * No consequence though : no memory access will occur, offset is only used for prefetching */ - seqState->pos = pos + seq.matchLength; - } - /* ANS state update * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). @@ -11398,17 +14632,64 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, c return seq; } +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +{ + size_t const windowSize = dctx->fParams.windowSize; + /* No dictionary used. */ + if (dctx->dictContentEndForFuzzing == NULL) return 0; + /* Dictionary is our prefix. */ + if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; + /* Dictionary is not our ext-dict. */ + if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; + /* Dictionary is not within our window size. */ + if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; + /* Dictionary is active. */ + return 1; +} + +MEM_STATIC void ZSTD_assertValidSequence( + ZSTD_DCtx const* dctx, + BYTE const* op, BYTE const* oend, + seq_t const seq, + BYTE const* prefixStart, BYTE const* virtualStart) +{ +#if DEBUGLEVEL >= 1 + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } +#else + (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; +#endif +} +#endif + #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG FORCE_INLINE_TEMPLATE size_t DONT_VECTORIZE ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + maxDstSize; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; @@ -11417,19 +14698,20 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); DEBUGLOG(5, "ZSTD_decompressSequences_body"); + (void)frame; /* Regen sequences */ if (nbSeq) { seqState_t seqState; - size_t error = 0; dctx->fseEntropy = 1; { U32 i; for (i=0; ientropy.rep[i]; } RETURN_ERROR_IF( ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), - corruption_detected); + corruption_detected, ""); ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); ZSTD_STATIC_ASSERT( BIT_DStream_unfinished < BIT_DStream_completed && @@ -11454,13 +14736,14 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, * If you see most cycles served out of the DSB you've hit the good case. * If it is pretty even then you may be in an okay case. * - * I've been able to reproduce this issue on the following CPUs: + * This issue has been reproduced on the following CPUs: * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 * Use Instruments->Counters to get DSB/MITE cycles. * I never got performance swings, but I was able to * go from the good case of mostly DSB to half of the * cycles served from MITE. * - Coffeelake: Intel i9-9900k + * - Coffeelake: Intel i7-9700k * * I haven't been able to reproduce the instability or DSB misses on any * of the following CPUS: @@ -11473,40 +14756,48 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, * * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 */ + __asm__(".p2align 6"); + __asm__("nop"); __asm__(".p2align 5"); __asm__("nop"); +# if __GNUC__ >= 9 + /* better for gcc-9 and gcc-10, worse for clang and gcc-8 */ + __asm__(".p2align 3"); +# else __asm__(".p2align 4"); +# endif #endif for ( ; ; ) { - seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + if (UNLIKELY(ZSTD_isError(oneSeqSize))) + return oneSeqSize; DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + op += oneSeqSize; + if (UNLIKELY(!--nbSeq)) + break; BIT_reloadDStream(&(seqState.DStream)); - /* gcc and clang both don't like early returns in this loop. - * gcc doesn't like early breaks either. - * Instead save an error and report it at the end. - * When there is an error, don't increment op, so we don't - * overwrite. - */ - if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize; - else op += oneSeqSize; - if (UNLIKELY(!--nbSeq)) break; } /* check if reached exact end */ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); - if (ZSTD_isError(error)) return error; - RETURN_ERROR_IF(nbSeq, corruption_detected); - RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected); + RETURN_ERROR_IF(nbSeq, corruption_detected, ""); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); /* save reps for next block */ { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } } /* last literal segment */ { size_t const lastLLSize = litEnd - litPtr; - RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } } return op-ostart; @@ -11516,23 +14807,43 @@ static size_t ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + +FORCE_INLINE_TEMPLATE size_t +ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence, + const BYTE* const prefixStart, const BYTE* const dictEnd) +{ + prefetchPos += sequence.litLength; + { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart; + const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : memory address is only used for prefetching, not for dereferencing */ + PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ + } + return prefetchPos + sequence.matchLength; +} + +/* This decoding function employs prefetching + * to reduce latency impact of cache misses. + * It's generally employed when block contains a significant portion of long-distance matches + * or when coupled with a "cold" dictionary */ FORCE_INLINE_TEMPLATE size_t ZSTD_decompressSequencesLong_body( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { const BYTE* ip = (const BYTE*)seqStart; const BYTE* const iend = ip + seqSize; - BYTE* const ostart = (BYTE* const)dst; + BYTE* const ostart = (BYTE*)dst; BYTE* const oend = ostart + maxDstSize; BYTE* op = ostart; const BYTE* litPtr = dctx->litPtr; @@ -11540,51 +14851,62 @@ ZSTD_decompressSequencesLong_body( const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + (void)frame; /* Regen sequences */ if (nbSeq) { -#define STORED_SEQS 4 +#define STORED_SEQS 8 #define STORED_SEQS_MASK (STORED_SEQS-1) -#define ADVANCED_SEQS 4 +#define ADVANCED_SEQS STORED_SEQS seq_t sequences[STORED_SEQS]; int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); seqState_t seqState; int seqNb; + size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */ + dctx->fseEntropy = 1; { int i; for (i=0; ientropy.rep[i]; } - seqState.prefixStart = prefixStart; - seqState.pos = (size_t)(op-prefixStart); - seqState.dictEnd = dictEnd; + assert(dst != NULL); assert(iend >= ip); RETURN_ERROR_IF( ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), - corruption_detected); + corruption_detected, ""); ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); /* prepare in advance */ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb (size_t)(oend-op), dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } } return op-ostart; @@ -11607,9 +14931,10 @@ static size_t ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -11623,9 +14948,10 @@ DONT_VECTORIZE ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { - return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -11634,9 +14960,10 @@ static TARGET_ATTRIBUTE("bmi2") size_t ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { - return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -11646,21 +14973,23 @@ typedef size_t (*ZSTD_decompressSequences_t)( ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset); + const ZSTD_longOffset_e isLongOffset, + const int frame); #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG static size_t ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { DEBUGLOG(5, "ZSTD_decompressSequences"); #if DYNAMIC_BMI2 if (dctx->bmi2) { - return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif - return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ @@ -11675,15 +15004,16 @@ static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, const void* seqStart, size_t seqSize, int nbSeq, - const ZSTD_longOffset_e isLongOffset) + const ZSTD_longOffset_e isLongOffset, + const int frame) { DEBUGLOG(5, "ZSTD_decompressSequencesLong"); #if DYNAMIC_BMI2 if (dctx->bmi2) { - return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif - return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); } #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ @@ -11717,7 +15047,6 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable) } #endif - size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, @@ -11733,7 +15062,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); - RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong); + RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); /* Decode literals section */ { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); @@ -11759,6 +15088,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, ip += seqHSize; srcSize -= seqHSize; + RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); + #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) if ( !usePrefetchDecoder @@ -11777,20 +15108,20 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, if (usePrefetchDecoder) #endif #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); #endif #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG /* else */ - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset); + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); #endif } } -void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) { - if (dst != dctx->previousDstEnd) { /* not contiguous */ + if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ dctx->dictEnd = dctx->previousDstEnd; dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); dctx->prefixStart = dst; @@ -11804,9 +15135,9 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize) { size_t dSize; - ZSTD_checkContinuity(dctx, dst); + ZSTD_checkContinuity(dctx, dst, dstCapacity); dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); dctx->previousDstEnd = (char*)dst + dSize; return dSize; } -/**** ended inlining zstd_decompress_block.c ****/ +/**** ended inlining decompress/zstd_decompress_block.c ****/ From a56358b9fcc7a9f393c9eee065592cd2ed9c8ae4 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 16 May 2021 16:15:14 -0700 Subject: [PATCH 046/901] kramv - add uv coords to the hud --- kramv/KramViewerMain.mm | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 9f4fcdb6..f7e4a38c 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -824,12 +824,17 @@ - (void)updateEyedropper { // this will always be a linear color float4 c = _showSettings->textureResult; - int32_t x = _showSettings->textureResultX; int32_t y = _showSettings->textureResultY; + // show uv, so can relate to gpu coordinates stored in geometry and find atlas areas + append_sprintf(text, "uv:%0.3f %0.3f\n", + (float)x / _showSettings->imageBoundsX, + (float)y / _showSettings->imageBoundsY + ); + // pixel at top-level mip - sprintf(text, "px:%d %d\n", x, y); + append_sprintf(text, "px:%d %d\n", x, y); // show block num int mipLOD = _showSettings->mipLOD; From 28fb4ce4ed1c0a55c97255f65c60a67a633231b7 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 16 May 2021 16:16:58 -0700 Subject: [PATCH 047/901] kram - update mipDown logic to take depth Still doesn't mean kram supports 3d with mips. Just part of the journey. --- kramv/KramLoader.mm | 6 ++- libkram/kram/Kram.cpp | 86 +++++++++++++++++++++++----------- libkram/kram/KramConfig.h | 15 ++---- libkram/kram/KramImageInfo.cpp | 2 + libkram/kram/KramImageInfo.h | 4 ++ libkram/kram/KramMipper.cpp | 7 +-- libkram/kram/KramMipper.h | 3 +- libkram/kram/KramSDFMipper.cpp | 5 +- 8 files changed, 83 insertions(+), 45 deletions(-) diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 11421ae5..1fc8a651 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -343,6 +343,7 @@ - (void)setMipgenNeeded:(BOOL)enabled { int32_t w = image.width; int32_t h = image.height; + int32_t d = image.depth; int32_t numMips = MAX(1, image.header.numberOfMipmapLevels); int32_t numArrays = MAX(1, image.header.numberOfArrayElements); @@ -469,7 +470,7 @@ - (void)setMipgenNeeded:(BOOL)enabled { } } - mipDown(w, h); + mipDown(w, h, d); } return texture; @@ -578,6 +579,7 @@ - (nonnull instancetype)init { int32_t w = image.width; int32_t h = image.height; + int32_t d = image.depth; int32_t numMips = MAX(1, image.header.numberOfMipmapLevels); int32_t numArrays = MAX(1, image.header.numberOfArrayElements); @@ -687,7 +689,7 @@ - (nonnull instancetype)init { } } - mipDown(w, h); + mipDown(w, h, d); } // this only affect managed textures diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index a3c6db07..5284bea1 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -511,7 +511,7 @@ string formatInputAndOutput(int32_t testNumber, const char* srcFilename, MyMTLPi size_t extSeparator = dst.rfind('.'); assert(extSeparator != string::npos); dst.erase(extSeparator); - dst.append(".ktx"); + dst.append(".ktx"); // TODO: test ktx2 too cmd += dst; @@ -1386,7 +1386,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, numPixels *= (float)pixelMultiplier; if (srcImage.header.numberOfMipmapLevels > 1) { - numPixels *= 4.0 / 3.0f; // estimate for now + numPixels *= 4.0 / 3.0f; // TODO: estimate for now } numPixels /= (1000.0f * 1000.0f); @@ -1424,14 +1424,12 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, // print out the array if (srcImage.header.numberOfArrayElements > 1) { - sprintf(tmp, + append_sprintf(info, "arry: %d\n", srcImage.header.numberOfArrayElements); - - info += tmp; } - sprintf(tmp, + append_sprintf(info, "fmtk: %s\n" "fmtm: %s (%d)\n" "fmtv: %s (%d)\n" @@ -1440,13 +1438,10 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, metalTypeName(metalFormat), metalFormat, vulkanTypeName(metalFormat), vulkanType(metalFormat), glTypeName(metalFormat), glType(metalFormat)); - info += tmp; // report any props - string propText; for (const auto& prop : srcImage.props) { - sprintf(propText, "prop: %s %s\n", prop.first.c_str(), prop.second.c_str()); - info += propText; + append_sprintf(info, "prop: %s %s\n", prop.first.c_str(), prop.second.c_str()); } // TODO: handle zstd compressed KTX2 too, they have a length and compressed length field @@ -1457,19 +1452,51 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, int32_t mipLevel = 0; int32_t w = srcImage.width; int32_t h = srcImage.height; - + int32_t d = srcImage.depth; + + // num chunks + append_sprintf(info, + "chun: %d\n", + srcImage.totalChunks()); + for (const auto& mip : srcImage.mipLevels) { - sprintf(tmp, - "mipn: %d\n" - "mipd: %dx%d\n" - "mips: %" PRIu64 "\n" - "mipc: %dx\n" - "mipo: %" PRIu64 "\n", - w, h, mipLevel++, mip.length, srcImage.totalChunks(), mip.offset); - info += tmp; + switch (textureType) { + case MyMTLTextureType3D: + append_sprintf(info, + "mipl: %d %dx%dx%d ", + mipLevel++, + w, h, d); + break; + default: + append_sprintf(info, + "mipl: %d %dx%d ", + mipLevel++, + w, h); + break; + } + + if (mip.lengthCompressed != 0) { + size_t percent = (100 * mip.lengthCompressed) / mip.length; + + append_sprintf(info, + "%" PRIu64 ",%" PRIu64 ",%" PRIu64 " %d%%\n", + mip.offset, + mip.length, // only size of one mip right now, not mip * numChunks + mip.lengthCompressed, // TODO: preserve so can be displayed + (int)percent + ); + } + else { + append_sprintf(info, + "%" PRIu64 ",%" PRIu64 "\n", + mip.offset, + mip.length // only size of one mip right now, not mip * numChunks + ); + } + // drop a mip level - mipDown(w, h); + mipDown(w, h, d); } } @@ -1593,10 +1620,11 @@ static int32_t kramAppDecode(vector& args) error = true; } - isKTX = endsWith(dstFilename, ".ktx"); + bool isDstKTX = endsWith(dstFilename, ".ktx"); + bool isDstKTX2 = endsWith(dstFilename, ".ktx2"); - if (!isKTX) { - KLOGE("Kram", "decode only supports ktx output"); + if (!(isDstKTX || isDstKTX2)) { + KLOGE("Kram", "decode only supports ktx and ktx2 output"); error = true; } @@ -1620,7 +1648,7 @@ static int32_t kramAppDecode(vector& args) return -1; } - success = success && SetupTmpFile(tmpFileHelper, ".ktx"); + success = success && SetupTmpFile(tmpFileHelper, isDstKTX ? ".ktx" : ".ktx2"); if (success && isVerbose) { KLOGI("Kram", "Decoding %s to %s with %s\n", @@ -1984,10 +2012,12 @@ static int32_t kramAppEncode(vector& args) error = true; } + // allow ktx and ktx2 output bool isDstKTX = endsWith(dstFilename, ".ktx"); + bool isDstKTX2 = endsWith(dstFilename, ".ktx2"); - if (!isDstKTX) { - KLOGE("Kram", "encode only supports ktx output"); + if (!(isDstKTX || isDstKTX2)) { + KLOGE("Kram", "encode only supports ktx and ktx2 output"); error = true; } @@ -1996,6 +2026,8 @@ static int32_t kramAppEncode(vector& args) return -1; } + infoArgs.isKTX2 = isDstKTX2; + // Any new settings just go into this struct which is passed into enoder ImageInfo info; info.initWithArgs(infoArgs); @@ -2013,7 +2045,7 @@ static int32_t kramAppEncode(vector& args) srcFilename, srcImage, isPremulRgb); if (success) { - success = SetupTmpFile(tmpFileHelper, ".ktx"); + success = SetupTmpFile(tmpFileHelper, isDstKTX ? ".ktx" : ".ktx2"); if (!success) { KLOGE("Kram", "encode couldn't generate tmp file for output"); diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h index cdf2c205..97fb39f6 100644 --- a/libkram/kram/KramConfig.h +++ b/libkram/kram/KramConfig.h @@ -378,28 +378,23 @@ inline half4 toHalf4(const float4& vv) //--------------------------------------- -#define ROUNDMIPSDOWN 1 -inline void mipDown(int32_t& w, int32_t& h) +inline void mipDown(int32_t& w, int32_t& h, int32_t& d) { // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up - // And then Metal followd OpenGL since it's the same hw and drivers. + // And then Metal followed OpenGL since it's the same hw and drivers. // Round up adds an extra mip level to the chain, but results in much better filtering. // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt // http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf -#if ROUNDMIPSDOWN // round-down w = w / 2; h = h / 2; - + d = h / 2; + if (w < 1) w = 1; if (h < 1) h = 1; -#else - // round-up - w = (w + 1) / 2; - h = (h + 1) / 2; -#endif + if (d < 1) d = 1; } // Use this on vectors diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index 2cab1300..7302bbb3 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -995,6 +995,8 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args) textureEncoder = args.textureEncoder; textureType = args.textureType; + isKTX2 = args.isKTX2; + isPrezero = args.isPrezero; isPremultiplied = args.isPremultiplied; if (isPremultiplied) diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h index d4c9f862..54a343b1 100644 --- a/libkram/kram/KramImageInfo.h +++ b/libkram/kram/KramImageInfo.h @@ -51,6 +51,8 @@ class ImageInfoArgs { int32_t quality = 49; // may want float + bool isKTX2 = false; + //bool skipImageLength = false; bool doMipmaps = true; // default to mips on bool isVerbose = false; @@ -119,6 +121,8 @@ class ImageInfo { string averageChannels; string swizzleText; + bool isKTX2 = false; + // output image state // Note: difference between input srgb and output srgb, but it's mingled // here a bit diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp index 50b5715c..1bd80432 100644 --- a/libkram/kram/KramMipper.cpp +++ b/libkram/kram/KramMipper.cpp @@ -333,8 +333,9 @@ void Mipper::mipmap(const ImageData& srcImage, ImageData& dstImage) const { dstImage.width = srcImage.width; dstImage.height = srcImage.height; - - mipDown(dstImage.width, dstImage.height); + dstImage.depth = srcImage.depth; + + mipDown(dstImage.width, dstImage.height, dstImage.depth); // this assumes that we can read mip-1 from srcImage mipmapLevel(srcImage, dstImage); @@ -344,7 +345,7 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons { int32_t width = srcImage.width; int32_t height = srcImage.height; - + // this can receive premul, srgb data // the mip chain is linear data only Color* cDstColor = dstImage.pixels; diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h index 65013751..19bde640 100644 --- a/libkram/kram/KramMipper.h +++ b/libkram/kram/KramMipper.h @@ -51,7 +51,8 @@ class ImageData { int32_t width = 0; int32_t height = 0; - + int32_t depth = 0; + bool isSRGB = false; bool isHDR = false; // only updates pixelsFloat }; diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp index 1d51e4d1..7bb6f71d 100644 --- a/libkram/kram/KramSDFMipper.cpp +++ b/libkram/kram/KramSDFMipper.cpp @@ -55,10 +55,11 @@ void SDFMipper::mipmap(ImageData& dstImage, int32_t mipLevel) { int32_t w = srcBitmapImage.width; int32_t h = srcBitmapImage.height; - + int32_t d = 1; + // can use shift with mip down, but this iterates for (int32_t i = 0; i < mipLevel; ++i) { - mipDown(w, h); + mipDown(w, h, d); } dstImage.width = w; From 55ddca6ba0e6166b2217d11c1d54d4414e708d88 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 16 May 2021 18:48:29 -0700 Subject: [PATCH 048/901] KTX2 - ability to save data to KTX2 format with zstd/zlib compression. This is a pretty big revamp to KramImage and KTXImage to allow it to support KTX2 format saves. Full DFD writing even though this is mostly redundant with VKFormat. Premul setting not specific to when premul occurs. Split initMipLevels and validateMipLevels. Moved some of KramImage to KTXImage. Broke up some KramImage monolithic functions. Remove "any" support from scripts for now. Will update that support later. Now use ktx2 in python scripts directly. Can bypass ktx2ktx2 and ktx2sc. ktx2 always sets -zstd but -zlib works. Made a single file version of zstd that has an encoder. Previously only had decoder. Use zstd.h header now to stay in sync with calls. zstd is updated to 1.5.0. Scoping operators for the contexts, so they are cleaned up. CLI now supports -zstd and -zlib args. --- libkram/kram/KTXImage.cpp | 345 +++++----- libkram/kram/KTXImage.h | 52 +- libkram/kram/Kram.cpp | 99 +-- libkram/kram/KramImage.cpp | 1098 ++++++++++++++++++++------------ libkram/kram/KramImage.h | 21 +- libkram/kram/KramImageInfo.cpp | 1 + libkram/kram/KramImageInfo.h | 6 + scripts/kramTests.sh | 4 +- scripts/kramTextures.py | 70 +- 9 files changed, 1060 insertions(+), 636 deletions(-) diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index c9f37333..12564de9 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -10,19 +10,10 @@ #include #include -extern "C" { - -// not using zstd.h, so pull this in directly from zstddeclib.c -bool FSE_isError(size_t size); - -typedef struct ZSTD_DCtx_s ZSTD_DCtx; -ZSTD_DCtx* ZSTD_createDCtx(void); -size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); - -size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); -} +// for zlib decompress +#include "miniz.h" +// for zstd decompress +#include "zstd.h" namespace kram { @@ -942,7 +933,8 @@ bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength) return false; } - return initMipLevels(true, sizeof(KTXHeader) + header.bytesOfKeyValueData); + initMipLevels(sizeof(KTXHeader) + header.bytesOfKeyValueData); + return validateMipLevels(); } void KTXImage::initProps(const uint8_t* propsData, size_t propDataSize) @@ -1095,7 +1087,85 @@ void KTXImage::toPropsData(vector& propsData) // TODO: this needs to pad to 16-bytes, so may need a prop for that } -bool KTXImage::initMipLevels(bool validateLevelSizeFromRead, size_t offsetToImageData) +void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize) +{ + // dst levels + int32_t w = width; + int32_t h = height; + int32_t d = depth; + + bool needsDownsample = (w > mipMaxSize || h > mipMaxSize); + + int32_t maxMipLevels = 16; // 64K x 64K + + // can still downsample src multiple times even with only 1 level exported + if ((!doMipmaps) && needsDownsample) { + maxMipLevels = 1; + } + + KTXImageLevel level; + level.offset = 0; // compute later, once know ktx vs. ktx2 + + mipLevels.clear(); + + if (doMipmaps || needsDownsample) { + bool keepMip = + (w >= mipMinSize && w <= mipMaxSize) && + (h >= mipMinSize && h <= mipMaxSize); + + if (keepMip) { + level.length = mipLevelSize(w, h); + level.lengthCompressed = 0; + + if (mipLevels.empty()) { + // adjust the top dimensions + width = w; + height = h; + depth = d; + } + mipLevels.push_back(level); + } + + do { + mipDown(w, h, d); + + keepMip = + (w >= mipMinSize && w <= mipMaxSize) && + (h >= mipMinSize && h <= mipMaxSize); + + if (keepMip && (mipLevels.size() < (size_t)maxMipLevels)) { + // length needs to be multiplied by chunk size before writing out + level.length = mipLevelSize(w, h); + level.lengthCompressed = 0; + + if (mipLevels.empty()) { + // adjust the top dimensions + width = w; + height = h; + depth = d; + } + + mipLevels.push_back(level); + } + + } while (w > 1 || h > 1 || d > 1); + } + else { + // length needs to be multiplied by chunk size before writing out + level.length = mipLevelSize(w, h); + level.lengthCompressed = 0; + + mipLevels.push_back(level); + } + + + header.numberOfMipmapLevels = mipLevels.size(); + + header.pixelWidth = width; + header.pixelHeight = height; +} + +void KTXImage::initMipLevels(size_t mipOffset) { // largest mips are first in file uint32_t numMips = max(1u, header.numberOfMipmapLevels); @@ -1105,117 +1175,62 @@ bool KTXImage::initMipLevels(bool validateLevelSizeFromRead, size_t offsetToImag mipLevels.reserve(numMips); mipLevels.clear(); - size_t totalDataSize = offsetToImageData; // sizeof(KTXHeader) + header.bytesOfKeyValueData; - //size_t blockSize = this->blockSize(); + size_t offset = mipOffset; int32_t w = width; int32_t h = height; - + int32_t d = depth; + for (uint32_t i = 0; i < numMips; ++i) { size_t dataSize = mipLevelSize(w, h); uint32_t levelSize = dataSize * numChunks; // compute dataSize from header data - if (!skipImageLength) { - // read data size - // 4-byte dataSize throws off alignment of mips to block size on most formats - // would need to pad after this by block size - - // validate that no weird size to image - if (validateLevelSizeFromRead) { - const uint8_t* levelSizeField = (const uint8_t*)fileData + totalDataSize; - - uint32_t levelSizeFromRead = *(const uint32_t*)levelSizeField; - // cube only stores size of one face, ugh - if (textureType == MyMTLTextureTypeCube) { - levelSizeFromRead *= 6; - } - - if (levelSizeFromRead != levelSize) { - KLOGE("kram", "mip %d levelSize mismatch %d %d", i, (int)levelSizeFromRead, (int)levelSize); - return false; - } - } - // advance past the length - totalDataSize += sizeof(uint32_t); + offset += sizeof(uint32_t); } - size_t offset = totalDataSize; - + // TODO: Here is where offset alignment to 4 bytes may be needed + // but that also needs to be accounted for in allocation + // level holds single texture size not level size, but offset reflects level start - KTXImageLevel level = {offset, dataSize}; + KTXImageLevel level = { offset, 0, dataSize }; mipLevels.push_back(level); - totalDataSize += levelSize; - - // TODO: remove code below, since padding really isn't used with 4-byte alignment of rowBytes in KTX1 - //mips += levelSize; - - // for (int array = 0; array < numArrays; ++array) { - // for (int face = 0; face < numFaces; ++face) { - // for (int slice = 0; slice < numSlices; ++slice) { - // const uint8_t* srcImageData = mips; - // mips += dataSize; - // totalDataSize += dataSize; - // - // // assumes all images are in same mmap file, so can just - // // alias the offset these offsets need to be at a multiple - // // of the block size - // size_t offset = srcImageData - fileData; - // KTXImageLevel level = {offset, dataSize}; - // mipLevels.push_back(level); - // - // if (skipImageLength) { - // if ((offset & (blockSize - 1)) != 0) { - // return false; - // } - // } - // - // // TODO: pad to 4 on 1/2/3 byte formats - // // but make sure if this is on every mip or not - // } - // - //// // cube padding to 4 byte alignment - //// if (textureType == MyMTLTextureTypeCube) { - //// size_t padding = - //// 3 - ((dataSize + 3) % 4); // 0, 1, 2, 3 -> 0, 3, 2, 1 - //// if (padding > 0) { - //// mips += padding; - //// totalDataSize += padding; - //// } - //// - //// if (skipImageLength) { - //// if (padding != 0) { - //// return false; - //// } - //// } - //// } - // } - // } + offset += levelSize; + + mipDown(w, h, d); + } +} - // // mip padding to 4 byte alignment - // size_t padding = - // 3 - ((totalDataSize + 3) % 4); // 0, 1, 2, 3 -> 0, 3, 2, 1 - // if (padding > 0) { - // mips += padding; - // totalDataSize += padding; - // } - // - // if (skipImageLength) { - // if (padding != 0) { - // return false; - // } - // } +bool KTXImage::validateMipLevels() const { + if (skipImageLength) + return true; + + bool isValid = true; - // https://computergraphics.stackexchange.com/questions/1441/how-does-mip-mapping-work-with-non-power-of-2-textures + // validate that no weird size to image + for (uint32_t i = 0; i < mipLevels.size(); ++i) { + auto& level = mipLevels[i]; + + const uint8_t* levelSizeField = (const uint8_t*)fileData + level.offset - sizeof(uint32_t); + uint32_t levelSizeFromRead = *(const uint32_t*)levelSizeField; + + // cube only stores size of one face, ugh + if (textureType == MyMTLTextureTypeCube) { + levelSizeFromRead *= 6; + } - mipDown(w, h); + if (levelSizeFromRead != level.length) { + KLOGE("kram", "mip %d levelSize mismatch %d %d", i, (int)levelSizeFromRead, (int)level.length); + isValid = false; + break; + } } - - return true; + + return isValid; } const char* textureTypeName(MyMTLTextureType textureType) @@ -1241,7 +1256,7 @@ const char* textureTypeName(MyMTLTextureType textureType) - +// KTX2 layout //// Data Format Descriptor //uint32_t dfdTotalSize = 0; //continue @@ -1272,6 +1287,17 @@ const char* textureTypeName(MyMTLTextureType textureType) // can use ktx2ktx2 and ktx2sc to supercompress, and kramv can use this to open and view data as a KTX1 file. // ignoring Basis and supercompression data, etc. +// wish C++ had a defer +struct ZSTDScope2 +{ + ZSTDScope2(ZSTD_DCtx* ctx_) : ctx(ctx_) {} + ~ZSTDScope2() { ZSTD_freeDCtx(ctx); } + +private: + ZSTD_DCtx* ctx = nullptr; +}; + + bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) { if ((size_t)imageDataLength < sizeof(KTX2Header)) { @@ -1290,22 +1316,16 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) // copy out the header, const KTX2Header& header2 = *(const KTX2Header*)imageData; - enum KTX2Supercompression { - KTX2SupercompressionNone = 0, - KTX2SupercompressionBasisLZ = 1, // can transcode, but can't gen from KTX file using ktxsc, uses sgdByteLength - KTX2SupercompressionZstd = 2, // faster deflate, ktxsc support - KTX2SupercompressionZlib = 3, // deflate, no ktxsc support (use miniz) - // TODO: Need LZFSE? - }; - - bool isLevelOfMipCompressed = header2.supercompressionScheme != KTX2SupercompressionNone; if (header2.supercompressionScheme != KTX2SupercompressionNone && - header2.supercompressionScheme != KTX2SupercompressionZstd) { + header2.supercompressionScheme != KTX2SupercompressionZstd && + header2.supercompressionScheme != KTX2SupercompressionZlib) { KLOGE("kram", "Unknown supercompression %d", header2.supercompressionScheme); return false; } + bool isCompressed = header2.supercompressionScheme != KTX2SupercompressionNone; + // This typically means UASTC encoding + zstd supercompression, and code doesn't handle that below yet if (header2.vkFormat == 0) { KLOGE("kram", "Basis encode not yet supported"); @@ -1329,12 +1349,13 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) int32_t numChunks = totalChunks(); - // need to copy out lengthCompressed here, since we can't determine that - vector levels; + vector levels; uint32_t levelOffset = sizeof(KTX2Header); for (uint32_t i = 0; i < header.numberOfMipmapLevels; ++i) { // ktx2 stores levels in same order as ktx1, but larger mips occur later in the file - auto level = *(const KTX2ImageLevel*)(imageData + levelOffset + sizeof(KTX2ImageLevel) * i); + // only KTX2 writes this array out due to lengthCompressed field. + + auto level = *(const KTXImageLevel*)(imageData + levelOffset + sizeof(KTXImageLevel) * i); assert(level.length % numChunks == 0); @@ -1356,32 +1377,27 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) return false; } - // Note: KTX2 also doesn't have the length field embedded the mipData - // so need to be able to set skipLength to unify the mipgen if aliasing the mip data - // Only reading this format, never writing it out. - skipImageLength = true; - // transfer key-value data pairs // bytesOfKeyValueData will be updated if props written out - // but probably want to leave this out of level offsets - header.bytesOfKeyValueData = 0; // header2.kvdByteLength; + header.bytesOfKeyValueData = 0; initProps(imageData + header2.kvdByteOffset, header2.kvdByteLength); - if (!isLevelOfMipCompressed) { + if (!isCompressed) { + // Note: this is aliasing the mips from a ktx2 file into a ktx1 KTXImage + // This is highly unsafe. + + // Note: KTX2 also doesn't have the length field embedded the mipData + // so need to be able to set skipLength to unify the mipgen if aliasing the mip data + // Only reading this format, never writing it out. + skipImageLength = true; + fileData = imageData; fileDataLength = imageDataLength; - // might be able to just use header2.sgdByteOffset + header2.sgdByteLength - uint32_t offsetToImageData = //std::max(std::max( - //header2.dfdByteOffset + header2.dfdByteLength, - //header2.kvdByteOffset + header2.kvdByteLength), - header2.sgdByteOffset + header2.sgdByteLength; - - - if (!initMipLevels(false, offsetToImageData)) { - return false; - } + // these are mip offset for KTX2 file + size_t mipOffset = header2.sgdByteOffset + header2.sgdByteLength; + initMipLevels(mipOffset); // TODO: KTX1 packs rows to 4 bytes, but KTX2 packs tightly to 1 // for now just reverse the ktx2 mips back to ktx1, aliasing fileData @@ -1405,9 +1421,9 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) } } else { - if (!initMipLevels(false, sizeof(KTXHeader) + header.bytesOfKeyValueData)) { - return false; - } + // This is decompressing KTX2 into KTX1 + size_t mipOffset = sizeof(KTXHeader) + header.bytesOfKeyValueData; + initMipLevels(mipOffset); // compute the decompressed size // Note: initMipLevels computes but doesn't store this @@ -1421,6 +1437,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) bool isZstd = header2.supercompressionScheme == KTX2SupercompressionZstd; ZSTD_DCtx* dctx = nullptr; if (isZstd) dctx = ZSTD_createDCtx(); + ZSTDScope2 scope(dctx); // need to decompress mips here for (uint32_t i = 0; i < header.numberOfMipmapLevels; ++i) { @@ -1434,6 +1451,13 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) size_t dstDataSize = level1.length * numChunks; uint8_t* dstData = (uint8_t*)fileData + level1.offset; // can const_cast, since class owns data + // preserve lengthCompressed so kram info can display the value + // this field will need to be set to 0 + + // This does display in kram info, but it's confusing since image was converted to ktx1 + // and the offsets are largest first. So for now, don't copy this in. + // level1.lengthCompressed = level2.lengthCompressed; + // TODO: use basis transcoder (single file) for Basis UASTC here, then don't need libktx yet // wont work for BasisLZ (which is ETC1S). @@ -1445,26 +1469,39 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) dstData, dstDataSize, srcData, srcDataSize); - if (FSE_isError(result)) { - ZSTD_freeDCtx(dctx); + if (ZSTD_isError(result)) { + KLOGE("kram", "decode mip zstd failed"); + return false; + } + if (level2.length * numChunks != result) { + KLOGE("kram", "decode mip zstd size not expected"); return false; } - assert(level2.length * numChunks == result); break; } - case KTX2SupercompressionBasisLZ: - // TODO: this one really needs KTX-software branch - // also loader has option to transcode to various formats - break; + + case KTX2SupercompressionZlib: { + // can use miniz or libCompression + mz_ulong dstDataSizeMZ = 0; + if (mz_uncompress(dstData, &dstDataSizeMZ, + srcData, srcDataSize) != MZ_OK) { + KLOGE("kram", "decode mip zlib failed"); + return false; + } + if (dstDataSizeMZ != dstDataSize) { + KLOGE("kram", "decode mip zlib size not expected"); + return false; + } - case KTX2SupercompressionZlib: - // TODO: can use miniz on this, or libCompression break; + } + + // already checked at top of function + default: { + return false; + } } } - - if (dctx) ZSTD_freeDCtx(dctx); - } return true; @@ -1476,6 +1513,8 @@ vector& KTXImage::imageData() { void KTXImage::reserveImageData() { int32_t numChunks = totalChunks(); + + // on KTX1 the last mip is the smallest and last in the file const auto& lastMip = mipLevels[header.numberOfMipmapLevels-1]; size_t totalKTXSize = lastMip.offset + lastMip.length * numChunks; diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index a5787056..2b0b54c3 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -181,11 +181,11 @@ class KTXHeader { // This is one entire level of mipLevels. // In KTX, the image levels are assumed from format and size since no compression applied. -class KTXImageLevel { -public: - uint64_t offset; // numChunks * length - uint64_t length; // size of a single mip -}; +//class KTXImageLevel { +//public: +// uint64_t offset; // numChunks * length +// uint64_t length; // size of a single mip +//}; //--------------------------------------------- @@ -225,18 +225,38 @@ class KTX2Header { uint64_t sgdByteLength = 0; // chunks hold levelCount of all mips of the same size - // KTX2ImageLevel* chunks; // [levelCount] + // KTXImageLevel* chunks; // [levelCount] }; -// Unlike KTX, KTX2 writes an array of level sizes since compression may e involved. -// These correspond to an entire compressed array of chunks. -// So often an entire level mus be decompressed before a chunk can be accessed. +// Unlike KTX, KTX2 writes an array of level sizes since level compression may be used. +// Level compression is an entire compressed array of chunks at a given mip dimension. +// So then the entire level must be decompressed before a chunk can be accessed. // This is one entire level of mipLevels. -class KTX2ImageLevel { +// +// Use this for KTX, but there length == lengthCompressed, and the array is just a temporary. +// and the offsts include a 4 byte length at the start of each level. +class KTXImageLevel { public: - uint64_t offset; // numChunks * length - uint64_t lengthCompressed; // can only be read in, can't compute this, but can compute upper bound from zstd - uint64_t length; // size of a single mip + uint64_t offset = 0; // differ in ordering - ktx largest first, ktx2 smallest first + uint64_t lengthCompressed = 0; // set to 0 if not compresseds + uint64_t length = 0; // numChunks * mipSize when written for non cube on KTX1 or all KTX2, internally only stores mipSize +}; + +enum KTX2Supercompression { + KTX2SupercompressionNone = 0, + KTX2SupercompressionBasisLZ = 1, // can transcode, but can't gen from KTX file using ktxsc, uses sgdByteLength + KTX2SupercompressionZstd = 2, // faster deflate, ktxsc support + KTX2SupercompressionZlib = 3, // deflate, no ktxsc support (use miniz) + // TODO: Need LZFSE? + // TODO: need Kraken for PS4 + // TODO: need Xbox format +}; + +struct KTX2Compressor { + KTX2Supercompression compressorType = KTX2SupercompressionNone; + float compressorLevel = 0.0f; // 0.0 default, 100.0 full compression + + bool isCompressed() const { return compressorType != KTX2SupercompressionNone; } }; //--------------------------------------------- @@ -251,8 +271,12 @@ class KTXImage { bool open(const uint8_t* imageData, size_t imageDataLength); void initProps(const uint8_t* propsData, size_t propDataSize); - bool initMipLevels(bool validateLevelSizeFromRead, size_t offsetToImageData); + + void initMipLevels(size_t mipOffset); + void initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize); + bool validateMipLevels() const; + // props handling void toPropsData(vector& propsData); diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 5284bea1..ec475eb4 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -893,7 +893,7 @@ void kramInfoUsage(bool showVersion = true) KLOGI("Kram", "%s\n" "Usage: kram info\n" - "\t -i/nput <.png | .ktx>\n" + "\t -i/nput <.png | .ktx | .ktx2>\n" "\t [-o/utput info.txt]\n" "\t [-v/erbose]\n" "\n", @@ -943,8 +943,8 @@ void kramEncodeUsage(bool showVersion = true) "Usage: kram encode\n" "\t -f/ormat (bc1 | astc4x4 | etc2rgba | rgba16f)\n" "\t [-srgb] [-signed] [-normal]\n" - "\t -i/nput \n" - "\t -o/utput \n" + "\t -i/nput \n" + "\t -o/utput \n" "\n" "\t [-type 2d|3d|..]\n" "\t [-e/ncoder (squish | ate | etcenc | bcenc | astcenc | explicit | ..)]\n" @@ -1059,6 +1059,12 @@ void kramEncodeUsage(bool showVersion = true) "\t-chunks 4x4" "\tSpecifies how many chunks to split up texture into 2darray\n" + // ktx2 specific settings + "\t-zstd" + "\tktx2 with zstd mip compressor\n" + "\t-zlib" + "\tktx2 with zlib mip compressor\n" + "\t-swizzle [rgba01 x4]" "\tSpecifies pre-encode swizzle pattern\n" "\t-avg [rgba]" @@ -1125,7 +1131,7 @@ static int32_t kramAppInfo(vector& args) } dstFilename = args[i]; - continue; + //continue; } else if (isStringEqual(word, "-input") || isStringEqual(word, "-i")) { @@ -1137,7 +1143,7 @@ static int32_t kramAppInfo(vector& args) } srcFilename = args[i]; - continue; + //continue; } else if (isStringEqual(word, "-v") || isStringEqual(word, "-verbose")) { @@ -1544,7 +1550,7 @@ static int32_t kramAppDecode(vector& args) // TODO: if args ends with /, then output to that dir dstFilename = args[i]; - continue; + //continue; } else if (isStringEqual(word, "-input") || isStringEqual(word, "-i")) { @@ -1556,7 +1562,7 @@ static int32_t kramAppDecode(vector& args) } srcFilename = args[i]; - continue; + //continue; } else if (isStringEqual(word, "-swizzle")) { @@ -1574,7 +1580,7 @@ static int32_t kramAppDecode(vector& args) break; } swizzleText = swizzleString; - continue; + //continue; } // this is really decoder, but keep same argument as encoder else if (isStringEqual(word, "-e") || @@ -1587,14 +1593,14 @@ static int32_t kramAppDecode(vector& args) } textureDecoder = parseEncoder(args[i]); - continue; + //continue; } // probably should be per-command and global verbose else if (isStringEqual(word, "-v") || isStringEqual(word, "-verbose")) { isVerbose = true; - continue; + //continue; } else { KLOGE("Kram", "unexpected argument \"%s\"\n", @@ -1698,11 +1704,11 @@ static int32_t kramAppEncode(vector& args) if (isStringEqual(word, "-sdf")) { infoArgs.doSDF = true; - continue; + //continue; } else if (isStringEqual(word, "-optopaque")) { infoArgs.optimizeFormatForOpaque = true; - continue; + //continue; } // mip setting @@ -1715,7 +1721,7 @@ static int32_t kramAppEncode(vector& args) } infoArgs.mipMaxSize = atoi(args[i]); - continue; + //continue; } else if (isStringEqual(word, "-mipmin")) { ++i; @@ -1726,12 +1732,12 @@ static int32_t kramAppEncode(vector& args) } infoArgs.mipMinSize = atoi(args[i]); - continue; + //continue; } else if (isStringEqual(word, "-mipnone")) { // disable mips even if pow2 infoArgs.doMipmaps = false; - continue; + //continue; } // else if (isStringEqual(word, "-mipalign")) { // // pad start of each mip to pixel/block size of format @@ -1755,17 +1761,17 @@ static int32_t kramAppEncode(vector& args) KLOGE("Kram", "heightScale arg cannot be 0"); error = true; } - continue; + //continue; } else if (isStringEqual(word, "-height")) { // converted to a normal map infoArgs.isHeight = true; - continue; + //continue; } else if (isStringEqual(word, "-wrap")) { // whether texture is clamp or wrap infoArgs.isWrap = true; - continue; + //continue; } @@ -1779,7 +1785,7 @@ static int32_t kramAppEncode(vector& args) } infoArgs.textureEncoder = parseEncoder(args[i]); - continue; + //continue; } else if (isStringEqual(word, "-swizzle")) { @@ -1797,7 +1803,7 @@ static int32_t kramAppEncode(vector& args) break; } infoArgs.swizzleText = swizzleString; - continue; + //continue; } else if (isStringEqual(word, "-chunks")) { @@ -1824,7 +1830,7 @@ static int32_t kramAppEncode(vector& args) infoArgs.chunksY = chunksY; infoArgs.chunksCount = chunksX * chunksY; - continue; + //continue; } else if (isStringEqual(word, "-avg")) { @@ -1836,7 +1842,7 @@ static int32_t kramAppEncode(vector& args) break; } infoArgs.averageChannels = channelString; - continue; + //continue; } else if (isStringEqual(word, "-type")) { ++i; @@ -1847,7 +1853,7 @@ static int32_t kramAppEncode(vector& args) } infoArgs.textureType = parseTextureType(args[i]); - continue; + //continue; } else if (isStringEqual(word, "-quality")) { ++i; @@ -1858,7 +1864,7 @@ static int32_t kramAppEncode(vector& args) } infoArgs.quality = atoi(args[i]); - continue; + //continue; } else if (isStringEqual(word, "-output") || @@ -1872,13 +1878,7 @@ static int32_t kramAppEncode(vector& args) // TODO: if args ends with /, then output to that dir dstFilename = args[i]; - -// // see if it's a ktxa file -// if (dstFilename.back() == 'a' || -// dstFilename.back() == 'A') { -// infoArgs.skipImageLength = true; -// } - continue; + //continue; } else if (isStringEqual(word, "-input") || isStringEqual(word, "-i")) { @@ -1890,29 +1890,29 @@ static int32_t kramAppEncode(vector& args) } srcFilename = args[i]; - continue; + //continue; } // these affect the format else if (isStringEqual(word, "-hdr")) { // not validating format for whether it's srgb or not infoArgs.isHDR = true; - continue; + //continue; } else if (isStringEqual(word, "-srgb")) { // not validating format for whether it's srgb or not infoArgs.isSRGB = true; - continue; + //continue; } else if (isStringEqual(word, "-signed")) { // not validating format for whether it's signed or not infoArgs.isSigned = true; - continue; + //continue; } else if (isStringEqual(word, "-normal")) { infoArgs.isNormal = true; - continue; + //continue; } else if (isStringEqual(word, "-resize")) { ++i; @@ -1923,7 +1923,7 @@ static int32_t kramAppEncode(vector& args) } resizeString = args[i]; - continue; + //continue; } // This means to post-multiply alpha after loading, not that incoming data in already premul @@ -1931,22 +1931,22 @@ static int32_t kramAppEncode(vector& args) // really would prefer to premul them when building the texture. else if (isStringEqual(word, "-premul")) { infoArgs.isPremultiplied = true; - continue; + //continue; } else if (isStringEqual(word, "-prezero")) { infoArgs.isPrezero = true; - continue; + //continue; } // this means premul the data at read from srgb, this it to match photoshop else if (isStringEqual(word, "-premulrgb")) { isPremulRgb = true; - continue; + //continue; } else if (isStringEqual(word, "-v") || isStringEqual(word, "-verbose")) { infoArgs.isVerbose = true; - continue; + //continue; } else if (isStringEqual(word, "-f") || isStringEqual(word, "-format")) { @@ -1958,7 +1958,18 @@ static int32_t kramAppEncode(vector& args) } infoArgs.formatString = args[i]; - continue; + //continue; + } + + // compressor for ktx2 mips + // TODO: need level control + else if (isStringEqual(word, "-zstd")) { + infoArgs.compressor.compressorType = KTX2SupercompressionZstd; + //continue; + } + else if (isStringEqual(word, "-zlib")) { + infoArgs.compressor.compressorType = KTX2SupercompressionZlib; + //continue; } else { KLOGE("Kram", "unexpected argument \"%s\"\n", @@ -2176,7 +2187,7 @@ int32_t kramAppScript(vector& args) } srcFilename = args[i]; - continue; + //continue; } else if (isStringEqual(word, "-jobs") || isStringEqual(word, "-j")) { @@ -2189,7 +2200,7 @@ int32_t kramAppScript(vector& args) } numJobs = atoi(args[i]); - continue; + //continue; } else if (isStringEqual(word, "-v") || isStringEqual(word, "-verbose")) { diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index c8ce1b0b..b0839191 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -48,6 +48,12 @@ #include #endif +// for zlib compress +#include "miniz.h" + +// for zstd compress +#include "zstd.h" + namespace kram { using namespace std; @@ -308,71 +314,7 @@ bool Image::loadImageFromPixels(const vector& pixels, int32_t width, return true; } -void Image::computeMipStorage(const KTXImage& image, int32_t w, int32_t h, - bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, - int32_t& storageSize, int32_t& storageSizeTotal, - vector& mipStorageSizes, - int32_t& numDstMipLevels, int32_t& numMipLevels) const -{ - bool canMipmap = true; // isPow2(w) && isPow2(h); // DONE: removed pow2 requirement, mip gen handles non-pow2 - - bool needsDownsample = (w > mipMaxSize || h > mipMaxSize); - - int32_t maxMipLevels = 16; // 64K x 64K - if ((!doMipmaps) && needsDownsample) { - maxMipLevels = 1; - } - - if (canMipmap && (doMipmaps || needsDownsample)) { - numMipLevels++; - - bool keepMip = - (w >= mipMinSize && w <= mipMaxSize) && - (h >= mipMinSize && h <= mipMaxSize); - - if (keepMip) { - mipStorageSizes.push_back(storageSize); - numDstMipLevels++; - } - else { - mipStorageSizes.push_back(0); // 0 means skip storing this mip - } - - do { - mipDown(w, h); - - keepMip = - (w >= mipMinSize && w <= mipMaxSize) && - (h >= mipMinSize && h <= mipMaxSize); - - if (keepMip && (numDstMipLevels < maxMipLevels)) { - int32_t mipStorageSize = image.mipLevelSize(w, h); - mipStorageSizes.push_back(mipStorageSize); - storageSizeTotal += mipStorageSize; - numDstMipLevels++; - } - else { - mipStorageSizes.push_back(0); // - means skip storing this mip - } - // a count of how many mips exist from topmost - numMipLevels++; - } while (w > 1 || h > 1); - - // adjust the pixel storage area to the first/largest exported mip - for (auto mipStorageSize : mipStorageSizes) { - if (mipStorageSize != 0) { - storageSize = mipStorageSize; - break; - } - } - } - else { - mipStorageSizes.push_back(storageSize); - numDstMipLevels++; - numMipLevels++; - } -} // Can average any channels per block, this means they are constant across the // block and use endpoint storage but do not affect the endpoint fitting. @@ -524,10 +466,9 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma vector propsData; dstImage.toPropsData(propsData); - dstHeader.bytesOfKeyValueData = uint32_t(propsData.size()); - if (!dstImage.initMipLevels(false, sizeof(KTXHeader) + dstHeader.bytesOfKeyValueData)) { - return false; - } + dstHeader.bytesOfKeyValueData = (uint32_t)vsizeof(propsData); + size_t mipOffset = sizeof(KTXHeader) + dstHeader.bytesOfKeyValueData; + dstImage.initMipLevels(mipOffset); // allocate to hold props and entire image to write out if (!dstFile) { @@ -546,12 +487,12 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma // write the header out - if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(headerCopy), 0, dstFile, dstImage)) { + if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(KTXHeader), 0, dstFile, dstImage)) { return false; } // write out the props - if (!writeDataAtOffset(propsData.data(), propsData.size(), sizeof(KTXHeader), dstFile, dstImage)) { + if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), sizeof(KTXHeader), dstFile, dstImage)) { return false; } @@ -580,12 +521,14 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma // DONE: walk chunks here and seek to src and dst offsets in conversion // make sure to walk chunks in the exact same order they are written, array then face, or slice - int32_t w = srcImage.width; - int32_t h = srcImage.height; - + int32_t w = 0; + int32_t h = 0; + int32_t d = 0; + for (int32_t chunk = 0; chunk < numChunks; ++chunk) { w = srcImage.width; h = srcImage.height; + d = srcImage.depth; for (int32_t i = 0; i < (int32_t)srcImage.header.numberOfMipmapLevels; ++i) { const KTXImageLevel& dstMipLevel = dstImage.mipLevels[i]; @@ -878,7 +821,7 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma } // next mip level - mipDown(w, h); + mipDown(w, h, d); } } @@ -935,39 +878,298 @@ bool Image::encode(ImageInfo& info, KTXImage& dstImage) const bool Image::encode(ImageInfo& info, FILE* dstFile) const { - // this will be throw out + // dstImage will be ignored KTXImage dstImage; + return encodeImpl(info, dstFile, dstImage); } +// Use this for in-place construction of mips +struct MipConstructData { + vector tmpImageData8; // for average channels per block -bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const -{ - //KTXImage image; - KTXHeader& header = dstImage.header; + // use this for complex texture types, copy data from vertical/horizotnal + // strip image into here to then gen mips + vector copyImage; + + // So can use simd ops to do conversions, use float4. + // using half4 for mips of ldr data to cut memory in half + // processing large textures nees lots of memory for src image + // 8k x 8k x 8b = 500 mb + // 8k x 8k x 16b = 1 gb + vector halfImage; + vector floatImage; vector chunkOffsets; +}; - int32_t w = _width; - int32_t h = _height; - if (!validateTextureType(info.textureType, w, h, chunkOffsets, header, - info.doMipmaps, - info.chunksX, info.chunksY, info.chunksCount)) - { - return false; - } - // cube and array this is the size of one face/slice - const int32_t modifiedWidth = w; - const int32_t modifiedHeight = h; +// See here: +// https://www.khronos.org/registry/DataFormat/specs/1.3/dataformat.1.3.html - // work out how much memory we need to load - header.initFormatGL(info.pixelFormat); +enum KHR_DF_MODEL { + KHR_DF_MODEL_RGBSDA = 1, + + KHR_DF_MODEL_BC1A = 128, + // KHR_DF_MODEL_BC2 = 129, + KHR_DF_MODEL_BC3 = 130, + KHR_DF_MODEL_BC4 = 131, + KHR_DF_MODEL_BC5 = 132, + KHR_DF_MODEL_BC6H = 133, + KHR_DF_MODEL_BC7 = 134, + + //KHR_DF_MODEL_ETC1 = 160, + KHR_DF_MODEL_ETC2 = 161, + + KHR_DF_MODEL_ASTC = 162, + + //KHR_DF_MODEL_ETC1S = 163, + +}; - dstImage.pixelFormat = info.pixelFormat; - dstImage.textureType = info.textureType; +enum KHR_DF_CHANNEL { + // guessing at these + KHR_DF_CHANNEL_RED = 0, + KHR_DF_CHANNEL_GREEN = 1, + KHR_DF_CHANNEL_BLUE = 2, + KHR_DF_CHANNEL_ALPHA = 15, + + // BC + //KHR_DF_CHANNEL_BC1A_COLOR = 0, + KHR_DF_CHANNEL_BC1A_ALPHA = 15, + + //KHR_DF_CHANNEL_BC2_COLOR = 0, + KHR_DF_CHANNEL_BC2_ALPHA = 15, + + //KHR_DF_CHANNEL_BC3_COLOR = 0, + KHR_DF_CHANNEL_BC3_ALPHA = 15, + + //KHR_DF_CHANNEL_BC4_DATA = 0, + + //KHR_DF_CHANNEL_BC5_RED = 0, + KHR_DF_CHANNEL_BC5_GREEN = 1, + + //KHR_DF_CHANNEL_BC6H_COLOR = 0, + //KHR_DF_CHANNEL_BC7_COLOR = 0, + + // ETC2 + //KHR_DF_CHANNEL_ETC2_RED = 0, + KHR_DF_CHANNEL_ETC2_GREEN = 1, + KHR_DF_CHANNEL_ETC2_COLOR = 2, // RGB + KHR_DF_CHANNEL_ETC2_ALPHA = 16, + + // ASTC + //KHR_DF_CHANNEL_ASTC_DATA = 0, +}; + + +enum KHR_DF_PRIMARIES { + KHR_DF_PRIMARIES_BT709 = 1 +}; +enum KHR_DF_TRANSFER { + KHR_DF_TRANSFER_LINEAR = 1, // ? + KHR_DF_TRANSFER_SRGB = 2, +}; + +enum KHR_DF_ALPHA { + KHR_DF_FLAG_ALPHA_STRAIGHT = 0, + KHR_DF_FLAG_ALPHA_PREMULTIPLIED = 1, +}; + +// 16 bytes total +struct KTX2DescriptorChannelBlock { + + // 32-bits + uint16_t bitOffset = 0; + uint8_t bitLength = 0; + uint8_t channelType : 4; // RED, GREEN, BLUE, RRR, GGG + uint8_t FSEL : 4; // l is low bit + + // 32-bits + uint8_t samplePositions[4] = {0}; + + uint32_t sampleLower = 0; + uint32_t sampleUpper = UINT32_MAX; +}; + +// This can be up to 7 x 4 = 24 + 16 x channels in size +struct KTX2DescriptorFileBlock { + KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool isPremul, bool isCompressed); + + uint32_t totalSize = 0; // descriptorBlockSize + 4 + + uint32_t vendorID : 18; + uint32_t descriptorType : 14; + uint16_t versionNumber = 2; + uint16_t descriptorBlockSize = 0; // 24B + channels (doesn't include totalSize) + + uint8_t colorModel = 0; + uint8_t colorPrimaries = 0; + uint8_t transferFunction = 0; + uint8_t flags = 0; + + uint8_t textureBlockDimensions[4] = {0}; + uint8_t bytesPlane[8] = {0}; + + // now 16 bytes for each channel present + KTX2DescriptorChannelBlock channels[4]; // max channels +}; + +KTX2DescriptorFileBlock::KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool isPremul, bool isCompressed) { + uint32_t numChannels = numChannelsOfFormat(format); + Int2 blockDims = blockDimsOfFormat(format); + bool isSrgb = isSrgbFormat(format); + uint32_t blockSize = blockSizeOfFormat(format); + bool isFloat = isFloatFormat(format); + bool isSigned = isSignedFormat(format); + + totalSize = sizeof(KTX2DescriptorFileBlock) - + (4 - numChannels) * sizeof(KTX2DescriptorChannelBlock); + descriptorBlockSize = totalSize - 4; + + // ugly that these are all -1, can't simply read them in debugger + textureBlockDimensions[0] = blockDims.x - 1; + textureBlockDimensions[1] = blockDims.y - 1; + + vendorID = 0; + descriptorType = 0; + + // these formats are all single-planes + // some indication this should be 0 if zstd applied + if (!isCompressed) { + bytesPlane[0] = blockSize; + } + + for (uint32_t i = 0; i < numChannels; ++i) { + auto& c = channels[i]; + + c.FSEL = 0; + if (isSigned) + c.FSEL |= 0x4; + if (isFloat) + c.FSEL |= 0x8; + + // TODO: what are E & L, nothing in docs about these ? + // no examples of use of these either + + c.channelType = 0; + + if (isFloat) { + // This is for BC6H, TODO: might be half only so test for isHalf? + if (isSigned) { + c.sampleLower = 0xBF800000U; // -1.0f; + c.sampleUpper = 0x7F800000U; //  1.0f; + } + else { + c.sampleLower = 0xBF800000U; //  -1.0f; + c.sampleUpper = 0x7F800000U; //   1.0f; + } + } + else if (isSigned) { + c.sampleLower = INT32_MIN; + c.sampleUpper = INT32_MAX; + } + } + + // set this since it applies to so many block formats + channels[0].bitOffset = 0; + channels[0].bitLength = blockSize * 8 - 1; // needs to be split of channel bits + + + switch(format) { + case MyMTLPixelFormatBC1_RGBA: + case MyMTLPixelFormatBC1_RGBA_sRGB: + // if ever do punchthrough-alpha + //channels[1].channelType = KHR_DF_CHANNEL_BC1A_ALPHA; + break; + + case MyMTLPixelFormatBC3_RGBA: + case MyMTLPixelFormatBC3_RGBA_sRGB: + // alpha is first + channels[0].channelType = KHR_DF_CHANNEL_BC3_ALPHA; + + channels[0].bitOffset = 0; + channels[0].bitLength = 64 - 1; + + channels[1].bitOffset = 64; + channels[1].bitLength = 64 - 1; + + break; + + case MyMTLPixelFormatBC5_RGUnorm: + case MyMTLPixelFormatBC5_RGSnorm: + channels[1].channelType = KHR_DF_CHANNEL_BC3_ALPHA; + + channels[0].bitOffset = 0; + channels[0].bitLength = 64 - 1; + + channels[1].bitOffset = 64; + channels[1].bitLength = 64 - 1; + + break; + + // TODO: fix bc6h sampleLower/Upper + + // TODO: handle etc2 + case MyMTLPixelFormatEAC_RG11Unorm: + case MyMTLPixelFormatEAC_RG11Snorm: + channels[1].channelType = KHR_DF_CHANNEL_ETC2_GREEN; + + channels[0].bitOffset = 0; + channels[0].bitLength = 64 - 1; + + channels[1].bitOffset = 64; + channels[1].bitLength = 64 - 1; + break; + + case MyMTLPixelFormatETC2_RGB8: + case MyMTLPixelFormatETC2_RGB8_sRGB: + channels[0].channelType = KHR_DF_CHANNEL_ETC2_COLOR; + break; + + + case MyMTLPixelFormatEAC_RGBA8: + case MyMTLPixelFormatEAC_RGBA8_sRGB: + channels[0].channelType = KHR_DF_CHANNEL_ETC2_ALPHA; + channels[1].channelType = KHR_DF_CHANNEL_ETC2_COLOR; + + channels[0].bitOffset = 0; + channels[0].bitLength = 64 - 1; + + channels[1].bitOffset = 64; + channels[1].bitLength = 64 - 1; + break; + + + // NOTE: astc is all the same, and can already use defaults + + default: { + uint32_t numChannelBits = (blockSize * 8) / numChannels; + // handle uniform explcit types with offset per channel + uint32_t lastBitOffset = 0; + for (uint32_t i = 0; i < numChannels; ++i) { + auto& c = channels[i]; + c.channelType = KHR_DF_CHANNEL_RED + i; + c.bitOffset = lastBitOffset; + c.bitLength = numChannelBits - 1; + + lastBitOffset += numChannelBits; + } + + colorModel = KHR_DF_MODEL_RGBSDA; + break; + } + } + + colorPrimaries = KHR_DF_PRIMARIES_BT709; + transferFunction = isSrgb ? KHR_DF_TRANSFER_SRGB : KHR_DF_TRANSFER_LINEAR; + flags = isPremul ? KHR_DF_FLAG_ALPHA_PREMULTIPLIED : KHR_DF_FLAG_ALPHA_STRAIGHT; +} + +void Image::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const +{ dstImage.addFormatProps(); // TODO: caller should really set post swizzle @@ -1010,137 +1212,414 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const else { dstImage.addAddressProps("Rep,Rep,X"); } - + if (info.doMipmaps) { dstImage.addFilterProps("Lin,Lin,Lin"); // min,mag,mip } else { dstImage.addFilterProps("Lin,Lin,X"); // min,mag,mip } - + // This is hash of source png/ktx file (use xxhash32 or crc32) // can quickly check header if multiple copies of same source w/diff names. // May also need to store command line args in a prop to reject duplicate processing // TODO: ktxImage.addSourceHashProps(0); +} - // convert props into a data blob that can be written out - vector propsData; - dstImage.toPropsData(propsData); - header.bytesOfKeyValueData = (uint32_t)propsData.size(); +// wish C++ had a defer +struct ZSTDScope +{ + ZSTDScope(ZSTD_CCtx* ctx_) : ctx(ctx_) {} + ~ZSTDScope() { ZSTD_freeCCtx(ctx); } + +private: + ZSTD_CCtx* ctx = nullptr; +}; - //ktxImage.bytesPerBlock = header.blockSize(); - //ktxImage.blockDims = header.blockDims(); - int32_t storageSize = dstImage.mipLevelSize(w, h); +bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const +{ + KTXHeader& header = dstImage.header; + MipConstructData mipConstructData; + + vector& chunkOffsets = mipConstructData.chunkOffsets; - // how much to store to store biggest level of ktx (will in-place mip to - // this) - int32_t storageSizeTotal = storageSize; + int32_t w = _width; + int32_t h = _height; - vector mipOffsets; - vector mipStorageSizes; - int32_t numDstMipLevels = 0; - int32_t numMipLevels = 0; + // compute chunks, and adjust w/h based on that + // the code allows a vertical or horizontal strip or grid of chunks + if (!validateTextureType(info.textureType, w, h, chunkOffsets, header, + info.doMipmaps, + info.chunksX, info.chunksY, info.chunksCount)) + { + return false; + } - // header only holds pixelFormat, but can generate block info from that - computeMipStorage(dstImage, w, h, // pixelFormat, - info.doMipmaps, info.mipMinSize, info.mipMaxSize, - storageSize, storageSizeTotal, mipStorageSizes, - numDstMipLevels, numMipLevels); + // work out how much memory we need to load + header.initFormatGL(info.pixelFormat); - // now compute the mip base offsets - int32_t mipOffset = sizeof(KTXHeader) + header.bytesOfKeyValueData; + dstImage.pixelFormat = info.pixelFormat; + dstImage.textureType = info.textureType; - for (int32_t i = 0; i < numMipLevels; ++i) { - int32_t mipStorageSize = mipStorageSizes[i]; - if (mipStorageSize == 0) { - mipOffsets.push_back(0); - continue; + // whd might be changed by initMipLevels based on min/max mip size + dstImage.width = w; + dstImage.height = h; + dstImage.depth = header.pixelDepth; // from validate above + + dstImage.initMipLevels(info.doMipmaps, info.mipMinSize, info.mipMaxSize); + + // ---------------------------------------------------- + + int32_t numChunks = (int32_t)chunkOffsets.size(); + + //--------------- + // props + + addBaseProps(info, dstImage); + + // convert props into a data blob that can be written out + vector propsData; + dstImage.toPropsData(propsData); + header.bytesOfKeyValueData = (uint32_t)vsizeof(propsData); + + // ---------------------------------------------------- + + // can go out to KTX2 here instead + // It has two different blocks, supercompression for BasisLZ + // and a DFD block which details the block content. + // And mips are reversed. + + // dstImage case - in memory version will always be KTX1 format for now + // this even gens a KTX1 dstImage, and then just compresses the mip levels + + if (info.isKTX2 && dstFile) + { + // generate KTX1 file with uncompressed mips first + // a big memory hit here, since all mips stored in memory despite built in-place + // could build and compress and entire level at a time, but can't write any of it + // out until smallest mips are constructed. Only then are offsets resolved. + + if (!writeKTX1FileOrImage(info, mipConstructData, propsData, nullptr, dstImage)) { + return false; + } + + // now convert from ktx1 to ktx2 + + KTX2Header header2; + + header2.vkFormat = vulkanType(info.pixelFormat); + // header2.typeSize = 1; // skip + + header2.pixelWidth = header.pixelWidth; + header2.pixelHeight = header.pixelHeight; + header2.pixelDepth = header.pixelDepth; + + header2.layerCount = header.numberOfArrayElements; + header2.faceCount = header.numberOfFaces; + header2.levelCount = header.numberOfMipmapLevels; + + header2.supercompressionScheme = info.compressor.compressorType; + + // compute the dfd + KTX2DescriptorFileBlock dfdData(info.pixelFormat, info.hasAlpha && info.isPremultiplied, info.compressor.isCompressed()); + + // TODO: sgdData only used for BasisLZ, UASTC + zstd don't use this + vector sgdData; + + size_t levelByteLength = header2.levelCount * sizeof(KTXImageLevel); + size_t levelByteOffset = sizeof(KTX2Header); + + // compute offsets and lengts of data blocks + header2.dfdByteOffset = levelByteOffset + levelByteLength; + header2.kvdByteOffset = header2.dfdByteOffset + dfdData.totalSize; + header2.sgdByteOffset = header2.kvdByteOffset + vsizeof(propsData); + + header2.dfdByteLength = dfdData.totalSize; + header2.kvdByteLength = vsizeof(propsData); + header2.sgdByteLength = vsizeof(sgdData); + + // write the header + if (!writeDataAtOffset((const uint8_t*)&header2, sizeof(KTX2Header), 0, dstFile, dstImage)) { + return false; + } + + // next are levels, but those are written out later + + // write the dfd + if (!writeDataAtOffset((const uint8_t*)&dfdData, dfdData.totalSize, header2.dfdByteOffset, dstFile, dstImage)) { + return false; + } + + // write the props + if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), header2.kvdByteOffset, dstFile, dstImage)) { + return false; + } + + // skip supercompression block + if (!sgdData.empty()) { + // TODO: align(8) sgdPadding + if (!writeDataAtOffset(sgdData.data(), vsizeof(sgdData), header2.sgdByteOffset, dstFile, dstImage)) { + return false; + } } + + // offsets will be largest last unlike KTX + // data is packed without any length or alignment unllike in KTX + // reverse the mip levels offsets (but not the order) for KTX2 + + size_t imageByteOffset = header2.sgdByteOffset + header2.sgdByteLength; + + size_t lastImageByteOffset = imageByteOffset; + + vector ktx2Levels(dstImage.mipLevels); + for (int32_t i = ktx2Levels.size() - 1; i >= 0; --i) { + + // align the offset to leastCommonMultiple(4, texel_block_size); + if (lastImageByteOffset & 0x3) { + lastImageByteOffset += 4 - (lastImageByteOffset & 0x3); + } + + auto& level = ktx2Levels[i]; + level.length *= numChunks; + level.lengthCompressed = level.length; + level.offset = lastImageByteOffset; + + lastImageByteOffset = level.offset + level.length; + } + + if (!info.compressor.isCompressed()) { + if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dstImage)) { + return false; + } + + // write the levels out + for (int32_t i = 0; i < (int32_t)ktx2Levels.size(); ++i) { + auto& level2 = ktx2Levels[i]; + auto& level1 = dstImage.mipLevels[i]; + + if (!writeDataAtOffset(dstImage.fileData + level1.offset, level2.length, level2.offset, dstFile, dstImage)) { + return false; + } + } + } + else { - // 4 byte length of mip level is written out, this totally throws off block alignment - // this is size of one mip not the array of mips of that size - //if (!info.skipImageLength) { - int32_t levelSizeOf = sizeof(uint32_t); - mipOffset += levelSizeOf; - //} - - // start of the mips - mipOffsets.push_back(mipOffset); - - // ktx requires 4 byte alignment to rows of pixels (affext r8, rg8, r16f) - // it's not enough to fix alignment below, so this needs fixed in mipStorage calc. -// int32_t numPadding = 3 - ((mipStorageSize + 3) % 4); -// if (numPadding != 0) { -// // TODO: add error, need to pad rows not just stick pad at end -// // this can happen on mips with formats below that don't align to 4 byte boundaries -// // rgb8/16f also have this, but not supporting those formats currently. -// return false; -// } - - // next row of mips are offset - mipOffset += mipStorageSize * header.totalChunks(); + // start compression with the smallest mips first, then can write out data as we go through it all + + // update the offsets and compressed sizes + lastImageByteOffset = imageByteOffset; + + // allocate big enough to hold entire uncompressed level + vector compressedData; + compressedData.resize(mz_compressBound(ktx2Levels.front().length)); // largest mip + size_t compressedDataSize = 0; + + // reuse a context here + ZSTD_CCtx* cctx = nullptr; + int zlibLevel = MZ_DEFAULT_COMPRESSION; + + if (info.compressor.compressorType == KTX2SupercompressionZstd) { + cctx = ZSTD_createCCtx(); + if (!cctx) { + return false; + } + + if (info.compressor.compressorLevel > 0.0) { + int zstdLevel = (int)round(info.compressor.compressorLevel * 100.0); + if (zstdLevel > 100) { + zstdLevel = 100; + } + + ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, zstdLevel); + + // may need to reset the compressor context, but says call starts a new frame + } + } + else if (info.compressor.compressorType == KTX2SupercompressionZlib) { + // set the level up + if (info.compressor.compressorLevel > 0.0) { + zlibLevel = (int)round(info.compressor.compressorLevel * 10.0); + if (zlibLevel > 10) { + zlibLevel = 10; + } + } + } + + ZSTDScope scope(cctx); + + for (int32_t i = (int32_t)ktx2Levels.size() - 1; i >= 0; --i) { + + auto& level2 = ktx2Levels[i]; + auto& level1 = dstImage.mipLevels[i]; + + const uint8_t* levelData = dstImage.fileData + level1.offset; + + // compress each mip + switch(info.compressor.compressorType) { + case KTX2SupercompressionZstd: { + // this resets the frame on each call + compressedDataSize = ZSTD_compress2(cctx, compressedData.data(), compressedData.size(), levelData, level2.length); + + if (ZSTD_isError(compressedDataSize)) { + KLOGE("kram", "encode mip zstd failed"); + return false; + } + break; + } + case KTX2SupercompressionZlib: { + mz_ulong dstSize = compressedData.size(); + if (mz_compress2(compressedData.data(), &dstSize, levelData, level2.length, zlibLevel) != MZ_OK) + { + KLOGE("kram", "encode mip zlib failed"); + return false; + } + compressedDataSize = dstSize; + + break; + } + default: + // should never get here + return false; + } + + // also need for compressed levels? + // align the offset to leastCommonMultiple(4, texel_block_size); + if (lastImageByteOffset & 0x3) { + lastImageByteOffset += 4 - (lastImageByteOffset & 0x3); + } + + level2.lengthCompressed = compressedDataSize; + level2.offset = lastImageByteOffset; + + lastImageByteOffset = level2.offset + level2.lengthCompressed; + + // write the mip + if (!writeDataAtOffset(compressedData.data(), compressedDataSize, level2.offset, dstFile, dstImage)) { + return false; + } + } + + // write out mip level size/offsets + if (!writeDataAtOffset((const uint8_t*)ktx2Levels.data(), vsizeof(ktx2Levels), levelByteOffset, dstFile, dstImage)) { + return false; + } + } } + else { + // this is purely ktx1 output path + if (!writeKTX1FileOrImage(info, mipConstructData, propsData, dstFile, dstImage)) { + return false; + } + } + + return true; +} - //---------------------------------------------- - - header.numberOfMipmapLevels = numDstMipLevels; +bool Image::writeKTX1FileOrImage( + ImageInfo& info, + MipConstructData& mipConstructData, + const vector& propsData, + FILE* dstFile, KTXImage& dstImage) const +{ + // recompute, it's had mips added into it above + size_t mipOffset = sizeof(KTXHeader) + dstImage.header.bytesOfKeyValueData; - // store the largest mip size that isn't skipped - for (auto mipStorageSize : mipStorageSizes) { - if (mipStorageSize != 0) { - header.pixelWidth = w; - header.pixelHeight = h; - break; + // allocate to hold props and entire image to write out + if (!dstFile) { + dstImage.initMipLevels(mipOffset); + + dstImage.reserveImageData(); + } + else { + int32_t numChunks = (int32_t)mipConstructData.chunkOffsets.size(); + + // set offsets up for ktx1 + size_t lastMipOffset = mipOffset; + + for (int32_t i = 0; i < (int32_t)dstImage.mipLevels.size(); ++i) { + auto& level = dstImage.mipLevels[i]; + level.offset = lastMipOffset + 4; // offset by length + + lastMipOffset = level.offset + level.lengthCompressed * numChunks; } + } + + // write the header out + KTXHeader headerCopy = dstImage.header; + + // fix header for 1d array + // TODO: move to initMipLevels, and just use the header + if (dstImage.textureType == MyMTLTextureType1DArray) { + headerCopy.pixelHeight = 0; + headerCopy.pixelDepth = 0; + } + + if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(headerCopy), 0, dstFile, dstImage)) { + return false; + } - mipDown(w, h); + // write out the props + if (!writeDataAtOffset(propsData.data(), vsizeof(propsData), sizeof(KTXHeader), dstFile, dstImage)) { + return false; } - // update image to match - dstImage.width = header.pixelWidth; - dstImage.height = header.pixelHeight; - dstImage.depth = header.pixelDepth; + // build and weite out the mip data + if (!createMipsFromChunks(info, mipConstructData, dstFile, dstImage)) { + return false; + } + + return true; +} + - // ---------------------------------------------------- +bool Image::createMipsFromChunks( + ImageInfo& info, + MipConstructData& data, + FILE* dstFile, + KTXImage& dstImage +) const +{ + // ---------------------------------------------------- + // set the structure fields and allocate it, only need enough to hold single // mip (reuses mem) also because mips are written out to file after // generated. TextureData outputTexture; - outputTexture.width = w; - outputTexture.height = h; - outputTexture.data.resize(storageSize); - - // restore full src size to build the mips - w = modifiedWidth; - h = modifiedHeight; + outputTexture.width = dstImage.width; + outputTexture.height = dstImage.height; + outputTexture.data.resize(dstImage.mipLevels[0].length); // allocate to size of largest mip // This is for 8-bit data (pixelsFloat used for in-place mipgen) ImageData srcImage; - srcImage.width = w; - srcImage.height = h; + srcImage.width = _width; + srcImage.height = _height; + + // KramMipper uses these srcImage.isSRGB = info.isSRGB; srcImage.isHDR = info.isHDR; + int32_t w = srcImage.width; + int32_t h = srcImage.width; + // ---------------------------------------------------- - - vector tmpImageData8; // for average channels per block - + // use this for complex texture types, copy data from vertical/horizotnal // strip image into here to then gen mips - vector copyImage; + vector& copyImage = data.copyImage; // So can use simd ops to do conversions, use float4. // using half4 for mips of ldr data to cut memory in half // processing large textures nees lots of memory for src image // 8k x 8k x 8b = 500 mb // 8k x 8k x 16b = 1 gb - vector halfImage; - vector floatImage; + vector& halfImage = data.halfImage; + vector& floatImage = data.floatImage; + int32_t numChunks = (int32_t)data.chunkOffsets.size(); bool doPremultiply = info.hasAlpha && (info.isPremultiplied || info.isPrezero); - bool isMultichunk = chunkOffsets.size() > 1; + bool isMultichunk = numChunks > 1; if (info.isHDR) { // here the source is float @@ -1207,183 +1686,21 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const } } - int32_t numChunks = (int32_t)chunkOffsets.size(); - - // allocate to hold props and entire image to write out - if (!dstFile) { - // recompute, it's had mips added into it above - mipOffset = sizeof(KTXHeader) + header.bytesOfKeyValueData; - - dstImage.initMipLevels(false, mipOffset); - - dstImage.reserveImageData(); - } - - // ---------------------------------------------------- - Mipper mipper; SDFMipper sdfMipper; -#if 0 - // TODO: can go out to KTX2 here instead - // It has two different blocks, supercompression for BasisLZ - // and a DFD block which details the block content. - // And mips are reversed. - bool doWriteKTX2 = false; - if (doWriteKTX2 && dstFile) // in memory version will always be KTX1 format for nwo - { - KTX2Header header2; - - header2.vkFormat = vulkanType(info.pixelFormat); - // header2.typeSize = 1; // skip - - header2.pixelWidth = header.pixelWidth; - header2.pixelHeight = header.pixelHeight; - header2.pixelDepth = header.pixelDepth; - - if (dstImage.textureType == MyMTLTextureType1DArray) { - header2.pixelHeight = 0; - header2.pixelDepth = 0; - } - - header2.layerCount = header.numberOfArrayElements; - header2.faceCount = header.numberOfFaces; - header2.levelCount = numDstMipLevels; // header.numberOfMipmapLevels; - - // compute size of dfd - vector dfdData; - - // compute offsets and lengts of data blocks - header2.dfdByteOffset = sizeof(header2); - header2.kvdByteOffset = header2.dfdByteOffset + dfdData.size(); - header2.sgdByteOffset = header2.kvdByteOffset + propsData.size(); - - header2.dfdByteLength = dfdData.size(); - header2.kvdByteLength = propsData.size(); - header2.sgdByteLength = 0; - - // TODO: figure out dfd here - - // write the header - if (!writeDataAtOffset((const uint8_t*)&header2, sizeof(header2), 0, dstFile, dstImage)) { - return false; - } - - // write the dfd - if (!writeDataAtOffset(dfdData.data(), dfdData.size(), header2.dfdByteOffset, dstFile, dstImage)) { - return false; - } - - // write the props - if (!writeDataAtOffset(propsData.data(), propsData.size(), header2.kvdByteOffset, dstFile, dstImage)) { - return false; - } - - // skip supercompression block - - // TODO: this either writes to file or to dstImage (in-memory KTX file) - - // TODO: also need to support a few compressions - // zstd and zlib, does dfd contain the offsets of each chunk - // and the compressed sizes of mips. Know format and sizes uncompressed. - // but need to fill out the compressed size field. - - vector levels; - levels.resize(numDstMipLevels); - - size_t levelListStartOffset = header2.sgdByteOffset + header2.sgdByteLength; - size_t levelStartOffset = levelListStartOffset + levels.size() * sizeof(KTX2ImageLevel); - - size_t lastLevelOffset = levelStartOffset; - for (int32_t i = 0; i < numDstMipLevels; ++i) { - levels[i].length = numChunks * numDstMipLevels; - levels[i].lengthCompressed = levels[i].length; - levels[i].offset = lastLevelOffset + levels[i].lengthCompressed; - lastLevelOffset = levels[i].offset; - } - // TODO: compress to a seperate zstd stream for each level - // then can continue to do mips in place, and just append the bytes to that level - // after compression. If not compressed, then code from KTX1 can be used. - bool isCompressed = false; - - if (!isCompressed) { - if (!writeDataAtOffset(levels.data(), levels.size(), levelListStartOffset, dstFile, dstImage)) { - return false; - } - } - - // TODO: here allocate a zstd encoder for each level - vector< vector > compressedLevels; - if (isCompressed) { - compressedLevels.resize(numDstMipLevels); - } - - // write the chunks of mips see code below, seeks are important since - // it's building mips on the fly. - for (int32_t chunk = 0; chunk < numChunks; ++chunk) { - // TODO: actually build the mip (reuse code below for KTX) - - if (!isCompressed) - continue; - - // handle zstd compression here, and add to end of existing encoder for level - zstd_compress(level); - - // append the compressed bytes to each strea - levels[mipLevel].append(data); - } - - if (isCompressed) { - - // update the offsets and compressed sizes - lastLevelOffset = levelStartOffset; - for (int32_t i = 0; i < numDstMipLevels; ++i) { - levels[i].lengthCompressed = compressedLevels[i].size(); - levels[i].offset = lastLevelOffset + levels[i].lengthCompressed; - lastLevelOffset = levels[i].offset; - } - - // write out sizes - if (!writeDataAtOffset(levels.data(), levels.size(), levelListStartOffset, dstFile, dstImage)) { - return false; - } - - // and now seek and write out each compressed level - for (int32_t i = 0; i < numDstMipLevels; ++i) { - if (!writeDataAtOffset(compressedLevels[i].data(), compressedLevels[i].size(), levels[i].offset, dstFile, dstImage)) { - return false; - } - } - } - - return true; - } -#endif + vector& dstMipLevels = dstImage.mipLevels; + + int32_t srcTopMipWidth = srcImage.width; + int32_t srcTopMipHeight = srcImage.height; - // ---------------------------------------------------- - - // write the header out - KTXHeader headerCopy = header; - if (dstImage.textureType == MyMTLTextureType1DArray) { - headerCopy.pixelHeight = 0; - headerCopy.pixelDepth = 0; - } - if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(headerCopy), 0, dstFile, dstImage)) { - return false; - } - - // write out the props - if (!writeDataAtOffset(propsData.data(), propsData.size(), sizeof(KTXHeader), dstFile, dstImage)) { - return false; - } - for (int32_t chunk = 0; chunk < numChunks; ++chunk) { // this needs to append before chunkOffset copy below - w = modifiedWidth; - h = modifiedHeight; + w = srcTopMipWidth; + h = srcTopMipHeight; // copy a chunk at a time, mip that if needed, and then move to next chunk - Int2 chunkOffset = chunkOffsets[chunk]; + Int2 chunkOffset = data.chunkOffsets[chunk]; // reset these dimensions, or the mip mapping drops them to 1x1 srcImage.width = w; @@ -1400,7 +1717,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const for (int32_t x = 0; x < w; ++x) { float4 c0 = srcPixels[yOffset + x]; - float4& d0 = floatImage[y0 + x]; + float4& d0 = data.floatImage[y0 + x]; d0 = c0; } } @@ -1417,7 +1734,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const for (int32_t x = 0; x < w; ++x) { Color c0 = srcPixels[yOffset + x]; - Color& d0 = copyImage[y0 + x]; + Color& d0 = data.copyImage[y0 + x]; d0 = c0; } } @@ -1430,8 +1747,10 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const // copy and convert to half4 or float4 image // srcImage already points to float data, so could modify that // only need doPremultiply at the top mip - mipper.initPixelsHalfIfNeeded(srcImage, doPremultiply && !info.isPrezero, info.isPrezero, - halfImage); + mipper.initPixelsHalfIfNeeded(srcImage, + doPremultiply && info.isPremultiplied, + doPremultiply && info.isPrezero, + data.halfImage); } } @@ -1443,24 +1762,29 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const // build mips for the chunk, dropping mips as needed, but downsampling // from available image - int32_t numDstMipLevelsWritten = 0; - for (int32_t mipLevel = 0; mipLevel < numMipLevels; ++mipLevel) { - // no need to mip futher - if (numDstMipLevelsWritten >= numDstMipLevels) { - break; - } + int32_t numSkippedMips = 0; // TODO: data.numSkippedMips; + + for (int32_t mipLevel = 0; mipLevel < (int32_t)dstMipLevels.size(); ++mipLevel) { - bool skipMip = false; - uint32_t mipStorageSize = mipStorageSizes[mipLevel]; - if (mipStorageSize == 0) { - skipMip = true; - } + if (mipLevel == 0 && !info.doSDF) + { + if (numSkippedMips > 0) { + // this does in-place mipmap to dstImage (also updates floatPixels if used) + for (int32_t i = 0; i < numSkippedMips; ++i) { + // have to build the submips even with skipMip + mipper.mipmap(srcImage, dstImageData); - // this does in-place mipmap to dstImage (also updates floatPixels - // if used) - if (info.doSDF) { - // have to process all images to SDF - if (!skipMip) { + // dst becomes src for next in-place mipmap + srcImage = dstImageData; + + w = dstImageData.width; + h = dstImageData.height; + } + } + } + else { + if (info.doSDF) { + // have to process all images to SDF // sdf mipper has to build from origin sourceImage // but it can in-place write to the same dstImage sdfMipper.mipmap(dstImageData, mipLevel); @@ -1468,10 +1792,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const w = dstImageData.width; h = dstImageData.height; } - } - else { - // can export existing image for mip 0 - if (mipLevel > 0) { + else { // have to build the submips even with skipMip mipper.mipmap(srcImage, dstImageData); @@ -1482,16 +1803,13 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const h = dstImageData.height; } } - - // only write out mip if non-zero storage - if (skipMip) { - continue; - } - - // mipOffsets are start of first chunk of a given mip size - mipOffset = mipOffsets[mipLevel] + chunk * mipStorageSize; - numDstMipLevelsWritten++; - + + // mipOffset are start of first chunk of a given mip size + size_t mipStorageSize = dstMipLevels[mipLevel].length; // / numChunks; + + // offset only valid for KTX and KTX2 w/o isCompressed + size_t mipOffset = dstMipLevels[mipLevel].offset + chunk * mipStorageSize; + // just to check that each mip has a unique offset //KLOGI("Image", "chunk:%d %d\n", chunk, mipOffset); @@ -1501,12 +1819,13 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const if (!info.averageChannels.empty()) { // this isn't applied to srgb data (what about premul?) averageChannelsInBlock(info.averageChannels.c_str(), dstImage, - mipImage, tmpImageData8); + mipImage, data.tmpImageData8); - mipImage.pixels = tmpImageData8.data(); + mipImage.pixels = data.tmpImageData8.data(); mipImage.pixelsFloat = nullptr; } + Timer timer; bool success = compressMipLevel(info, dstImage, @@ -1519,33 +1838,30 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const } } - // Write out the mip size on chunk0, all other mips are this size since not supercompressed. - // This throws off block alignment so have option to skip for ktxa files. I guess 3d textures + // Write out the mip size on chunk 0, all other mips are this size since not supercompressed. + // This throws off block alignment and gpu loading of ktx files from mmap. I guess 3d textures // and arrays can then load entire level in a single call. - if (chunk == 0) { + if ((!info.isKTX2) && chunk == 0) { // some clarification on what imageSize means, but best to look at ktx codebase itself // https://github.com/BinomialLLC/basis_universal/issues/40 // this contains all bytes at a mipLOD but not any padding - uint32_t levelSize = (int32_t)chunkOffsets.size() * mipStorageSize; + uint32_t levelSize = (uint32_t)dstMipLevels[mipLevel].length; // this is size of one face for non-array cubes - if (info.textureType == MyMTLTextureTypeCube) { - levelSize = mipStorageSize; + // but for everything else, it's the numChunks * mipStorageSize + if (info.textureType != MyMTLTextureTypeCube) { + levelSize *= numChunks; } int32_t levelSizeOf = sizeof(levelSize); assert(levelSizeOf == 4); - //fseek(dstFile, mipOffset - levelSizeOf, SEEK_SET); // from begin - if (!writeDataAtOffset((const uint8_t*)&levelSize, levelSizeOf, mipOffset - levelSizeOf, dstFile, dstImage)) { return false; } } - - //fseek(dstFile, mipOffset, SEEK_SET); // from begin - + // Note that default ktx alignment is 4, so r8u, r16f mips need to be padded out to 4 bytes // may need to write these out row by row, and let fseek pad the rows to 4. diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h index 3fc97346..22b57578 100644 --- a/libkram/kram/KramImage.h +++ b/libkram/kram/KramImage.h @@ -29,6 +29,8 @@ enum ImageResizeFilter { //--------------------------- +struct MipConstructData; + // TODO: this can only holds one level of mips, so custom mips aren't possible. // Mipmap generation is all in-place to this storage. class Image { @@ -66,14 +68,13 @@ class Image { private: bool encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const; + bool decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const; // compute how big mips will be - void computeMipStorage(const KTXImage& image, int32_t w, int32_t h, + void computeMipStorage(const KTXImage& image, int32_t& w, int32_t& h, int32_t& numSkippedMips, bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, - int32_t& storageSize, int32_t& storageSizeTotal, - vector& mipStorageSizes, - int32_t& numDstMipLevels, int32_t& numMipLevels) const; + vector& dstMipLevels) const; // ugh, reduce the params into this bool compressMipLevel(const ImageInfo& info, KTXImage& image, @@ -85,7 +86,17 @@ class Image { const KTXImage& image, ImageData& srcImage, vector& tmpImage) const; - + bool createMipsFromChunks(ImageInfo& info, MipConstructData& data, + FILE* dstFile, KTXImage& dstImage) const; + + bool writeKTX1FileOrImage( + ImageInfo& info, + MipConstructData& mipConstructData, + const vector& propsData, + FILE* dstFile, KTXImage& dstImage) const; + + void addBaseProps(const ImageInfo& info, KTXImage& dstImage) const; + private: // pixel size of image int32_t _width = 0; diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index 7302bbb3..78987728 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -996,6 +996,7 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args) textureType = args.textureType; isKTX2 = args.isKTX2; + compressor = args.compressor; isPrezero = args.isPrezero; isPremultiplied = args.isPremultiplied; diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h index 54a343b1..86a42488 100644 --- a/libkram/kram/KramImageInfo.h +++ b/libkram/kram/KramImageInfo.h @@ -51,6 +51,8 @@ class ImageInfoArgs { int32_t quality = 49; // may want float + // ktx2 has a compression type and level + KTX2Compressor compressor; bool isKTX2 = false; //bool skipImageLength = false; @@ -121,6 +123,8 @@ class ImageInfo { string averageChannels; string swizzleText; + // ktx2 has a compression type and level + KTX2Compressor compressor; bool isKTX2 = false; // output image state @@ -172,6 +176,8 @@ class ImageInfo { int32_t chunksX = 0; int32_t chunksY = 0; int32_t chunksCount = 0; + + }; bool isSwizzleValid(const char* swizzle); diff --git a/scripts/kramTests.sh b/scripts/kramTests.sh index 70119096..d9bbf60b 100755 --- a/scripts/kramTests.sh +++ b/scripts/kramTests.sh @@ -10,7 +10,7 @@ ../scripts/kramTextures.py -p android --ktx2 --bundle ../scripts/kramTextures.py -p android --bundle -# this only has ktx2 form -../scripts/kramTextures.py -p any --ktx2 --bundle +# this only has ktx2 form, tests uastc which kram doesn't open/save yet +#../scripts/kramTextures.py -p any --ktx2 --bundle diff --git a/scripts/kramTextures.py b/scripts/kramTextures.py index 183644c0..78ec37e9 100755 --- a/scripts/kramTextures.py +++ b/scripts/kramTextures.py @@ -52,7 +52,8 @@ class TextureProcessor: appKtx2sc = "" appKtx2check = "" doUastc = False - + doKTX2 = False + # preset formats for a given platform textureFormats = [] @@ -168,8 +169,9 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp): srcFilename = os.path.basename(srcRoot) # just the name no ext - # this only exports to ktx, post process will convert to ktx2 ext = ".ktx" + if self.doKTX2: + ext = ".ktx2" dstName = srcFilename # replace -h with -n, since it will be converted to a normal @@ -216,6 +218,11 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp): } typeText = switcher.get(texType, " -type 2d") + # choice of none, zlib, or zstd + compressorText = "" + if self.doKTX2: + compressorText = " -zstd" + # this could work on 3d and cubearray textures, but for now only use on 2D textures chunksText = "" if texType == TextureType.Tex2DArray: @@ -223,7 +230,7 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp): if chunksX > 0 and chunksY > 0: chunksText = " -chunks {0}x{1}".format(chunksX, chunksY) - cmd = "encode" + fmt + typeText + chunksText + " -i " + srcPath + " -o " + dstFile + cmd = "encode" + fmt + typeText + chunksText + compressorText + " -i " + srcPath + " -o " + dstFile # can print out commands to script and then process that all in C++ if self.doScript: @@ -234,6 +241,7 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp): else: timer = -time.perf_counter() + # kram can't compress to uastc ktx2, but this script can via ktx2sc from original file result = self.spawn(self.appKram + " " + cmd) # report slow textures @@ -242,36 +250,38 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp): if timer > slowTextureTime: print("perf: encode {0} took {1:.3f}s".format(dstName, timer)) + # TODO: split this off into another modstamp testing pass, and only do work if ktx is older than ktx2 # convert ktx -> ktx2, and zstd supercompress the mips, kram can read these and decompress # for now, this is only possible when not scripted # could read these in kram, and then execute them, or write these to another file # and then execute that if script file suceeds - if self.appKtx2: - ktx2Filename = dstFile + "2" + # if self.appKtx2: + # ktx2Filename = dstFile + "2" - # create the ktx2 - result = self.spawn(self.appKtx2 + " -f -o " + ktx2Filename + " " + dstFile) + # # create the ktx2 + # result = self.spawn(self.appKtx2 + " -f -o " + ktx2Filename + " " + dstFile) - # too bad this can't check ktx1... - if self.appKtx2check != "" and result == 0: - result = self.spawn(self.appKtx2check + " -q " + ktx2Filename) - - # can only zstd compress block encoded files, but can do BasisLZ on - # explicit files. - - # overwrite it with supercompressed version - # basis uastc supercompress - only if content isn't already block encoded, TODO: kramv and loader cannot read this - # zstd supercompress - works on everything, kramv and loader can read this - if self.appKtx2sc != "" and result == 0: - if self.doUastc: - result = self.spawn(self.appKtx2sc + " --uastc 2 --uastc_rdo_q 1.0 --zcmp 3 --threads 1 " + ktx2Filename) - else: - result = self.spawn(self.appKtx2sc + " --zcmp 3 --threads 1 " + ktx2Filename) - - # double check supercompressed version, may not be necessary - if self.appKtx2check != "" and result == 0: - result = self.spawn(self.appKtx2check + " -q " + ktx2Filename) + # # too bad this can't check ktx1... + # if self.appKtx2check != "" and result == 0: + # result = self.spawn(self.appKtx2check + " -q " + ktx2Filename) + + # # can only zstd compress block encoded files, but can do BasisLZ on + # # explicit files. + + # # overwrite it with supercompressed version + # # basis uastc supercompress - only if content isn't already block encoded, TODO: kramv and loader cannot read this + # # zstd supercompress - works on everything, kramv and loader can read this + # if self.appKtx2sc != "" and result == 0: + # if self.doUastc: + # result = self.spawn(self.appKtx2sc + " --uastc 2 --uastc_rdo_q 1.0 --zcmp 3 --threads 1 " + ktx2Filename) + # else: + # result = self.spawn(self.appKtx2sc + " --zcmp 3 --threads 1 " + ktx2Filename) + + # double check supercompressed version, may not be necessary + if self.appKtx2check != "" and result == 0: + result = self.spawn(self.appKtx2check + " -q " + ktx2Filename) + return result @@ -526,11 +536,17 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, processor = TextureProcessor(platform, appKram, maxCores, force, script, scriptFile, formats) if ktx2: + processor.doKTX2 = ktx2 + + # used to need all of these apps to gen ktx2, but can gen directly from kram now + # leaving these to test aastc case processor.appKtx2 = appKtx2 processor.appKtx2sc = appKtx2sc - processor.appKtx2check = appKtx2check processor.doUastc = doUastc + # check app still useful + processor.appKtx2check = appKtx2check + for srcDir in srcDirs: dstDir = dstDirForPlatform + srcDir os.makedirs(dstDir, exist_ok = True) From e72d0d3036a09164c45feb4c407284ceba95e5d1 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 16 May 2021 19:23:09 -0700 Subject: [PATCH 049/901] Kram - fix numSkippedMips support --- libkram/kram/KTXImage.cpp | 20 +++++++++++++++----- libkram/kram/KTXImage.h | 2 +- libkram/kram/KramImage.cpp | 18 +++++++++++++----- 3 files changed, 29 insertions(+), 11 deletions(-) diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 12564de9..f5311974 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -1087,13 +1087,15 @@ void KTXImage::toPropsData(vector& propsData) // TODO: this needs to pad to 16-bytes, so may need a prop for that } -void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize) +void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, uint32_t& numSkippedMips) { // dst levels int32_t w = width; int32_t h = height; int32_t d = depth; + numSkippedMips = 0; + bool needsDownsample = (w > mipMaxSize || h > mipMaxSize); int32_t maxMipLevels = 16; // 64K x 64K @@ -1104,7 +1106,8 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS } KTXImageLevel level; - level.offset = 0; // compute later, once know ktx vs. ktx2 + //level.offset = 0; // compute later, once know ktx vs. ktx2 + //level.lengthCompressed = 0; mipLevels.clear(); @@ -1115,7 +1118,6 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS if (keepMip) { level.length = mipLevelSize(w, h); - level.lengthCompressed = 0; if (mipLevels.empty()) { // adjust the top dimensions @@ -1125,6 +1127,11 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS } mipLevels.push_back(level); } + else { + if (mipLevels.empty()) { + numSkippedMips++; + } + } do { mipDown(w, h, d); @@ -1136,7 +1143,6 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS if (keepMip && (mipLevels.size() < (size_t)maxMipLevels)) { // length needs to be multiplied by chunk size before writing out level.length = mipLevelSize(w, h); - level.lengthCompressed = 0; if (mipLevels.empty()) { // adjust the top dimensions @@ -1147,13 +1153,17 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS mipLevels.push_back(level); } + else { + if (mipLevels.empty()) { + numSkippedMips++; + } + } } while (w > 1 || h > 1 || d > 1); } else { // length needs to be multiplied by chunk size before writing out level.length = mipLevelSize(w, h); - level.lengthCompressed = 0; mipLevels.push_back(level); } diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index 2b0b54c3..2dace691 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -273,7 +273,7 @@ class KTXImage { void initProps(const uint8_t* propsData, size_t propDataSize); void initMipLevels(size_t mipOffset); - void initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize); + void initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, uint32_t& numSkippedMips); bool validateMipLevels() const; diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index b0839191..9e1ed3c1 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -118,11 +118,13 @@ bool Image::loadImageFromKTX(const KTXImage& image) return false; } - // TODO: handle custom mips, this will currently box filter to build + // TODO: handle loading custom mips. Save will currently box filter to build // remaining mips but for SDF or coverage scaled alpha test, need to - // preserve original data. + // preserve original data. Problem is that Image save to KTX/2 always does in-place + // mipgen. + if (image.header.numberOfMipmapLevels > 1) { - KLOGW("Image", "Skipping custom mip levels"); + KLOGW("Image", "Skipping custom mip levels from KTX load"); } // so can call through to blockSize @@ -467,6 +469,7 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma vector propsData; dstImage.toPropsData(propsData); dstHeader.bytesOfKeyValueData = (uint32_t)vsizeof(propsData); + size_t mipOffset = sizeof(KTXHeader) + dstHeader.bytesOfKeyValueData; dstImage.initMipLevels(mipOffset); @@ -900,7 +903,12 @@ struct MipConstructData { vector halfImage; vector floatImage; + // Subdividing strips of larger images into cube/atlas/etc. + // These offsets are where to find each chunk in that larger image vector chunkOffsets; + + // Can skip the larger and smaller mips. This is the larger mips skipped. + uint32_t numSkippedMips = 0; }; @@ -1267,7 +1275,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const dstImage.height = h; dstImage.depth = header.pixelDepth; // from validate above - dstImage.initMipLevels(info.doMipmaps, info.mipMinSize, info.mipMaxSize); + dstImage.initMipLevels(info.doMipmaps, info.mipMinSize, info.mipMaxSize, mipConstructData.numSkippedMips); // ---------------------------------------------------- @@ -1762,7 +1770,7 @@ bool Image::createMipsFromChunks( // build mips for the chunk, dropping mips as needed, but downsampling // from available image - int32_t numSkippedMips = 0; // TODO: data.numSkippedMips; + int32_t numSkippedMips = data.numSkippedMips; for (int32_t mipLevel = 0; mipLevel < (int32_t)dstMipLevels.size(); ++mipLevel) { From 43a50b9f9a51a8cb2e3bc6acab271659ed851c73 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 16 May 2021 22:42:53 -0700 Subject: [PATCH 050/901] kram - add compression level support to -zstd/-zlib, fix width/height issues on chunked images, update test scripts --- libkram/kram/KTXImage.h | 2 +- libkram/kram/Kram.cpp | 27 +++++++++++++++++++++------ libkram/kram/KramImage.cpp | 23 ++++++++++++++++------- scripts/kramTests.sh | 16 +++++++++------- scripts/kramTextures.py | 29 ++++++++++++++++++++--------- 5 files changed, 67 insertions(+), 30 deletions(-) diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index 2dace691..71c426e6 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -254,7 +254,7 @@ enum KTX2Supercompression { struct KTX2Compressor { KTX2Supercompression compressorType = KTX2SupercompressionNone; - float compressorLevel = 0.0f; // 0.0 default, 100.0 full compression + float compressorLevel = 0.0f; // 0.0 is default bool isCompressed() const { return compressorType != KTX2SupercompressionNone; } }; diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index ec475eb4..720691ec 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -1060,10 +1060,10 @@ void kramEncodeUsage(bool showVersion = true) "\tSpecifies how many chunks to split up texture into 2darray\n" // ktx2 specific settings - "\t-zstd" - "\tktx2 with zstd mip compressor\n" - "\t-zlib" - "\tktx2 with zlib mip compressor\n" + "\t-zstd level" + "\tktx2 with zstd mip compressor, 0 for default\n" + "\t-zlib level" + "\tktx2 with zlib mip compressor, 0 for defauult\n" "\t-swizzle [rgba01 x4]" "\tSpecifies pre-encode swizzle pattern\n" @@ -1654,7 +1654,7 @@ static int32_t kramAppDecode(vector& args) return -1; } - success = success && SetupTmpFile(tmpFileHelper, isDstKTX ? ".ktx" : ".ktx2"); + success = success && SetupTmpFile(tmpFileHelper, isDstKTX2 ? ".ktx2" : ".ktx"); if (success && isVerbose) { KLOGI("Kram", "Decoding %s to %s with %s\n", @@ -1965,10 +1965,25 @@ static int32_t kramAppEncode(vector& args) // TODO: need level control else if (isStringEqual(word, "-zstd")) { infoArgs.compressor.compressorType = KTX2SupercompressionZstd; + ++i; + if (i >= argc) { + KLOGE("Kram", "zstd level arg invalid"); + error = true; + break; + } + infoArgs.compressor.compressorLevel = atoi(args[i]); + //continue; } else if (isStringEqual(word, "-zlib")) { infoArgs.compressor.compressorType = KTX2SupercompressionZlib; + ++i; + if (i >= argc) { + KLOGE("Kram", "zlib level arg invalid"); + error = true; + break; + } + infoArgs.compressor.compressorLevel = atoi(args[i]); //continue; } else { @@ -2056,7 +2071,7 @@ static int32_t kramAppEncode(vector& args) srcFilename, srcImage, isPremulRgb); if (success) { - success = SetupTmpFile(tmpFileHelper, isDstKTX ? ".ktx" : ".ktx2"); + success = SetupTmpFile(tmpFileHelper, isDstKTX2 ? ".ktx2" : ".ktx"); if (!success) { KLOGE("Kram", "encode couldn't generate tmp file for output"); diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 9e1ed3c1..a6831194 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -909,6 +909,10 @@ struct MipConstructData { // Can skip the larger and smaller mips. This is the larger mips skipped. uint32_t numSkippedMips = 0; + + // this is size of 2d image src after accounting for chunks for a strip of array/cube data + uint32_t modifiedWidth = 0; + uint32_t modifiedHeight = 0; }; @@ -1264,6 +1268,11 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const return false; } + // This is wxh of source in case it has chunks + // dstImage will start at this, but may mip down smaller base on mipMaxSize + mipConstructData.modifiedWidth = w; + mipConstructData.modifiedHeight = h; + // work out how much memory we need to load header.initFormatGL(info.pixelFormat); @@ -1433,8 +1442,8 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const return false; } - if (info.compressor.compressorLevel > 0.0) { - int zstdLevel = (int)round(info.compressor.compressorLevel * 100.0); + if (info.compressor.compressorLevel > 0.0f) { + int zstdLevel = (int)round(info.compressor.compressorLevel); if (zstdLevel > 100) { zstdLevel = 100; } @@ -1446,8 +1455,8 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const } else if (info.compressor.compressorType == KTX2SupercompressionZlib) { // set the level up - if (info.compressor.compressorLevel > 0.0) { - zlibLevel = (int)round(info.compressor.compressorLevel * 10.0); + if (info.compressor.compressorLevel > 0.0f) { + zlibLevel = (int)round(info.compressor.compressorLevel); if (zlibLevel > 10) { zlibLevel = 10; } @@ -1601,15 +1610,15 @@ bool Image::createMipsFromChunks( // This is for 8-bit data (pixelsFloat used for in-place mipgen) ImageData srcImage; - srcImage.width = _width; - srcImage.height = _height; + srcImage.width = data.modifiedWidth; + srcImage.height = data.modifiedHeight; // KramMipper uses these srcImage.isSRGB = info.isSRGB; srcImage.isHDR = info.isHDR; int32_t w = srcImage.width; - int32_t h = srcImage.width; + int32_t h = srcImage.height; // ---------------------------------------------------- diff --git a/scripts/kramTests.sh b/scripts/kramTests.sh index d9bbf60b..93ca1d91 100755 --- a/scripts/kramTests.sh +++ b/scripts/kramTests.sh @@ -1,16 +1,18 @@ #/bin/zsh -../scripts/kramTextures.py -p mac --ktx2 --bundle -../scripts/kramTextures.py -p mac --bundle +args=$1 -../scripts/kramTextures.py -p ios --ktx2 --bundle -../scripts/kramTextures.py -p ios --bundle +../scripts/kramTextures.py -p mac --bundle ${args} +#../scripts/kramTextures.py -p mac -c ktx --bundle ${args} + +../scripts/kramTextures.py -p ios --bundle ${args} +#../scripts/kramTextures.py -p ios -c ktx --bundle ${args} # this takes 15s+ with ETC2comp -../scripts/kramTextures.py -p android --ktx2 --bundle -../scripts/kramTextures.py -p android --bundle +../scripts/kramTextures.py -p android --bundle ${args} +#../scripts/kramTextures.py -p -c ktx android --bundle ${args} # this only has ktx2 form, tests uastc which kram doesn't open/save yet -#../scripts/kramTextures.py -p any --ktx2 --bundle +#../scripts/kramTextures.py -p any --bundle ${args} diff --git a/scripts/kramTextures.py b/scripts/kramTextures.py index 78ec37e9..c8b02769 100755 --- a/scripts/kramTextures.py +++ b/scripts/kramTextures.py @@ -221,7 +221,7 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp): # choice of none, zlib, or zstd compressorText = "" if self.doKTX2: - compressorText = " -zstd" + compressorText = " -zstd 0" # this could work on 3d and cubearray textures, but for now only use on 2D textures chunksText = "" @@ -278,9 +278,12 @@ def processTextureKram(self, srcPath, dstDir, srcModstamp): # else: # result = self.spawn(self.appKtx2sc + " --zcmp 3 --threads 1 " + ktx2Filename) - # double check supercompressed version, may not be necessary - if self.appKtx2check != "" and result == 0: - result = self.spawn(self.appKtx2check + " -q " + ktx2Filename) + if self.doKTX2: + ktx2Filename = dstFile + + # double check supercompressed version, may not be necessary + if self.appKtx2check != "" and result == 0: + result = self.spawn(self.appKtx2check + " -q " + ktx2Filename) return result @@ -370,16 +373,15 @@ def runMapInParallel(args): @click.command() @click.option('-p', '--platform', type=click.Choice(['ios', 'mac', 'win', 'android', 'any']), required=True, help="build platform") -@click.option('-c', '--container', type=click.Choice(['ktx', 'ktxa']), default="ktx", help="container type") +@click.option('-c', '--container', type=click.Choice(['ktx', 'ktx2']), default="ktx2", help="container type") @click.option('-v', '--verbose', is_flag=True, help="verbose output") @click.option('-q', '--quality', default=49, type=click.IntRange(0, 100), help="quality affects encode speed") @click.option('-j', '--jobs', default=64, help="max physical cores to use") @click.option('--force', is_flag=True, help="force rebuild ignoring modstamps") @click.option('--script', is_flag=True, help="generate kram script and execute that") -@click.option('--ktx2', is_flag=True, help="generate ktx2 files from ktx output") -@click.option('--check', is_flag=True, help="check ktx2 files as generated") +@click.option('--check', is_flag=True, help="check ktx2 files when generated") @click.option('--bundle', is_flag=True, help="bundle files by updating a zip file") -def processTextures(platform, container, verbose, quality, jobs, force, script, ktx2, check, bundle): +def processTextures(platform, container, verbose, quality, jobs, force, script, check, bundle): # output to multiple dirs by type # eventually pass these in as strings, so script is generic @@ -392,6 +394,10 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, appKtx2check = "" doUastc = False + ktx2 = True + if container == "ktx": + ktx2 = False + # can convert ktx -> ktx2 files with zstd and Basis supercompression # caller must have ktx2ktx2 and ktx2sc in path build from https://github.com/KhronosGroup/KTX-Software if platform == "any": @@ -399,11 +405,16 @@ def processTextures(platform, container, verbose, quality, jobs, force, script, doUastc = True if ktx2: - script = False + # have to run check script after generating, or have to convert ktx to ktx2 + # so that's why these disable scripting + if doUastc or check: + script = False + # these were for converting ktx output from kram to ktx2, and for uastc from original png appKtx2 = "ktx2ktx2" appKtx2sc ="ktxsc" + # this is a validator app if check: appKtx2check = "ktx2check" From 43be2410b77d4bb892114145a484f0facaa62dc7 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 18 May 2021 22:12:04 -0700 Subject: [PATCH 051/901] Kram/kramv - verbose info, -mipskip n, and kramv buttons, fast info load path kram add faster load path for kram info that doesn't decode mips and can return ktx2 data intact hook up verbose info add -mipskip 1 support to CLI to skip the topmost n mips. Sometimes easier than specifying a pixel size. kramv dynamically add buttons and menu items, menu items not yet hooked to an Edit menu add verbose info, flip buttons, transparent debug, fix non-zero debug mode for snorm, fix debug hdr, improve the display of info data. Has % compression for image and mip levels. --- kramv/KramRenderer.mm | 11 +- kramv/KramShaders.h | 15 +- kramv/KramShaders.metal | 35 ++++- kramv/KramViewerBase.h | 19 ++- kramv/KramViewerMain.mm | 278 +++++++++++++++++++++++++++++++-- libkram/kram/KTXImage.cpp | 50 ++++-- libkram/kram/KTXImage.h | 6 +- libkram/kram/Kram.cpp | 100 +++++++++--- libkram/kram/KramImage.cpp | 23 ++- libkram/kram/KramImageInfo.cpp | 3 +- libkram/kram/KramImageInfo.h | 6 +- 11 files changed, 459 insertions(+), 87 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 1f44314e..1b206419 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -421,9 +421,10 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest if (!sourceImage.open(imageData,imageDataLength)) { return NO; } - bool isVerbose = false; - _showSettings->imageInfo = kramInfoKTXToString(fullFilename, sourceImage, isVerbose); - + + _showSettings->imageInfo = kramInfoKTXToString(fullFilename, sourceImage, false); + _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, sourceImage, true); + _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL; _showSettings->lastFilename = fullFilename; @@ -462,8 +463,8 @@ - (BOOL)loadTexture:(nonnull NSURL *)url return NO; } - bool isVerbose = false; - _showSettings->imageInfo = kramInfoToString(fullFilename, isVerbose); + _showSettings->imageInfo = kramInfoToString(fullFilename, false); + _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true); _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL; _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat; diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index 2799faa4..5169213c 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -73,14 +73,15 @@ typedef NS_ENUM(int32_t, ShaderTextureChannels) typedef NS_ENUM(int32_t, ShaderDebugMode) { ShDebugModeNone = 0, - ShDebugModeTransparent = 1, - ShDebugModeColor = 2, - ShDebugModeGray = 3, - ShDebugModeHDR = 4, + ShDebugModeTransparent, // alpha < 255 + ShDebugModeNonZero, // any(rgba) > 0 + ShDebugModeColor, + ShDebugModeGray, + ShDebugModeHDR, - ShDebugModePosX = 5, - ShDebugModePosY = 6, - ShDebugModeCircleXY = 7, + ShDebugModePosX, + ShDebugModePosY, + ShDebugModeCircleXY, ShDebugModeCount }; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 54832b83..9363a5b2 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -444,6 +444,8 @@ float4 DrawPixels( float2 textureSize ) { + float4 sc = c; + bool isPreview = uniforms.isPreview; if (isPreview) { @@ -553,6 +555,7 @@ float4 DrawPixels( c.rgb = toNormal(c.rgb); // from signed, to match other editors that don't display signed data + sc = c; c.xyz = toUnorm(c.xyz); // can sample from this // view data as abs magnitude @@ -566,13 +569,21 @@ float4 DrawPixels( // signed 1/2 channel formats return sr,0,0, and sr,sg,0 for rgb? // May want to display those as 0 not 0.5. if (uniforms.isSigned) { + // Note: premul on signed should occur while still signed, since it's a pull to zoer + // to premul, but also need to see without premul + if (uniforms.isPremul) { + c.xyz *= c.a; + } + + sc = c; c.xyz = toUnorm(c.xyz); } - - // to premul, but also need to see without premul - if (uniforms.isPremul) { - c.xyz *= c.a; + else { + if (uniforms.isPremul) { + c.xyz *= c.a; + } } + } } @@ -623,6 +634,20 @@ float4 DrawPixels( isHighlighted = true; } } + else if (uniforms.debugMode == ShDebugModeNonZero) { + // want to compare so snorm 0 on signed data + // TODO: unorm formats don't store exact 0, so may need toleranc + if (uniforms.isSigned) { + if (any(sc != 0.0)) { + isHighlighted = true; + } + } + else { + if (any(c != 0.0)) { + isHighlighted = true; + } + } + } else if (uniforms.debugMode == ShDebugModeColor) { // with 565 formats, all pixels with light up if (c.r != c.g || c.r != c.b) { @@ -636,7 +661,7 @@ float4 DrawPixels( } } else if (uniforms.debugMode == ShDebugModeHDR) { - if (any(c.rgb < float3(0.0)) || any(c.rgb < float3(0.0)) ) { + if (any(c.rgb < float3(0.0)) || any(c.rgb > float3(1.0)) ) { isHighlighted = true; } } diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index 3f706a06..7159d256 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -30,17 +30,19 @@ enum TextureChannels ModeAAA1 = 8, }; +// Must line up with ShDebugMode enum DebugMode { DebugModeNone = 0, - DebugModeTransparent = 1, - DebugModeColor = 2, - DebugModeGray = 3, - DebugModeHDR = 4, + DebugModeTransparent, + DebugModeNonZero, + DebugModeColor, + DebugModeGray, + DebugModeHDR, - DebugModePosX = 5, - DebugModePosY = 6, - DebugModeCircleXY = 7, + DebugModePosX, + DebugModePosY, + DebugModeCircleXY, DebugModeCount }; @@ -145,7 +147,8 @@ class ShowSettings { // cached on load, raw info about the texture from libkram string imageInfo; - + string imageInfoVerbose; + // format before any transcode to supported formats MyMTLPixelFormat originalFormat; MyMTLPixelFormat decodedFormat; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index f7e4a38c..ca2f977f 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -271,7 +271,12 @@ - (IBAction)showAboutDialog:(id)sender { M = 0x2E, // https://eastmanreference.com/complete-list-of-applescript-key-codes - Zero = 0x1D, + Num1 = 0x12, + Num2 = 0x13, + Num3 = 0x14, + Num4 = 0x15, + // ... + Num0 = 0x1D, LeftBrace = 0x21, RightBrace = 0x1E, @@ -395,8 +400,10 @@ void encodeSrcForEncodeComparisons(bool increment) { @implementation MyMTKView { + NSStackView* _buttonStack; NSTextField* _hudLabel; NSTextField* _hudLabel2; + vector _textSlots; ShowSettings* _showSettings; @@ -404,6 +411,7 @@ @implementation MyMTKView ZipHelper _zip; MmapHelper _zipMmap; int32_t _fileIndex; + BOOL _noImageLoaded; } - (void)awakeFromNib @@ -456,6 +464,12 @@ - (instancetype)initWithCoder:(NSCoder*)coder { _zoomGesture = [[NSMagnificationGestureRecognizer alloc] initWithTarget:self action:@selector(handleGesture:)]; [self addGestureRecognizer:_zoomGesture]; + _buttonStack = [self _addButtons]; + + // hide until image loaded + _buttonStack.hidden = YES; + _noImageLoaded = YES; + _hudLabel2 = [self _addHud:YES]; _hudLabel = [self _addHud:NO]; [self setHudText:""]; @@ -467,10 +481,125 @@ - (nonnull ShowSettings*)showSettings { return _showSettings; } +- (NSStackView*)_addButtons { + const int32_t numButtons = 25; // 13; + const char* names[numButtons*2] = { + + "?", "Help", + "I", "Info", + "H", "Hud", + "S", "Show All", + + "O", "Preview", + "W", "Repeat", + "P", "Premul", + "N", "Signed", + + "-", "", + + "E", "Debug", + "D", "Grid", + "C", "Checker", + "U", "Toggle UI", + + "-", "", + + "M", "Mip", + "F", "Face", + "Y", "Array", + "J", "Next", + "L", "Reload", + "0", "Fit", + + // TODO: need to shift hud over a little + // "UI", - add to show/hide buttons + + "-", "", + + // make these individual toggles and exclusive toggle off shift + "R", "Red", + "G", "Green", + "B", "Blue", + "A", "Alpha", + }; + + NSRect rect = NSMakeRect(0,10,30,30); + + //#define ArrayCount(x) ((x) / sizeof(x[0])) + + NSMutableArray* buttons = [[NSMutableArray alloc] init]; + + for (int32_t i = 0; i < numButtons; ++i) { + const char* icon = names[2*i+0]; + const char* tip = names[2*i+1]; + + NSString* name = [NSString stringWithUTF8String:icon]; + NSString* toolTip = [NSString stringWithUTF8String:tip]; + + NSButton* button = nil; + + button = [NSButton buttonWithTitle:name target:self action:@selector(handleAction:)]; + [button setToolTip:toolTip]; + button.hidden = NO; + + // turn off rounded bezel + button.bordered = NO; + + [button setFrame:rect]; + + // stackView seems to disperse the items evenly across the area, so this doesn't work + if (icon[0] == '-') { + //rect.origin.y += 11; + button.enabled = NO; + } + else { + //sKrect.origin.y += 25; + } + + [buttons addObject:button]; + } + + NSStackView* stackView = [NSStackView stackViewWithViews:buttons]; + stackView.orientation = NSUserInterfaceLayoutOrientationVertical; + [self addSubview: stackView]; + +#if 0 + // Want menus, so user can define their own shortcuts to commands + // Also need to enable/disable this via validateUserInterfaceItem + NSApplication* app = [NSApplication sharedApplication]; + + // TODO: add an edit menu in the storyboard + NSMenu* menu = app.windowsMenu; + [menu addItem:[NSMenuItem separatorItem]]; + + for (int32_t i = 0; i < numButtons; ++i) { + const char* icon = names[2*i+0]; + const char* tip = names[2*i+1]; + + NSString* shortcut = [NSString stringWithUTF8String:icon]; + NSString* name = [NSString stringWithUTF8String:tip]; + shortcut = @""; // for now, or AppKit turns key int cmd+shift+key + + if (icon[0] == '-') { + [menu addItem:[NSMenuItem separatorItem]]; + } + else { + NSMenuItem* menuItem = [[NSMenuItem alloc] initWithTitle:name action:@selector(handleAction) keyEquivalent:shortcut]; + [menu addItem: menuItem]; + } + } +#endif + + return stackView; +} + - (NSTextField*)_addHud:(BOOL)isShadow { + // TODO: This text field is clamping to the height, so have it set to 1200. + // really want field to expand to fill the window height for large output + // add a label for the hud - NSTextField *label = [[NSTextField alloc] initWithFrame:NSMakeRect(isShadow ? 11 : 10, isShadow ? 11 : 10, 800, 300)]; + NSTextField *label = [[NSTextField alloc] initWithFrame:NSMakeRect(isShadow ? 21 : 20, isShadow ? 21 : 20, 800, 1200)]; label.drawsBackground = NO; label.textColor = !isShadow ? [NSColor colorWithSRGBRed:0 green:1 blue:0 alpha:1] : @@ -479,7 +608,8 @@ - (NSTextField*)_addHud:(BOOL)isShadow label.editable = NO; label.selectable = NO; label.lineBreakMode = NSLineBreakByClipping; - + label.maximumNumberOfLines = 0; // fill to height + label.cell.scrollable = NO; label.cell.wraps = NO; @@ -489,13 +619,19 @@ - (NSTextField*)_addHud:(BOOL)isShadow // UILabel has shadowColor/shadowOffset but NSTextField doesn't [self addSubview: label]; + + // add vertical constrains to have it fill window, but keep 800 width + label.preferredMaxLayoutWidth = 800; + + NSDictionary* views = @{ @"label" : label }; + [self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"H:|-[label]" options:0 metrics:nil views:views]]; + [self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"V:|-[label]" options:0 metrics:nil views:views]]; + return label; } - (void)doZoomMath:(float)newZoom newPan:(float2&)newPan { -// transform the cursor to texture coordinate, or clamped version if outside - - + // transform the cursor to texture coordinate, or clamped version if outside Renderer* renderer = (Renderer*)self.delegate; float4x4 projectionViewModelMatrix = [renderer computeImageTransform:_showSettings->panX panY:_showSettings->panY zoom:_showSettings->zoom]; @@ -1071,7 +1207,80 @@ - (void)scrollWheel:(NSEvent *)event // TODO: convert to C++ actions, and then call into Base holding all this // move pan/zoom logic too. Then use that as start of Win32 kramv. +- (IBAction)handleAction:(id)sender { + // sender is the UI element/NSButton + // if (sender == ) + NSButton* button = (NSButton*)sender; + + NSEvent* theEvent = [NSApp currentEvent]; + bool isShiftKeyDown = (theEvent.modifierFlags & NSEventModifierFlagShift); + + string title = [button.title UTF8String]; + int32_t keyCode = -1; + + if (title == "?") + keyCode = Key::Slash; // help + else if (title == "I") + keyCode = Key::I; + else if (title == "H") + keyCode = Key::H; + + else if (title == "S") + keyCode = Key::S; + else if (title == "O") + keyCode = Key::O; + else if (title == "W") + keyCode = Key::W; + else if (title == "P") + keyCode = Key::P; + else if (title == "N") + keyCode = Key::N; + + else if (title == "E") + keyCode = Key::E; + else if (title == "D") + keyCode = Key::D; + else if (title == "C") + keyCode = Key::C; + else if (title == "U") + keyCode = Key::U; + + else if (title == "M") + keyCode = Key::M; + else if (title == "F") + keyCode = Key::F; + else if (title == "Y") + keyCode = Key::Y; + else if (title == "J") + keyCode = Key::J; + else if (title == "L") + keyCode = Key::L; + else if (title == "0") + keyCode = Key::Num0; + + else if (title == "R") + keyCode = Key::R; + else if (title == "G") + keyCode = Key::G; + else if (title == "B") + keyCode = Key::B; + else if (title == "A") + keyCode = Key::A; + + + if (keyCode >= 0) + [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown]; +} + - (void)keyDown:(NSEvent *)theEvent +{ + bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift; + uint32_t keyCode = theEvent.keyCode; + + [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown]; +} + +- (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown { // Some data depends on the texture data (isSigned, isNormal, ..) TextureChannels& channels = _showSettings->channels; @@ -1079,11 +1288,29 @@ - (void)keyDown:(NSEvent *)theEvent // TODO: fix isChanged to only be set when value changes // f.e. clamped values don't need to re-render - bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift; string text; - switch(theEvent.keyCode) { + switch(keyCode) { + case Key::V: { + bool isVertical = _buttonStack.orientation == NSUserInterfaceLayoutOrientationVertical; + isVertical = !isVertical; + + _buttonStack.orientation = isVertical ? NSUserInterfaceLayoutOrientationVertical : NSUserInterfaceLayoutOrientationHorizontal; + text = isVertical ? "Vert UI" : "Horiz UI"; + break; + } + case Key::U: + // this means no image loaded yet + if (_noImageLoaded) { + return; + } + + _buttonStack.hidden = !_buttonStack.hidden; + text = _buttonStack.hidden ? "Hide UI" : "Show UI"; + break; + // rgba channels + case Key::Num1: case Key::R: if (channels == TextureChannels::ModeRRR1 || channels == TextureChannels::ModeR001) { channels = TextureChannels::ModeRGBA; @@ -1096,6 +1323,8 @@ - (void)keyDown:(NSEvent *)theEvent isChanged = true; break; + + case Key::Num2: case Key::G: if (channels == TextureChannels::ModeGGG1 || channels == TextureChannels::Mode0G01) { channels = TextureChannels::ModeRGBA; @@ -1107,6 +1336,8 @@ - (void)keyDown:(NSEvent *)theEvent } isChanged = true; break; + + case Key::Num3: case Key::B: if (channels == TextureChannels::ModeBBB1 || channels == TextureChannels::Mode00B1) { channels = TextureChannels::ModeRGBA; @@ -1118,6 +1349,8 @@ - (void)keyDown:(NSEvent *)theEvent } isChanged = true; break; + + case Key::Num4: case Key::A: if (channels == TextureChannels::ModeAAA1) { channels = TextureChannels::ModeRGBA; @@ -1136,6 +1369,7 @@ - (void)keyDown:(NSEvent *)theEvent switch(_showSettings->debugMode) { case DebugModeNone: text = "Debug Off"; break; case DebugModeTransparent: text = "Debug Transparent"; break; + case DebugModeNonZero: text = "Debug NonZero"; break; case DebugModeColor: text = "Debug Color"; break; case DebugModeGray: text = "Debug Gray"; break; case DebugModeHDR: text = "Debug HDR"; break; @@ -1157,7 +1391,7 @@ - (void)keyDown:(NSEvent *)theEvent "⇧J-next bundle image\n"; break; - case Key::Zero: { // scale and reset pan + case Key::Num0: { // scale and reset pan float zoom; // fit image or mip if (isShiftKeyDown) { @@ -1315,7 +1549,7 @@ - (void)keyDown:(NSEvent *)theEvent // info on the texture, could request info from lib, but would want to cache that info case Key::I: if (_showSettings->isHudShown) { - sprintf(text, "%s", _showSettings->imageInfo.c_str()); + sprintf(text, "%s", isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str() : _showSettings->imageInfo.c_str()); } break; @@ -1472,7 +1706,7 @@ - (BOOL)performDragOperation:(id)sender { if ([self loadTextureFromURL:url]) { [self setHudText:""]; - + return YES; } } @@ -1561,12 +1795,20 @@ - (BOOL)loadTextureFromArchive:(const char*)filename timestamp:(double)timestamp // was using subtitle, but that's macOS 11.0 feature. string title = "kramv - "; + title += formatTypeName(_showSettings->originalFormat); + title += " - "; title += filenameShort; self.window.title = [NSString stringWithUTF8String: title.c_str()]; // doesn't set imageURL or update the recent document menu + // show the controls + if (_noImageLoaded) { + _buttonStack.hidden = NO; // show controls + _noImageLoaded = NO; + } + self.needsDisplay = YES; return YES; } @@ -1626,6 +1868,8 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { // was using subtitle, but that's macOS 11.0 feature. string title = "kramv - "; + title += formatTypeName(_showSettings->originalFormat); + title += " - "; title += filenameShort; self.window.title = [NSString stringWithUTF8String: title.c_str()]; @@ -1639,6 +1883,12 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { self.imageURL = url; + // show the controls + if (_noImageLoaded) { + _buttonStack.hidden = NO; // show controls + _noImageLoaded = NO; + } + self.needsDisplay = YES; return YES; } @@ -1733,9 +1983,13 @@ - (void)viewDidLoad options: (NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved | NSTrackingActiveInKeyWindow ) owner:_view userInfo:nil]; [_view addTrackingArea:_trackingArea]; - + + // programmatically add some buttons + // think limited to 11 viewws before they must be wrapepd in a container. That's how SwiftUI was. + } + @end diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index f5311974..1a49b928 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -877,7 +877,7 @@ MyMTLTextureType KTXHeader::metalTextureType() const //--------------------------------------------------- -bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength) +bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly) { // Note: never trust the extension, always load based on the identifier if ((size_t)imageDataLength < sizeof(kKTX2Identifier)) { @@ -886,7 +886,7 @@ bool KTXImage::open(const uint8_t* imageData, size_t imageDataLength) // check for ktx2 if (memcmp(imageData, kKTX2Identifier, sizeof(kKTX2Identifier)) == 0) { - return openKTX2(imageData, imageDataLength); + return openKTX2(imageData, imageDataLength, isInfoOnly); } // check for ktx1 @@ -1087,16 +1087,16 @@ void KTXImage::toPropsData(vector& propsData) // TODO: this needs to pad to 16-bytes, so may need a prop for that } -void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, uint32_t& numSkippedMips) +void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, int32_t mipSkip, uint32_t& numSkippedMips) { // dst levels int32_t w = width; int32_t h = height; int32_t d = depth; - numSkippedMips = 0; + numSkippedMips = mipSkip; - bool needsDownsample = (w > mipMaxSize || h > mipMaxSize); + bool needsDownsample = (numSkippedMips > 0) || (w > mipMaxSize || h > mipMaxSize); int32_t maxMipLevels = 16; // 64K x 64K @@ -1113,8 +1113,9 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS if (doMipmaps || needsDownsample) { bool keepMip = - (w >= mipMinSize && w <= mipMaxSize) && - (h >= mipMinSize && h <= mipMaxSize); + (numSkippedMips >= (uint32_t)mipSkip) || + ((w >= mipMinSize && w <= mipMaxSize) && + (h >= mipMinSize && h <= mipMaxSize)); if (keepMip) { level.length = mipLevelSize(w, h); @@ -1137,8 +1138,9 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS mipDown(w, h, d); keepMip = - (w >= mipMinSize && w <= mipMaxSize) && - (h >= mipMinSize && h <= mipMaxSize); + (numSkippedMips >= (uint32_t)mipSkip) || + ((w >= mipMinSize && w <= mipMaxSize) && + (h >= mipMinSize && h <= mipMaxSize)); if (keepMip && (mipLevels.size() < (size_t)maxMipLevels)) { // length needs to be multiplied by chunk size before writing out @@ -1308,7 +1310,7 @@ struct ZSTDScope2 }; -bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) +bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly) { if ((size_t)imageDataLength < sizeof(KTX2Header)) { return false; @@ -1392,10 +1394,34 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) header.bytesOfKeyValueData = 0; initProps(imageData + header2.kvdByteOffset, header2.kvdByteLength); + // skip parsing th elevels + if (isInfoOnly) { + skipImageLength = true; + fileData = imageData; + fileDataLength = imageDataLength; + + // copy these over from ktx2 + mipLevels = levels; + + // copy the original ktx2 levels, this includes mip compression + bool isCompressed = + (mipLevels[0].lengthCompressed > 0) && + (mipLevels[0].length != mipLevels[0].lengthCompressed); + + for (auto& level : mipLevels) { + level.length /= numChunks; + + // this indicates not compressed + if (!isCompressed) { + level.lengthCompressed = 0; + } + } + return true; + } if (!isCompressed) { // Note: this is aliasing the mips from a ktx2 file into a ktx1 KTXImage - // This is highly unsafe. + // This is highly unsafe but mostly works for input. // Note: KTX2 also doesn't have the length field embedded the mipData // so need to be able to set skipLength to unify the mipgen if aliasing the mip data @@ -1443,6 +1469,8 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength) imageDataFromKTX2.resize(fileDataLength, 0); fileData = imageDataFromKTX2.data(); + // TODO: may need to fill out length field in fileData + // Note: specific to zstd bool isZstd = header2.supercompressionScheme == KTX2SupercompressionZstd; ZSTD_DCtx* dctx = nullptr; diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index 71c426e6..b82f9299 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -268,12 +268,12 @@ struct KTX2Compressor { class KTXImage { public: // this calls init calls - bool open(const uint8_t* imageData, size_t imageDataLength); + bool open(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly = false); void initProps(const uint8_t* propsData, size_t propDataSize); void initMipLevels(size_t mipOffset); - void initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, uint32_t& numSkippedMips); + void initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxSize, int32_t mipSkip, uint32_t& numSkippedMips); bool validateMipLevels() const; @@ -305,7 +305,7 @@ class KTXImage { vector& imageData(); private: - bool openKTX2(const uint8_t* imageData, size_t imageDataLength); + bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly); // ktx2 mips are uncompressed to convert back to ktx1, but without the image offset vector imageDataFromKTX2; diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 720691ec..04f46e73 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -203,7 +203,7 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper, // decoding reads a ktx file into KTXImage (not Image) bool SetupSourceKTX(MmapHelper& mmapHelper, FileHelper& fileHelper, vector& fileBuffer, - const string& srcFilename, KTXImage& sourceImage) + const string& srcFilename, KTXImage& sourceImage, bool isInfoOnly = false) { // first try mmap, and then use file -> buffer bool useMmap = true; @@ -213,7 +213,7 @@ bool SetupSourceKTX(MmapHelper& mmapHelper, FileHelper& fileHelper, } if (useMmap) { - if (!sourceImage.open(mmapHelper.data(), mmapHelper.dataLength())) { + if (!sourceImage.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) { return false; } } @@ -231,7 +231,7 @@ bool SetupSourceKTX(MmapHelper& mmapHelper, FileHelper& fileHelper, return false; } - if (!sourceImage.open(fileBuffer.data(), (int32_t)fileBuffer.size())) { + if (!sourceImage.open(fileBuffer.data(), (int32_t)fileBuffer.size(), isInfoOnly)) { return false; } } @@ -952,7 +952,7 @@ void kramEncodeUsage(bool showVersion = true) "\n" //"\t [-mipalign]\n" "\t [-mipnone]\n" - "\t [-mipmin size] [-mipmax size]\n" + "\t [-mipmin size] [-mipmax size] [-mipskip count]\n" "\n" "\t [-chunks 4x4]\n" "\t [-swizzle rg01]\n" @@ -1254,9 +1254,13 @@ string kramInfoToString(const string& srcFilename, bool isVerbose) else if (isKTX) { KTXImage srcImage; + // This means don't convert to KTX1, keep original data/offsets + // and also skip decompressing the mips + bool isInfoOnly = true; + // Note: could change to not read any mips bool success = SetupSourceKTX(srcMmapHelper, srcFileHelper, srcFileBuffer, - srcFilename, srcImage); + srcFilename, srcImage, isInfoOnly); if (!success) { KLOGE("Kram", "info couldn't open ktx file"); return ""; @@ -1373,10 +1377,10 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, MyMTLPixelFormat metalFormat = srcImage.pixelFormat; int32_t dataSize = srcImage.fileDataLength; - - string tmp; + + //string tmp; bool isMB = (dataSize > (512 * 1024)); - sprintf(tmp, + append_sprintf(info, "file: %s\n" "size: %d\n" "sizm: %0.3f %s\n", @@ -1384,17 +1388,42 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, dataSize, isMB ? dataSize / (1024.0f * 1024.0f) : dataSize / 1024.0f, isMB ? "MB" : "KB"); - info += tmp; + + int32_t numChunks = srcImage.totalChunks(); + + // add up lengtha and lengthCompressed + if (srcImage.mipLevels[0].lengthCompressed > 0) { + uint64_t length = 0; + uint64_t lengthCompressed = 0; - int32_t pixelMultiplier = srcImage.totalChunks(); + for (const auto& level : srcImage.mipLevels) { + length += level.length; + lengthCompressed += level.lengthCompressed; + } + + length *= numChunks; + uint64_t percent = (100 * lengthCompressed) / length; + + isMB = (lengthCompressed > (512 * 1024)); + double lengthF = isMB ? length / (1024.0f * 1024.0f) : length / 1024.0f; + double lengthCompressedF = isMB ? lengthCompressed / (1024.0f * 1024.0f) : lengthCompressed / 1024.0f; + + append_sprintf(info, + "sizc: %0.3f,%0.3f %s %d%%\n", + lengthF, lengthCompressedF, + isMB ? "MB" : "KB", + (int)percent); + } + float numPixels = srcImage.width * srcImage.height; - numPixels *= (float)pixelMultiplier; + numPixels *= (float)numChunks; if (srcImage.header.numberOfMipmapLevels > 1) { numPixels *= 4.0 / 3.0f; // TODO: estimate for now } + // to megapixels numPixels /= (1000.0f * 1000.0f); auto textureType = srcImage.header.metalTextureType(); @@ -1404,7 +1433,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, case MyMTLTextureTypeCube: case MyMTLTextureTypeCubeArray: case MyMTLTextureType2DArray: - sprintf(tmp, + append_sprintf(info, "type: %s\n" "dims: %dx%d\n" "dimm: %0.3f MP\n" @@ -1415,7 +1444,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, srcImage.header.numberOfMipmapLevels); break; case MyMTLTextureType3D: - sprintf(tmp, + append_sprintf(info, "type: %s\n" "dims: %dx%dx%d\n" "dimm: %0.3f MP\n" @@ -1426,8 +1455,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, srcImage.header.numberOfMipmapLevels); break; } - info += tmp; - + // print out the array if (srcImage.header.numberOfArrayElements > 1) { append_sprintf(info, @@ -1449,9 +1477,6 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, for (const auto& prop : srcImage.props) { append_sprintf(info, "prop: %s %s\n", prop.first.c_str(), prop.second.c_str()); } - - // TODO: handle zstd compressed KTX2 too, they have a length and compressed length field - // also Basis + zstd if (isVerbose) { // dump mips/dims, but this can be a lot of data on arrays @@ -1461,9 +1486,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, int32_t d = srcImage.depth; // num chunks - append_sprintf(info, - "chun: %d\n", - srcImage.totalChunks()); + append_sprintf(info, "chun: %d\n", numChunks); for (const auto& mip : srcImage.mipLevels) { @@ -1483,13 +1506,14 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, } if (mip.lengthCompressed != 0) { - size_t percent = (100 * mip.lengthCompressed) / mip.length; + uint64_t levelSize = mip.length * numChunks; + uint64_t percent = (100 * mip.lengthCompressed) / levelSize; append_sprintf(info, "%" PRIu64 ",%" PRIu64 ",%" PRIu64 " %d%%\n", mip.offset, - mip.length, // only size of one mip right now, not mip * numChunks - mip.lengthCompressed, // TODO: preserve so can be displayed + levelSize, + mip.lengthCompressed, (int)percent ); } @@ -1721,6 +1745,12 @@ static int32_t kramAppEncode(vector& args) } infoArgs.mipMaxSize = atoi(args[i]); + if (infoArgs.mipMaxSize < 1 || infoArgs.mipMaxSize > 65536) { + KLOGE("Kram", "mipmax arg invalid"); + error = true; + break; + } + //continue; } else if (isStringEqual(word, "-mipmin")) { @@ -1732,6 +1762,28 @@ static int32_t kramAppEncode(vector& args) } infoArgs.mipMinSize = atoi(args[i]); + if (infoArgs.mipMinSize < 1 || infoArgs.mipMinSize > 65536) { + KLOGE("Kram", "mipmin arg invalid"); + error = true; + break; + } + //continue; + } + else if (isStringEqual(word, "-mipskip")) { + ++i; + if (i >= argc) { + KLOGE("Kram", "mipskip arg invalid"); + error = true; + break; + } + + infoArgs.mipSkip = atoi(args[i]); + if (infoArgs.mipSkip < 0 || infoArgs.mipSkip > 16) { + KLOGE("Kram", "mipskip arg invalid"); + error = true; + break; + } + //continue; } else if (isStringEqual(word, "-mipnone")) { diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index a6831194..e7b4017a 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -910,9 +910,9 @@ struct MipConstructData { // Can skip the larger and smaller mips. This is the larger mips skipped. uint32_t numSkippedMips = 0; - // this is size of 2d image src after accounting for chunks for a strip of array/cube data - uint32_t modifiedWidth = 0; - uint32_t modifiedHeight = 0; + // 2d image src after accounting for chunks for a strip of array/cube data + uint32_t chunkWidth = 0; + uint32_t chunkHeight = 0; }; @@ -1270,8 +1270,8 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const // This is wxh of source in case it has chunks // dstImage will start at this, but may mip down smaller base on mipMaxSize - mipConstructData.modifiedWidth = w; - mipConstructData.modifiedHeight = h; + mipConstructData.chunkWidth = w; + mipConstructData.chunkHeight = h; // work out how much memory we need to load header.initFormatGL(info.pixelFormat); @@ -1284,8 +1284,12 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const dstImage.height = h; dstImage.depth = header.pixelDepth; // from validate above - dstImage.initMipLevels(info.doMipmaps, info.mipMinSize, info.mipMaxSize, mipConstructData.numSkippedMips); + dstImage.initMipLevels(info.doMipmaps, info.mipMinSize, info.mipMaxSize, info.mipSkip, mipConstructData.numSkippedMips); + if (dstImage.mipLevels.empty()) { + KLOGE("kram", "skipped all mips"); + return false; + } // ---------------------------------------------------- int32_t numChunks = (int32_t)chunkOffsets.size(); @@ -1317,6 +1321,9 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const // could build and compress and entire level at a time, but can't write any of it // out until smallest mips are constructed. Only then are offsets resolved. + // A better way would be to do mips in-place, but in-order, and compressing the large + // to small mips into an array of open compressor streams. Then only need one mip instead of + // all levels in memory. if (!writeKTX1FileOrImage(info, mipConstructData, propsData, nullptr, dstImage)) { return false; } @@ -1610,8 +1617,8 @@ bool Image::createMipsFromChunks( // This is for 8-bit data (pixelsFloat used for in-place mipgen) ImageData srcImage; - srcImage.width = data.modifiedWidth; - srcImage.height = data.modifiedHeight; + srcImage.width = data.chunkWidth; + srcImage.height = data.chunkHeight; // KramMipper uses these srcImage.isSRGB = info.isSRGB; diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index 78987728..3a324a3d 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -1012,7 +1012,8 @@ void ImageInfo::initWithArgs(const ImageInfoArgs& args) doMipmaps = args.doMipmaps; mipMinSize = args.mipMinSize; mipMaxSize = args.mipMaxSize; - + mipSkip = args.mipSkip; + swizzleText = args.swizzleText; averageChannels = args.averageChannels; diff --git a/libkram/kram/KramImageInfo.h b/libkram/kram/KramImageInfo.h index 86a42488..1e34ae68 100644 --- a/libkram/kram/KramImageInfo.h +++ b/libkram/kram/KramImageInfo.h @@ -48,7 +48,8 @@ class ImageInfoArgs { int32_t mipMinSize = 1; int32_t mipMaxSize = 32 * 1024; - + int32_t mipSkip = 0; + int32_t quality = 49; // may want float // ktx2 has a compression type and level @@ -172,12 +173,11 @@ class ImageInfo { int32_t mipMinSize = 1; int32_t mipMaxSize = 32 * 1024; + int32_t mipSkip = 0; // count of large mips to skip int32_t chunksX = 0; int32_t chunksY = 0; int32_t chunksCount = 0; - - }; bool isSwizzleValid(const char* swizzle); From 3af15a2af3d25f3e67ddb6dab9268bc6ab44acbf Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 18 May 2021 22:52:34 -0700 Subject: [PATCH 052/901] kramv - add mode to capture any loge messages This is wrap around loadTex and loadArchive calls, so that error messages are reported to the kramv hud. --- kramv/KramViewerMain.mm | 24 ++++++++++++++++++++++-- libkram/kram/KTXImage.cpp | 7 ++++++- libkram/kram/KramLog.cpp | 34 +++++++++++++++++++++++++++++++++- libkram/kram/KramLog.h | 7 +++++++ 4 files changed, 68 insertions(+), 4 deletions(-) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index ca2f977f..02177e97 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1842,7 +1842,18 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { const char* filename = entry.filename; double timestamp = entry.modificationDate; - return [self loadTextureFromArchive:filename timestamp:timestamp]; + setErrorLogCapture(true); + + BOOL success = [self loadTextureFromArchive:filename timestamp:timestamp]; + + if (!success) { + string errorText; + getErrorLogCaptureText(errorText); + [self setHudText: errorText.c_str()]; + } + + setErrorLogCapture(false); + return success; } if (!(endsWithExtension(filename, ".png") || @@ -1853,9 +1864,18 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { } Renderer* renderer = (Renderer*)self.delegate; - if (![renderer loadTexture:url]) { + setErrorLogCapture(true); + + BOOL success = [renderer loadTexture:url]; + + if (!success) { + string errorText; + getErrorLogCaptureText(errorText); + [self setHudText: errorText.c_str()]; + setErrorLogCapture(false); return NO; } + setErrorLogCapture(false); // set title to filename, chop this to just file+ext, not directory const char* filenameShort = strrchr(filename, '/'); diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 1a49b928..6fd52b24 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -1328,6 +1328,11 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i // copy out the header, const KTX2Header& header2 = *(const KTX2Header*)imageData; + + if (header2.supercompressionScheme != KTX2SupercompressionBasisLZ) { + KLOGE("kram", "BasisLZ supercompression not yet supported"); + return false; + } if (header2.supercompressionScheme != KTX2SupercompressionNone && header2.supercompressionScheme != KTX2SupercompressionZstd && @@ -1340,7 +1345,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i // This typically means UASTC encoding + zstd supercompression, and code doesn't handle that below yet if (header2.vkFormat == 0) { - KLOGE("kram", "Basis encode not yet supported"); + KLOGE("kram", "Basis decode not yet supported"); return false; } diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp index 89090557..4045ad15 100644 --- a/libkram/kram/KramLog.cpp +++ b/libkram/kram/KramLog.cpp @@ -14,6 +14,31 @@ namespace kram { using namespace std; +static mutex gLogLock; +static string gErrorLogCaptureText; +static bool gIsErrorLogCapture = false; +void setErrorLogCapture(bool enable) { + gIsErrorLogCapture = enable; + if (enable) { + unique_lock lock(gLogLock); + gErrorLogCaptureText.clear(); + } +} +bool isErrorLogCapture() { return gIsErrorLogCapture; } + + +// return the text +void getErrorLogCaptureText(string& text) { + if (gIsErrorLogCapture) { + unique_lock lock(gLogLock); + text = gErrorLogCaptureText; + } + else { + text.clear(); + } +} + + // TODO: install assert handler to intercept, and also add a verify (assert that leaves source in) //void __assert(const char *expression, const char *file, int32_t line) { // @@ -200,9 +225,16 @@ extern int32_t logMessage(const char* group, int32_t logLevel, } // stdout isn't thread safe, so to prevent mixed output put this under mutex - static mutex gLogLock; unique_lock lock(gLogLock); + // this means caller needs to know all errors to display in the hud + if (gIsErrorLogCapture && logLevel == LogLevelError) { + gErrorLogCaptureText += msg; + if (needsNewline) { + gErrorLogCaptureText += "\n"; + } + } + fprintf(fp, "%s%s%s%s%s%s", tag, groupString, space, msg, needsNewline ? "\n" : "", fileLineFunc.c_str()); return 0; // reserved for later diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h index 0b24d871..38f48e2c 100644 --- a/libkram/kram/KramLog.h +++ b/libkram/kram/KramLog.h @@ -48,6 +48,12 @@ extern int32_t logMessage(const char* group, int32_t logLevel, // TODO: move to Strings.h using namespace std; +// when set true, the internal string is cleared +void setErrorLogCapture(bool enable); +bool isErrorLogCapture(); +// return the text +void getErrorLogCaptureText(string& text); + // returns length of string, -1 if failure int32_t sprintf(string& str, const char* format, ...) __printflike(2, 3); @@ -60,4 +66,5 @@ bool endsWithExtension(const char* str, const string& substring); // https://stackoverflow.com/questions/874134/find-out-if-string-ends-with-another-string-in-c bool endsWith(const string& value, const string& ending); + } // namespace kram From 62d7a0de19812c53dd62ac7727c55ce940e5caa0 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 18 May 2021 22:55:14 -0700 Subject: [PATCH 053/901] kram - fix BasisLZ test --- libkram/kram/KTXImage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 6fd52b24..ce43b75c 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -1329,7 +1329,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i const KTX2Header& header2 = *(const KTX2Header*)imageData; - if (header2.supercompressionScheme != KTX2SupercompressionBasisLZ) { + if (header2.supercompressionScheme == KTX2SupercompressionBasisLZ) { KLOGE("kram", "BasisLZ supercompression not yet supported"); return false; } From 88203135cd0766e7c8dc1bd5ff27977bb740f00a Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 18 May 2021 23:01:20 -0700 Subject: [PATCH 054/901] kram - fix ValidateMipLevels for KTX files length is internally stored not multiplied by numChunks, so that needed to be done in this call. --- libkram/kram/KTXImage.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index ce43b75c..46aff718 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -1223,6 +1223,8 @@ bool KTXImage::validateMipLevels() const { bool isValid = true; + int numChunks = header.totalChunks(); + // validate that no weird size to image for (uint32_t i = 0; i < mipLevels.size(); ++i) { auto& level = mipLevels[i]; @@ -1235,7 +1237,7 @@ bool KTXImage::validateMipLevels() const { levelSizeFromRead *= 6; } - if (levelSizeFromRead != level.length) { + if (levelSizeFromRead != level.length * numChunks) { KLOGE("kram", "mip %d levelSize mismatch %d %d", i, (int)levelSizeFromRead, (int)level.length); isValid = false; break; From e90c71f0d9450a46e6acffaad2e5e71a206cde3d Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 18 May 2021 23:18:40 -0700 Subject: [PATCH 055/901] kramv - prepend the filename when failure occurs --- kramv/KramViewerMain.mm | 27 +++++++++++++++++++++++++-- libkram/kram/KTXImage.cpp | 4 ++-- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 02177e97..47f04c8c 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1849,7 +1849,15 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { if (!success) { string errorText; getErrorLogCaptureText(errorText); - [self setHudText: errorText.c_str()]; + setErrorLogCapture(false); + + // prepend filename + string finalErrorText; + append_sprintf(finalErrorText, + "Could not load from archive:\n %s\n", filename); + finalErrorText += errorText; + + [self setHudText: finalErrorText.c_str()]; } setErrorLogCapture(false); @@ -1860,6 +1868,14 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { endsWithExtension(filename, ".ktx") || endsWithExtension(filename, ".ktx2")) ) { + string errorText = "Unsupported file extension, must be .zip, .png, .ktx, ktx2\n"; + + string finalErrorText; + append_sprintf(finalErrorText, + "Could not load from archive:\n %s\n", filename); + finalErrorText += errorText; + + [self setHudText: finalErrorText.c_str()]; return NO; } @@ -1871,8 +1887,15 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { if (!success) { string errorText; getErrorLogCaptureText(errorText); - [self setHudText: errorText.c_str()]; setErrorLogCapture(false); + + // prepend filename + string finalErrorText; + append_sprintf(finalErrorText, + "Could not load from file\n %s\n", filename); + finalErrorText += errorText; + + [self setHudText: finalErrorText.c_str()]; return NO; } setErrorLogCapture(false); diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 46aff718..d00c9c2f 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -1332,7 +1332,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i if (header2.supercompressionScheme == KTX2SupercompressionBasisLZ) { - KLOGE("kram", "BasisLZ supercompression not yet supported"); + KLOGE("kram", "Basis decode not yet supported"); return false; } @@ -1347,7 +1347,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i // This typically means UASTC encoding + zstd supercompression, and code doesn't handle that below yet if (header2.vkFormat == 0) { - KLOGE("kram", "Basis decode not yet supported"); + KLOGE("kram", "UASTC and vkFormat of 0 decode not yet supported"); return false; } From 04aefc8e29b7710a0c7165060da1372e06ccf71c Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 18 May 2021 23:39:16 -0700 Subject: [PATCH 056/901] Update README.md --- README.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 91e4ab4e..22dfb9cf 100644 --- a/README.md +++ b/README.md @@ -100,9 +100,9 @@ KTX - breaks loads of mips with 4 byte length offset at the start of each level metadata/props aren't standardized and only ascii prop support so easy to dump out KTX2 - works in kram and viewer, has aligned compressed levels of mips, - libkram only supports None/Zstd supercompression, only read no write support, - write by converting ktx -> ktx2 with ktx2ktx2 + ktxsc (see kramTexture.py --ktx2 option) - + libkram supports None/Zlib/Zstd supercompression for read/write + doesn't support UASTC or BasisLZ yet + ``` ### An example pipeline @@ -223,10 +223,15 @@ cd build ./Release/kram -testall ./Release/kram -test 1002 +# for ktx ./Release/kram encode -f astc4x4 -srgb -premul -quality 49 -mipmax 1024 -type 2d -i ../tests/src/ColorMap-a.png -o ../tests/out/ios/ColorMap-a.ktx ./Release/kram encode -f etc2rg -signed -normal -quality 49 -mipmax 1024 -type 2d -i ../tests/src/collectorbarrel-n.png -o ../tests/out/ios/collectorbarrel-n.ktx ./Release/kram encode -f etc2r -signed -sdf -quality 49 -mipmax 1024 -type 2d -i ../kram/tests/src/flipper-sdf.png -o ../tests/out/ios/flipper-sdf.ktx +# for ktx (without and with zstd compression) +./Release/kram encode -f astc4x4 -srgb -premul -quality 49 -mipmax 1024 -type 2d -i ../tests/src/ColorMap-a.png -o ../tests/out/ios/ColorMap-a.ktx2 +./Release/kram encode -f astc4x4 -srgb -premul -quality 49 -mipmax 1024 -type 2d -zstd 0 -i ../tests/src/ColorMap-a.png -o ../tests/out/ios/ColorMap-a.ktx2 + ``` ### Open Source Encoder Usage @@ -257,7 +262,7 @@ Squish Simplified to single folder. Replaced sse vector with float4/a for ARM/Neon support. -Astcenc v2.1 +Astcenc v2.5 (current is v3.0) Provide rgba8u source pixels. Converted to 32f at tile level. Improved 1 and 2 channel format encoding (not transfered to v2.1). Avoid reading off end of arrays with padding. @@ -585,7 +590,7 @@ Visually validating and previewing the results is complicated. KTX/2 have few v kram adds props to KTX/2 file to store data. Currently props store Metal and Vulkan formats. This is important since GL's ASTC LDR and HDR formats are the same constant. Also props are saved for channel content and post-swizzle. Loaders, viewers, and shaders can utilize this metadata. -KTX can be converted to KTX2 and each mip supercompressed via ktx2ktx2 and ktxsc. KTX2 reverses mip ordering smallest to largest, so that streamed textures can display smaller mips before they finish fully streaming. KTX2 can also supercompress each mip with zstd and Basis for transcode. I suppose this could then be unpacked to tiles for sparse texturing. KTX2 does not store a length field inside the mip data which keeps consistent alignment. +Kram now supports KTX2 export. But KTX can also be converted to KTX2 and each mip supercompressed via ktx2ktx2 and ktxsc. KTX2 reverses mip ordering smallest to largest, so that streamed textures can display smaller mips before they finish fully streaming. KTX2 can also supercompress each mip with zstd and Basis for transcode. I suppose this could then be unpacked to tiles for sparse texturing. KTX2 does not store a length field inside the mip data which keeps consistent alignment. Metal cannot load mmap mip data that isn't aligned to a multiple of the block size (8 or 16 bytes for BC/ASTC/ETC). KTX adds a 4 byte length into the mip data that breaks alignment, but KTX2 fortunately skips that. But KTX2 typically compresses the levels and needs decode/transcode to send to the GPU. From 600577d272ad46f0eb44d099e81d5ae7c7aa425c Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Fri, 21 May 2021 15:32:21 -0700 Subject: [PATCH 057/901] kramv - fix up debug modes, add srgb savvy isGray to png loader NonZero is rgb != 0 now, ignoring alpha since many images are a=1 Gray also ignores when c.r is 0, since on premul images 000 is common. --- kramv/KramLoader.mm | 2 +- kramv/KramShaders.metal | 6 +-- libkram/kram/Kram.cpp | 96 ++++++++++++++++++++++++++----------- libkram/kram/Kram.h | 2 +- libkram/kram/KramMipper.cpp | 2 +- libkram/kram/KramMipper.h | 6 +++ 6 files changed, 80 insertions(+), 34 deletions(-) diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 1fc8a651..67e83771 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -186,7 +186,7 @@ static int32_t numberOfMipmapLevels(const Image& image) { { // can only load 8u and 16u from png, no hdr formats, no premul either, no props Image sourceImage; - bool isLoaded = LoadPng(data, dataSize, false, sourceImage); + bool isLoaded = LoadPng(data, dataSize, false, false, sourceImage); if (!isLoaded) { return nil; } diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 9363a5b2..544bab07 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -638,12 +638,12 @@ float4 DrawPixels( // want to compare so snorm 0 on signed data // TODO: unorm formats don't store exact 0, so may need toleranc if (uniforms.isSigned) { - if (any(sc != 0.0)) { + if (any(sc.rgb != 0.0)) { isHighlighted = true; } } else { - if (any(c != 0.0)) { + if (any(c.rgb != 0.0)) { isHighlighted = true; } } @@ -656,7 +656,7 @@ float4 DrawPixels( } else if (uniforms.debugMode == ShDebugModeGray) { // with 565 formats, all pixels with light up - if (c.r == c.g && c.r == c.b) { + if (c.r != 0 && (c.r == c.g && c.r == c.b)) { isHighlighted = true; } } diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 04f46e73..815d64f0 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -48,13 +48,33 @@ bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage) } inline Color toPremul(Color c) { + // these are really all fractional, but try this c.r = ((uint32_t)c.r * (uint32_t)c.a) / 255; c.g = ((uint32_t)c.g * (uint32_t)c.a) / 255; c.b = ((uint32_t)c.b * (uint32_t)c.a) / 255; return c; } -bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, Image& sourceImage) +// rec709 +// https://en.wikipedia.org/wiki/Grayscale +inline Color toGrayscaleRec709(Color c, const Mipper& mipper) { + + const float4 kRec709Conversion = float4m(0.2126f, 0.7152f, 0.0722f, 0.0f); // really a float3 + + // convert to linear, do luminance, then back to srgb primary + + float4 clin = mipper.toLinear(c); + float luminance = dot(clin, kRec709Conversion); + + c.r = (uint8_t)(linearToSRGBFunc(luminance) * 255.1f); + + // can just copy into the other 3 terms + c.g = c.b = c.r; + return c; +} + + +bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray, Image& sourceImage) { uint32_t width = 0; uint32_t height = 0; @@ -114,6 +134,20 @@ bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, Image& sour return false; } + + // convert to grasycale on load + // better if could do this later in pipeline to stay in linear fp16 color + if (hasColor && isGray) { + Mipper mipper; + + Color* colors = (Color*)pixels.data(); + for (int32_t i = 0, iEnd = width*height; i < iEnd; ++i) { + colors[i] = toGrayscaleRec709(colors[i], mipper); + } + + hasColor = false; + } + // apply premul srgb right away, don't use with -premul or alpha is applied twice // this may throw off the props. Note this ignores srgb conversion. // This is hack to look like Photoshop and Apple Preview, where they process srgb wrong @@ -136,7 +170,8 @@ bool SetupTmpFile(FileHelper& tmpFileHelper, const char* suffix) bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper, vector& fileBuffer, - const string& srcFilename, Image& sourceImage, bool isPremulSrgb = false) + const string& srcFilename, Image& sourceImage, + bool isPremulSrgb = false, bool isGray = false) { bool isKTX = endsWith(srcFilename, ".ktx") || endsWith(srcFilename, ".ktx2"); bool isPNG = endsWith(srcFilename, ".png"); @@ -162,7 +197,7 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper, } } else if (isPNG) { - if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(), isPremulSrgb, + if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(), isPremulSrgb, isGray, sourceImage)) { return false; // error } @@ -190,7 +225,7 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper, } } else if (isPNG) { - if (!LoadPng(fileBuffer.data(), fileHelper.size(), isPremulSrgb, + if (!LoadPng(fileBuffer.data(), fileHelper.size(), isPremulSrgb, isGray, sourceImage)) { return false; // error } @@ -941,7 +976,8 @@ void kramEncodeUsage(bool showVersion = true) KLOGI("Kram", "%s\n" "Usage: kram encode\n" - "\t -f/ormat (bc1 | astc4x4 | etc2rgba | rgba16f)\n" + "\t -f/ormat (bc1 | astc4x4 | etc2rgba | rgba16f) [-quality 0-100]\n" + "\t [-zstd 0] or [-zlib 0] (for .ktx2 output)\n" "\t [-srgb] [-signed] [-normal]\n" "\t -i/nput \n" "\t -o/utput \n" @@ -950,7 +986,6 @@ void kramEncodeUsage(bool showVersion = true) "\t [-e/ncoder (squish | ate | etcenc | bcenc | astcenc | explicit | ..)]\n" "\t [-resize (16x32 | pow2)]\n" "\n" - //"\t [-mipalign]\n" "\t [-mipnone]\n" "\t [-mipmin size] [-mipmax size] [-mipskip count]\n" "\n" @@ -958,9 +993,8 @@ void kramEncodeUsage(bool showVersion = true) "\t [-swizzle rg01]\n" "\t [-avg rxbx]\n" "\t [-sdf]\n" - "\t [-premul]\n" - "\t [-prezero]\n" - "\t [-quality 0-100]\n" + "\t [-premul] [-prezero] [-premulrgb]\n" + "\t [-gray]\n" "\t [-optopaque]\n" "\t [-v]\n" "\n" @@ -1007,17 +1041,20 @@ void kramEncodeUsage(bool showVersion = true) "\tr|rg|rgba[8|16f|32f]\n" "\n" - "\t-mipalign" - "\tAlign mip levels with .ktxa output \n" + // Mips "\t-mipnone" "\tDon't build mips even if pow2 dimensions\n" "\t-mipmin size" "\tOnly output mips >= size px\n" + "\t-mipmax size" "\tOnly output mips <= size px\n" + + "\t-mipskip count" + "\tOnly output largest mips >= count, similar to mipmax but with count instead of size px\n" "\n" - + // tex to normal "\t-height" "\tConvert height.x to normal.xy\n" @@ -1035,21 +1072,23 @@ void kramEncodeUsage(bool showVersion = true) "\tNormal map rg storage signed for etc/bc (rg01), only unsigned astc L+A (gggr).\n" "\t-sdf" "\tGenerate single-channel SDF from a bitmap, can mip and drop large mips. Encode to r8, bc4, etc2r, astc4x4 (Unorm LLL1) to encode\n" - + + "\t-gray" + "\tConvert to grayscale before premul\n" + // premul is not on by default, but really should be or textures aren't sampled correctly // but this really only applies to color channel textures, so off by default. "\t-premul" "\tPremultiplied alpha to src pixels before output\n" - "\n" - + // This is meant to work with shaders that (incorrectly) premul after sampling. // limits the rgb bleed in regions that should not display colors. Can stil have black color halos. "\t-prezero" "\tPremultiplied alpha to src pixels before output but only where a=0\n" - "\n" + // This emulates Photoshop premul only on png files. Multiplies srgbColor.rgb * a. "\t-premulrgb" - "\tPremultiplied alpha to src pixels at load to emulate Photoshop, don't use with -premul\n" + "\tPremultiplied alpha to src pixels at load to emulate Photoshop srgbColor.rgb * a, don't use with -premul\n" "\n" "\t-optopaque" @@ -1060,10 +1099,11 @@ void kramEncodeUsage(bool showVersion = true) "\tSpecifies how many chunks to split up texture into 2darray\n" // ktx2 specific settings - "\t-zstd level" - "\tktx2 with zstd mip compressor, 0 for default\n" - "\t-zlib level" - "\tktx2 with zlib mip compressor, 0 for defauult\n" + "\tktx2 mip compression, if not present then no compresion used\n" + "\t-zstd 0" + "\tktx2 with zstd mip compressor, 0 for default, 0 to 100\n" + "\t-zlib 0" + "\tktx2 with zlib mip compressor, 0 for default, 0 to 11\n" "\t-swizzle [rgba01 x4]" "\tSpecifies pre-encode swizzle pattern\n" @@ -1714,6 +1754,7 @@ static int32_t kramAppEncode(vector& args) ImageInfoArgs infoArgs; bool isPremulRgb = false; + bool isGray = false; bool error = false; for (int32_t i = 0; i < argc; ++i) { @@ -1734,7 +1775,11 @@ static int32_t kramAppEncode(vector& args) infoArgs.optimizeFormatForOpaque = true; //continue; } - + else if (isStringEqual(word, "-gray")) { + isGray = true; + //continue; + } + // mip setting else if (isStringEqual(word, "-mipmax")) { ++i; @@ -1791,11 +1836,6 @@ static int32_t kramAppEncode(vector& args) infoArgs.doMipmaps = false; //continue; } -// else if (isStringEqual(word, "-mipalign")) { -// // pad start of each mip to pixel/block size of format -// infoArgs.skipImageLength = true; -// continue; -// } else if (isStringEqual(word, "-heightScale")) { ++i; @@ -2120,7 +2160,7 @@ static int32_t kramAppEncode(vector& args) vector srcFileBuffer; bool success = SetupSourceImage(srcMmapHelper, srcFileHelper, srcFileBuffer, - srcFilename, srcImage, isPremulRgb); + srcFilename, srcImage, isPremulRgb, isGray); if (success) { success = SetupTmpFile(tmpFileHelper, isDstKTX2 ? ".ktx2" : ".ktx"); diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h index 9a52a695..5d715e97 100644 --- a/libkram/kram/Kram.h +++ b/libkram/kram/Kram.h @@ -14,7 +14,7 @@ class KTXImage; // helpers to source from a png or single level of a ktx bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage); -bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulSrgb, Image& sourceImage); +bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulSrgb, bool isGray, Image& sourceImage); // can call these with data instead of needing a file string kramInfoPNGToString(const string& srcFilename, const uint8_t* data, uint64_t dataSize, bool isVerbose); diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp index 1bd80432..dbdd80f7 100644 --- a/libkram/kram/KramMipper.cpp +++ b/libkram/kram/KramMipper.cpp @@ -59,7 +59,7 @@ inline Color Snormfloat4ToColor(float4 value) return c; } -inline float linearToSRGBFunc(float lin) +float linearToSRGBFunc(float lin) { assert(lin >= 0.0f && lin <= 1.0f); return (lin < 0.00313066844250063f) ? (lin * 12.92f) diff --git a/libkram/kram/KramMipper.h b/libkram/kram/KramMipper.h index 19bde640..36329eb6 100644 --- a/libkram/kram/KramMipper.h +++ b/libkram/kram/KramMipper.h @@ -41,6 +41,9 @@ void remapToSignedBCEndpoint88(uint16_t &endpoint); float4 linearToSRGB(float4 lin); +// return srgb from a linear intesnity +float linearToSRGBFunc(float lin); + class ImageData { public: // data can be mipped as 8u, 16f, or 32f. Prefer smallest size. @@ -71,9 +74,12 @@ class Mipper { void initPixelsHalfIfNeeded(ImageData &srcImage, bool doPremultiply, bool doPrezero, vector &halfImage) const; + // these use table lookups, so need to be class members float toLinear(uint8_t srgb) const { return srgbToLinear[srgb]; } float toAlphaFloat(uint8_t alpha) const { return alphaToFloat[alpha]; } + float4 toLinear(const Color& c) const { return float4m(toLinear(c.r), toLinear(c.g), toLinear(c.b), toAlphaFloat(c.a)); } + uint8_t toPremul(uint8_t channelIntensity, uint8_t alpha) const { return ((uint32_t)channelIntensity * (uint32_t)alpha) / 255; } private: From 0b387f1204997250cf8640beb3de69de0cff387f Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Fri, 21 May 2021 15:58:16 -0700 Subject: [PATCH 058/901] kram - use rounding in conversions to 8-bit --- libkram/kram/Kram.cpp | 6 +++--- libkram/kram/KramImageInfo.cpp | 3 ++- libkram/kram/KramMipper.cpp | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 815d64f0..e6f79b93 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -66,7 +66,7 @@ inline Color toGrayscaleRec709(Color c, const Mipper& mipper) { float4 clin = mipper.toLinear(c); float luminance = dot(clin, kRec709Conversion); - c.r = (uint8_t)(linearToSRGBFunc(luminance) * 255.1f); + c.r = (uint8_t)(roundf(linearToSRGBFunc(luminance) * 255.0f)); // can just copy into the other 3 terms c.g = c.b = c.r; @@ -2530,7 +2530,7 @@ void PSTest() { for (int32_t i = 0; i < 256; ++i) { float value = mipper.toLinear(values1[i]); - values2[i] = uint8_t(value * 255.1); + values2[i] = uint8_t(roundf(value * 255.0f)); //KLOGI("srgb", "[%d] = %g\n", i, value); } @@ -2540,7 +2540,7 @@ void PSTest() { float value = mipper.toLinear(i); value *= alphaF; - values3[i] = uint8_t(value * 255.1); + values3[i] = uint8_t(roundf(value * 255.0)); } // log them side-by-side for comparison diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index 3a324a3d..1a0ce2de 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -1309,6 +1309,7 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h, normal = normalize(normal); // convert to unorm + // TODO: may need to do around unorm8 offset of unorm 255/127 and + 128/127 normal = normal * 0.5 + 0.5f; // write out the result @@ -1343,7 +1344,7 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h, normal = normalize(normal); // convert to unorm - normal = normal * 127.0f + 128.0f; + normal = round(normal * 127.0f) + 128.0f; Color& dstPixel8 = dstPixels8[y0 + x]; diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp index dbdd80f7..84a5f4b3 100644 --- a/libkram/kram/KramMipper.cpp +++ b/libkram/kram/KramMipper.cpp @@ -34,13 +34,13 @@ int32_t nextPow2(int32_t num) inline uint8_t floatToUint8(float value) { - return (uint8_t)roundf(value * 255.1f); + return (uint8_t)roundf(value * 255.0f); } inline Color Unormfloat4ToColor(float4 value) { Color c; - value = round(value * 255.1f); + value = round(value * 255.0f); c.r = (uint8_t)value.x; c.g = (uint8_t)value.y; c.b = (uint8_t)value.z; From 9c2182ddd9725accf2ffe04005fac98fb4b2d51b Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Fri, 21 May 2021 19:32:56 -0700 Subject: [PATCH 059/901] Kram - fix ktx1 mip offset. Was setting lengthCompressed to length, but switch to setting to 0 on ktx1 files to avoid confusion. So don't use that in the offset calcs. --- libkram/kram/KramImage.cpp | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index e7b4017a..cb1a6e44 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -804,9 +804,9 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma // write the mips out to the file, and code above can then decode into the same buffer // This isn't correct for cubes, arrays, and other types. The mip length is only written out once for all mips. - int32_t dstMipOffset = dstMipLevel.offset + chunk * dstMipLevel.length; if (chunk == 0 && !dstImage.skipImageLength) { + // sie of one mip uint32_t levelSize = dstMipLevel.length; // cubes write the face size, not the levels size, ugh @@ -814,11 +814,15 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma levelSize *= numChunks; } - if (!writeDataAtOffset((const uint8_t*)&levelSize, sizeof(levelSize), dstMipOffset - sizeof(levelSize), dstFile, dstImage)) { + if (!writeDataAtOffset((const uint8_t*)&levelSize, sizeof(levelSize), dstMipLevel.offset - sizeof(levelSize), dstFile, dstImage)) { return false; } } + // only writing one mip at a time in the level here + // so written bytes are only length and not numChunks * length + int32_t dstMipOffset = dstMipLevel.offset + chunk * dstMipLevel.length; + if (!writeDataAtOffset(outputTexture.data(), dstMipLevel.length, dstMipOffset, dstFile, dstImage)) { return false; } @@ -1565,7 +1569,7 @@ bool Image::writeKTX1FileOrImage( auto& level = dstImage.mipLevels[i]; level.offset = lastMipOffset + 4; // offset by length - lastMipOffset = level.offset + level.lengthCompressed * numChunks; + lastMipOffset = level.offset + level.length * numChunks; } } @@ -1789,7 +1793,8 @@ bool Image::createMipsFromChunks( int32_t numSkippedMips = data.numSkippedMips; for (int32_t mipLevel = 0; mipLevel < (int32_t)dstMipLevels.size(); ++mipLevel) { - + const auto& dstMipLevel = dstMipLevels[mipLevel]; + if (mipLevel == 0 && !info.doSDF) { if (numSkippedMips > 0) { @@ -1827,12 +1832,12 @@ bool Image::createMipsFromChunks( h = dstImageData.height; } } - - // mipOffset are start of first chunk of a given mip size - size_t mipStorageSize = dstMipLevels[mipLevel].length; // / numChunks; + + // size of one mip, not levelSize = numChunks * mipStorageSize + size_t mipStorageSize = dstMipLevel.length; // offset only valid for KTX and KTX2 w/o isCompressed - size_t mipOffset = dstMipLevels[mipLevel].offset + chunk * mipStorageSize; + size_t mipChunkOffset = dstMipLevel.offset + chunk * mipStorageSize; // just to check that each mip has a unique offset //KLOGI("Image", "chunk:%d %d\n", chunk, mipOffset); @@ -1870,7 +1875,7 @@ bool Image::createMipsFromChunks( // https://github.com/BinomialLLC/basis_universal/issues/40 // this contains all bytes at a mipLOD but not any padding - uint32_t levelSize = (uint32_t)dstMipLevels[mipLevel].length; + uint32_t levelSize = (uint32_t)mipStorageSize; // this is size of one face for non-array cubes // but for everything else, it's the numChunks * mipStorageSize @@ -1881,7 +1886,7 @@ bool Image::createMipsFromChunks( int32_t levelSizeOf = sizeof(levelSize); assert(levelSizeOf == 4); - if (!writeDataAtOffset((const uint8_t*)&levelSize, levelSizeOf, mipOffset - levelSizeOf, dstFile, dstImage)) { + if (!writeDataAtOffset((const uint8_t*)&levelSize, levelSizeOf, dstMipLevel.offset - levelSizeOf, dstFile, dstImage)) { return false; } } @@ -1889,7 +1894,7 @@ bool Image::createMipsFromChunks( // Note that default ktx alignment is 4, so r8u, r16f mips need to be padded out to 4 bytes // may need to write these out row by row, and let fseek pad the rows to 4. - if (!writeDataAtOffset(outputTexture.data.data(), mipStorageSize, mipOffset, dstFile, dstImage)) { + if (!writeDataAtOffset(outputTexture.data.data(), mipStorageSize, mipChunkOffset, dstFile, dstImage)) { return false; } } From b6d5b0f739ef32a513bc4d0b069c6242105a13bc Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 22 May 2021 13:20:30 -0700 Subject: [PATCH 060/901] Kram - normalize weights and add some round/snapping to 255 on non-pow2 mipgen. Otherwise, mipgen was pulling down the alpha from 255 to 254. This only affects non-pow2 mips, since they weight in x and y. --- libkram/kram/KTXImage.cpp | 12 ++++ libkram/kram/KTXImage.h | 3 +- libkram/kram/KramImage.cpp | 111 +++++++++++++++++++++++++++++++++++- libkram/kram/KramMipper.cpp | 20 ++++++- 4 files changed, 143 insertions(+), 3 deletions(-) diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index d00c9c2f..8d328a2e 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -759,6 +759,18 @@ uint32_t KTXImage::mipLevelSize(uint32_t width_, uint32_t height_) const return count * size; } +uint32_t KTXImage::blockCountRows(uint32_t width_) const +{ + assert(width_ >= 1); + + Int2 dims = blockDims(); + + width_ = (width_ + dims.x - 1) / dims.x; + + return width_; +} + + uint32_t KTXImage::blockCount(uint32_t width_, uint32_t height_) const { assert(width_ >= 1 && height_ >= 1); diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index b82f9299..b7971682 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -294,7 +294,8 @@ class KTXImage { uint32_t blockSize() const; Int2 blockDims() const; uint32_t blockCount(uint32_t width_, uint32_t height_) const; - + uint32_t blockCountRows(uint32_t width_) const; + // mip data depends on format uint32_t mipLevelSize(uint32_t width_, uint32_t height_) const; //int totalMipLevels() const; diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index cb1a6e44..514dc646 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -87,6 +87,25 @@ class TextureData { vector data; }; +// return the block mode of a bc7 block, or -1 if finvalid +int32_t decodeBC7BlockMode(const void *pBlock) +{ + const uint32_t first_byte = static_cast(pBlock)[0]; + + for (uint32_t mode = 0; mode <= 7; mode++) + { + // bit followed by zeros, mask out upper + uint8_t bits = (1U << mode); + + if ((first_byte & bits) == bits) + { + return mode; + } + } + + return -1; +} + Image::Image() : _width(0), _height(0), _hasColor(false), _hasAlpha(false) { } @@ -1600,7 +1619,73 @@ bool Image::writeKTX1FileOrImage( return true; } - +void printBCBlock(const uint8_t* bcBlock, MyMTLPixelFormat format) { + // https://docs.microsoft.com/en-us/windows/win32/direct3d11/bc7-format-mode-reference#mode-6 + if (!(format == MyMTLPixelFormatBC7_RGBAUnorm || format == MyMTLPixelFormatBC7_RGBAUnorm_sRGB)) { + return; + } + + uint32_t mode = decodeBC7BlockMode(bcBlock); + + switch(mode) { + case 6: { + const uint64_t* block = (const uint64_t*)bcBlock; + // 6 bits of signature - LSB 000001 + // 7 bits R0, 7 bits R1 + // 7 bits G0, 7 bits G1 + // 7 bits B0, 7 bits B1 + // 7 bits A0, 7 bits A1 + + // 1 bit P0, 1 bit P1 + // 63 bits of index data, how dos that work? + + uint32_t R0 = (uint32_t)((block[0] >> uint64_t(7*1)) & uint64_t(0b1111111)); + uint32_t R1 = (uint32_t)((block[0] >> uint64_t(7*2)) & uint64_t(0b1111111)); + + uint32_t G0 = (uint32_t)((block[0] >> uint64_t(7*3)) & uint64_t(0b1111111)); + uint32_t G1 = (uint32_t)((block[0] >> uint64_t(7*4)) & uint64_t(0b1111111)); + + uint32_t B0 = (uint32_t)((block[0] >> uint64_t(7*5)) & uint64_t(0b1111111)); + uint32_t B1 = (uint32_t)((block[0] >> uint64_t(7*6)) & uint64_t(0b1111111)); + + uint32_t A0 = (uint32_t)((block[0] >> uint64_t(7*7)) & uint64_t(0b1111111)); + uint32_t A1 = (uint32_t)((block[0] >> uint64_t(7*8)) & uint64_t(0b1111111)); + + uint32_t P0 = (uint32_t)((block[0] >> uint64_t(7*9)) & uint64_t(0b1)); + uint32_t P1 = (uint32_t)((block[1] >> uint64_t(0)) & uint64_t(0b1)); + + // r,g,b,a to be or-ed with the pbit to get tha actual value of the endpoints + + KLOGI("kram", + "R0=%d, R1=%d\n" + "G0=%d, G1=%d\n" + "B0=%d, B1=%d\n" + "A0=%d, A1=%d\n" + "P0=%d, P1=%d\n", + R0, R1, + G0, G1, + B0, B1, + A0, A1, + P0, P1); + + break; + } + } + + // Have a block debug mode that hud's the mode pixel values + // over the hovered block. + uint32_t pixels[4*4]; + if (!unpack_bc7(bcBlock, (bc7decomp::color_rgba*)pixels)) { + return; + } + + for (uint32_t y = 0; y < 4; ++y) { + KLOGI("kram", + "[%u] = %08X %08X %08X %08X\n", + y, pixels[4*y + 0], pixels[4*y + 1], pixels[4*y + 2], pixels[4*y + 3] + ); + } +} bool Image::createMipsFromChunks( ImageInfo& info, @@ -2184,6 +2269,9 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, int32_t blockSize = image.blockSize(); for (int32_t y = 0; y < h; y += blockDim) { for (int32_t x = 0; x < w; x += blockDim) { + + + // Have to copy to temp block, since encode doesn't test w/h edges // copy src to 4x4 clamping the edge pixels // TODO: do clamped edge pixels get weighted more then on non-multiple of 4 images ? @@ -2210,6 +2298,17 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, int32_t b0 = by * blocks_x + bx; uint8_t* dstBlock = &dstData[b0 * blockSize]; + // bc7enc is not setting pbit on bc7 mode6 and doesn's support opaque mode3 yet + // , so opaque textures repro as 254 alpha on Toof-a.png. + // ate sets pbits on mode 6 for same block. Also fixed mip weights in non-pow2 mipper. + +// bool doPrintBlock = false; +// if (bx == 8 && by == 1) { +// int32_t bp = 0; +// bp = bp; +// doPrintBlock = true; +// } + switch (info.pixelFormat) { case MyMTLPixelFormatBC1_RGBA: case MyMTLPixelFormatBC1_RGBA_sRGB: { @@ -2239,6 +2338,10 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, case MyMTLPixelFormatBC7_RGBAUnorm: case MyMTLPixelFormatBC7_RGBAUnorm_sRGB: { bc7enc_compress_block(dstBlock, srcPixelCopy, &bc7params); + + if (doPrintBlock) { + printBCBlock(dstBlock, info.pixelFormat); + } break; } default: { @@ -2281,6 +2384,12 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, if (info.isSigned) { doRemapSnormEndpoints = true; } + + + // find the 8,1 block and print it +// uint32_t numRowBlocks = image.blockCountRows(w); +// const uint8_t* block = outputTexture.data.data() + (numRowBlocks * 1 + 8) * image.blockSize(); +// printBCBlock(block, pixelFormatRemap); } #endif #if COMPILE_SQUISH diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp index 84a5f4b3..e3068624 100644 --- a/libkram/kram/KramMipper.cpp +++ b/libkram/kram/KramMipper.cpp @@ -34,7 +34,7 @@ int32_t nextPow2(int32_t num) inline uint8_t floatToUint8(float value) { - return (uint8_t)roundf(value * 255.0f); + return (uint8_t)roundf(value * 255.0f); // or use 255.1f ? } inline Color Unormfloat4ToColor(float4 value) @@ -390,6 +390,12 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons y1w = 0.5f; } + // normalize weights + float totalY = ymw + y0w + y1w; + ymw /= totalY; + y0w /= totalY; + y1w /= totalY; +// ym *= width; y0 *= width; y1 *= width; @@ -414,6 +420,12 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons x1w = 0.5f; } + // this mipgen is pulling down alpha of 255 to 241 and smaller over the course of the whole mip chain + float totalX = xmw + x0w + x1w; + xmw /= totalX; + x0w /= totalX; + x1w /= totalX; + // we have 3x2, 2x3 or 3x3 pattern to weight // now lookup the 9 values from the buffer @@ -487,6 +499,9 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons if (!srcImage.isHDR) { // convert back to srgb for encode if (srcImage.isSRGB) { + // round to 8-bits before conversion, and then back + cFloat = round(cFloat * 255.0f) / 255.0f; + cFloat.x = linearToSRGBFunc(cFloat.x); cFloat.y = linearToSRGBFunc(cFloat.y); cFloat.z = linearToSRGBFunc(cFloat.z); @@ -508,6 +523,9 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons if (!srcImage.isHDR) { // convert back to srgb for encode if (srcImage.isSRGB) { + // round to 8-bits before conversion, and then back + cFloat = round(cFloat * 255.0f) / 255.0f; + cFloat.x = linearToSRGBFunc(cFloat.x); cFloat.y = linearToSRGBFunc(cFloat.y); cFloat.z = linearToSRGBFunc(cFloat.z); From 6526b1c985fe6b375fbf7f45e37f6698ada5fcb6 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 22 May 2021 13:23:49 -0700 Subject: [PATCH 061/901] kram - add back bool doPrintBlock for now. --- libkram/kram/KramImage.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 514dc646..9ccee08d 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -2302,7 +2302,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, // , so opaque textures repro as 254 alpha on Toof-a.png. // ate sets pbits on mode 6 for same block. Also fixed mip weights in non-pow2 mipper. -// bool doPrintBlock = false; + bool doPrintBlock = false; // if (bx == 8 && by == 1) { // int32_t bp = 0; // bp = bp; From 6f85d7c46f9e9fe514ab221157d642f7e8260346 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 22 May 2021 14:04:25 -0700 Subject: [PATCH 062/901] kram - sat don't snap/round before linearToSrgb conversion with normalized weights got some values 1.0002 and that asserted in linearToSrgb. But shouldn't snap to 255 before that. --- libkram/kram/KramMipper.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp index e3068624..f875127c 100644 --- a/libkram/kram/KramMipper.cpp +++ b/libkram/kram/KramMipper.cpp @@ -499,8 +499,8 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons if (!srcImage.isHDR) { // convert back to srgb for encode if (srcImage.isSRGB) { - // round to 8-bits before conversion, and then back - cFloat = round(cFloat * 255.0f) / 255.0f; + // getting some values > 1 + cFloat = saturate(cFloat); cFloat.x = linearToSRGBFunc(cFloat.x); cFloat.y = linearToSRGBFunc(cFloat.y); @@ -523,8 +523,8 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons if (!srcImage.isHDR) { // convert back to srgb for encode if (srcImage.isSRGB) { - // round to 8-bits before conversion, and then back - cFloat = round(cFloat * 255.0f) / 255.0f; + // getting some values > 1 + cFloat = saturate(cFloat); cFloat.x = linearToSRGBFunc(cFloat.x); cFloat.y = linearToSRGBFunc(cFloat.y); From 4b79054cc96c77dd049a3101c8b8abca7a5525ee Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 22 May 2021 14:10:44 -0700 Subject: [PATCH 063/901] kram - more conversion bullteproofing. go through sat calls that handle srgb conversion. --- libkram/kram/Kram.cpp | 1 + libkram/kram/KramMipper.cpp | 31 ++++++++----------------------- 2 files changed, 9 insertions(+), 23 deletions(-) diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index e6f79b93..4d5374a6 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -65,6 +65,7 @@ inline Color toGrayscaleRec709(Color c, const Mipper& mipper) { float4 clin = mipper.toLinear(c); float luminance = dot(clin, kRec709Conversion); + luminance = std::min(luminance, 1.0f); // to avoid assert if math goes above 1.0 c.r = (uint8_t)(roundf(linearToSRGBFunc(luminance) * 255.0f)); diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp index f875127c..b20f9f77 100644 --- a/libkram/kram/KramMipper.cpp +++ b/libkram/kram/KramMipper.cpp @@ -225,10 +225,7 @@ void Mipper::initPixelsHalfIfNeeded(ImageData& srcImage, bool doPremultiply, boo if (doPremultiply && c0.a != 255) { // need to overwrite the color 8-bit color too // but this writes back to srgb for encoding - cFloat.x = linearToSRGBFunc(cFloat.x); - cFloat.y = linearToSRGBFunc(cFloat.y); - cFloat.z = linearToSRGBFunc(cFloat.z); - + cFloat = linearToSRGB(cFloat); c0 = Unormfloat4ToColor(cFloat); } } @@ -499,15 +496,11 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons if (!srcImage.isHDR) { // convert back to srgb for encode if (srcImage.isSRGB) { - // getting some values > 1 - cFloat = saturate(cFloat); - - cFloat.x = linearToSRGBFunc(cFloat.x); - cFloat.y = linearToSRGBFunc(cFloat.y); - cFloat.z = linearToSRGBFunc(cFloat.z); + // getting some values > 1m, but this saturates + cFloat = linearToSRGB(cFloat); } - // override rgba8u version, since this is what is encoded + // overwrite rgba8u version, since this is what is encoded Color c = Unormfloat4ToColor(cFloat); // can only skip this if cSrc = cDst @@ -523,12 +516,8 @@ void Mipper::mipmapLevelOdd(const ImageData& srcImage, ImageData& dstImage) cons if (!srcImage.isHDR) { // convert back to srgb for encode if (srcImage.isSRGB) { - // getting some values > 1 - cFloat = saturate(cFloat); - - cFloat.x = linearToSRGBFunc(cFloat.x); - cFloat.y = linearToSRGBFunc(cFloat.y); - cFloat.z = linearToSRGBFunc(cFloat.z); + // getting some values > 1, but this saturates + cFloat = linearToSRGB(cFloat); } // Overwrite the RGBA8u image too (this will go out to @@ -609,9 +598,7 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const if (!srcImage.isHDR) { // convert back to srgb for encode if (srcImage.isSRGB) { - cFloat.x = linearToSRGBFunc(cFloat.x); - cFloat.y = linearToSRGBFunc(cFloat.y); - cFloat.z = linearToSRGBFunc(cFloat.z); + cFloat = linearToSRGB(cFloat); } // override rgba8u version, since this is what is encoded @@ -639,9 +626,7 @@ void Mipper::mipmapLevel(const ImageData& srcImage, ImageData& dstImage) const if (!srcImage.isHDR) { // convert back to srgb for encode if (srcImage.isSRGB) { - cFloat.x = linearToSRGBFunc(cFloat.x); - cFloat.y = linearToSRGBFunc(cFloat.y); - cFloat.z = linearToSRGBFunc(cFloat.z); + cFloat = linearToSRGB(cFloat); } // Overwrite the RGBA8u image too (this will go out to From ff0e4bff2775de9a633bb05d0ca9bd090fd64599 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 22 May 2021 15:20:15 -0700 Subject: [PATCH 064/901] kramv - hide buttons that aren't relevant to the currently loaded texture --- kramv/KramRenderer.mm | 4 +-- kramv/KramViewerBase.cpp | 2 +- kramv/KramViewerBase.h | 5 ++- kramv/KramViewerMain.mm | 76 ++++++++++++++++++++++++++++++++++------ 4 files changed, 73 insertions(+), 14 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 1b206419..dc81331e 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -935,8 +935,8 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie uniformsLevel.drawOffset.y -= h + gap; } - // this its ktxImage.totalLevels() - int32_t numLevels = _showSettings->totalLevels(); + // this its ktxImage.totalChunks() + int32_t numLevels = _showSettings->totalChunks(); for (int32_t level = 0; level < numLevels; ++level) { diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp index 5fb3d12c..e241bd5d 100644 --- a/kramv/KramViewerBase.cpp +++ b/kramv/KramViewerBase.cpp @@ -5,7 +5,7 @@ namespace kram using namespace simd; using namespace std; -int32_t ShowSettings::totalLevels() const { +int32_t ShowSettings::totalChunks() const { int32_t one = 1; return std::max(one, faceCount) * std::max(one, arrayCount) * std::max(one, sliceCount); } diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index 7159d256..d5cf2211 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -68,7 +68,7 @@ class ShowSettings { int32_t sliceNumber = 0; int32_t sliceCount = 0; - int32_t totalLevels() const; + int32_t totalChunks() const; // DONE: hook all these up to shader and view bool isHudShown = true; @@ -103,6 +103,9 @@ class ShowSettings { // draw with reverseZ to better match perspective bool isReverseZ = true; + // whether files are pulled from disk or zip archive. + bool isArchive = false; + // can have up to 5 channels (xyz as xy, 2 other channels) int32_t numChannels = 0; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 47f04c8c..0c251f48 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -401,6 +401,7 @@ void encodeSrcForEncodeComparisons(bool increment) { @implementation MyMTKView { NSStackView* _buttonStack; + NSMutableArray* _buttonArray; NSTextField* _hudLabel; NSTextField* _hudLabel2; @@ -464,6 +465,7 @@ - (instancetype)initWithCoder:(NSCoder*)coder { _zoomGesture = [[NSMagnificationGestureRecognizer alloc] initWithTarget:self action:@selector(handleGesture:)]; [self addGestureRecognizer:_zoomGesture]; + _buttonArray = [[NSMutableArray alloc] init]; _buttonStack = [self _addButtons]; // hide until image loaded @@ -554,13 +556,19 @@ - (NSStackView*)_addButtons { } else { //sKrect.origin.y += 25; + + // keep all buttons, since stackView will remove and pack the stack + [_buttonArray addObject:button]; } [buttons addObject:button]; + + } NSStackView* stackView = [NSStackView stackViewWithViews:buttons]; stackView.orientation = NSUserInterfaceLayoutOrientationVertical; + stackView.detachesHiddenViews = YES; // default, but why have to have _buttonArrary [self addSubview: stackView]; #if 0 @@ -623,9 +631,9 @@ - (NSTextField*)_addHud:(BOOL)isShadow // add vertical constrains to have it fill window, but keep 800 width label.preferredMaxLayoutWidth = 800; - NSDictionary* views = @{ @"label" : label }; - [self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"H:|-[label]" options:0 metrics:nil views:views]]; - [self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"V:|-[label]" options:0 metrics:nil views:views]]; + //NSDictionary* views = @{ @"label" : label }; + //[self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"H:|-[label]" options:0 metrics:nil views:views]]; + //[self addConstraints:[NSLayoutConstraint constraintsWithVisualFormat:@"V:|-[label]" options:0 metrics:nil views:views]]; return label; } @@ -661,7 +669,7 @@ - (void)doZoomMath:(float)newZoom newPan:(float2&)newPan { float maxX = 0.5f; float minY = -0.5f; if (_showSettings->isShowingAllLevelsAndMips) { - maxX += 1.0f * (_showSettings->totalLevels() - 1); + maxX += 1.0f * (_showSettings->totalChunks() - 1); minY -= 1.0f * (_showSettings->maxLOD - 1); } @@ -739,7 +747,7 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer CGRect imageRect = CGRectMake(pt0.x, pt0.y, pt1.x - pt0.x, pt1.y - pt0.y); CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f); - int32_t numTexturesX = _showSettings->totalLevels(); + int32_t numTexturesX = _showSettings->totalChunks(); int32_t numTexturesY = _showSettings->maxLOD; if (_showSettings->isShowingAllLevelsAndMips) { @@ -909,7 +917,7 @@ - (void)updateEyedropper { // TODO: finish this logic, need to account for gaps too, and then isolate to a given level and mip to sample // if (_showSettings->isShowingAllLevelsAndMips) { -// pixel.x *= _showSettings->totalLevels(); +// pixel.x *= _showSettings->totalChunks(); // pixel.y *= _showSettings->maxLOD; // } @@ -1064,8 +1072,8 @@ - (void)updateEyedropper { text += tmp; } - // display the premul values too - if (c.a < 1.0f) + // display the premul values too, but not fully transparent pixels + if (c.a > 0.0 && c.a < 1.0f) { printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned); text += tmp; @@ -1168,7 +1176,7 @@ - (void)scrollWheel:(NSEvent *)event CGRect imageRect = CGRectMake(pt0.x, pt0.y, pt1.x - pt0.x, pt1.y - pt0.y); CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f); - int32_t numTexturesX = _showSettings->totalLevels(); + int32_t numTexturesX = _showSettings->totalChunks(); int32_t numTexturesY = _showSettings->maxLOD; if (_showSettings->isShowingAllLevelsAndMips) { @@ -1203,6 +1211,44 @@ - (void)scrollWheel:(NSEvent *)event } } +// use this to enable/disable menus, buttons, etc. Called on every event +// when not implemented, then user items are always enabled +- (BOOL)validateUserInterfaceItem:(id)item +{ + // TODO: tie to menus and buttons + return YES; +} + +- (NSButton*)findButton:(const char*)name { + NSString* title = [NSString stringWithUTF8String:name]; + for (NSButton* button in _buttonArray) { + if (button.title == title) + return button; + } + return nil; +} + +- (void)updateUIAfterLoad { + + // base on showSettings, hide some fo the buttons + bool isShowAllHidden = _showSettings->totalChunks() <= 1 && _showSettings->mipLOD <= 1; + + bool isArrayHidden = _showSettings->arrayCount <= 1; + bool isFaceSliceHidden = _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1; + bool isMipHidden = _showSettings->mipLOD <= 1; + + bool isJumpToNextHidden = !_showSettings->isArchive; + + // could hide rgba buttons on some formas + // or have XYZBA on nromals, but have Y mapped to array + + [self findButton:"Y"].hidden = isArrayHidden; + [self findButton:"F"].hidden = isFaceSliceHidden; + [self findButton:"M"].hidden = isMipHidden; + [self findButton:"S"].hidden = isShowAllHidden; + [self findButton:"J"].hidden = isJumpToNextHidden; +} + // TODO: convert to C++ actions, and then call into Base holding all this // move pan/zoom logic too. Then use that as start of Win32 kramv. @@ -1599,7 +1645,7 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown case Key::F: // cube or cube array, but hit s to pick cubearray - if (_showSettings->faceCount) { + if (_showSettings->faceCount > 1) { if (isShiftKeyDown) { _showSettings->faceNumber = MAX(_showSettings->faceNumber - 1, 0); } @@ -1809,6 +1855,11 @@ - (BOOL)loadTextureFromArchive:(const char*)filename timestamp:(double)timestamp _noImageLoaded = NO; } + _showSettings->isArchive = false; + + // show/hide button + [self updateUIAfterLoad]; + self.needsDisplay = YES; return YES; } @@ -1932,6 +1983,11 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { _noImageLoaded = NO; } + _showSettings->isArchive = false; + + // show/hide button + [self updateUIAfterLoad]; + self.needsDisplay = YES; return YES; } From 9fd87737d78c456ae0fc0e970dc936edb09fcacd Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 22 May 2021 16:42:24 -0700 Subject: [PATCH 065/901] Kramv - fix hiding of mip button, only have show gray show pixels not full black/white --- kramv/KramShaders.metal | 2 +- kramv/KramViewerMain.mm | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 544bab07..287c0570 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -656,7 +656,7 @@ float4 DrawPixels( } else if (uniforms.debugMode == ShDebugModeGray) { // with 565 formats, all pixels with light up - if (c.r != 0 && (c.r == c.g && c.r == c.b)) { + if ((c.r > 0.0 && c.r < 1.0) && (c.r == c.g && c.r == c.b)) { isHighlighted = true; } } diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 0c251f48..426e7fea 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1231,11 +1231,11 @@ - (NSButton*)findButton:(const char*)name { - (void)updateUIAfterLoad { // base on showSettings, hide some fo the buttons - bool isShowAllHidden = _showSettings->totalChunks() <= 1 && _showSettings->mipLOD <= 1; + bool isShowAllHidden = _showSettings->totalChunks() <= 1 && _showSettings->maxLOD <= 1; bool isArrayHidden = _showSettings->arrayCount <= 1; bool isFaceSliceHidden = _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1; - bool isMipHidden = _showSettings->mipLOD <= 1; + bool isMipHidden = _showSettings->maxLOD <= 1; bool isJumpToNextHidden = !_showSettings->isArchive; From c9ebc74911ac3aaeac1c4238eeaeeb09bacee5b1 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 22 May 2021 19:51:02 -0700 Subject: [PATCH 066/901] Kram - add MortonOrder twiddle class, remove flips on height to normals, block actions Now actions are blocked if the button is hidden. Using this as poor man's action test. Some of these states may not be getting reset when loading new texture, so test to make sure don't get stuck without action to disable. collectorbarrelh-n from height now matches the collectorbarrel-n. Still need to make sure using OpenGL normal +Y, not -Y of DX normals. Have info test length for info on sizc field. --- kramv/KramRenderer.mm | 2 +- kramv/KramViewerMain.mm | 168 +++++++++++++++++++++------------ libkram/kram/Kram.cpp | 55 +++++++++-- libkram/kram/KramImage.cpp | 8 +- libkram/kram/KramImageInfo.cpp | 7 +- 5 files changed, 162 insertions(+), 78 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index dc81331e..b2061068 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -612,7 +612,7 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom - (void)_updateGameState { - /// Update any game state before encoding renderint commands to our drawable + /// Update any game state before encoding rendering commands to our drawable Uniforms& uniforms = *(Uniforms*)_dynamicUniformBuffer[_uniformBufferIndex].contents; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 426e7fea..d23149c5 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1230,6 +1230,9 @@ - (NSButton*)findButton:(const char*)name { - (void)updateUIAfterLoad { + // TODO: move these to actions, and test their state instead of looking up buttons + // here and in HandleKey. + // base on showSettings, hide some fo the buttons bool isShowAllHidden = _showSettings->totalChunks() <= 1 && _showSettings->maxLOD <= 1; @@ -1239,14 +1242,34 @@ - (void)updateUIAfterLoad { bool isJumpToNextHidden = !_showSettings->isArchive; - // could hide rgba buttons on some formas - // or have XYZBA on nromals, but have Y mapped to array + bool isGreenHidden = _showSettings->numChannels <= 1; + bool isBlueHidden = _showSettings->numChannels <= 2 && !_showSettings->isNormal; // reconstruct z = b on normals + + // TODO: also need a hasAlpha for pixels, since many compressed formats like ASTC always have 4 channels + // but internally store R,RG01,... etc. Can get more data from swizzle in the props. + // Often alpha doesn't store anything useful to view. + + bool hasAlpha = _showSettings->numChannels >= 3; + + bool isAlphaHidden = !hasAlpha; + bool isPremulHidden = !hasAlpha; + bool isCheckerboardHidden = !hasAlpha; + + bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat); [self findButton:"Y"].hidden = isArrayHidden; [self findButton:"F"].hidden = isFaceSliceHidden; [self findButton:"M"].hidden = isMipHidden; [self findButton:"S"].hidden = isShowAllHidden; [self findButton:"J"].hidden = isJumpToNextHidden; + + [self findButton:"G"].hidden = isGreenHidden; + [self findButton:"B"].hidden = isBlueHidden; + [self findButton:"A"].hidden = isAlphaHidden; + + [self findButton:"P"].hidden = isPremulHidden; + [self findButton:"N"].hidden = isSignedHidden; + [self findButton:"C"].hidden = isCheckerboardHidden; } @@ -1358,55 +1381,63 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown // rgba channels case Key::Num1: case Key::R: - if (channels == TextureChannels::ModeRRR1 || channels == TextureChannels::ModeR001) { - channels = TextureChannels::ModeRGBA; - text = "Mask RGBA"; - } - else { - channels = isShiftKeyDown ? TextureChannels::ModeRRR1 : TextureChannels::ModeR001; - text = isShiftKeyDown ? "Mask RRR1" : "Mask R001"; + if (![self findButton:"R"].isHidden) { + if (channels == TextureChannels::ModeRRR1 || channels == TextureChannels::ModeR001) { + channels = TextureChannels::ModeRGBA; + text = "Mask RGBA"; + } + else { + channels = isShiftKeyDown ? TextureChannels::ModeRRR1 : TextureChannels::ModeR001; + text = isShiftKeyDown ? "Mask RRR1" : "Mask R001"; + } + isChanged = true; } - isChanged = true; - + break; case Key::Num2: case Key::G: - if (channels == TextureChannels::ModeGGG1 || channels == TextureChannels::Mode0G01) { - channels = TextureChannels::ModeRGBA; - text = "Mask RGBA"; - } - else { - channels = isShiftKeyDown ? TextureChannels::ModeGGG1 : TextureChannels::Mode0G01; - text = isShiftKeyDown ? "Mask GGG1" : "Mask 0G01"; + if (![self findButton:"G"].isHidden) { + if (channels == TextureChannels::ModeGGG1 || channels == TextureChannels::Mode0G01) { + channels = TextureChannels::ModeRGBA; + text = "Mask RGBA"; + } + else { + channels = isShiftKeyDown ? TextureChannels::ModeGGG1 : TextureChannels::Mode0G01; + text = isShiftKeyDown ? "Mask GGG1" : "Mask 0G01"; + } + isChanged = true; } - isChanged = true; break; case Key::Num3: case Key::B: - if (channels == TextureChannels::ModeBBB1 || channels == TextureChannels::Mode00B1) { - channels = TextureChannels::ModeRGBA; - text = "Mask RGBA"; - } - else { - channels = isShiftKeyDown ? TextureChannels::ModeBBB1 : TextureChannels::Mode00B1; - text = isShiftKeyDown ? "Mask BBB1" : "Mask 00B1"; + if (![self findButton:"B"].isHidden) { + if (channels == TextureChannels::ModeBBB1 || channels == TextureChannels::Mode00B1) { + channels = TextureChannels::ModeRGBA; + text = "Mask RGBA"; + } + else { + channels = isShiftKeyDown ? TextureChannels::ModeBBB1 : TextureChannels::Mode00B1; + text = isShiftKeyDown ? "Mask BBB1" : "Mask 00B1"; + } + isChanged = true; } - isChanged = true; break; case Key::Num4: case Key::A: - if (channels == TextureChannels::ModeAAA1) { - channels = TextureChannels::ModeRGBA; - text = "Mask RGBA"; - } - else { - channels = TextureChannels::ModeAAA1; - text = "Mask AAA1"; + if (![self findButton:"A"].isHidden) { + if (channels == TextureChannels::ModeAAA1) { + channels = TextureChannels::ModeRGBA; + text = "Mask RGBA"; + } + else { + channels = TextureChannels::ModeAAA1; + text = "Mask AAA1"; + } + isChanged = true; } - isChanged = true; break; case Key::E: { @@ -1510,10 +1541,12 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown // toggle checkerboard for transparency case Key::C: - _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown; - isChanged = true; - text = "Checker "; - text += _showSettings->isCheckerboardShown ? "On" : "Off"; + if (![self findButton:"C"].isHidden) { + _showSettings->isCheckerboardShown = !_showSettings->isCheckerboardShown; + isChanged = true; + text = "Checker "; + text += _showSettings->isCheckerboardShown ? "On" : "Off"; + } break; // toggle pixel grid when magnified above 1 pixel, can happen from mipmap changes too @@ -1574,11 +1607,14 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown break; } case Key::S: - // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips - _showSettings->isShowingAllLevelsAndMips = !_showSettings->isShowingAllLevelsAndMips; - isChanged = true; - text = "Show All "; - text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off"; + if (![self findButton:"S"].isHidden) { + + // TODO: have drawAllMips, drawAllLevels, drawAllLevelsAndMips + _showSettings->isShowingAllLevelsAndMips = !_showSettings->isShowingAllLevelsAndMips; + isChanged = true; + text = "Show All "; + text += _showSettings->isShowingAllLevelsAndMips ? "On" : "Off"; + } break; // toggle hud that shows name and pixel value under the cursor @@ -1610,37 +1646,45 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown // toggle signed vs. unsigned case Key::N: - _showSettings->isSigned = !_showSettings->isSigned; - isChanged = true; - text = "Signed "; - text += _showSettings->isSigned ? "On" : "Off"; + if (![self findButton:"N"].isHidden) { + _showSettings->isSigned = !_showSettings->isSigned; + isChanged = true; + text = "Signed "; + text += _showSettings->isSigned ? "On" : "Off"; + } break; // toggle premul alpha vs. unmul case Key::P: - _showSettings->isPremul = !_showSettings->isPremul; - isChanged = true; - text = "Premul "; - text += _showSettings->isPremul ? "On" : "Off"; + if (![self findButton:"P"].isHidden) { + _showSettings->isPremul = !_showSettings->isPremul; + isChanged = true; + text = "Premul "; + text += _showSettings->isPremul ? "On" : "Off"; + } break; case Key::J: - if ([self advanceTextureFromAchive:!isShiftKeyDown]) { - isChanged = true; - text = "Loaded " + _showSettings->lastFilename; + if (![self findButton:"J"].isHidden) { + if ([self advanceTextureFromAchive:!isShiftKeyDown]) { + isChanged = true; + text = "Loaded " + _showSettings->lastFilename; + } } break; // mip up/down case Key::M: - if (isShiftKeyDown) { - _showSettings->mipLOD = MAX(_showSettings->mipLOD - 1, 0); - } - else { - _showSettings->mipLOD = MIN(_showSettings->mipLOD + 1, _showSettings->maxLOD - 1); + if (_showSettings->maxLOD > 1) { + if (isShiftKeyDown) { + _showSettings->mipLOD = MAX(_showSettings->mipLOD - 1, 0); + } + else { + _showSettings->mipLOD = MIN(_showSettings->mipLOD + 1, _showSettings->maxLOD - 1); + } + sprintf(text, "Mip %d/%d", _showSettings->mipLOD, _showSettings->maxLOD); + isChanged = true; } - sprintf(text, "Mip %d/%d", _showSettings->mipLOD, _showSettings->maxLOD); - isChanged = true; break; case Key::F: diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 4d5374a6..463bef9e 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -34,18 +34,43 @@ namespace kram { using namespace std; -bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage) +// Twiddle pixels or blocks into Morton order. Usually this is done during the upload of +// linear-order block textures. But on some platforms may be able to directly use the block +// and pixel data if organized in the exact twiddle order the hw uses. +// Code adapted from KTX doc example. +class MortonOrder { - KTXImage image; - if (!image.open(data, dataSize)) { - return false; +public: +MortonOrder(uint32_t width, uint32_t height) { + minDim = (width <= height) ? width : height; + + // Smaller size must be a power of 2 + assert((minDim & (minDim - 1)) == 0); + + // Larger size must be a multiple of the smaller + assert(width % minDim == 0 && height % minDim == 0); +} + +// For a given xy block in a mip level, find the block offset in morton order +uint32_t mortonOffset(uint32_t x, uint32_t y) +{ + uint32_t offset = 0, shift = 0; + + for (uint32_t mask = 1; mask < minDim; mask <<= 1) { + offset |= (((y & mask) << 1) | (x & mask)) << shift; + shift++; } - // many different types of KTX files, for now only import from 2D type - // and only pull the first mip, but want to be able to pull custom mips from - // many types - return sourceImage.loadImageFromKTX(image); + // At least one of width and height will have run out of most-significant bits + offset |= ((x | y) >> shift) << (shift * 2); + return offset; } + +private: + uint32_t minDim = 0; +}; + + inline Color toPremul(Color c) { // these are really all fractional, but try this @@ -74,6 +99,18 @@ inline Color toGrayscaleRec709(Color c, const Mipper& mipper) { return c; } +bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage) +{ + KTXImage image; + if (!image.open(data, dataSize)) { + return false; + } + + // many different types of KTX files, for now only import from 2D type + // and only pull the first mip, but want to be able to pull custom mips from + // many types + return sourceImage.loadImageFromKTX(image); +} bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulRgb, bool isGray, Image& sourceImage) { @@ -1445,7 +1482,7 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, length *= numChunks; uint64_t percent = (100 * lengthCompressed) / length; - isMB = (lengthCompressed > (512 * 1024)); + isMB = (length > (512 * 1024)); double lengthF = isMB ? length / (1024.0f * 1024.0f) : length / 1024.0f; double lengthCompressedF = isMB ? lengthCompressed / (1024.0f * 1024.0f) : lengthCompressed / 1024.0f; diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 9ccee08d..6993649e 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -2302,7 +2302,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, // , so opaque textures repro as 254 alpha on Toof-a.png. // ate sets pbits on mode 6 for same block. Also fixed mip weights in non-pow2 mipper. - bool doPrintBlock = false; +// bool doPrintBlock = false; // if (bx == 8 && by == 1) { // int32_t bp = 0; // bp = bp; @@ -2339,9 +2339,9 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, case MyMTLPixelFormatBC7_RGBAUnorm_sRGB: { bc7enc_compress_block(dstBlock, srcPixelCopy, &bc7params); - if (doPrintBlock) { - printBCBlock(dstBlock, info.pixelFormat); - } +// if (doPrintBlock) { +// printBCBlock(dstBlock, info.pixelFormat); +// } break; } default: { diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index 1a0ce2de..3fee97fa 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -1305,6 +1305,9 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h, float dx = (cE - cW) * scaleX; float dy = (cN - cS) * scaleY; + //dx = -dx; + //dy = -dy; + float4 normal = float4m(dx, dy, 1.0f, 0.0f); normal = normalize(normal); @@ -1337,8 +1340,8 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h, float dx = (cE - cW) * scaleX; float dy = (cN - cS) * scaleY; - dx = -dx; - dy = -dy; + //dx = -dx; + //dy = -dy; float4 normal = float4m(dx, dy, 1.0f, 0.0f); normal = normalize(normal); From d5fa28757835bbfcdc03e79edfb377eb97236c47 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 23 May 2021 00:51:54 -0700 Subject: [PATCH 067/901] kramv - fix small mip sample lookup, add cmake gpu capture for shaders with source, fix mipX/Y calc --- kramv/CMakeLists.txt | 6 ++++++ kramv/KramRenderer.mm | 5 ++++- kramv/KramShaders.metal | 10 +++++----- kramv/KramViewerBase.h | 4 ++++ kramv/KramViewerMain.mm | 9 ++++++--- 5 files changed, 25 insertions(+), 9 deletions(-) diff --git a/kramv/CMakeLists.txt b/kramv/CMakeLists.txt index cd290094..11baacc3 100644 --- a/kramv/CMakeLists.txt +++ b/kramv/CMakeLists.txt @@ -77,6 +77,12 @@ set_target_properties(${myTargetApp} PROPERTIES # TODO: not sure how to set this, nothing online either ? # MACOSX_BUNDLE_APP_CATEGORY "Developer Tools" + + #------------------------- + # turn on shader capture support and indexing + # why can't this just be a yes or no, there's "Yes, exclude source code" + XCODE_ATTRIBUTE_MTL_ENABLE_DEBUG_INFO "Yes, include source code" + XCODE_ATTRIBUTE_MTL_ENABLE_INDEX_STORE YES ) target_compile_options(${myTargetApp} PRIVATE -W -Wall) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index b2061068..461141af 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -1050,7 +1050,10 @@ - (void)drawSample int32_t textureLookupX = _showSettings->textureLookupX; int32_t textureLookupY = _showSettings->textureLookupY; - [self drawSamples:commandBuffer lookupX:textureLookupX lookupY:textureLookupY]; + int32_t textureLookupMipX = _showSettings->textureLookupMipX; + int32_t textureLookupMipY = _showSettings->textureLookupMipY; + + [self drawSamples:commandBuffer lookupX:textureLookupMipX lookupY:textureLookupMipY]; // Synchronize the managed texture. id blitCommandEncoder = [commandBuffer blitCommandEncoder]; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 287c0570..f3289232 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -982,7 +982,7 @@ kernel void SampleImageCS( // the for-loop is replaced with a collection of threads, each of which // calls this function. uint2 uv = uniforms.uv; // tie into texture lookup - uv = max(uint2(1), uv >> uniforms.mipLOD); + // uv >>= uniforms.mipLOD; // the color returned is linear float4 color = colorMap.read(uv, uniforms.mipLOD); @@ -999,7 +999,7 @@ kernel void SampleImageArrayCS( // the for-loop is replaced with a collection of threads, each of which // calls this function. uint2 uv = uniforms.uv; // tie into texture lookup - uv = max(uint2(1), uv >> uniforms.mipLOD); + //uv >>= uniforms.mipLOD; uint arrayOrSlice = uniforms.arrayOrSlice; @@ -1018,7 +1018,7 @@ kernel void SampleCubeCS( // the for-loop is replaced with a collection of threads, each of which // calls this function. uint2 uv = uint2(uniforms.uv); // tie into texture lookup - uv = max(uint2(1), uv >> uniforms.mipLOD); + //uv >>= uniforms.mipLOD; uint face = uniforms.face; @@ -1040,7 +1040,7 @@ kernel void SampleCubeArrayCS( // the for-loop is replaced with a collection of threads, each of which // calls this function. uint2 uv = uint2(uniforms.uv); // tie into texture lookup - uv = max(uint2(1), uv >> uniforms.mipLOD); + //uv >>= uniforms.mipLOD; uint face = uniforms.face; uint arrayOrSlice = uniforms.arrayOrSlice; @@ -1060,7 +1060,7 @@ kernel void SampleVolumeCS( // the for-loop is replaced with a collection of threads, each of which // calls this function. uint3 uv = uint3(uniforms.uv, uniforms.arrayOrSlice); // tie into texture lookup - uv = max(uint3(1), uv >> uniforms.mipLOD); + //uv >>= uniforms.mipLOD); // the color returned is linear float4 color = colorMap.read(uv, uniforms.mipLOD); diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index d5cf2211..d907c296 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -125,6 +125,10 @@ class ShowSettings { int32_t textureLookupX = 0; int32_t textureLookupY = 0; + // exact pixel in the mip level + int32_t textureLookupMipX = 0; + int32_t textureLookupMipY = 0; + int32_t textureResultX = 0; int32_t textureResultY = 0; float4 textureResult; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index d23149c5..cf71e8ed 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -983,15 +983,15 @@ - (void)updateEyedropper { // show block num int mipLOD = _showSettings->mipLOD; - // TODO:: these block numbers are not accurate on Toof at 4x4 + // TODO: these block numbers are not accurate on Toof at 4x4 // there is resizing going on to the dimensions int mipX = _showSettings->imageBoundsX; int mipY = _showSettings->imageBoundsY; for (int i = 0; i < mipLOD; ++i) { - mipX = (mipX+1) >> 1; - mipY = (mipY+1) >> 1; + mipX = mipX >> 1; + mipY = mipY >> 1; } mipX = std::max(1, mipX); mipY = std::max(1, mipY); @@ -999,6 +999,9 @@ - (void)updateEyedropper { mipX = (int32_t)(uvX * mipX); mipY = (int32_t)(uvY * mipY); + _showSettings->textureLookupMipX = mipX; + _showSettings->textureLookupMipY = mipY; + // TODO: may want to return mip in pixel readback // don't have it right now, so don't display if preview is enabled if (_showSettings->isPreview) From f214d50822da0f5898d5cb9422c2ed5d65c75c5d Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 23 May 2021 13:19:15 -0700 Subject: [PATCH 068/901] kramv - early menu support view menu with items show/hidden the same way buttons are. Not handling state on/off yet. --- kramv/Base.lproj/Main.storyboard | 4 ++ kramv/KramViewerMain.mm | 83 ++++++++++++++++++++++++++------ libkram/kram/KTXImage.cpp | 9 ++-- libkram/kram/KramImage.cpp | 2 +- 4 files changed, 78 insertions(+), 20 deletions(-) diff --git a/kramv/Base.lproj/Main.storyboard b/kramv/Base.lproj/Main.storyboard index 50fcd024..86041b06 100644 --- a/kramv/Base.lproj/Main.storyboard +++ b/kramv/Base.lproj/Main.storyboard @@ -92,6 +92,10 @@ + + + + diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index cf71e8ed..de695e5e 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -400,6 +400,7 @@ void encodeSrcForEncodeComparisons(bool increment) { @implementation MyMTKView { + NSMenu* _viewMenu; // really the items NSStackView* _buttonStack; NSMutableArray* _buttonArray; NSTextField* _hudLabel; @@ -571,31 +572,42 @@ - (NSStackView*)_addButtons { stackView.detachesHiddenViews = YES; // default, but why have to have _buttonArrary [self addSubview: stackView]; -#if 0 +#if 1 // Want menus, so user can define their own shortcuts to commands // Also need to enable/disable this via validateUserInterfaceItem NSApplication* app = [NSApplication sharedApplication]; - // TODO: add an edit menu in the storyboard - NSMenu* menu = app.windowsMenu; - [menu addItem:[NSMenuItem separatorItem]]; + NSMenu* mainMenu = app.mainMenu; + NSMenuItem* viewMenuItem = mainMenu.itemArray[2]; + _viewMenu = viewMenuItem.submenu; + + // TODO: add a view menu in the storyboard + //NSMenu* menu = app.windowsMenu; + //[menu addItem:[NSMenuItem separatorItem]]; for (int32_t i = 0; i < numButtons; ++i) { - const char* icon = names[2*i+0]; - const char* tip = names[2*i+1]; + const char* icon = names[2*i+0]; // single char + const char* title = names[2*i+1]; - NSString* shortcut = [NSString stringWithUTF8String:icon]; - NSString* name = [NSString stringWithUTF8String:tip]; - shortcut = @""; // for now, or AppKit turns key int cmd+shift+key + NSString* toolTip = [NSString stringWithUTF8String:icon]; + NSString* name = [NSString stringWithUTF8String:title]; + NSString* shortcut = @""; // for now, or AppKit turns key int cmd+shift+key if (icon[0] == '-') { - [menu addItem:[NSMenuItem separatorItem]]; + [_viewMenu addItem:[NSMenuItem separatorItem]]; } else { - NSMenuItem* menuItem = [[NSMenuItem alloc] initWithTitle:name action:@selector(handleAction) keyEquivalent:shortcut]; - [menu addItem: menuItem]; + NSMenuItem* menuItem = [[NSMenuItem alloc] initWithTitle:name action:@selector(handleAction:) keyEquivalent:shortcut]; + menuItem.toolTip = toolTip; // use in findMenuItem + + // TODO: menus and buttons should reflect any toggle state + // menuItem.state = Mixed/Off/On; + + [_viewMenu addItem: menuItem]; } } + + [_viewMenu addItem:[NSMenuItem separatorItem]]; #endif return stackView; @@ -1231,6 +1243,17 @@ - (NSButton*)findButton:(const char*)name { return nil; } +- (NSMenuItem*)findMenuItem:(const char*)name { + NSString* title = [NSString stringWithUTF8String:name]; + + for (NSMenuItem* menuItem in _viewMenu.itemArray) { + if (menuItem.toolTip == title) + return menuItem; + } + return nil; +} + + - (void)updateUIAfterLoad { // TODO: move these to actions, and test their state instead of looking up buttons @@ -1260,6 +1283,7 @@ - (void)updateUIAfterLoad { bool isSignedHidden = !isSignedFormat(_showSettings->originalFormat); + // buttons [self findButton:"Y"].hidden = isArrayHidden; [self findButton:"F"].hidden = isFaceSliceHidden; [self findButton:"M"].hidden = isMipHidden; @@ -1273,6 +1297,21 @@ - (void)updateUIAfterLoad { [self findButton:"P"].hidden = isPremulHidden; [self findButton:"N"].hidden = isSignedHidden; [self findButton:"C"].hidden = isCheckerboardHidden; + + // menus (may want to disable, not hide) + [self findMenuItem:"Y"].hidden = isArrayHidden; + [self findMenuItem:"F"].hidden = isFaceSliceHidden; + [self findMenuItem:"M"].hidden = isMipHidden; + [self findMenuItem:"S"].hidden = isShowAllHidden; + [self findMenuItem:"J"].hidden = isJumpToNextHidden; + + [self findMenuItem:"G"].hidden = isGreenHidden; + [self findMenuItem:"B"].hidden = isBlueHidden; + [self findMenuItem:"A"].hidden = isAlphaHidden; + + [self findMenuItem:"P"].hidden = isPremulHidden; + [self findMenuItem:"N"].hidden = isSignedHidden; + [self findMenuItem:"C"].hidden = isCheckerboardHidden; } @@ -1280,14 +1319,26 @@ - (void)updateUIAfterLoad { // move pan/zoom logic too. Then use that as start of Win32 kramv. - (IBAction)handleAction:(id)sender { - // sender is the UI element/NSButton - // if (sender == ) - NSButton* button = (NSButton*)sender; NSEvent* theEvent = [NSApp currentEvent]; bool isShiftKeyDown = (theEvent.modifierFlags & NSEventModifierFlagShift); - string title = [button.title UTF8String]; + string title; + + // sender is the UI element/NSButton + if ([sender isKindOfClass:[NSButton class]]) { + NSButton* button = (NSButton*)sender; + title = [button.title UTF8String]; + } + else if ([sender isKindOfClass:[NSMenuItem class]]) { + NSMenuItem* menuItem = (NSMenuItem*)sender; + title = [menuItem.toolTip UTF8String]; + } + else { + KLOGE("kram", "unknown UI element"); + return; + } + int32_t keyCode = -1; if (title == "?") diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 8d328a2e..55d7f671 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -1210,14 +1210,17 @@ void KTXImage::initMipLevels(size_t mipOffset) uint32_t levelSize = dataSize * numChunks; + // TODO: align mip offset to multiple of 4 bytes for KTX1, may need for kTX2 + // make sure when adding up offsets with length to include this padding +// if (!skipImageLength) { +// offset += 3 - (offset & 3); // align level to 4 bytes +// } + // compute dataSize from header data if (!skipImageLength) { // advance past the length offset += sizeof(uint32_t); } - - // TODO: Here is where offset alignment to 4 bytes may be needed - // but that also needs to be accounted for in allocation // level holds single texture size not level size, but offset reflects level start KTXImageLevel level = { offset, 0, dataSize }; diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 6993649e..b9704371 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -1020,7 +1020,7 @@ struct KTX2DescriptorChannelBlock { uint16_t bitOffset = 0; uint8_t bitLength = 0; uint8_t channelType : 4; // RED, GREEN, BLUE, RRR, GGG - uint8_t FSEL : 4; // l is low bit + uint8_t FSEL : 4; // L is low bit - Float, Signed, Exponent, Linear (used on Alpha) // 32-bits uint8_t samplePositions[4] = {0}; From 885d36d10245045b0ef56cd8c40ffc69b1a65469 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 23 May 2021 16:38:59 -0700 Subject: [PATCH 069/901] kram - isolate decompress and copy logic into unpackLevel(), update KramLoader on blit path The blit path can directly upload to staging from zstd mips. Update that path, and call unpackLevel() if compressed. Bump staging memory to 64MB for larger texture support. This code path isn't take yet, but pass isInfoOnly to skip decompress. Also put supercompressionType in KTXImage and return that, and have info display the name. Don't reuse zstd context across mips, so I can isolate that logic into unpackLevel. Can add other decompression here too, and zstd+uastc need both a decompress and and a transcode. --- kramv/KramLoader.mm | 102 ++++++++++++++----------- kramv/KramRenderer.mm | 2 + kramv/KramViewerMain.mm | 22 ++++-- libkram/kram/KTXImage.cpp | 156 ++++++++++++++++++++++---------------- libkram/kram/KTXImage.h | 16 +++- libkram/kram/Kram.cpp | 13 ++-- 6 files changed, 186 insertions(+), 125 deletions(-) diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 67e83771..b4f75e62 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -120,7 +120,14 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { { KTXImage image; - if (!image.open(imageData, imageDataLength)) { + // true keeps compressed mips on KTX2 and aliases original mip data + // but have decode etc2/asct path below that uncompressed mips + // and the rgb conversion path below as well in the viewer. + // games would want to decompress directly from aliased mmap ktx2 data into staging + // or have blocks pre-twiddled in hw morton order. + + bool isInfoOnly = false; + if (!image.open(imageData, imageDataLength, isInfoOnly)) { return nil; } @@ -495,7 +502,9 @@ - (nonnull instancetype)init { self = [super init]; // must be aligned to pagesize() or can't use with newBufferWithBytesNoCopy - dataSize = 16*1024*1024; + // enough to upload 4k x 4k @ 4 bytes no mips, careful with array and cube that get too big + dataSize = 64*1024*1024; + posix_memalign((void**)&data, getpagesize(), dataSize); // allocate memory for circular staging buffer, only need to memcpy to this @@ -554,16 +563,6 @@ - (nonnull instancetype)init { return texture; } -//for (int mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) { -// -// // zstd decompress entire mip level to the staging buffer -// zstd -//} -// -//// so first memcpy and entire level(s) into the buffer -////memcpy(...); - - // Has a synchronous upload via replaceRegion that only works for shared/managed (f.e. ktx), // and another path for private that uses a blitEncoder and must have block aligned data (f.e. ktxa, ktx2). // Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer. @@ -571,6 +570,13 @@ - (nonnull instancetype)init { { id texture = [self createTexture:image]; + // Note: always starting at 0 here, since kramv is only uploading 1 texture + // but a real uploader would upload until buffer full, and then reset this back to 0 + // A circular buffer if large enough to support multiple uploads over time. + // This can be a lot of temporary memory and must complete upload before changing. + + uint64_t bufferOffset = 0; + //-------------------------------- // upload mip levels @@ -588,7 +594,31 @@ - (nonnull instancetype)init { Int2 blockDims = image.blockDims(); - for (int mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) { + // Note: copy entire decompressed level from KTX, but then upload + // each chunk of that with separate blit calls below. + size_t blockSize = image.blockSize(); + + vector bufferOffsets; + uint8_t* bufferData = (uint8_t*)_buffer.contents; + const uint8_t* mipData = (const uint8_t*)image.fileData; + bufferOffsets.resize(image.mipLevels.size()); + + for (int32_t i = 0; i < numMips; ++i) { + const KTXImageLevel& mipLevel = image.mipLevels[i]; + + // pad buffer offset to a multiple of the blockSize + bufferOffset += (blockSize - 1) - (bufferOffset & blockSize); + bufferOffsets[i] = bufferOffset; + bufferOffset += mipLevel.length; + + // this may have to decompress the level data + image.unpackLevel(i, mipData + mipLevel.offset, bufferData + bufferOffset); + } + + // blit encode calls must all be submitted to an encoder + // but may not have to be on the render thrad? + + for (int32_t mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) { // there's a 4 byte levelSize for each mipLevel // the mipLevel.offset is immediately after this @@ -616,45 +646,32 @@ - (nonnull instancetype)init { bytesPerRow = (int32_t)mipLevel.length / yBlocks; } - int32_t sliceOrArrayOrFace; + int32_t chunkNum; - if (image.header.numberOfArrayElements > 0) { + if (image.header.numberOfArrayElements > 1) { // can be 1d, 2d, or cube array - sliceOrArrayOrFace = array; + chunkNum = array; if (numFaces > 1) { - sliceOrArrayOrFace = 6 * sliceOrArrayOrFace + face; + chunkNum = 6 * chunkNum + face; } } else { // can be 1d, 2d, or 3d - sliceOrArrayOrFace = slice; + chunkNum = slice; if (numFaces > 1) { - sliceOrArrayOrFace = face; + chunkNum = face; } } - // this is size of one face/slice/texture, not the levels size - int32_t mipStorageSize = (int32_t)mipLevel.length; + // This is size of one chunk + uint64_t mipStorageSize = mipLevel.length; - int32_t mipOffset = (int32_t)mipLevel.offset + sliceOrArrayOrFace * mipStorageSize; - - int32_t bufferBaseOffset = 0; // TODO: pos offset into the staging buffer - mipOffset += bufferBaseOffset; - - // using buffer to store - // offset into the level - //const uint8_t *srcBytes = image.fileData + mipOffset; - - // had blitEncoder support here + // Have uploaded to buffer in same order visiting chunks. + // Note: no call on MTLBlitEncoder to copy entire level of mips like glTexImage3D + uint64_t mipOffset = bufferOffsets[mipLevelNumber] + chunkNum * mipStorageSize; { - // Note: this only works for managed/shared textures. - // For private upload to buffer and then use blitEncoder to copy to texture. - //bool isCubemap = image.textureType == MyMTLTextureTypeCube || - // image.textureType == MyMTLTextureTypeCubeArray; bool is3D = image.textureType == MyMTLTextureType3D; - //bool is2DArray = image.textureType == MyMTLTextureType2DArray; - //bool is1DArray = image.textureType == MyMTLTextureType1DArray; // cpu copy the bytes from the data object into the texture MTLRegion region = { @@ -662,16 +679,11 @@ - (nonnull instancetype)init { { (NSUInteger)w, (NSUInteger)h, 1 } // MTLSize }; - // TODO: revist how loading is done to load entire levels - // otherwise too many replaceRegion calls. Data is already packed by mip. - if (is3D) { - region.origin.z = sliceOrArrayOrFace; - sliceOrArrayOrFace = 0; + region.origin.z = chunkNum; + chunkNum = 0; } - // TODO: no call on MTLBlitEncoder to copy entire level of mips like glTexImage3D - [_blitEncoder copyFromBuffer:_buffer sourceOffset:mipOffset sourceBytesPerRow:bytesPerRow @@ -679,7 +691,7 @@ - (nonnull instancetype)init { sourceSize:region.size toTexture:texture - destinationSlice:sliceOrArrayOrFace + destinationSlice:chunkNum destinationLevel:mipLevelNumber destinationOrigin:region.origin options:MTLBlitOptionNone diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 461141af..14cce950 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -417,6 +417,8 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest // archive shouldn't contain png, so only support ktx/ktx2 here // TODO: have loader return KTXImage instead of parsing it again + // then can decode blocks in kramv + KTXImage sourceImage; if (!sourceImage.open(imageData,imageDataLength)) { return NO; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index de695e5e..4cbf359b 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1226,14 +1226,6 @@ - (void)scrollWheel:(NSEvent *)event } } -// use this to enable/disable menus, buttons, etc. Called on every event -// when not implemented, then user items are always enabled -- (BOOL)validateUserInterfaceItem:(id)item -{ - // TODO: tie to menus and buttons - return YES; -} - - (NSButton*)findButton:(const char*)name { NSString* title = [NSString stringWithUTF8String:name]; for (NSButton* button in _buttonArray) { @@ -1253,6 +1245,18 @@ - (NSMenuItem*)findMenuItem:(const char*)name { return nil; } +// use this to enable/disable menus, buttons, etc. Called on every event +// when not implemented, then user items are always enabled +- (BOOL)validateUserInterfaceItem:(id)item +{ + // TODO: tie to menus and buttons states for enable/disable toggles + // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/MenuList/Articles/EnablingMenuItems.html + + // MTKView is not doc based, so can't all super + //return [super validateUserInterfaceItem:anItem]; + + return YES; +} - (void)updateUIAfterLoad { @@ -1299,6 +1303,8 @@ - (void)updateUIAfterLoad { [self findButton:"C"].hidden = isCheckerboardHidden; // menus (may want to disable, not hide) + // problem is crashes since menu seems to strip hidden items + // enabled state has to be handled in validateUserInterfaceItem [self findMenuItem:"Y"].hidden = isArrayHidden; [self findMenuItem:"F"].hidden = isFaceSliceHidden; [self findMenuItem:"M"].hidden = isMipHidden; diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 55d7f671..948a32c3 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -745,6 +745,18 @@ MyMTLPixelFormat toggleSrgbFormat(MyMTLPixelFormat format) return MyMTLPixelFormatInvalid; } +const char* supercompressionName(KTX2Supercompression type) +{ + const char* name = "Unknown"; + switch(type) { + case KTX2SupercompressionNone: name = "None"; break; + case KTX2SupercompressionBasisLZ: name = "BasisLZ"; break; + case KTX2SupercompressionZstd: name = "Zstd"; break; + case KTX2SupercompressionZlib: name = "Zlib"; break; + } + return name; +} + // https://docs.unity3d.com/ScriptReference/Experimental.Rendering.GraphicsFormat.html // Unity only handles 4,5,6,8,10,12 square block dimensions @@ -1316,17 +1328,6 @@ const char* textureTypeName(MyMTLTextureType textureType) // can use ktx2ktx2 and ktx2sc to supercompress, and kramv can use this to open and view data as a KTX1 file. // ignoring Basis and supercompression data, etc. -// wish C++ had a defer -struct ZSTDScope2 -{ - ZSTDScope2(ZSTD_DCtx* ctx_) : ctx(ctx_) {} - ~ZSTDScope2() { ZSTD_freeDCtx(ctx); } - -private: - ZSTD_DCtx* ctx = nullptr; -}; - - bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly) { if ((size_t)imageDataLength < sizeof(KTX2Header)) { @@ -1416,12 +1417,16 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i header.bytesOfKeyValueData = 0; initProps(imageData + header2.kvdByteOffset, header2.kvdByteLength); + // skip parsing th elevels if (isInfoOnly) { skipImageLength = true; fileData = imageData; fileDataLength = imageDataLength; + // copy this in to return as info + supercompressionType = (KTX2Supercompression)header2.supercompressionScheme; + // copy these over from ktx2 mipLevels = levels; @@ -1493,73 +1498,94 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i // TODO: may need to fill out length field in fileData - // Note: specific to zstd - bool isZstd = header2.supercompressionScheme == KTX2SupercompressionZstd; - ZSTD_DCtx* dctx = nullptr; - if (isZstd) dctx = ZSTD_createDCtx(); - ZSTDScope2 scope(dctx); + supercompressionType = (KTX2Supercompression)header2.supercompressionScheme; // need to decompress mips here for (uint32_t i = 0; i < header.numberOfMipmapLevels; ++i) { // compresssed level const auto& level2 = levels[i]; - size_t srcDataSize = level2.lengthCompressed; const uint8_t* srcData = imageData + level2.offset; - + // uncompressed level - const auto& level1 = mipLevels[i]; - size_t dstDataSize = level1.length * numChunks; + auto& level1 = mipLevels[i]; + level1.lengthCompressed = level2.lengthCompressed; // need this for copyLevel to have enough data uint8_t* dstData = (uint8_t*)fileData + level1.offset; // can const_cast, since class owns data + + if (!unpackLevel(i, srcData, dstData)) { + return false; + } - // preserve lengthCompressed so kram info can display the value - // this field will need to be set to 0 - - // This does display in kram info, but it's confusing since image was converted to ktx1 - // and the offsets are largest first. So for now, don't copy this in. - // level1.lengthCompressed = level2.lengthCompressed; - - // TODO: use basis transcoder (single file) for Basis UASTC here, then don't need libktx yet - // wont work for BasisLZ (which is ETC1S). - - switch(header2.supercompressionScheme) { - case KTX2SupercompressionZstd: { - // decompress from zstd directly into ktx1 ordered chunk - // Note: decode fails with FSE_decompress. - auto result = ZSTD_decompressDCtx(dctx, - dstData, dstDataSize, - srcData, srcDataSize); - - if (ZSTD_isError(result)) { - KLOGE("kram", "decode mip zstd failed"); - return false; - } - if (level2.length * numChunks != result) { - KLOGE("kram", "decode mip zstd size not expected"); - return false; - } - break; - } + // have decompressed here, so set to 0 + level1.lengthCompressed = 0; + } + + // have decompressed ktx1, so change back to None + supercompressionType = KTX2SupercompressionNone; + } + + return true; +} + +bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData) { + + // uncompressed level + uint32_t numChunks = totalChunks(); + const auto& level = mipLevels[mipNumber]; + size_t dstDataSize = level.length * numChunks; + + if (level.lengthCompressed == 0) { + memcpy(dstData, srcData, dstDataSize); + } + else { + size_t srcDataSize = level.lengthCompressed; + + // TODO: use basis transcoder (single file) for Basis UASTC here, then don't need libktx yet + // wont work for BasisLZ (which is ETC1S). + // copy this in to return as info + + switch(supercompressionType) { + case KTX2SupercompressionZstd: { + // decompress from zstd directly into ktx1 ordered chunk + // Note: decode fails with FSE_decompress. + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + if (!dctx) + return false; - case KTX2SupercompressionZlib: { - // can use miniz or libCompression - mz_ulong dstDataSizeMZ = 0; - if (mz_uncompress(dstData, &dstDataSizeMZ, - srcData, srcDataSize) != MZ_OK) { - KLOGE("kram", "decode mip zlib failed"); - return false; - } - if (dstDataSizeMZ != dstDataSize) { - KLOGE("kram", "decode mip zlib size not expected"); - return false; - } - - break; + auto dstDataSizeZstd = ZSTD_decompressDCtx(dctx, + dstData, dstDataSize, + srcData, srcDataSize); + ZSTD_freeDCtx(dctx); + + if (ZSTD_isError(dstDataSizeZstd)) { + KLOGE("kram", "decode mip zstd failed"); + return false; + } + if (dstDataSizeZstd != dstDataSize) { + KLOGE("kram", "decode mip zstd size not expected"); + return false; + } + break; + } + + case KTX2SupercompressionZlib: { + // can use miniz or libCompression + mz_ulong dstDataSizeMiniz = 0; + if (mz_uncompress(dstData, &dstDataSizeMiniz, + srcData, srcDataSize) != MZ_OK) { + KLOGE("kram", "decode mip zlib failed"); + return false; } - - // already checked at top of function - default: { + if (dstDataSizeMiniz != dstDataSize) { + KLOGE("kram", "decode mip zlib size not expected"); return false; } + + break; + } + + // already checked at top of function + default: { + return false; } } } diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index b7971682..0174786d 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -305,6 +305,15 @@ class KTXImage { void reserveImageData(); vector& imageData(); + // for KTX2 files, the mips can be compressed using various encoders + bool isSupercompressed() const { return isKTX2() && !mipLevels.empty() && mipLevels[0].lengthCompressed != 0; } + + bool isKTX1() const { return !skipImageLength; } + bool isKTX2() const { return skipImageLength; } + + // can use on ktx1/2 files, does a decompress if needed + bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData); + private: bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly); @@ -321,9 +330,10 @@ class KTXImage { uint32_t height; uint32_t depth; - // for ktxa and ktx2 + // for ktx2 bool skipImageLength = false; - + KTX2Supercompression supercompressionType = KTX2SupercompressionNone; + KTXHeader header; // copy of KTXHeader, so can be modified and then written back // write out only string/string props, for easy of viewing @@ -336,6 +346,8 @@ class KTXImage { const uint8_t* fileData; // mmap data }; +const char* supercompressionName(KTX2Supercompression type); + // Generic format helpers. All based on the ubiquitous type. bool isFloatFormat(MyMTLPixelFormat format); bool isHalfFormat(MyMTLPixelFormat format); diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 463bef9e..aaacdbc0 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -1469,8 +1469,8 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, int32_t numChunks = srcImage.totalChunks(); - // add up lengtha and lengthCompressed - if (srcImage.mipLevels[0].lengthCompressed > 0) { + // add up lengths and lengthCompressed + if (srcImage.isSupercompressed()) { uint64_t length = 0; uint64_t lengthCompressed = 0; @@ -1485,12 +1485,15 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, isMB = (length > (512 * 1024)); double lengthF = isMB ? length / (1024.0f * 1024.0f) : length / 1024.0f; double lengthCompressedF = isMB ? lengthCompressed / (1024.0f * 1024.0f) : lengthCompressed / 1024.0f; - + append_sprintf(info, - "sizc: %0.3f,%0.3f %s %d%%\n", + "sizc: %0.3f,%0.3f %s %d%%\n" + "comp: %s\n", lengthF, lengthCompressedF, isMB ? "MB" : "KB", - (int)percent); + (int)percent, + supercompressionName(srcImage.supercompressionType) + ); } From 2b743546084edad6f36802c63b8e9740bc6e4a7c Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 24 May 2021 07:53:39 -0700 Subject: [PATCH 070/901] kramv - update fast load path from staging MTLBuffer to MTLTexture, fix archive increment This decompresses, aliases, or copies directly from blocks and compressed blocks. The blit encoder is used to go direct to private textures via a 128MB staging MTLBuffer. This allows the entire level to be copied at once, and the the blits reference offsets into the buffer. The buffer is just a linear allocator right now, no circular usage. Use completion handler. Loader really needs a queue of pending textures, and also avoid level allocation on the copy path, and use a part of staging texture. Fix isInfoOnly handling since levelSize was divided by length twice. --- kramv/KramLoader.h | 19 +- kramv/KramLoader.mm | 417 +++++++++++++++++++++++--------------- kramv/KramRenderer.mm | 17 +- kramv/KramViewerMain.mm | 2 +- libkram/kram/KTXImage.cpp | 18 +- libkram/kram/KTXImage.h | 8 +- libkram/kram/KramConfig.h | 26 ++- 7 files changed, 301 insertions(+), 206 deletions(-) diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h index 2b14b16f..d9ecedda 100644 --- a/kramv/KramLoader.h +++ b/kramv/KramLoader.h @@ -10,10 +10,11 @@ #import // protocol requires imports -#import -#import #import +#import #import +#import +#import #endif @@ -31,18 +32,8 @@ // from url (mmap) - (nullable id)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat; -@property (retain, nonatomic, readwrite, nonnull) id device; - -// test this after load, and use a MTLBlitEncoder to autogen mips -@property (nonatomic, readwrite, getter=isMipgenNeeded) BOOL mipgenNeeded; - -@end - -//------------------------------------- - -// This loads KTX and PNG data synchronously. Will likely move to only loading KTX files, with a png -> ktx conversion. -// The underlying KTXImage is not yet returned to the caller, but would be useful for prop queries. -@interface KramBlitLoader : NSObject +// handle auto-mipgen and upload mips from staging MTLBuffer to mips of various private MTLTexture +- (void)uploadTexturesIfNeeded:(nonnull id)blitEncoder commandBuffer:(nonnull id)commandBuffer; @property (retain, nonatomic, readwrite, nonnull) id device; diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index b4f75e62..b4cf12c9 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -31,13 +31,42 @@ return string([[[NSString stringWithUTF8String:text.c_str()] lowercaseString] UTF8String]); } -//----------------------------------------------- +// defer data need to blit staging MTLBuffer to MTLTexture at the start of rendering +struct KramBlit +{ + uint32_t w; + uint32_t h; + uint32_t chunkNum; + uint32_t mipLevelNumber; + + uint64_t mipStorageSize; + uint64_t mipOffset; -// blit path for ktxa is commented out to simplify loader, will move that to an async load -// and simplify the loader API by making this a loader class. + uint32_t textureIndex; + uint32_t bytesPerRow; + bool is3D; +}; +//----------------------------------------------- + @implementation KramLoader { - BOOL _isMipgenNeeded; + // only one of these for now + id _buffer; + uint8_t* _data; + uint8_t _bufferOffset; + + vector _blits; + NSMutableArray>* _blitTextures; + NSMutableArray>* _mipgenTextures; +} + +- (instancetype)init { + self = [super init]; + + _blitTextures = [[NSMutableArray alloc] init]; + _mipgenTextures = [[NSMutableArray alloc] init]; + + return self; } - (nullable id)loadTextureFromData:(nonnull NSData*)imageData originalFormat:(nullable MTLPixelFormat*)originalFormat { @@ -78,6 +107,7 @@ - (BOOL)decodeImageIfNeeded:(KTXImage&)image imageDecoded:(KTXImage&)imageDecode } #if SUPPORT_RGB + inline bool isInternalRGBFormat(MyMTLPixelFormat format) { bool isInternal = false; switch(format) { @@ -120,17 +150,39 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { { KTXImage image; - // true keeps compressed mips on KTX2 and aliases original mip data - // but have decode etc2/asct path below that uncompressed mips + // isInfoOnly = true keeps compressed mips on KTX2 and aliases original mip data + // but have decode etc2/astc path below that uncompressed mips // and the rgb conversion path below as well in the viewer. // games would want to decompress directly from aliased mmap ktx2 data into staging // or have blocks pre-twiddled in hw morton order. - bool isInfoOnly = false; + bool isInfoOnly = true; if (!image.open(imageData, imageDataLength, isInfoOnly)) { return nil; } + // see if it needs decode first + bool needsDecode = false; + if (isInternalRGBFormat(image.pixelFormat)) { + needsDecode = true; + } +#if DO_DECODE + else if (isETCFormat(image.pixelFormat)) { + needsDecode = true; + } + else if (isASTCFormat(image.pixelFormat)) { + needsDecode = true; + } +#endif + + if (needsDecode) { + isInfoOnly = false; + + if (!image.open(imageData, imageDataLength, isInfoOnly)) { + return nil; + } + } + #if SUPPORT_RGB if (isInternalRGBFormat(image.pixelFormat)) { // loads and converts image to RGBA version @@ -162,18 +214,23 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { } #endif - if (originalFormat != nullptr) { *originalFormat = (MTLPixelFormat)image.pixelFormat; } - KTXImage imageDecoded; - bool useImageDecoded = false; - if (![self decodeImageIfNeeded:image imageDecoded:imageDecoded useImageDecoded:useImageDecoded]) { - return nil; + if (needsDecode) { + KTXImage imageDecoded; + bool useImageDecoded = false; + if (![self decodeImageIfNeeded:image imageDecoded:imageDecoded useImageDecoded:useImageDecoded]) { + return nil; + } + + return [self loadTextureFromImage:useImageDecoded ? imageDecoded : image]; + } + else { + // fast load path directly from mmap'ed data, decompress direct to staging + return [self blitTextureFromImage:image]; } - - return [self loadTextureFromImage:useImageDecoded ? imageDecoded : image]; } static int32_t numberOfMipmapLevels(const Image& image) { @@ -212,7 +269,7 @@ static int32_t numberOfMipmapLevels(const Image& image) { // TODO: replace this with code that gens a KTXImage from png (and cpu mips) // instead of needing to use autogenmip that has it's own filters (probably a box) - id texture = [self createTexture:image]; + id texture = [self createTexture:image isPrivate:false]; if (!texture) { return nil; } @@ -238,20 +295,12 @@ static int32_t numberOfMipmapLevels(const Image& image) { // have to schedule autogen inside render using MTLBlitEncoder if (image.header.numberOfMipmapLevels > 1) { - _isMipgenNeeded = YES; + [_mipgenTextures addObject: texture]; } return texture; } -- (BOOL)isMipgenNeeded { - return _isMipgenNeeded; -} - -- (void)setMipgenNeeded:(BOOL)enabled { - _isMipgenNeeded = enabled; -} - - (nullable id)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat { const char *path = [url.absoluteURL.path UTF8String]; @@ -295,7 +344,7 @@ - (void)setMipgenNeeded:(BOOL)enabled { return [self loadTextureFromData:mmapHelper.data() imageDataLength:(int32_t)mmapHelper.dataLength() originalFormat:originalFormat]; } -- (nullable id)createTexture:(KTXImage&)image { +- (nullable id)createTexture:(KTXImage&)image isPrivate:(bool)isPrivate { MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init]; // Indicate that each pixel has a blue, green, red, and alpha channel, where each channel is @@ -315,6 +364,10 @@ - (void)setMipgenNeeded:(BOOL)enabled { // and only get box filtering in API-level filters. But would cut storage. textureDescriptor.mipmapLevelCount = MAX(1, image.header.numberOfMipmapLevels); + // this is needed for blit + if (isPrivate) + textureDescriptor.storageMode = MTLStorageModePrivate; + // only do this for viewer // but allows encoded textures to enable/disable their sRGB state. // Since the view isn't accurate, will probably pull this out. @@ -340,11 +393,6 @@ - (void)setMipgenNeeded:(BOOL)enabled { // Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer. - (nullable id)loadTextureFromImage:(KTXImage &)image { - id texture = [self createTexture:image]; - - //-------------------------------- - // upload mip levels - // TODO: about aligning to 4k for base + length // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html @@ -359,19 +407,42 @@ - (void)setMipgenNeeded:(BOOL)enabled { Int2 blockDims = image.blockDims(); + uint32_t numChunks = image.totalChunks(); + + // TODO: reuse staging _buffer and _bufferOffset here, these large allocations take time + vector mipStorage; + mipStorage.resize(image.mipLevels[0].length * numChunks); // enough to hold biggest mip + + //----------------- + + id texture = [self createTexture:image isPrivate:false]; + + const uint8_t* srcLevelData = image.fileData; + for (int mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) { // there's a 4 byte levelSize for each mipLevel // the mipLevel.offset is immediately after this - // this is offset to a given level const KTXImageLevel& mipLevel = image.mipLevels[mipLevelNumber]; + // this is offset to a given level + uint64_t mipBaseOffset = mipLevel.offset; + + // unpack the whole level in-place + if (image.isSupercompressed()) { + image.unpackLevel(mipLevelNumber, image.fileData + mipLevel.offset, mipStorage.data()); + srcLevelData = mipStorage.data(); + + // going to upload from mipStorage temp array + mipBaseOffset = 0; + } + // only have face, face+array, or slice but this handles all cases for (int array = 0; array < numArrays; ++array) { for (int face = 0; face < numFaces; ++face) { for (int slice = 0; slice < numSlices; ++slice) { - int32_t bytesPerRow = 0; + uint32_t bytesPerRow = 0; // 1D/1DArray textures set bytesPerRow to 0 if ((MTLTextureType)image.textureType != MTLTextureType1D && @@ -380,61 +451,62 @@ - (void)setMipgenNeeded:(BOOL)enabled { // for compressed, bytesPerRow needs to be multiple of block size // so divide by the number of blocks making up the height //int xBlocks = ((w + blockDims.x - 1) / blockDims.x); - int32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y); + uint32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y); // Calculate the number of bytes per row in the image. // for compressed images this is xBlocks * blockSize - bytesPerRow = (int32_t)mipLevel.length / yBlocks; + bytesPerRow = (uint32_t)mipLevel.length / yBlocks; } - int32_t sliceOrArrayOrFace; + int32_t chunkNum = 0; if (image.header.numberOfArrayElements > 0) { // can be 1d, 2d, or cube array - sliceOrArrayOrFace = array; + chunkNum = array; if (numFaces > 1) { - sliceOrArrayOrFace = 6 * sliceOrArrayOrFace + face; + chunkNum = 6 * chunkNum + face; } } else { // can be 1d, 2d, or 3d - sliceOrArrayOrFace = slice; + chunkNum = slice; if (numFaces > 1) { - sliceOrArrayOrFace = face; + chunkNum = face; } } // this is size of one face/slice/texture, not the levels size - int32_t mipStorageSize = (int32_t)mipLevel.length; + uint64_t mipStorageSize = mipLevel.length; + + uint64_t mipOffset = mipBaseOffset + chunkNum * mipStorageSize; - int32_t mipOffset = (int32_t)mipLevel.offset + sliceOrArrayOrFace * mipStorageSize; // offset into the level - const uint8_t *srcBytes = image.fileData + mipOffset; - - // had blitEncoder support here + const uint8_t *srcBytes = srcLevelData + mipOffset; { // Note: this only works for managed/shared textures. // For private upload to buffer and then use blitEncoder to copy to texture. + // See KramBlitLoader for that. This is all synchronous upload too. + // + // Note: due to API limit we can only copy one chunk at a time. With KramBlitLoader + // can copy the whole level to buffer, and then reference chunks within. + bool isCubemap = image.textureType == MyMTLTextureTypeCube || image.textureType == MyMTLTextureTypeCubeArray; - bool is3D = image.textureType == MyMTLTextureType3D; + bool is3D = image.textureType == MyMTLTextureType3D; bool is2DArray = image.textureType == MyMTLTextureType2DArray; bool is1DArray = image.textureType == MyMTLTextureType1DArray; - // cpu copy the bytes from the data object into the texture + // sync cpu copy the bytes from the data object into the texture MTLRegion region = { { 0, 0, 0 }, // MTLOrigin { (NSUInteger)w, (NSUInteger)h, 1 } // MTLSize }; - // TODO: revist how loading is done to load entire levels - // otherwise too many replaceRegion calls. Data is already packed by mip. - if (is1DArray) { [texture replaceRegion:region mipmapLevel:mipLevelNumber - slice:sliceOrArrayOrFace + slice:chunkNum withBytes:srcBytes bytesPerRow:bytesPerRow bytesPerImage:0]; @@ -442,17 +514,18 @@ - (void)setMipgenNeeded:(BOOL)enabled { else if (isCubemap) { [texture replaceRegion:region mipmapLevel:mipLevelNumber - slice:sliceOrArrayOrFace + slice:chunkNum withBytes:srcBytes bytesPerRow:bytesPerRow bytesPerImage:0]; } else if (is3D) { - region.origin.z = sliceOrArrayOrFace; + region.origin.z = chunkNum; + chunkNum = 0; [texture replaceRegion:region mipmapLevel:mipLevelNumber - slice:0 + slice:chunkNum withBytes:srcBytes bytesPerRow:bytesPerRow bytesPerImage:mipStorageSize]; // only for 3d @@ -460,7 +533,7 @@ - (void)setMipgenNeeded:(BOOL)enabled { else if (is2DArray) { [texture replaceRegion:region mipmapLevel:mipLevelNumber - slice:array + slice:chunkNum withBytes:srcBytes bytesPerRow:bytesPerRow bytesPerImage:0]; @@ -483,84 +556,90 @@ - (void)setMipgenNeeded:(BOOL)enabled { return texture; } -@end - //-------------------------- - - - -@implementation KramBlitLoader { - // this must be created in render, and then do blits into this - id _blitEncoder; - id _buffer; - uint8_t* data; - size_t dataSize; -} - -- (nonnull instancetype)init { - self = [super init]; +- (void)createStagingBufffer:(uint64_t)dataSize { // must be aligned to pagesize() or can't use with newBufferWithBytesNoCopy // enough to upload 4k x 4k @ 4 bytes no mips, careful with array and cube that get too big - dataSize = 64*1024*1024; - posix_memalign((void**)&data, getpagesize(), dataSize); + // allocate system memory for bufffer, can memcopy to this + posix_memalign((void**)&_data, getpagesize(), dataSize); // allocate memory for circular staging buffer, only need to memcpy to this // but need a rolling buffer atop to track current begin/end. - _buffer = [_device newBufferWithBytesNoCopy:data + _buffer = [_device newBufferWithBytesNoCopy:_data length:dataSize options:MTLResourceStorageModeShared deallocator: ^(void *macroUnusedArg(pointer), NSUInteger macroUnusedArg(length)) { - delete data; + delete _data; } ]; - return self; } -- (nullable id)createTexture:(KTXImage&)image { - MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init]; - - // Indicate that each pixel has a blue, green, red, and alpha channel, where each channel is - // an 8-bit unsigned normalized value (i.e. 0 maps to 0.0 and 255 maps to 1.0) - textureDescriptor.textureType = (MTLTextureType)image.textureType; - textureDescriptor.pixelFormat = (MTLPixelFormat)image.pixelFormat; - - // Set the pixel dimensions of the texture - textureDescriptor.width = image.width; - textureDescriptor.height = MAX(1, image.height); - textureDescriptor.depth = MAX(1, image.depth); - textureDescriptor.arrayLength = MAX(1, image.header.numberOfArrayElements); - // ignoring 0 (auto mip), but might need to support for explicit formats - // must have hw filtering support for format, and 32f filtering only first appeared on A14/M1 - // and only get box filtering in API-level filters. But would cut storage. - textureDescriptor.mipmapLevelCount = MAX(1, image.header.numberOfMipmapLevels); +- (void)uploadTexturesIfNeeded:(id)blitEncoder commandBuffer:(id)commandBuffer { + if (_mipgenTextures.count > 0) { + for (id texture in _mipgenTextures) { + // autogen mips will include srgb conversions, so toggling srgb on/off isn't quite correct + [blitEncoder generateMipmapsForTexture:texture]; + } + + // reset the arra + [_mipgenTextures removeAllObjects]; + } - // needed for blit, - textureDescriptor.storageMode = MTLStorageModePrivate; - - // only do this for viewer - // but allows encoded textures to enable/disable their sRGB state. - // Since the view isn't accurate, will probably pull this out. - // Keep usageRead set by default. - //textureDescriptor.usage = MTLTextureUsageShaderRead; - - // this was so that could toggle srgb on/off, but mips are built linear and encoded as lin or srgb - // in the encoded formats so this wouldn't accurately reflect with/without srgb. - //textureDescriptor.usage |= MTLTextureUsagePixelFormatView; - - // Create the texture from the device by using the descriptor - id texture = [self.device newTextureWithDescriptor:textureDescriptor]; - if (!texture) { - KLOGE("kramv", "could not allocate texture"); - return nil; + if (!_blits.empty()) { + // now upload from staging MTLBuffer to private MTLTexture + for (const auto& blit: _blits) { + MTLRegion region = { + { 0, 0, 0 }, // MTLOrigin + { (NSUInteger)blit.w, (NSUInteger)blit.h, 1 } // MTLSize + }; + + uint32_t chunkNum = blit.chunkNum; + if (blit.is3D) { + region.origin.z = chunkNum; + chunkNum = 0; + } + + //assert(blit.textureIndex < _blitTextures.count); + id texture = _blitTextures[blit.textureIndex]; + + [blitEncoder copyFromBuffer:_buffer + sourceOffset:blit.mipOffset + sourceBytesPerRow:blit.bytesPerRow + sourceBytesPerImage:blit.mipStorageSize + sourceSize:region.size + + toTexture:texture + destinationSlice:chunkNum + destinationLevel:blit.mipLevelNumber + destinationOrigin:region.origin + options:MTLBlitOptionNone + ]; + } + + // reset the array and buffer offset, so can upload more textures + _blits.clear(); + [_blitTextures removeAllObjects]; + + // TODO: use atomic on this + uint32_t bufferOffsetCopy = _bufferOffset; + [commandBuffer addCompletedHandler:^(id /* buffer */) + { + // can only reset this once gpu completes the blits above + // also guard against addding to this in blitTextureFromImage when completion handler will reset to 0 + if (_bufferOffset == bufferOffsetCopy) + _bufferOffset = 0; + }]; } - - return texture; +} + +inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) { + return offset + (alignment - offset % alignment) % alignment; } // Has a synchronous upload via replaceRegion that only works for shared/managed (f.e. ktx), @@ -568,29 +647,39 @@ - (nonnull instancetype)init { // Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer. - (nullable id)blitTextureFromImage:(KTXImage &)image { - id texture = [self createTexture:image]; + if (_buffer == nil) { + // this is only 4k x 4x @ RGBA8u with mips, 8k x 8k compressed with mips + [self createStagingBufffer: 128*1024*1024]; + } + + // TODO: first make sure have enough buffer to upload, otherwise need to queue this image + // try not to load much until that's established + // queue would need KTXImage and mmap to stay alive long enough for queue to be completed + if (_bufferOffset != 0) { + return nil; + } + + id texture = [self createTexture:image isPrivate:true]; // Note: always starting at 0 here, since kramv is only uploading 1 texture // but a real uploader would upload until buffer full, and then reset this back to 0 // A circular buffer if large enough to support multiple uploads over time. // This can be a lot of temporary memory and must complete upload before changing. - uint64_t bufferOffset = 0; - //-------------------------------- // upload mip levels // TODO: about aligning to 4k for base + length // http://metalkit.org/2017/05/26/working-with-memory-in-metal-part-2.html - int32_t w = image.width; - int32_t h = image.height; - int32_t d = image.depth; + uint32_t w = image.width; + uint32_t h = image.height; + uint32_t d = image.depth; - int32_t numMips = MAX(1, image.header.numberOfMipmapLevels); - int32_t numArrays = MAX(1, image.header.numberOfArrayElements); - int32_t numFaces = MAX(1, image.header.numberOfFaces); - int32_t numSlices = MAX(1, image.depth); + uint32_t numMips = MAX(1, image.header.numberOfMipmapLevels); + uint32_t numArrays = MAX(1, image.header.numberOfArrayElements); + uint32_t numFaces = MAX(1, image.header.numberOfFaces); + uint32_t numSlices = MAX(1, image.depth); Int2 blockDims = image.blockDims(); @@ -603,22 +692,35 @@ - (nonnull instancetype)init { const uint8_t* mipData = (const uint8_t*)image.fileData; bufferOffsets.resize(image.mipLevels.size()); - for (int32_t i = 0; i < numMips; ++i) { + uint32_t bufferOffset = _bufferOffset; + uint32_t numChunks = image.totalChunks(); + + for (uint32_t i = 0; i < numMips; ++i) { const KTXImageLevel& mipLevel = image.mipLevels[i]; // pad buffer offset to a multiple of the blockSize - bufferOffset += (blockSize - 1) - (bufferOffset & blockSize); - bufferOffsets[i] = bufferOffset; - bufferOffset += mipLevel.length; + bufferOffset = alignOffset(bufferOffset, blockSize); // this may have to decompress the level data image.unpackLevel(i, mipData + mipLevel.offset, bufferData + bufferOffset); + + bufferOffsets[i] = bufferOffset; + bufferOffset += mipLevel.length * numChunks; } - // blit encode calls must all be submitted to an encoder - // but may not have to be on the render thrad? - for (int32_t mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) { + // Should this be split off after cpu upload, could code store enough + // in a vector to jettison the KTXImage. Also need a queue of textures + // that are not fully loaded or haven't started if sharing the staging buffer. + // Note that it is just system ram, and can have allocations stored into it + // and can be viewed in the debugger and can do memcpy to it above. + + //-------------------- + + // blit encoder calls must all be submitted to an open MTLBlitCommandEncoder, + // but may not have to be on the render thread? + + for (uint32_t mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) { // there's a 4 byte levelSize for each mipLevel // the mipLevel.offset is immediately after this @@ -626,11 +728,11 @@ - (nonnull instancetype)init { const KTXImageLevel& mipLevel = image.mipLevels[mipLevelNumber]; // only have face, face+array, or slice but this handles all cases - for (int array = 0; array < numArrays; ++array) { - for (int face = 0; face < numFaces; ++face) { - for (int slice = 0; slice < numSlices; ++slice) { + for (uint32_t array = 0; array < numArrays; ++array) { + for (uint32_t face = 0; face < numFaces; ++face) { + for (uint32_t slice = 0; slice < numSlices; ++slice) { - int32_t bytesPerRow = 0; + uint32_t bytesPerRow = 0; // 1D/1DArray textures set bytesPerRow to 0 if ((MTLTextureType)image.textureType != MTLTextureType1D && @@ -639,14 +741,14 @@ - (nonnull instancetype)init { // for compressed, bytesPerRow needs to be multiple of block size // so divide by the number of blocks making up the height //int xBlocks = ((w + blockDims.x - 1) / blockDims.x); - int32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y); + uint32_t yBlocks = ((h + blockDims.y - 1) / blockDims.y); // Calculate the number of bytes per row in the image. // for compressed images this is xBlocks * blockSize - bytesPerRow = (int32_t)mipLevel.length / yBlocks; + bytesPerRow = mipLevel.length / yBlocks; } - int32_t chunkNum; + uint32_t chunkNum = 0; if (image.header.numberOfArrayElements > 1) { // can be 1d, 2d, or cube array @@ -668,34 +770,24 @@ - (nonnull instancetype)init { // Have uploaded to buffer in same order visiting chunks. // Note: no call on MTLBlitEncoder to copy entire level of mips like glTexImage3D - uint64_t mipOffset = bufferOffsets[mipLevelNumber] + chunkNum * mipStorageSize; + uint64_t mipOffset = bufferOffsets[mipLevelNumber] + chunkNum * mipStorageSize; { bool is3D = image.textureType == MyMTLTextureType3D; - // cpu copy the bytes from the data object into the texture - MTLRegion region = { - { 0, 0, 0 }, // MTLOrigin - { (NSUInteger)w, (NSUInteger)h, 1 } // MTLSize - }; - - if (is3D) { - region.origin.z = chunkNum; - chunkNum = 0; - } - - [_blitEncoder copyFromBuffer:_buffer - sourceOffset:mipOffset - sourceBytesPerRow:bytesPerRow - sourceBytesPerImage:mipStorageSize - sourceSize:region.size - - toTexture:texture - destinationSlice:chunkNum - destinationLevel:mipLevelNumber - destinationOrigin:region.origin - options:MTLBlitOptionNone - ]; + _blits.push_back({ + // use named inits here + w, h, + chunkNum, + + mipLevelNumber, + mipStorageSize, + mipOffset, + + (uint32_t)_blitTextures.count, + bytesPerRow, + is3D // could derive from textureIndex lookup + }); } } } @@ -704,9 +796,12 @@ - (nonnull instancetype)init { mipDown(w, h, d); } - // this only affect managed textures - [_blitEncoder optimizeContentsForGPUAccess:texture]; + // everything succeded, so advance the offset + _bufferOffset = bufferOffset; + [_blitTextures addObject: texture]; + // this texture cannot be used until buffer uploads complete + // but those happen at beginning of frame, so can attach to shaders, etc return texture; } diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 14cce950..eb42a481 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -742,7 +742,7 @@ - (void)drawInMTKView:(nonnull MTKView *)view _uniformBufferIndex = (_uniformBufferIndex + 1) % MaxBuffersInFlight; - id commandBuffer = [_commandQueue commandBuffer]; + id commandBuffer = [_commandQueue commandBuffer]; commandBuffer.label = @"MyCommand"; __block dispatch_semaphore_t block_sema = _inFlightSemaphore; @@ -758,18 +758,11 @@ - (void)drawInMTKView:(nonnull MTKView *)view // also use to readback pixels // also use for async texture upload - bool needsBlit = _loader.isMipgenNeeded && _colorMap.mipmapLevelCount > 1; - if (needsBlit) { - id blitEncoder = [commandBuffer blitCommandEncoder]; + id blitEncoder = [commandBuffer blitCommandEncoder]; + if (blitEncoder) + { blitEncoder.label = @"MyBlitEncoder"; - - // autogen mips will include srgb conversions, so toggling srgb on/off isn't quite correct - if (_loader.mipgenNeeded) { - [blitEncoder generateMipmapsForTexture:_colorMap]; - - _loader.mipgenNeeded = NO; - } - + [_loader uploadTexturesIfNeeded:blitEncoder commandBuffer:commandBuffer]; [blitEncoder endEncoding]; } diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 4cbf359b..e1ffb90d 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1959,7 +1959,7 @@ - (BOOL)loadTextureFromArchive:(const char*)filename timestamp:(double)timestamp _noImageLoaded = NO; } - _showSettings->isArchive = false; + _showSettings->isArchive = true; // show/hide button [self updateUIAfterLoad]; diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 948a32c3..ff45debc 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -1417,7 +1417,6 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i header.bytesOfKeyValueData = 0; initProps(imageData + header2.kvdByteOffset, header2.kvdByteLength); - // skip parsing th elevels if (isInfoOnly) { skipImageLength = true; @@ -1433,13 +1432,10 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i // copy the original ktx2 levels, this includes mip compression bool isCompressed = (mipLevels[0].lengthCompressed > 0) && - (mipLevels[0].length != mipLevels[0].lengthCompressed); + ((mipLevels[0].length * numChunks) != mipLevels[0].lengthCompressed); - for (auto& level : mipLevels) { - level.length /= numChunks; - - // this indicates not compressed - if (!isCompressed) { + if (!isCompressed) { + for (auto& level : mipLevels) { level.lengthCompressed = 0; } } @@ -1472,6 +1468,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i // the offsets are reversed in ktx2 file level1.offset = level2.offset; + assert(level1.lengthCompressed == 0); if (level1.length != level2.length) { @@ -1547,14 +1544,9 @@ bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* case KTX2SupercompressionZstd: { // decompress from zstd directly into ktx1 ordered chunk // Note: decode fails with FSE_decompress. - ZSTD_DCtx* dctx = ZSTD_createDCtx(); - if (!dctx) - return false; - - auto dstDataSizeZstd = ZSTD_decompressDCtx(dctx, + size_t dstDataSizeZstd = ZSTD_decompress( dstData, dstDataSize, srcData, srcDataSize); - ZSTD_freeDCtx(dctx); if (ZSTD_isError(dstDataSizeZstd)) { KLOGE("kram", "decode mip zstd failed"); diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index 0174786d..19c30f05 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -306,7 +306,7 @@ class KTXImage { vector& imageData(); // for KTX2 files, the mips can be compressed using various encoders - bool isSupercompressed() const { return isKTX2() && !mipLevels.empty() && mipLevels[0].lengthCompressed != 0; } + bool isSupercompressed() const { return isKTX2() && mipLevels[0].lengthCompressed != 0; } bool isKTX1() const { return !skipImageLength; } bool isKTX2() const { return skipImageLength; } @@ -314,6 +314,12 @@ class KTXImage { // can use on ktx1/2 files, does a decompress if needed bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData); + // helpers to work with the mipLevels array, mipLength and levelLength are important to get right + size_t mipLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length; } + size_t levelLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length * totalChunks(); } + size_t levelLengthCompressed(uint32_t mipNumber) const { return mipLevels[mipNumber].lengthCompressed; } + size_t chunkOffset(uint32_t mipNumber, uint32_t chunkNumber) const { return mipLevels[mipNumber].offset + mipLevels[mipNumber].length * chunkNumber; } + private: bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly); diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h index 97fb39f6..e3b84c0a 100644 --- a/libkram/kram/KramConfig.h +++ b/libkram/kram/KramConfig.h @@ -379,7 +379,7 @@ inline half4 toHalf4(const float4& vv) //--------------------------------------- -inline void mipDown(int32_t& w, int32_t& h, int32_t& d) +inline void mipDown(int32_t& w, int32_t& h, int32_t& d, uint32_t lod = 1) { // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up // And then Metal followed OpenGL since it's the same hw and drivers. @@ -388,9 +388,27 @@ inline void mipDown(int32_t& w, int32_t& h, int32_t& d) // http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf // round-down - w = w / 2; - h = h / 2; - d = h / 2; + w >>= (int32_t)lod; + h >>= (int32_t)lod; + d >>= (int32_t)lod; + + if (w < 1) w = 1; + if (h < 1) h = 1; + if (d < 1) d = 1; +} + +inline void mipDown(uint32_t& w, uint32_t& h, uint32_t& d, uint32_t lod = 1) +{ + // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up + // And then Metal followed OpenGL since it's the same hw and drivers. + // Round up adds an extra mip level to the chain, but results in much better filtering. + // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt + // http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf + + // round-down + w >>= lod; + h >>= lod; + d >>= lod; if (w < 1) w = 1; if (h < 1) h = 1; From 1067d0c8749a033a1a983b2c5c205f461dac1e1a Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 24 May 2021 09:35:45 -0700 Subject: [PATCH 071/901] kramv - a little more work on state on buttons/menus buttons still don't highlight, so may need to do that explicitly in the update Also need to change how rgba buttons work --- kramv/KramViewerBase.h | 2 + kramv/KramViewerMain.mm | 89 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 87 insertions(+), 4 deletions(-) diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index d907c296..40f883da 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -81,6 +81,8 @@ class ShowSettings { bool isBlockGridShown = false; bool isAtlasGridShown = false; + bool isAnyGridShown() const { return isPixelGridShown || isBlockGridShown || isAtlasGridShown; } + // show all mips, faces, arrays all at once bool isShowingAllLevelsAndMips = false; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index e1ffb90d..5f88d149 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -545,9 +545,15 @@ - (NSStackView*)_addButtons { [button setToolTip:toolTip]; button.hidden = NO; - // turn off rounded bezel +#if 0 + // can use this with border + // TODO: for some reason this breaks clicking on buttons + // TODO: eliminate the rounded border + button.showsBorderOnlyWhileMouseInside = YES; + button.bordered = YES; +#else button.bordered = NO; - +#endif [button setFrame:rect]; // stackView seems to disperse the items evenly across the area, so this doesn't work @@ -1272,6 +1278,7 @@ - (void)updateUIAfterLoad { bool isJumpToNextHidden = !_showSettings->isArchive; + bool isRedHidden = false; bool isGreenHidden = _showSettings->numChannels <= 1; bool isBlueHidden = _showSettings->numChannels <= 2 && !_showSettings->isNormal; // reconstruct z = b on normals @@ -1279,6 +1286,9 @@ - (void)updateUIAfterLoad { // but internally store R,RG01,... etc. Can get more data from swizzle in the props. // Often alpha doesn't store anything useful to view. + // TODO: may want to disable isPremul on block textures that already have premul in data + // or else premul is applied a second time to the visual + bool hasAlpha = _showSettings->numChannels >= 3; bool isAlphaHidden = !hasAlpha; @@ -1294,6 +1304,7 @@ - (void)updateUIAfterLoad { [self findButton:"S"].hidden = isShowAllHidden; [self findButton:"J"].hidden = isJumpToNextHidden; + [self findButton:"R"].hidden = isRedHidden; [self findButton:"G"].hidden = isGreenHidden; [self findButton:"B"].hidden = isBlueHidden; [self findButton:"A"].hidden = isAlphaHidden; @@ -1311,6 +1322,7 @@ - (void)updateUIAfterLoad { [self findMenuItem:"S"].hidden = isShowAllHidden; [self findMenuItem:"J"].hidden = isJumpToNextHidden; + [self findMenuItem:"R"].hidden = isRedHidden; [self findMenuItem:"G"].hidden = isGreenHidden; [self findMenuItem:"B"].hidden = isBlueHidden; [self findMenuItem:"A"].hidden = isAlphaHidden; @@ -1318,6 +1330,69 @@ - (void)updateUIAfterLoad { [self findMenuItem:"P"].hidden = isPremulHidden; [self findMenuItem:"N"].hidden = isSignedHidden; [self findMenuItem:"C"].hidden = isCheckerboardHidden; + + // also need to call after each toggle + [self updateUIControlState]; +} + +- (void)updateUIControlState +{ + // there is also mixed + auto On = NSControlStateValueOn; + auto Off = NSControlStateValueOff; + + auto showAllState = _showSettings->isShowingAllLevelsAndMips ? On : Off; + auto premulState = _showSettings->isPremul ? On : Off; + auto signedState = _showSettings->isSigned ? On : Off; + auto checkerboardState = _showSettings->isCheckerboardShown ? On : Off; + auto previewState = _showSettings->isPreview ? On : Off; + auto gridState = _showSettings->isAnyGridShown() ? On : Off; + auto wrapState = _showSettings->isWrap ? On : Off; + auto debugState = (_showSettings->debugMode != DebugModeNone) ? On : Off; + + // buttons +// [self findButton:"Y"].state = +// [self findButton:"F"].state = +// [self findButton:"M"].state = +// [self findButton:"J"].state = +// +// [self findButton:"R"].state = +// [self findButton:"G"].state = +// [self findButton:"B"].state = +// [self findButton:"A"].state = + + [self findButton:"S"].state = showAllState; + [self findButton:"O"].state = previewState; + [self findButton:"W"].state = wrapState; + [self findButton:"D"].state = gridState; + [self findButton:"E"].state = debugState; + + [self findButton:"P"].state = premulState; + [self findButton:"N"].state = signedState; + [self findButton:"C"].state = checkerboardState; + + // menus (may want to disable, not hide) + // problem is crashes since menu seems to strip hidden items + // enabled state has to be handled in validateUserInterfaceItem +// [self findMenuItem:"Y"].state = +// [self findMenuItem:"F"].state = +// [self findMenuItem:"M"].state = +// [self findMenuItem:"J"].state = +// +// [self findMenuItem:"R"].state = +// [self findMenuItem:"G"].state = +// [self findMenuItem:"B"].state = +// [self findMenuItem:"A"].state = + + [self findMenuItem:"S"].state = showAllState; + [self findMenuItem:"O"].state = previewState; + [self findMenuItem:"W"].state = wrapState; + [self findMenuItem:"D"].state = gridState; + [self findMenuItem:"E"].state = debugState; + + [self findMenuItem:"P"].state = premulState; + [self findMenuItem:"N"].state = signedState; + [self findMenuItem:"C"].state = checkerboardState; } @@ -1793,6 +1868,8 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown } if (isChanged) { + [self updateUIControlState]; + self.needsDisplay = YES; } } @@ -1880,7 +1957,7 @@ -(BOOL)loadArchive:(const char*)zipFilename if (!_zip.openForRead(_zipMmap.data(), _zipMmap.dataLength())) { return NO; } - + // load the first entry in the archive _fileIndex = 0; @@ -1988,8 +2065,12 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { return NO; } - // store the + // store the archive url self.imageURL = url; + + // add it to recent docs + NSDocumentController* dc = [NSDocumentController sharedDocumentController]; + [dc noteNewRecentDocumentURL:url]; } // now reload the filename if needed From a3810732c41dbf2f23654f81f68028f7a2f23f1c Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 24 May 2021 23:39:17 -0700 Subject: [PATCH 072/901] kram - fix bugs in rowBytes on LoadImageFromKTX, simplify loader, move ZipHelper to libkram Starting to simplify the loader, so can extract C++ portion from ObjC++. Could use loader for other APIs then. More error handling on loader. ZipHelper is useful in library for bundle handling. It didn't need to live in kramv. --- kramv/KramLoader.mm | 191 +++++++++------------- libkram/kram/KTXImage.h | 2 + libkram/kram/KramImage.cpp | 8 +- {kramv => libkram/kram}/KramZipHelper.cpp | 0 {kramv => libkram/kram}/KramZipHelper.h | 0 5 files changed, 87 insertions(+), 114 deletions(-) rename {kramv => libkram/kram}/KramZipHelper.cpp (100%) rename {kramv => libkram/kram}/KramZipHelper.h (100%) diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index b4cf12c9..7d75a90d 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -78,34 +78,47 @@ - (instancetype)init { // on macOS/arm, the M1 supports all 3 encode formats #define DO_DECODE TARGET_CPU_X86_64 -- (BOOL)decodeImageIfNeeded:(KTXImage&)image imageDecoded:(KTXImage&)imageDecoded useImageDecoded:(bool&)useImageDecoded -{ #if DO_DECODE - useImageDecoded = false; + +// this means format isnt supported on platform, but can be decoded to rgba to display +bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat) { + bool needsDecode = false; + + if (isETCFormat(pixelFormat)) { + needsDecode = true; + } + else if (isASTCFormat(pixelFormat)) { + needsDecode = true; + } + return needsDecode; +} + +bool decodeImage(KTXImage& image, KTXImage& imageDecoded) +{ Image imageUnused; // TODO: move to only using KTXImage, decode needs to move there if (isETCFormat(image.pixelFormat)) { if (!imageUnused.decode(image, imageDecoded, kTexEncoderEtcenc, false, "")) { return NO; } - useImageDecoded = true; } else if (isASTCFormat(image.pixelFormat)) { if (!imageUnused.decode(image, imageDecoded, kTexEncoderAstcenc, false, "")) { return NO; } - - useImageDecoded = true; + } + else { + assert(false); // don't call this routine if decode not needed } // TODO: decode BC format on iOS when not supported, but viewer only on macOS for now -#endif - return YES; } +#endif + #if SUPPORT_RGB inline bool isInternalRGBFormat(MyMTLPixelFormat format) { @@ -163,18 +176,17 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { // see if it needs decode first bool needsDecode = false; + if (isInternalRGBFormat(image.pixelFormat)) { needsDecode = true; } #if DO_DECODE - else if (isETCFormat(image.pixelFormat)) { - needsDecode = true; - } - else if (isASTCFormat(image.pixelFormat)) { + else if (isDecodeImageNeeded(image.pixelFormat)) { needsDecode = true; } #endif + // open it again, but unpack the levels if supercompressed if (needsDecode) { isInfoOnly = false; @@ -185,7 +197,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { #if SUPPORT_RGB if (isInternalRGBFormat(image.pixelFormat)) { - // loads and converts image to RGBA version + // loads and converts image from RGB to RGBA Image rbgaImage; if (!rbgaImage.loadImageFromKTX(image)) return nil; @@ -207,7 +219,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { } if (originalFormat != nullptr) { - *originalFormat = (MTLPixelFormat)rbgaImage2.pixelFormat; + *originalFormat = (MTLPixelFormat)rbgaImage2.pixelFormat; // TODO: should this return rgbaImage.pixelFormat ? } return [self loadTextureFromImage:rbgaImage2]; @@ -217,28 +229,29 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { if (originalFormat != nullptr) { *originalFormat = (MTLPixelFormat)image.pixelFormat; } - +#if DO_DECODE if (needsDecode) { KTXImage imageDecoded; - bool useImageDecoded = false; - if (![self decodeImageIfNeeded:image imageDecoded:imageDecoded useImageDecoded:useImageDecoded]) { + if (!decodeImage(image, imageDecoded)) { return nil; } - return [self loadTextureFromImage:useImageDecoded ? imageDecoded : image]; + return [self loadTextureFromImage:imageDecoded]; } - else { + else +#endif + { // fast load path directly from mmap'ed data, decompress direct to staging return [self blitTextureFromImage:image]; } } -static int32_t numberOfMipmapLevels(const Image& image) { - int32_t w = image.width(); - int32_t h = image.height(); - int32_t maxDim = MAX(w,h); +static uint32_t numberOfMipmapLevels(const Image& image) { + uint32_t w = image.width(); + uint32_t h = image.height(); + uint32_t maxDim = MAX(w,h); - int32_t numberOfMips = 1; + uint32_t numberOfMips = 1; while (maxDim > 1) { numberOfMips++; maxDim = maxDim >> 1; @@ -249,6 +262,8 @@ static int32_t numberOfMipmapLevels(const Image& image) { - (nullable id)loadTextureFromPNGData:(const uint8_t*)data dataSize:(int32_t)dataSize isSRGB:(BOOL)isSRGB originalFormat:(nullable MTLPixelFormat*)originalFormat { // can only load 8u and 16u from png, no hdr formats, no premul either, no props + // this also doesn't handle strips like done in libkram. + Image sourceImage; bool isLoaded = LoadPng(data, dataSize, false, false, sourceImage); if (!isLoaded) { @@ -279,14 +294,12 @@ static int32_t numberOfMipmapLevels(const Image& image) { } // cpu copy the bytes from the data object into the texture - int32_t sliceOrArrayOrFace = 0; - const MTLRegion region = { - { 0, 0, (NSUInteger)sliceOrArrayOrFace }, // MTLOrigin + { 0, 0, 0 }, // MTLOrigin { static_cast(image.width), static_cast(image.height), 1 } // MTLSize }; - int32_t bytesPerRow = 4 * sourceImage.width(); + size_t bytesPerRow = 4 * sourceImage.width(); [texture replaceRegion:region mipmapLevel:0 @@ -368,16 +381,6 @@ static int32_t numberOfMipmapLevels(const Image& image) { if (isPrivate) textureDescriptor.storageMode = MTLStorageModePrivate; - // only do this for viewer - // but allows encoded textures to enable/disable their sRGB state. - // Since the view isn't accurate, will probably pull this out. - // Keep usageRead set by default. - //textureDescriptor.usage = MTLTextureUsageShaderRead; - - // this was so that could toggle srgb on/off, but mips are built linear and encoded as lin or srgb - // in the encoded formats so this wouldn't accurately reflect with/without srgb. - //textureDescriptor.usage |= MTLTextureUsagePixelFormatView; - // Create the texture from the device by using the descriptor id texture = [self.device newTextureWithDescriptor:textureDescriptor]; if (!texture) { @@ -411,11 +414,14 @@ static int32_t numberOfMipmapLevels(const Image& image) { // TODO: reuse staging _buffer and _bufferOffset here, these large allocations take time vector mipStorage; - mipStorage.resize(image.mipLevels[0].length * numChunks); // enough to hold biggest mip + mipStorage.resize(image.mipLengthLargest() * numChunks); // enough to hold biggest mip //----------------- id texture = [self createTexture:image isPrivate:false]; + if (!texture) { + return nil; + } const uint8_t* srcLevelData = image.fileData; @@ -430,7 +436,9 @@ static int32_t numberOfMipmapLevels(const Image& image) { // unpack the whole level in-place if (image.isSupercompressed()) { - image.unpackLevel(mipLevelNumber, image.fileData + mipLevel.offset, mipStorage.data()); + if (!image.unpackLevel(mipLevelNumber, image.fileData + mipLevel.offset, mipStorage.data())) { + return nil; + } srcLevelData = mipStorage.data(); // going to upload from mipStorage temp array @@ -445,8 +453,8 @@ static int32_t numberOfMipmapLevels(const Image& image) { uint32_t bytesPerRow = 0; // 1D/1DArray textures set bytesPerRow to 0 - if ((MTLTextureType)image.textureType != MTLTextureType1D && - (MTLTextureType)image.textureType != MTLTextureType1DArray) + if (//image.textureType != MyMTLTextureType1D && + image.textureType != MyMTLTextureType1DArray) { // for compressed, bytesPerRow needs to be multiple of block size // so divide by the number of blocks making up the height @@ -491,60 +499,28 @@ static int32_t numberOfMipmapLevels(const Image& image) { // Note: due to API limit we can only copy one chunk at a time. With KramBlitLoader // can copy the whole level to buffer, and then reference chunks within. - bool isCubemap = image.textureType == MyMTLTextureTypeCube || - image.textureType == MyMTLTextureTypeCubeArray; - bool is3D = image.textureType == MyMTLTextureType3D; - bool is2DArray = image.textureType == MyMTLTextureType2DArray; - bool is1DArray = image.textureType == MyMTLTextureType1DArray; - + bool is3D = image.textureType == MyMTLTextureType3D; + // sync cpu copy the bytes from the data object into the texture MTLRegion region = { { 0, 0, 0 }, // MTLOrigin { (NSUInteger)w, (NSUInteger)h, 1 } // MTLSize }; - if (is1DArray) { - [texture replaceRegion:region - mipmapLevel:mipLevelNumber - slice:chunkNum - withBytes:srcBytes - bytesPerRow:bytesPerRow - bytesPerImage:0]; - } - else if (isCubemap) { - [texture replaceRegion:region - mipmapLevel:mipLevelNumber - slice:chunkNum - withBytes:srcBytes - bytesPerRow:bytesPerRow - bytesPerImage:0]; - } - else if (is3D) { + size_t bytesPerImage = 0; + if (is3D) { region.origin.z = chunkNum; chunkNum = 0; - - [texture replaceRegion:region - mipmapLevel:mipLevelNumber - slice:chunkNum - withBytes:srcBytes - bytesPerRow:bytesPerRow - bytesPerImage:mipStorageSize]; // only for 3d - } - else if (is2DArray) { - [texture replaceRegion:region - mipmapLevel:mipLevelNumber - slice:chunkNum - withBytes:srcBytes - bytesPerRow:bytesPerRow - bytesPerImage:0]; - } - else { - - [texture replaceRegion:region - mipmapLevel:mipLevelNumber - withBytes:srcBytes - bytesPerRow:bytesPerRow]; + bytesPerImage = mipStorageSize; } + + [texture replaceRegion:region + mipmapLevel:mipLevelNumber + slice:chunkNum + withBytes:srcBytes + bytesPerRow:bytesPerRow + bytesPerImage:bytesPerImage]; + } } } @@ -648,7 +624,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) { - (nullable id)blitTextureFromImage:(KTXImage &)image { if (_buffer == nil) { - // this is only 4k x 4x @ RGBA8u with mips, 8k x 8k compressed with mips + // this is enough to upload 4k x 4x @ RGBA8u with mips, 8k x 8k compressed with mips @96MB [self createStagingBufffer: 128*1024*1024]; } @@ -660,11 +636,11 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) { } id texture = [self createTexture:image isPrivate:true]; + if (!texture) + return nil; - // Note: always starting at 0 here, since kramv is only uploading 1 texture - // but a real uploader would upload until buffer full, and then reset this back to 0 - // A circular buffer if large enough to support multiple uploads over time. - // This can be a lot of temporary memory and must complete upload before changing. + // this is index where texture will be added + uint32_t textureIndex = (uint32_t)_blitTextures.count; //-------------------------------- // upload mip levels @@ -692,9 +668,10 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) { const uint8_t* mipData = (const uint8_t*)image.fileData; bufferOffsets.resize(image.mipLevels.size()); - uint32_t bufferOffset = _bufferOffset; uint32_t numChunks = image.totalChunks(); + uint32_t bufferOffset = _bufferOffset; + for (uint32_t i = 0; i < numMips; ++i) { const KTXImageLevel& mipLevel = image.mipLevels[i]; @@ -702,23 +679,20 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) { bufferOffset = alignOffset(bufferOffset, blockSize); // this may have to decompress the level data - image.unpackLevel(i, mipData + mipLevel.offset, bufferData + bufferOffset); + if (!image.unpackLevel(i, mipData + mipLevel.offset, bufferData + bufferOffset)) { + return nil; + } bufferOffsets[i] = bufferOffset; bufferOffset += mipLevel.length * numChunks; } + // everything succeded, so advance the offset + _bufferOffset = bufferOffset; + [_blitTextures addObject: texture]; - // Should this be split off after cpu upload, could code store enough - // in a vector to jettison the KTXImage. Also need a queue of textures - // that are not fully loaded or haven't started if sharing the staging buffer. - // Note that it is just system ram, and can have allocations stored into it - // and can be viewed in the debugger and can do memcpy to it above. - - //-------------------- - // blit encoder calls must all be submitted to an open MTLBlitCommandEncoder, - // but may not have to be on the render thread? + // defer the blits from buffer until start of render thread when BlitEncoder is available for (uint32_t mipLevelNumber = 0; mipLevelNumber < numMips; ++mipLevelNumber) { // there's a 4 byte levelSize for each mipLevel @@ -735,8 +709,8 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) { uint32_t bytesPerRow = 0; // 1D/1DArray textures set bytesPerRow to 0 - if ((MTLTextureType)image.textureType != MTLTextureType1D && - (MTLTextureType)image.textureType != MTLTextureType1DArray) + if (//image.textureType != MyMTLTextureType1D && + image.textureType != MyMTLTextureType1DArray) { // for compressed, bytesPerRow needs to be multiple of block size // so divide by the number of blocks making up the height @@ -784,7 +758,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) { mipStorageSize, mipOffset, - (uint32_t)_blitTextures.count, + textureIndex, bytesPerRow, is3D // could derive from textureIndex lookup }); @@ -796,10 +770,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) { mipDown(w, h, d); } - // everything succeded, so advance the offset - _bufferOffset = bufferOffset; - [_blitTextures addObject: texture]; - + // this texture cannot be used until buffer uploads complete // but those happen at beginning of frame, so can attach to shaders, etc return texture; diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index 19c30f05..bcfb49b3 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -315,11 +315,13 @@ class KTXImage { bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData); // helpers to work with the mipLevels array, mipLength and levelLength are important to get right + size_t mipLengthLargest() const { return mipLevels[0].length; } size_t mipLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length; } size_t levelLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length * totalChunks(); } size_t levelLengthCompressed(uint32_t mipNumber) const { return mipLevels[mipNumber].lengthCompressed; } size_t chunkOffset(uint32_t mipNumber, uint32_t chunkNumber) const { return mipLevels[mipNumber].offset + mipLevels[mipNumber].length * chunkNumber; } + private: bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly); diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index b9704371..8916531f 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -178,7 +178,7 @@ bool Image::loadImageFromKTX(const KTXImage& image) } for (int32_t y = 0; y < _height; ++y) { - int32_t y0 = _height * y; + int32_t y0 = _width * y; for (int32_t x = 0, xEnd = _width; x < xEnd; ++x) { int32_t srcX = (y0 + x) * numSrcChannels; @@ -232,7 +232,7 @@ bool Image::loadImageFromKTX(const KTXImage& image) } for (int32_t y = 0; y < _height; ++y) { - int32_t y0 = _height * y; + int32_t y0 = _width * y; for (int32_t x = 0, xEnd = _width; x < xEnd; ++x) { int32_t srcX = (y0 + x) * numSrcChannels; @@ -280,7 +280,7 @@ bool Image::loadImageFromKTX(const KTXImage& image) float* dstPixels = (float*)(_pixelsFloat.data()); for (int32_t y = 0; y < _height; ++y) { - int32_t y0 = _height * y; + int32_t y0 = _width * y; for (int32_t x = 0, xEnd = _width; x < xEnd; ++x) { int32_t srcX = (y0 + x) * numSrcChannels; @@ -1702,7 +1702,7 @@ bool Image::createMipsFromChunks( TextureData outputTexture; outputTexture.width = dstImage.width; outputTexture.height = dstImage.height; - outputTexture.data.resize(dstImage.mipLevels[0].length); // allocate to size of largest mip + outputTexture.data.resize(dstImage.mipLengthLargest()); // This is for 8-bit data (pixelsFloat used for in-place mipgen) ImageData srcImage; diff --git a/kramv/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp similarity index 100% rename from kramv/KramZipHelper.cpp rename to libkram/kram/KramZipHelper.cpp diff --git a/kramv/KramZipHelper.h b/libkram/kram/KramZipHelper.h similarity index 100% rename from kramv/KramZipHelper.h rename to libkram/kram/KramZipHelper.h From 110a4ff08df760187efd74b8945dcf074503703c Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 25 May 2021 00:36:34 -0700 Subject: [PATCH 073/901] kram - simplify 1,2,3,4 channel conversion from KTX to Image, fix Win build --- libkram/kram/KramImage.cpp | 98 ++++++++++++++---------------------- libkram/kram/KramZipHelper.h | 1 + 2 files changed, 39 insertions(+), 60 deletions(-) diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 8916531f..98b1450c 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -149,11 +149,13 @@ bool Image::loadImageFromKTX(const KTXImage& image) // so can call through to blockSize KTXHeader header; header.initFormatGL(image.pixelFormat); - int32_t blockSize = image.blockSize(); + //int32_t blockSize = image.blockSize(); _hasColor = isColorFormat(image.pixelFormat); _hasAlpha = isAlphaFormat(image.pixelFormat); + // TODO: this assumes 1,2,3 channel srcData has no rowPadding to say 4 bytes + switch (image.pixelFormat) { case MyMTLPixelFormatR8Unorm: case MyMTLPixelFormatRG8Unorm: @@ -167,34 +169,28 @@ bool Image::loadImageFromKTX(const KTXImage& image) const uint8_t* srcPixels = image.fileData + image.mipLevels[0].offset; - int32_t numSrcChannels = blockSize / sizeof(uint8_t); - int32_t numDstChannels = 4; - + int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat); + // Note: clearing unspecified channels to 0000, not 0001 // can set swizzleText when encoding _pixels.resize(4 * _width * _height); - if (numSrcChannels != 4) { - memset(_pixels.data(), 0, _pixels.size()); - } + + Color* dstPixels = (Color*)_pixels.data(); + Color dstTemp = {0,0,0,0}; + for (int32_t y = 0; y < _height; ++y) { - int32_t y0 = _width * y; + int32_t y0 = y * _width; - for (int32_t x = 0, xEnd = _width; x < xEnd; ++x) { + for (int32_t x = 0; x < _width; ++x) { int32_t srcX = (y0 + x) * numSrcChannels; - int32_t dstX = (y0 + x) * numDstChannels; + int32_t dstX = (y0 + x); // * numDstChannels; - switch (numSrcChannels) { - // all fallthrough - case 4: - _pixels[dstX + 3] = srcPixels[srcX + 3]; - case 3: - _pixels[dstX + 2] = srcPixels[srcX + 2]; - case 2: - _pixels[dstX + 1] = srcPixels[srcX + 1]; - case 1: - _pixels[dstX + 0] = srcPixels[srcX + 0]; + for (int32_t i = 0; i < numSrcChannels; ++i) { + *(&dstTemp.r + i) = srcPixels[srcX + i]; } + + dstPixels[dstX] = dstTemp; } } @@ -209,16 +205,11 @@ bool Image::loadImageFromKTX(const KTXImage& image) case MyMTLPixelFormatRGB16Float_internal: #endif case MyMTLPixelFormatRGBA16Float: { - int32_t numSrcChannels = blockSize / 2; // 2 = sizeof(_float16) - int32_t numDstChannels = 4; - + int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat); + // Note: clearing unspecified channels to 0000, not 0001 // can set swizzleText when encoding _pixelsFloat.resize(_width * _height); - if (numSrcChannels != 4) { - memset(_pixelsFloat.data(), 0, - _pixelsFloat.size() * sizeof(float4)); - } // treat as float for per channel copies float4* dstPixels = _pixelsFloat.data(); @@ -226,25 +217,22 @@ bool Image::loadImageFromKTX(const KTXImage& image) const half* srcPixels = (const half*)(image.fileData + image.mipLevels[0].offset); - half4 srcPixel; - for (int32_t i = 0; i < 4; ++i) { - srcPixel.v[i] = 0; - } - + half4 dstTemp = half4((half)0); + for (int32_t y = 0; y < _height; ++y) { - int32_t y0 = _width * y; + int32_t y0 = y * _width; - for (int32_t x = 0, xEnd = _width; x < xEnd; ++x) { + for (int32_t x = 0; x < _width; ++x) { int32_t srcX = (y0 + x) * numSrcChannels; - int32_t dstX = (y0 + x) * numDstChannels; + int32_t dstX = (y0 + x); // copy in available values for (int32_t i = 0; i < numSrcChannels; ++i) { - srcPixel.v[i] = srcPixels[srcX + i]; + dstTemp.v[i] = srcPixels[srcX + i]; } // use AVX to convert - dstPixels[dstX] = toFloat4(srcPixel); + dstPixels[dstX] = toFloat4(dstTemp); } } @@ -265,38 +253,28 @@ bool Image::loadImageFromKTX(const KTXImage& image) const float* srcPixels = (const float*)(image.fileData + image.mipLevels[0].offset); - int32_t numSrcChannels = blockSize / sizeof(float); - int32_t numDstChannels = 4; - + int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat); + // Note: clearing unspecified channels to 0000, not 0001 // can set swizzleText when encoding _pixelsFloat.resize(_width * _height); - if (numSrcChannels != 4) { - memset(_pixelsFloat.data(), 0, - _pixelsFloat.size() * sizeof(float4)); - } - + // treat as float for per channel copies - float* dstPixels = (float*)(_pixelsFloat.data()); - + float4* dstPixels = _pixelsFloat.data(); + float4 dstTemp = float4m(0.0f); + for (int32_t y = 0; y < _height; ++y) { - int32_t y0 = _width * y; + int32_t y0 = y * _width; - for (int32_t x = 0, xEnd = _width; x < xEnd; ++x) { + for (int32_t x = 0; x < _width; ++x) { int32_t srcX = (y0 + x) * numSrcChannels; - int32_t dstX = (y0 + x) * numDstChannels; + int32_t dstX = (y0 + x); - switch (numSrcChannels) { - // all fallthrough - case 4: - dstPixels[dstX + 3] = srcPixels[srcX + 3]; - case 3: - dstPixels[dstX + 2] = srcPixels[srcX + 2]; - case 2: - dstPixels[dstX + 1] = srcPixels[srcX + 1]; - case 1: - dstPixels[dstX + 0] = srcPixels[srcX + 0]; + for (int32_t i = 0; i < numSrcChannels; ++i) { + dstTemp[i] = srcPixels[srcX + i]; } + + dstPixels[dstX] = dstTemp; } } diff --git a/libkram/kram/KramZipHelper.h b/libkram/kram/KramZipHelper.h index ea3d566c..e224c7f3 100644 --- a/libkram/kram/KramZipHelper.h +++ b/libkram/kram/KramZipHelper.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include From f3fd4c14846aec8540833653bb2fa9f31d77cd3d Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 25 May 2021 09:35:22 -0700 Subject: [PATCH 074/901] kramv - change NSTrackingArea to use less CPU kramv was using 10% cpu in some User Interactive QoS worker threads that I didn't create. Set the tracking area to only when app is active. This dropped CPU use to 3% I think. --- kramv/KramViewerMain.mm | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 5f88d149..6f7e7af3 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -2264,7 +2264,10 @@ - (void)viewDidLoad // https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/EventOverview/TrackingAreaObjects/TrackingAreaObjects.html // this is better than requesting mousemoved events, they're only sent when cursor is inside _trackingArea = [[NSTrackingArea alloc] initWithRect:_view.bounds - options: (NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved | NSTrackingActiveInKeyWindow ) + options: (NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved | + NSTrackingActiveInActiveApp + //NSTrackingActiveInKeyWindow + ) owner:_view userInfo:nil]; [_view addTrackingArea:_trackingArea]; From 66ec9acc3fca2b6b9bf760e20804086099bff2cb Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 25 May 2021 09:56:16 -0700 Subject: [PATCH 075/901] kramv - handle highlight state on buttons Now these show that mode is active using a toggle button. State already set on/off based on criteria. --- kramv/KramViewerMain.mm | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 6f7e7af3..1edef786 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -545,6 +545,9 @@ - (NSStackView*)_addButtons { [button setToolTip:toolTip]; button.hidden = NO; + button.buttonType = NSButtonTypeToggle; + //NSButtonTypeOnOff + #if 0 // can use this with border // TODO: for some reason this breaks clicking on buttons @@ -1350,16 +1353,20 @@ - (void)updateUIControlState auto wrapState = _showSettings->isWrap ? On : Off; auto debugState = (_showSettings->debugMode != DebugModeNone) ? On : Off; + // buttons -// [self findButton:"Y"].state = -// [self findButton:"F"].state = -// [self findButton:"M"].state = -// [self findButton:"J"].state = -// -// [self findButton:"R"].state = -// [self findButton:"G"].state = -// [self findButton:"B"].state = -// [self findButton:"A"].state = + [self findButton:"Y"].state = _showSettings->arrayNumber > 1 ? On : Off; + [self findButton:"F"].state = _showSettings->faceNumber > 1 ? On : Off; + [self findButton:"M"].state = _showSettings->mipLOD > 1 ? On : Off; + + [self findButton:"J"].state = Off; + [self findButton:"U"].state = Off; + + // TODO: want these to show highlight + [self findButton:"R"].state = Off; + [self findButton:"G"].state = Off; + [self findButton:"B"].state = Off; + [self findButton:"A"].state = Off; [self findButton:"S"].state = showAllState; [self findButton:"O"].state = previewState; @@ -1511,6 +1518,10 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown _buttonStack.hidden = !_buttonStack.hidden; text = _buttonStack.hidden ? "Hide UI" : "Show UI"; + + // for button control state update only + if (!_buttonStack.hidden) + isChanged = true; break; // rgba channels From 2c1304ae13e12f8675bff2fd51c0ab4721e13e41 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Wed, 26 May 2021 08:46:58 -0700 Subject: [PATCH 076/901] kramv - fix on/off state on 3 buttons --- kramv/KramViewerMain.mm | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 1edef786..17a6613a 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1355,9 +1355,9 @@ - (void)updateUIControlState // buttons - [self findButton:"Y"].state = _showSettings->arrayNumber > 1 ? On : Off; - [self findButton:"F"].state = _showSettings->faceNumber > 1 ? On : Off; - [self findButton:"M"].state = _showSettings->mipLOD > 1 ? On : Off; + [self findButton:"Y"].state = _showSettings->arrayNumber > 0 ? On : Off; + [self findButton:"F"].state = _showSettings->faceNumber > 0 ? On : Off; + [self findButton:"M"].state = _showSettings->mipLOD > 0 ? On : Off; [self findButton:"J"].state = Off; [self findButton:"U"].state = Off; From cad986a6546dbd767c8fc8ec91fc6cab162cc0ed Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Wed, 26 May 2021 09:21:17 -0700 Subject: [PATCH 077/901] kramv - fix rgba toggles Removed some swizzles. Also need to add a grayscale mode too to look at luminance. --- kramv/KramShaders.h | 6 +- kramv/KramShaders.metal | 10 +-- kramv/KramViewerBase.h | 7 ++- kramv/KramViewerMain.mm | 131 +++++++++++++++++++++++++++------------- 4 files changed, 101 insertions(+), 53 deletions(-) diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index 5169213c..3a192e0b 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -63,9 +63,9 @@ typedef NS_ENUM(int32_t, ShaderTextureChannels) ShMode00B1 = 3, // see grayscale channels - ShModeRRR1 = 5, - ShModeGGG1 = 6, - ShModeBBB1 = 7, +// ShModeRRR1 = 5, +// ShModeGGG1 = 6, +// ShModeBBB1 = 7, ShModeAAA1 = 8, }; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index f3289232..bd430fed 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -592,14 +592,16 @@ float4 DrawPixels( switch(uniforms.channels) { case ShModeRGBA: break; + + // with premul formats, already have ra,ga,ba case ShModeR001: c = float4(c.r,0,0,1); break; case ShMode0G01: c = float4(0,c.g,0,1); break; case ShMode00B1: c = float4(0,0,c.b,1); break; - case ShModeRRR1: c = float4(c.rrr,1); break; - case ShModeGGG1: c = float4(c.ggg,1); break; - case ShModeBBB1: c = float4(c.bbb,1); break; - +// case ShModeRRR1: c = float4(c.rrr,1); break; +// case ShModeGGG1: c = float4(c.ggg,1); break; +// case ShModeBBB1: c = float4(c.bbb,1); break; +// case ShModeAAA1: c = float4(c.aaa,1); break; } diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index 40f883da..9b3f2e2c 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -24,9 +24,10 @@ enum TextureChannels Mode00B1 = 3, // see grayscale channels - ModeRRR1 = 5, - ModeGGG1 = 6, - ModeBBB1 = 7, +// ModeRRR1 = 5, +// ModeGGG1 = 6, +// ModeBBB1 = 7, + ModeAAA1 = 8, }; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 17a6613a..a0c370bf 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1343,30 +1343,50 @@ - (void)updateUIControlState // there is also mixed auto On = NSControlStateValueOn; auto Off = NSControlStateValueOff; - - auto showAllState = _showSettings->isShowingAllLevelsAndMips ? On : Off; - auto premulState = _showSettings->isPremul ? On : Off; - auto signedState = _showSettings->isSigned ? On : Off; - auto checkerboardState = _showSettings->isCheckerboardShown ? On : Off; - auto previewState = _showSettings->isPreview ? On : Off; - auto gridState = _showSettings->isAnyGridShown() ? On : Off; - auto wrapState = _showSettings->isWrap ? On : Off; - auto debugState = (_showSettings->debugMode != DebugModeNone) ? On : Off; + #define toState(x) (x) ? On : Off + + auto showAllState = toState(_showSettings->isShowingAllLevelsAndMips); + auto premulState = toState(_showSettings->isPremul); + auto signedState = toState(_showSettings->isSigned); + auto checkerboardState = toState(_showSettings->isCheckerboardShown); + auto previewState = toState(_showSettings->isPreview); + auto gridState = toState(_showSettings->isAnyGridShown()); + auto wrapState = toState(_showSettings->isWrap); + auto debugState = toState(_showSettings->debugMode != DebugModeNone); + + TextureChannels& channels = _showSettings->channels; + + auto redState = toState(channels == TextureChannels::ModeR001); + auto greenState = toState(channels == TextureChannels::Mode0G01); + auto blueState = toState(channels == TextureChannels::Mode00B1); + auto alphaState = toState(channels == TextureChannels::ModeAAA1); + + auto arrayState = toState(_showSettings->arrayNumber > 0); + auto faceState = toState(_showSettings->faceNumber > 0); + auto mipState = toState(_showSettings->mipLOD > 0); + + // TODO: UI state, and vertical state + auto uiState = toState(_buttonStack.hidden); + auto helpState = Off; + auto infoState = Off; + auto jumpState = Off; // buttons - [self findButton:"Y"].state = _showSettings->arrayNumber > 0 ? On : Off; - [self findButton:"F"].state = _showSettings->faceNumber > 0 ? On : Off; - [self findButton:"M"].state = _showSettings->mipLOD > 0 ? On : Off; + [self findButton:"?"].state = helpState; + [self findButton:"I"].state = infoState; + + [self findButton:"Y"].state = arrayState; + [self findButton:"F"].state = faceState; + [self findButton:"M"].state = mipState; - [self findButton:"J"].state = Off; - [self findButton:"U"].state = Off; + [self findButton:"J"].state = jumpState; + [self findButton:"U"].state = Off; // always off - // TODO: want these to show highlight - [self findButton:"R"].state = Off; - [self findButton:"G"].state = Off; - [self findButton:"B"].state = Off; - [self findButton:"A"].state = Off; + [self findButton:"R"].state = redState; + [self findButton:"G"].state = greenState; + [self findButton:"B"].state = blueState; + [self findButton:"A"].state = alphaState; [self findButton:"S"].state = showAllState; [self findButton:"O"].state = previewState; @@ -1381,15 +1401,21 @@ - (void)updateUIControlState // menus (may want to disable, not hide) // problem is crashes since menu seems to strip hidden items // enabled state has to be handled in validateUserInterfaceItem -// [self findMenuItem:"Y"].state = -// [self findMenuItem:"F"].state = -// [self findMenuItem:"M"].state = -// [self findMenuItem:"J"].state = -// -// [self findMenuItem:"R"].state = -// [self findMenuItem:"G"].state = -// [self findMenuItem:"B"].state = -// [self findMenuItem:"A"].state = + + // when menu state is selected, it may not uncheck when advancing through state + [self findMenuItem:"?"].state = helpState; + [self findMenuItem:"I"].state = infoState; + + [self findMenuItem:"Y"].state = arrayState; + [self findMenuItem:"F"].state = faceState; + [self findMenuItem:"M"].state = mipState; + [self findMenuItem:"J"].state = jumpState; + [self findMenuItem:"U"].state = uiState; + + [self findMenuItem:"R"].state = redState; + [self findMenuItem:"G"].state = greenState; + [self findMenuItem:"B"].state = blueState; + [self findMenuItem:"A"].state = alphaState; [self findMenuItem:"S"].state = showAllState; [self findMenuItem:"O"].state = previewState; @@ -1494,8 +1520,8 @@ - (void)keyDown:(NSEvent *)theEvent - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown { // Some data depends on the texture data (isSigned, isNormal, ..) - TextureChannels& channels = _showSettings->channels; bool isChanged = false; + bool isStateChanged = false; // TODO: fix isChanged to only be set when value changes // f.e. clamped values don't need to re-render @@ -1508,6 +1534,9 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown _buttonStack.orientation = isVertical ? NSUserInterfaceLayoutOrientationVertical : NSUserInterfaceLayoutOrientationHorizontal; text = isVertical ? "Vert UI" : "Horiz UI"; + + // just to update toggle state to Off + isStateChanged = true; break; } case Key::U: @@ -1519,22 +1548,23 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown _buttonStack.hidden = !_buttonStack.hidden; text = _buttonStack.hidden ? "Hide UI" : "Show UI"; - // for button control state update only - if (!_buttonStack.hidden) - isChanged = true; + // just to update toggle state to Off + isStateChanged = true; break; // rgba channels case Key::Num1: case Key::R: if (![self findButton:"R"].isHidden) { - if (channels == TextureChannels::ModeRRR1 || channels == TextureChannels::ModeR001) { + TextureChannels& channels = _showSettings->channels; + + if (channels == TextureChannels::ModeR001) { channels = TextureChannels::ModeRGBA; text = "Mask RGBA"; } else { - channels = isShiftKeyDown ? TextureChannels::ModeRRR1 : TextureChannels::ModeR001; - text = isShiftKeyDown ? "Mask RRR1" : "Mask R001"; + channels = TextureChannels::ModeR001; + text = "Mask R001"; } isChanged = true; } @@ -1544,13 +1574,15 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown case Key::Num2: case Key::G: if (![self findButton:"G"].isHidden) { - if (channels == TextureChannels::ModeGGG1 || channels == TextureChannels::Mode0G01) { + TextureChannels& channels = _showSettings->channels; + + if (channels == TextureChannels::Mode0G01) { channels = TextureChannels::ModeRGBA; text = "Mask RGBA"; } else { - channels = isShiftKeyDown ? TextureChannels::ModeGGG1 : TextureChannels::Mode0G01; - text = isShiftKeyDown ? "Mask GGG1" : "Mask 0G01"; + channels = TextureChannels::Mode0G01; + text = "Mask 0G01"; } isChanged = true; } @@ -1559,14 +1591,17 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown case Key::Num3: case Key::B: if (![self findButton:"B"].isHidden) { - if (channels == TextureChannels::ModeBBB1 || channels == TextureChannels::Mode00B1) { + TextureChannels& channels = _showSettings->channels; + + if (channels == TextureChannels::Mode00B1) { channels = TextureChannels::ModeRGBA; text = "Mask RGBA"; } else { - channels = isShiftKeyDown ? TextureChannels::ModeBBB1 : TextureChannels::Mode00B1; - text = isShiftKeyDown ? "Mask BBB1" : "Mask 00B1"; + channels = TextureChannels::Mode00B1; + text = "Mask 00B1"; } + isChanged = true; } break; @@ -1574,6 +1609,8 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown case Key::Num4: case Key::A: if (![self findButton:"A"].isHidden) { + TextureChannels& channels = _showSettings->channels; + if (channels == TextureChannels::ModeAAA1) { channels = TextureChannels::ModeRGBA; text = "Mask RGBA"; @@ -1582,6 +1619,7 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown channels = TextureChannels::ModeAAA1; text = "Mask AAA1"; } + isChanged = true; } break; @@ -1612,6 +1650,9 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown "W-wrap, Premul, N-signed\n" "⇧Mip, ⇧Face, ⇧Y-array/slice\n" "⇧J-next bundle image\n"; + + // just to update toggle state to Off + isStateChanged = true; break; case Key::Num0: { // scale and reset pan @@ -1779,6 +1820,8 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown if (_showSettings->isHudShown) { sprintf(text, "%s", isShiftKeyDown ? _showSettings->imageInfoVerbose.c_str() : _showSettings->imageInfo.c_str()); } + // just to update toggle state to Off + isStateChanged = true; break; // toggle wrap/clamp @@ -1878,9 +1921,11 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown [self setHudText:text.c_str()]; } - if (isChanged) { + if (isChanged || isStateChanged) { [self updateUIControlState]; - + } + + if (isChanged) { self.needsDisplay = YES; } } From 7c8d05d83980703986c1bcbc5d84cd4af5f6c280 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Thu, 27 May 2021 13:01:03 -0700 Subject: [PATCH 078/901] kram - break off decoder/encoder from Image, add block decode, add macOS thumbnailer This is the code to generate a Quicklook thumbnailer. The project was setup by Xcode, so I don't have it tied to CMake yet. Had to move all projects to 10.15, where this frameworks is available. This is an app extension for thumbnailing tied into kramv. Still need to detail the ktx/ktx2 formats in the plist. Broke out the Encoder/Decoder since they shouldn't be so tied to the single-level Image class. The thumbnailer wanted to extract a single mip, and render it to CG. So that's done now. Simplfied getting mip dimensions, since with mipDown it's a shift and max(1, w). --- CMakeLists.txt | 2 +- kram-thumb/Info.plist | 40 ++ kram-thumb/KramThumbnailProvider.h | 16 + kram-thumb/KramThumbnailProvider.mm | 153 +++++++ kram-thumb/kram_thumb.entitlements | 10 + kramv/KramLoader.mm | 16 +- libkram/kram/KTXImage.cpp | 15 + libkram/kram/KTXImage.h | 57 ++- libkram/kram/Kram.cpp | 24 +- libkram/kram/KramConfig.h | 37 -- libkram/kram/KramImage.cpp | 670 +++++++++++++++------------- libkram/kram/KramImage.h | 94 ++-- libkram/kram/KramImageInfo.cpp | 4 +- libkram/kram/KramMipper.cpp | 2 + libkram/kram/KramSDFMipper.cpp | 8 +- 15 files changed, 729 insertions(+), 419 deletions(-) create mode 100644 kram-thumb/Info.plist create mode 100644 kram-thumb/KramThumbnailProvider.h create mode 100644 kram-thumb/KramThumbnailProvider.mm create mode 100644 kram-thumb/kram_thumb.entitlements diff --git a/CMakeLists.txt b/CMakeLists.txt index 5018cb5b..20cf0e14 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,7 +57,7 @@ if (APPLE) set(CMAKE_OSX_DEPLOYMENT_TARGET "11.0" CACHE STRING "Minimum iOS") set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture iOS") else() - set(CMAKE_OSX_DEPLOYMENT_TARGET "10.14" CACHE STRING "Minimum macOS") + set(CMAKE_OSX_DEPLOYMENT_TARGET "10.15" CACHE STRING "Minimum macOS") set(CMAKE_OSX_ARCHITECTURES "$(ARCHS_STANDARD)" CACHE STRING "Architecture macOS") endif() endif() diff --git a/kram-thumb/Info.plist b/kram-thumb/Info.plist new file mode 100644 index 00000000..e6b0324d --- /dev/null +++ b/kram-thumb/Info.plist @@ -0,0 +1,40 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleDisplayName + kram-thumb + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + LSMinimumSystemVersion + $(MACOSX_DEPLOYMENT_TARGET) + NSExtension + + NSExtensionAttributes + + QLSupportedContentTypes + + QLThumbnailMinimumDimension + 0 + + NSExtensionPointIdentifier + com.apple.quicklook.thumbnail + NSExtensionPrincipalClass + ThumbnailProvider + + + diff --git a/kram-thumb/KramThumbnailProvider.h b/kram-thumb/KramThumbnailProvider.h new file mode 100644 index 00000000..7ee38563 --- /dev/null +++ b/kram-thumb/KramThumbnailProvider.h @@ -0,0 +1,16 @@ +// +// KramThumbnailProvider.h +// kram-thumb +// +// Created by Alec on 5/26/21. +// + +#import + +NS_ASSUME_NONNULL_BEGIN + +@interface KramThumbnailProvider : QLThumbnailProvider + +@end + +NS_ASSUME_NONNULL_END diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm new file mode 100644 index 00000000..90c9a866 --- /dev/null +++ b/kram-thumb/KramThumbnailProvider.mm @@ -0,0 +1,153 @@ +// +// KramThumbnailProvider.mm +// kram-thumb +// +// Created by Alec on 5/26/21. +// + +#import "KramThumbnailProvider.h" + +#include "Kram.h" +#include "KramMmapHelper.h" +#include "KramLog.h" +#include "KTXImage.h" +#include "KramImage.h" // for KramDecoder + +#include + +//@import Accelerate // for vimage +#import + +using namespace kram; + +@implementation KramThumbnailProvider + +- (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request completionHandler:(void (^)(QLThumbnailReply * _Nullable, NSError * _Nullable))handler { + + // This + // Second way: Draw the thumbnail into a context passed to your block, set up with Core Graphics's coordinate system. + handler([QLThumbnailReply replyWithContextSize:request.maximumSize drawingBlock:^BOOL(CGContextRef _Nonnull context) + { + const char* file = [request.fileURL fileSystemRepresentation]; + + if (!(endsWith(file, ".ktx") || endsWith(file, ".ktx2"))) { + return NO; + } + + // load the mmap file, and interpret it as a KTXImage + MmapHelper mmapHelper; + if (!mmapHelper.open(file)) { + return NO; + } + + // open but leave the image compressed if KTX2 + zstd + bool isInfoOnly = true; + + KTXImage image; + if (!image.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) { + return NO; + } + + // no BC6 or ASTC HDR yet for thumbs, just do LDR first + if (isHdrFormat(image.pixelFormat)) { + return NO; + } + + // TODO: hookup to whether content is already premul with alpha + // will have to come from props. ASTC always 4 channels but may hold other daa. + bool isPremul = numChannelsOfFormat(image.pixelFormat) >= 4; + + // unpack a level to get the blocks + uint32_t mipNumber = 0; + + uint32_t w, h, d; + for (uint32_t i = 0; i < image.header.numberOfMipmapLevels; ++i) { + image.mipDimensions(i, w, h, d); + if (w > request.maximumSize.width || h > request.maximumSize.height) { + mipNumber++; + } + } + + // clamp to smallest + mipNumber = std::min(mipNumber, image.header.numberOfMipmapLevels); + image.mipDimensions(mipNumber, w, h, d); + + uint32_t chunkNum = 0; // TODO: could embed chunk(s) to gen thumbnail from, cube/array? + uint32_t numChunks = image.totalChunks(); + + vector mipData; + + // then decode any blocks to rgba8u, not dealing with HDR formats yet + if (image.isSupercompressed()) { + const uint8_t* srcData = image.fileData + image.mipLevels[mipNumber].offset; + + mipData.resize(image.mipLevels[mipNumber].length * numChunks); + uint8_t* dstData = mipData.data(); + if (!image.unpackLevel(mipNumber, srcData, dstData)) { + return NO; + } + } + + // now extract the chunk for the thumbnail out of that level + if (numChunks > 1) { + macroUnusedVar(chunkNum); + assert(chunkNum == 0); + + // this just truncate to chunk 0 instead of copying chunkNum first + mipData.resize(image.mipLevels[mipNumber].length); + } + + // new decode the blocks in that chunk to + KTXImage imageDecoded; + if (isBlockFormat(image.pixelFormat)) { + + KramDecoder decoder; + KramDecoderParams params; + + vector dstMipData; + + // want to just decode one chunk of the level that was unpacked abovve + if (!decoder.decodeBlocks(w, h, mipData.data(), mipData.size(), image.pixelFormat, dstMipData, params)) { + return NO; + } + + mipData = dstMipData; + } + + // https://developer.apple.com/library/archive/documentation/GraphicsImaging/Conceptual/drawingwithquartz2d/dq_images/dq_images.html#//apple_ref/doc/uid/TP30001066-CH212-TPXREF101 + + uint32_t rowBytes = w * sizeof(uint32_t); + + // use vimage in the Accelerate.framework + // https://developer.apple.com/library/archive/releasenotes/Performance/RN-vecLib/index.html#//apple_ref/doc/uid/TP40001049 + + vImage_Buffer buf = { mipData.data(), h, w, rowBytes }; + + // Declare the pixel format for the vImage_Buffer + vImage_CGImageFormat format = { + .bitsPerComponent = 8, + .bitsPerPixel = 32, + }; + + format.bitmapInfo = kCGBitmapByteOrderDefault | (isPremul ? kCGImageAlphaPremultipliedLast: kCGImageAlphaLast); + + // don't need to allocate, can requse memory from mip + + // TODO: might want to convert to PNG, but maybe thumbnail system does that automatically? + // see how big thumbs.db is after running this + + //CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); + vImage_Error err = 0; + CGImageRef cgImage = vImageCreateCGImageFromBuffer( &buf, &format, NULL, NULL, kvImageNoAllocate, &err); + + CGRect rect = CGRectMake(0, 0, w, h); + + // The image is scaled—disproportionately, if necessary—to fit the bounds + // specified by the rect parameter. + CGContextDrawImage(context, rect, cgImage); + + return YES; + }], nil); +} + +@end diff --git a/kram-thumb/kram_thumb.entitlements b/kram-thumb/kram_thumb.entitlements new file mode 100644 index 00000000..f2ef3ae0 --- /dev/null +++ b/kram-thumb/kram_thumb.entitlements @@ -0,0 +1,10 @@ + + + + + com.apple.security.app-sandbox + + com.apple.security.files.user-selected.read-only + + + diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 7d75a90d..e2612552 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -96,15 +96,16 @@ bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat) { bool decodeImage(KTXImage& image, KTXImage& imageDecoded) { - Image imageUnused; // TODO: move to only using KTXImage, decode needs to move there + KramDecoderParams decoderParams; + KramDecoder decoder; if (isETCFormat(image.pixelFormat)) { - if (!imageUnused.decode(image, imageDecoded, kTexEncoderEtcenc, false, "")) { + if (!decoder.decode(image, imageDecoded, decoderParams)) { return NO; } } else if (isASTCFormat(image.pixelFormat)) { - if (!imageUnused.decode(image, imageDecoded, kTexEncoderAstcenc, false, "")) { + if (!decoder.decode(image, imageDecoded, decoderParams)) { return NO; } } @@ -198,8 +199,8 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { #if SUPPORT_RGB if (isInternalRGBFormat(image.pixelFormat)) { // loads and converts image from RGB to RGBA - Image rbgaImage; - if (!rbgaImage.loadImageFromKTX(image)) + Image rgbaImage; + if (!rgbaImage.loadImageFromKTX(image)) return nil; // re-encode it as a KTXImage, even though this is just a copy @@ -214,7 +215,8 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { ImageInfo dstImageInfo; dstImageInfo.initWithArgs(dstImageInfoArgs); - if (!rbgaImage.encode(dstImageInfo, rbgaImage2)) { + KramEncoder encoder; + if (!encoder.encode(dstImageInfo, rgbaImage, rbgaImage2)) { return nil; } @@ -303,7 +305,7 @@ static uint32_t numberOfMipmapLevels(const Image& image) { [texture replaceRegion:region mipmapLevel:0 - withBytes:sourceImage.pixels() + withBytes:sourceImage.pixels().data() bytesPerRow:bytesPerRow]; // have to schedule autogen inside render using MTLBlitEncoder diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index ff45debc..fa38aeec 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -554,6 +554,11 @@ bool isASTCFormat(MyMTLPixelFormat format) return it.isASTC(); } +bool isBlockFormat(MyMTLPixelFormat format) +{ + return isBCFormat(format) || isETCFormat(format) || isASTCFormat(format); +} + bool isExplicitFormat(MyMTLPixelFormat format) { const auto& it = formatInfo(format); @@ -771,6 +776,16 @@ uint32_t KTXImage::mipLevelSize(uint32_t width_, uint32_t height_) const return count * size; } +uint32_t KTXImage::mipLevelSize(uint32_t mipNumber) const +{ + uint32_t w = width; + uint32_t h = height; + uint32_t d = depth; + + mipDown(w, h, d, mipNumber); + return mipLevelSize(w, h); +} + uint32_t KTXImage::blockCountRows(uint32_t width_) const { assert(width_ >= 1); diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index bcfb49b3..bc2bfae8 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -296,11 +296,7 @@ class KTXImage { uint32_t blockCount(uint32_t width_, uint32_t height_) const; uint32_t blockCountRows(uint32_t width_) const; - // mip data depends on format - uint32_t mipLevelSize(uint32_t width_, uint32_t height_) const; - //int totalMipLevels() const; - uint32_t totalChunks() const; - + // this is where KTXImage holds all mip data internally void reserveImageData(); vector& imageData(); @@ -315,10 +311,21 @@ class KTXImage { bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData); // helpers to work with the mipLevels array, mipLength and levelLength are important to get right + // mip data depends on format + + // mip + void mipDimensions(uint32_t mipNumber, uint32_t& width_, uint32_t& height_, uint32_t& depth_) const; + uint32_t mipLevelSize(uint32_t width_, uint32_t height_) const; + uint32_t mipLevelSize(uint32_t mipNumber) const; size_t mipLengthLargest() const { return mipLevels[0].length; } size_t mipLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length; } + + // level size_t levelLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length * totalChunks(); } size_t levelLengthCompressed(uint32_t mipNumber) const { return mipLevels[mipNumber].lengthCompressed; } + + // chunk + uint32_t totalChunks() const; size_t chunkOffset(uint32_t mipNumber, uint32_t chunkNumber) const { return mipLevels[mipNumber].offset + mipLevels[mipNumber].length * chunkNumber; } @@ -354,6 +361,45 @@ class KTXImage { const uint8_t* fileData; // mmap data }; +// GL/D3D hobbled non-pow2 mips by only supporting round down, not round up +// And then Metal followed OpenGL since it's the same hw and drivers. +// Round up adds an extra mip level to the chain, but results in much better filtering. +// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt +// http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf +inline void mipDown(int32_t& w, int32_t& h, int32_t& d, uint32_t lod = 1) +{ + // round-down + w >>= (int32_t)lod; + h >>= (int32_t)lod; + d >>= (int32_t)lod; + + if (w < 1) w = 1; + if (h < 1) h = 1; + if (d < 1) d = 1; +} + +inline void mipDown(uint32_t& w, uint32_t& h, uint32_t& d, uint32_t lod = 1) +{ + // round-down + w >>= lod; + h >>= lod; + d >>= lod; + + if (w < 1) w = 1; + if (h < 1) h = 1; + if (d < 1) d = 1; +} + +inline void KTXImage::mipDimensions(uint32_t mipNumber, uint32_t& width_, uint32_t& height_, uint32_t& depth_) const { + assert(mipNumber < mipLevels.size()); + + width_ = width; + height_ = height; + depth_ = depth; + + mipDown(width_, height_, depth_, mipNumber); +} + const char* supercompressionName(KTX2Supercompression type); // Generic format helpers. All based on the ubiquitous type. @@ -368,6 +414,7 @@ bool isSignedFormat(MyMTLPixelFormat format); bool isBCFormat(MyMTLPixelFormat format); bool isETCFormat(MyMTLPixelFormat format); bool isASTCFormat(MyMTLPixelFormat format); +bool isBlockFormat(MyMTLPixelFormat format); bool isExplicitFormat(MyMTLPixelFormat format); Int2 blockDimsOfFormat(MyMTLPixelFormat format); diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index aaacdbc0..84dc571a 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -1562,15 +1562,14 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, if (isVerbose) { // dump mips/dims, but this can be a lot of data on arrays int32_t mipLevel = 0; - int32_t w = srcImage.width; - int32_t h = srcImage.height; - int32_t d = srcImage.depth; // num chunks append_sprintf(info, "chun: %d\n", numChunks); for (const auto& mip : srcImage.mipLevels) { - + uint32_t w, h, d; + srcImage.mipDimensions(mipLevel, w, h, d); + switch (textureType) { case MyMTLTextureType3D: append_sprintf(info, @@ -1605,9 +1604,6 @@ string kramInfoKTXToString(const string& srcFilename, const KTXImage& srcImage, mip.length // only size of one mip right now, not mip * numChunks ); } - - // drop a mip level - mipDown(w, h, d); } } @@ -1768,8 +1764,13 @@ static int32_t kramAppDecode(vector& args) encoderName(textureDecoder)); } - Image tmpImage; // just to call decode - success = success && tmpImage.decode(srcImage, tmpFileHelper.pointer(), textureDecoder, isVerbose, swizzleText); + KramDecoderParams params; + params.isVerbose = isVerbose; + params.decoder = textureDecoder; + params.swizzleText = swizzleText; + + KramDecoder decoder; // just to call decode + success = success && decoder.decode(srcImage, tmpFileHelper.pointer(), params); // rename to dest filepath, note this only occurs if above succeeded // so any existing files are left alone on failure. @@ -2214,7 +2215,7 @@ static int32_t kramAppEncode(vector& args) // so now can complete validation knowing hdr vs. ldr input // this checks the dst format if (success) { - bool isHDR = srcImage.pixelsFloat() != nullptr; + bool isHDR = !srcImage.pixelsFloat().empty(); if (isHDR) { MyMTLPixelFormat format = info.pixelFormat; @@ -2272,7 +2273,8 @@ static int32_t kramAppEncode(vector& args) } if (success) { - success = srcImage.encode(info, tmpFileHelper.pointer()); + KramEncoder encoder; + success = encoder.encode(info, srcImage, tmpFileHelper.pointer()); if (!success) { KLOGE("Kram", "encode failed"); diff --git a/libkram/kram/KramConfig.h b/libkram/kram/KramConfig.h index e3b84c0a..1a485c73 100644 --- a/libkram/kram/KramConfig.h +++ b/libkram/kram/KramConfig.h @@ -378,43 +378,6 @@ inline half4 toHalf4(const float4& vv) //--------------------------------------- - -inline void mipDown(int32_t& w, int32_t& h, int32_t& d, uint32_t lod = 1) -{ - // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up - // And then Metal followed OpenGL since it's the same hw and drivers. - // Round up adds an extra mip level to the chain, but results in much better filtering. - // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt - // http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf - - // round-down - w >>= (int32_t)lod; - h >>= (int32_t)lod; - d >>= (int32_t)lod; - - if (w < 1) w = 1; - if (h < 1) h = 1; - if (d < 1) d = 1; -} - -inline void mipDown(uint32_t& w, uint32_t& h, uint32_t& d, uint32_t lod = 1) -{ - // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up - // And then Metal followed OpenGL since it's the same hw and drivers. - // Round up adds an extra mip level to the chain, but results in much better filtering. - // https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_non_power_of_two.txt - // http://download.nvidia.com/developer/Papers/2005/NP2_Mipmapping/NP2_Mipmap_Creation.pdf - - // round-down - w >>= lod; - h >>= lod; - d >>= lod; - - if (w < 1) w = 1; - if (h < 1) h = 1; - if (d < 1) d = 1; -} - // Use this on vectors #include diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 98b1450c..f218fe8c 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -322,7 +322,7 @@ bool Image::loadImageFromPixels(const vector& pixels, int32_t width, // BC1nm + b average. That way color endpoints are of some use rather than just // being set ot 0. This runs counter to ASTC L+A mode though which eliminates // the endpoint storage. -void Image::averageChannelsInBlock( +void KramEncoder::averageChannelsInBlock( const char* averageChannels, const KTXImage& image, ImageData& srcImage, vector& tmpImageData8) const // otherwise, it's BlueAlpha averaging { @@ -413,19 +413,319 @@ static bool writeDataAtOffset(const uint8_t* data, size_t dataSize, size_t dataO return true; } -bool Image::decode(const KTXImage& srcImage, FILE* dstFile, TexEncoder decoder, bool isVerbose, const string& swizzleText) const +bool KramDecoder::decode(const KTXImage& srcImage, FILE* dstFile, const KramDecoderParams& params) const { KTXImage dstImage; // thrown out, data written to file - return decodeImpl(srcImage, dstFile, dstImage, decoder, isVerbose, swizzleText); + return decodeImpl(srcImage, dstFile, dstImage, params); } -bool Image::decode(const KTXImage& srcImage, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const +bool KramDecoder::decode(const KTXImage& srcImage, KTXImage& dstImage, const KramDecoderParams& params) const { - return decodeImpl(srcImage, nullptr, dstImage, decoder, isVerbose, swizzleText); + return decodeImpl(srcImage, nullptr, dstImage, params); } -bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const +bool KramDecoder::decodeBlocks( + int32_t w, int32_t h, + const uint8_t* blockData, uint32_t blockDataSize, MyMTLPixelFormat blockFormat, + vector& outputTexture, // currently Color + const KramDecoderParams& params) const { + + bool success = false; + + // could tie use flags to format filter, or encoder settings + // or may want to disable if decoders don't gen correct output + TexEncoder decoder = params.decoder; +#if COMPILE_ATE + // Encode/decode formats differ depending on library version + // but it's likely the fastest decoder. Only on macOS/iOS. + bool useATE = decoder == kTexEncoderATE; +#endif +#if COMPILE_SQUISH + bool useSquish = decoder == kTexEncoderSquish; +#endif +#if COMPILE_BCENC + bool useBcenc = decoder == kTexEncoderBcenc; +#endif +#if COMPILE_ASTCENC + bool useAstcenc = decoder == kTexEncoderAstcenc; +#endif + + // TODO: hook to block decode logic below + // copy srcData if using ATE, it says it needs 16-byte aligned data for encode + // and assume for decode too. Output texture is already 16-byte aligned. + const uint8_t* srcData = blockData; + vector srcTexture; + if (useATE && (((uintptr_t)srcData & 15) != 0)) { + srcTexture.resize(blockDataSize); + memcpy(srcTexture.data(), srcData, blockDataSize); + srcData = srcTexture.data(); + } + + Int2 blockDims = blockDimsOfFormat(blockFormat); + bool isVerbose = params.isVerbose; + const string& swizzleText = params.swizzleText; + bool isHDR = isHdrFormat(blockFormat); + + // start decoding after format pulled from KTX file + if (isBCFormat(blockFormat)) { + // bc via ate, or squish for bc1-5 if on other platforms + // bcenc also likely has decode for bc7 + if (false) { + // just to chain if/else + } +#if COMPILE_BCENC + else if (useBcenc) { + Color* dstPixels = (Color*)outputTexture.data(); + + const int32_t blockDim = 4; + int32_t blocks_x = (w + blockDim - 1) / blockDim; + //int32_t blocks_y = (h + blockDim - 1) / blockDim; + int32_t blockSize = blockSizeOfFormat(blockFormat); + + for (int32_t y = 0; y < h; y += blockDim) { + for (int32_t x = 0; x < w; x += blockDim) { + int32_t bbx = x / blockDim; + int32_t bby = y / blockDim; + int32_t bb0 = bby * blocks_x + bbx; + const uint8_t* srcBlock = &srcData[bb0 * blockSize]; + + // decode into temp 4x4 pixels + Color pixels[blockDim * blockDim]; + + success = true; + + switch (blockFormat) { + case MyMTLPixelFormatBC1_RGBA: + case MyMTLPixelFormatBC1_RGBA_sRGB: + // Returns true if the block uses 3 color punchthrough alpha mode. + rgbcx::unpack_bc1(srcBlock, pixels); + break; + case MyMTLPixelFormatBC3_RGBA_sRGB: + case MyMTLPixelFormatBC3_RGBA: + // Returns true if the block uses 3 color punchthrough alpha mode. + rgbcx::unpack_bc3(srcBlock, pixels); + break; + case MyMTLPixelFormatBC4_RSnorm: + case MyMTLPixelFormatBC4_RUnorm: + rgbcx::unpack_bc4(srcBlock, (uint8_t*)pixels); + break; + case MyMTLPixelFormatBC5_RGSnorm: + case MyMTLPixelFormatBC5_RGUnorm: + rgbcx::unpack_bc5(srcBlock, pixels); + break; + + case MyMTLPixelFormatBC7_RGBAUnorm: + case MyMTLPixelFormatBC7_RGBAUnorm_sRGB: + bc7decomp::unpack_bc7(srcBlock, (bc7decomp::color_rgba*)pixels); + break; + + default: + KLOGE("Image", "decode unsupported format"); + success = false; + break; + } + + if (!success) { + return false; + } + + // copy temp pixels to outputTexture + for (int32_t by = 0; by < blockDim; ++by) { + int32_t yy = y + by; + if (yy >= h) { + break; + } + + for (int32_t bx = 0; bx < blockDim; ++bx) { + int32_t xx = x + bx; + if (xx >= w) { + break; // go to next y above + } + + dstPixels[yy * w + xx] = pixels[by * blockDim + bx]; + } + } + } + } + } +#endif +#if COMPILE_SQUISH + else if (useSquish) { + squish::TexFormat format = squish::kBC1; + + success = true; + + switch (blockFormat) { + case MyMTLPixelFormatBC1_RGBA: + case MyMTLPixelFormatBC1_RGBA_sRGB: + format = squish::kBC1; + break; + case MyMTLPixelFormatBC3_RGBA_sRGB: + case MyMTLPixelFormatBC3_RGBA: + format = squish::kBC3; + break; + case MyMTLPixelFormatBC4_RSnorm: + case MyMTLPixelFormatBC4_RUnorm: + format = squish::kBC4; + break; + case MyMTLPixelFormatBC5_RGSnorm: + case MyMTLPixelFormatBC5_RGUnorm: + format = squish::kBC5; + break; + default: + KLOGE("Image", "decode unsupported format"); + success = false; + break; + } + + if (success) { + // only handles bc1,3,4,5 + squish::DecompressImage(outputTexture.data(), w, h, srcData, format); + success = true; + } + } +#endif +#if COMPILE_ATE + else if (useATE) { + ATEEncoder encoder; + success = encoder.Decode(blockFormat, blockDataSize, blockDims.y, + isVerbose, + w, h, srcData, outputTexture.data()); + } +#endif + } + else if (isETCFormat(blockFormat)) { + // etc via etc2comp +#if COMPILE_ETCENC + Etc::Image::Format format = Etc::Image::Format::R11; + + success = true; + + switch (blockFormat) { + case MyMTLPixelFormatEAC_R11Unorm: + format = Etc::Image::Format::R11; + break; + case MyMTLPixelFormatEAC_R11Snorm: + format = Etc::Image::Format::SIGNED_R11; + break; + case MyMTLPixelFormatEAC_RG11Unorm: + format = Etc::Image::Format::RG11; + break; + case MyMTLPixelFormatEAC_RG11Snorm: + format = Etc::Image::Format::SIGNED_RG11; + break; + + case MyMTLPixelFormatETC2_RGB8: + format = Etc::Image::Format::RGB8; + break; + case MyMTLPixelFormatETC2_RGB8_sRGB: + format = Etc::Image::Format::SRGB8; + break; + case MyMTLPixelFormatEAC_RGBA8: + format = Etc::Image::Format::RGBA8; + break; + case MyMTLPixelFormatEAC_RGBA8_sRGB: + format = Etc::Image::Format::SRGBA8; + break; + + default: + KLOGE("Image", "decode unsupported format"); + success = false; + break; + } + + if (success) { + Etc::Image etcImage(format, nullptr, + w, h, Etc::ErrorMetric::NUMERIC); + + success = etcImage.Decode(srcData, outputTexture.data()) == Etc::Image::SUCCESS; + } +#endif + } + else if (isASTCFormat(blockFormat)) { + // ate can decode more than it encodes + if (false) { + // just to chain if/else + } +#if COMPILE_ASTCENC + else if (useAstcenc) { + // decode the mip + astcenc_image dstImageASTC; + dstImageASTC.dim_x = w; + dstImageASTC.dim_y = h; + dstImageASTC.dim_z = 1; // Not using 3D blocks, not supported on iOS + //dstImageASTC.dim_pad = 0; + dstImageASTC.data_type = ASTCENC_TYPE_U8; + + + // encode/encode still setup on array of 2d slices, so need address of data + uint8_t* outData = outputTexture.data(); + dstImageASTC.data = (void**)&outData; + + uint32_t srcDataLength = blockDataSize; + + astcenc_profile profile; + profile = ASTCENC_PRF_LDR; // isSrgb ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR; + if (isHDR) { + profile = ASTCENC_PRF_HDR; // TODO: also ASTCENC_PRF_HDR_RGB_LDR_A + } + + astcenc_config config; + astcenc_error error = astcenc_config_init( + profile, blockDims.x, blockDims.y, 1, ASTCENC_PRE_FAST, ASTCENC_FLG_DECOMPRESS_ONLY, &config); + if (error != ASTCENC_SUCCESS) { + return false; + } + + astcenc_context* codec_context = nullptr; + error = astcenc_context_alloc(&config, 1, &codec_context); + if (error != ASTCENC_SUCCESS) { + return false; + } + // no swizzle + astcenc_swizzle swizzleDecode = {ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A}; + + error = astcenc_decompress_image(codec_context, srcData, srcDataLength, &dstImageASTC, swizzleDecode, 0); + + astcenc_context_free(codec_context); + + success = (error == ASTCENC_SUCCESS); + } +#endif +#if COMPILE_ATE + else if (useATE) { + // this decods all except hdr/bc6 + ATEEncoder encoder; + success = encoder.Decode(blockFormat, blockDataSize, blockDims.y, + isVerbose, + w, h, srcData, outputTexture.data()); + } +#endif + } + else { + KLOGE("Image", "unsupported pixel format for decode"); + success = false; + } + + // stop processing mips, since failed above + if (!success) { + return false; + } + + // swizzle the data back to a more viewable layout (f.e. gggr -> rg01) + // This swizzleText is currently explicit, but could be reversed from prop of content channels and preswizzle. + // It's hard to specify this swizzle for arbitrary content otherwise. + if (!swizzleText.empty()) { + ImageInfo::swizzleTextureLDR(w, h, (Color*)outputTexture.data(), swizzleText.c_str()); + } + + return true; +} + +bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, const KramDecoderParams& params) const +{ + // read existing KTX file into mip offset, then start decoding the blocks // and write these to 8u,16f,32f ktx with mips // write out KTXHeader for the explicit image, this should be similar to other code @@ -436,8 +736,7 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma MyMTLPixelFormat pixelFormat = srcImage.pixelFormat; bool isSrgb = isSrgbFormat(pixelFormat); - bool isHDR = isHdrFormat(pixelFormat); - + // setup dstImage //KTXImage dstImage; dstImage = srcImage; // copy src (name-value pairs copied too) @@ -485,7 +784,6 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma headerCopy.pixelDepth = 0; } - // write the header out if (!writeDataAtOffset((const uint8_t*)&headerCopy, sizeof(KTXHeader), 0, dstFile, dstImage)) { return false; @@ -500,305 +798,31 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma vector outputTexture; vector srcTexture; - - // could tie use flags to format filter, or encoder settings - // or may want to disable if decoders don't gen correct output -#if COMPILE_ATE - // Encode/decode formats differ depending on library version - // but it's likely the fastest decoder. Only on macOS/iOS. - bool useATE = decoder == kTexEncoderATE; -#endif -#if COMPILE_SQUISH - bool useSquish = decoder == kTexEncoderSquish; -#endif -#if COMPILE_BCENC - bool useBcenc = decoder == kTexEncoderBcenc; -#endif -#if COMPILE_ASTCENC - bool useAstcenc = decoder == kTexEncoderAstcenc; -#endif - + // DONE: walk chunks here and seek to src and dst offsets in conversion // make sure to walk chunks in the exact same order they are written, array then face, or slice - int32_t w = 0; - int32_t h = 0; - int32_t d = 0; - - for (int32_t chunk = 0; chunk < numChunks; ++chunk) { - w = srcImage.width; - h = srcImage.height; - d = srcImage.depth; + for (uint32_t i = 0; i < srcImage.header.numberOfMipmapLevels; ++i) { + // TODO: to decode compressed KTX2 want to walk all chunks of a single level + // after decompressing the level. This isn't doing unpackLevel and needs to here. + assert(!srcImage.isSupercompressed()); + + uint32_t w, h, d; + srcImage.mipDimensions(i, w, h, d); - for (int32_t i = 0; i < (int32_t)srcImage.header.numberOfMipmapLevels; ++i) { + for (int32_t chunk = 0; chunk < numChunks; ++chunk) { + const KTXImageLevel& dstMipLevel = dstImage.mipLevels[i]; outputTexture.resize(dstMipLevel.length); const KTXImageLevel& srcMipLevel = srcImage.mipLevels[i]; const uint8_t* srcData = srcImage.fileData + srcMipLevel.offset + chunk * srcMipLevel.length; - // copy srcData if using ATE, it says it needs 16-byte aligned data for encode - // and assume for decode too. Output texture is already 16-byte aligned. - if (((uintptr_t)srcData & 15) != 0) { - srcTexture.resize(srcMipLevel.length); - memcpy(srcTexture.data(), srcData, srcMipLevel.length); - srcData = srcTexture.data(); - } - - // start decoding after format pulled from KTX file - if (isBCFormat(pixelFormat)) { - // bc via ate, or squish for bc1-5 if on other platforms - // bcenc also likely has decode for bc7 - if (false) { - // just to chain if/else - } - #if COMPILE_BCENC - else if (useBcenc) { - Color* dstPixels = (Color*)outputTexture.data(); - - const int32_t blockDim = 4; - int32_t blocks_x = (w + blockDim - 1) / blockDim; - //int32_t blocks_y = (h + blockDim - 1) / blockDim; - int32_t blockSize = blockSizeOfFormat(pixelFormat); - - for (int32_t y = 0; y < h; y += blockDim) { - for (int32_t x = 0; x < w; x += blockDim) { - int32_t bbx = x / blockDim; - int32_t bby = y / blockDim; - int32_t bb0 = bby * blocks_x + bbx; - const uint8_t* srcBlock = &srcData[bb0 * blockSize]; - - // decode into temp 4x4 pixels - Color pixels[blockDim * blockDim]; - - success = true; - - switch (pixelFormat) { - case MyMTLPixelFormatBC1_RGBA: - case MyMTLPixelFormatBC1_RGBA_sRGB: - // Returns true if the block uses 3 color punchthrough alpha mode. - rgbcx::unpack_bc1(srcBlock, pixels); - break; - case MyMTLPixelFormatBC3_RGBA_sRGB: - case MyMTLPixelFormatBC3_RGBA: - // Returns true if the block uses 3 color punchthrough alpha mode. - rgbcx::unpack_bc3(srcBlock, pixels); - break; - case MyMTLPixelFormatBC4_RSnorm: - case MyMTLPixelFormatBC4_RUnorm: - rgbcx::unpack_bc4(srcBlock, (uint8_t*)pixels); - break; - case MyMTLPixelFormatBC5_RGSnorm: - case MyMTLPixelFormatBC5_RGUnorm: - rgbcx::unpack_bc5(srcBlock, pixels); - break; - - case MyMTLPixelFormatBC7_RGBAUnorm: - case MyMTLPixelFormatBC7_RGBAUnorm_sRGB: - bc7decomp::unpack_bc7(srcBlock, (bc7decomp::color_rgba*)pixels); - break; - - default: - KLOGE("Image", "decode unsupported format"); - success = false; - break; - } - - if (!success) { - return false; - } - - // copy temp pixels to outputTexture - for (int32_t by = 0; by < blockDim; ++by) { - int32_t yy = y + by; - if (yy >= h) { - break; - } - - for (int32_t bx = 0; bx < blockDim; ++bx) { - int32_t xx = x + bx; - if (xx >= w) { - break; // go to next y above - } - - dstPixels[yy * w + xx] = pixels[by * blockDim + bx]; - } - } - } - } - } - #endif - #if COMPILE_SQUISH - else if (useSquish) { - squish::TexFormat format = squish::kBC1; - - success = true; - - switch (pixelFormat) { - case MyMTLPixelFormatBC1_RGBA: - case MyMTLPixelFormatBC1_RGBA_sRGB: - format = squish::kBC1; - break; - case MyMTLPixelFormatBC3_RGBA_sRGB: - case MyMTLPixelFormatBC3_RGBA: - format = squish::kBC3; - break; - case MyMTLPixelFormatBC4_RSnorm: - case MyMTLPixelFormatBC4_RUnorm: - format = squish::kBC4; - break; - case MyMTLPixelFormatBC5_RGSnorm: - case MyMTLPixelFormatBC5_RGUnorm: - format = squish::kBC5; - break; - default: - KLOGE("Image", "decode unsupported format"); - success = false; - break; - } - - if (success) { - // only handles bc1,3,4,5 - squish::DecompressImage(outputTexture.data(), w, h, srcData, format); - success = true; - } - } - #endif - #if COMPILE_ATE - else if (useATE) { - ATEEncoder encoder; - success = encoder.Decode(pixelFormat, (int32_t)srcMipLevel.length, srcImage.blockDims().y, - isVerbose, - w, h, srcData, outputTexture.data()); - } - #endif - } - else if (isETCFormat(pixelFormat)) { - // etc via etc2comp - #if COMPILE_ETCENC - Etc::Image::Format format = Etc::Image::Format::R11; - - success = true; - - switch (pixelFormat) { - case MyMTLPixelFormatEAC_R11Unorm: - format = Etc::Image::Format::R11; - break; - case MyMTLPixelFormatEAC_R11Snorm: - format = Etc::Image::Format::SIGNED_R11; - break; - case MyMTLPixelFormatEAC_RG11Unorm: - format = Etc::Image::Format::RG11; - break; - case MyMTLPixelFormatEAC_RG11Snorm: - format = Etc::Image::Format::SIGNED_RG11; - break; - - case MyMTLPixelFormatETC2_RGB8: - format = Etc::Image::Format::RGB8; - break; - case MyMTLPixelFormatETC2_RGB8_sRGB: - format = Etc::Image::Format::SRGB8; - break; - case MyMTLPixelFormatEAC_RGBA8: - format = Etc::Image::Format::RGBA8; - break; - case MyMTLPixelFormatEAC_RGBA8_sRGB: - format = Etc::Image::Format::SRGBA8; - break; - - default: - KLOGE("Image", "decode unsupported format"); - success = false; - break; - } - - if (success) { - Etc::Image etcImage(format, nullptr, - w, h, Etc::ErrorMetric::NUMERIC); - - success = etcImage.Decode(srcData, outputTexture.data()) == Etc::Image::SUCCESS; - } - #endif - } - else if (isASTCFormat(pixelFormat)) { - // ate can decode more than it encodes - if (false) { - // just to chain if/else - } - #if COMPILE_ASTCENC - else if (useAstcenc) { - // decode the mip - astcenc_image dstImageASTC; - dstImageASTC.dim_x = w; - dstImageASTC.dim_y = h; - dstImageASTC.dim_z = 1; // Not using 3D blocks, not supported on iOS - //dstImageASTC.dim_pad = 0; - dstImageASTC.data_type = ASTCENC_TYPE_U8; - - - // encode/encode still setup on array of 2d slices, so need address of data - uint8_t* outData = outputTexture.data(); - dstImageASTC.data = (void**)&outData; - - int32_t srcDataLength = (int32_t)srcMipLevel.length; - Int2 blockDims = srcImage.blockDims(); - - astcenc_profile profile; - profile = ASTCENC_PRF_LDR; // isSrgb ? ASTCENC_PRF_LDR_SRGB : ASTCENC_PRF_LDR; - if (isHDR) { - profile = ASTCENC_PRF_HDR; // TODO: also ASTCENC_PRF_HDR_RGB_LDR_A - } - - astcenc_config config; - astcenc_error error = astcenc_config_init( - profile, blockDims.x, blockDims.y, 1, ASTCENC_PRE_FAST, ASTCENC_FLG_DECOMPRESS_ONLY, &config); - if (error != ASTCENC_SUCCESS) { - return false; - } - - astcenc_context* codec_context = nullptr; - error = astcenc_context_alloc(&config, 1, &codec_context); - if (error != ASTCENC_SUCCESS) { - return false; - } - // no swizzle - astcenc_swizzle swizzleDecode = {ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A}; - - error = astcenc_decompress_image(codec_context, srcData, srcDataLength, &dstImageASTC, swizzleDecode, 0); - - astcenc_context_free(codec_context); - - success = (error == ASTCENC_SUCCESS); - } - #endif - #if COMPILE_ATE - else if (useATE) { - // this decods all except hdr/bc6 - ATEEncoder encoder; - success = encoder.Decode(pixelFormat, (int32_t)srcMipLevel.length, srcImage.blockDims().y, - isVerbose, - w, h, srcData, outputTexture.data()); - } - #endif - } - else { - KLOGE("Image", "unsupported pixel format for decode"); - success = false; - } - - // stop processing mips, since failed above - if (!success) { - break; - } - - // swizzle the data back to a more viewable layout (f.e. gggr -> rg01) - // This swizzleText is currently explicit, but could be reversed from prop of content channels and preswizzle. - // It's hard to specify this swizzle for arbitrary content otherwise. - if (!swizzleText.empty()) { - ImageInfo::swizzleTextureLDR(w, h, (Color*)outputTexture.data(), swizzleText.c_str()); + // decode the blocks to LDR RGBA8 + if (!decodeBlocks(w, h, srcData, srcMipLevel.length, srcImage.pixelFormat, outputTexture, params)) { + return false; } - + // write the mips out to the file, and code above can then decode into the same buffer // This isn't correct for cubes, arrays, and other types. The mip length is only written out once for all mips. @@ -823,9 +847,6 @@ bool Image::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstIma if (!writeDataAtOffset(outputTexture.data(), dstMipLevel.length, dstMipOffset, dstFile, dstImage)) { return false; } - - // next mip level - mipDown(w, h, d); } } @@ -875,17 +896,17 @@ bool Image::resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, Image } -bool Image::encode(ImageInfo& info, KTXImage& dstImage) const +bool KramEncoder::encode(ImageInfo& info, Image& singleImage,KTXImage& dstImage) const { - return encodeImpl(info, nullptr, dstImage); + return encodeImpl(info, singleImage, nullptr, dstImage); } -bool Image::encode(ImageInfo& info, FILE* dstFile) const +bool KramEncoder::encode(ImageInfo& info, Image& singleImage, FILE* dstFile) const { // dstImage will be ignored KTXImage dstImage; - return encodeImpl(info, dstFile, dstImage); + return encodeImpl(info, singleImage, dstFile, dstImage); } // Use this for in-place construction of mips @@ -1181,7 +1202,7 @@ KTX2DescriptorFileBlock::KTX2DescriptorFileBlock(MyMTLPixelFormat format, bool i flags = isPremul ? KHR_DF_FLAG_ALPHA_PREMULTIPLIED : KHR_DF_FLAG_ALPHA_STRAIGHT; } -void Image::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const +void KramEncoder::addBaseProps(const ImageInfo& info, KTXImage& dstImage) const { dstImage.addFormatProps(); @@ -1250,15 +1271,15 @@ struct ZSTDScope }; -bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const +bool KramEncoder::encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile, KTXImage& dstImage) const { KTXHeader& header = dstImage.header; MipConstructData mipConstructData; vector& chunkOffsets = mipConstructData.chunkOffsets; - int32_t w = _width; - int32_t h = _height; + int32_t w = singleImage.width(); + int32_t h = singleImage.height(); // compute chunks, and adjust w/h based on that // the code allows a vertical or horizontal strip or grid of chunks @@ -1325,7 +1346,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const // A better way would be to do mips in-place, but in-order, and compressing the large // to small mips into an array of open compressor streams. Then only need one mip instead of // all levels in memory. - if (!writeKTX1FileOrImage(info, mipConstructData, propsData, nullptr, dstImage)) { + if (!writeKTX1FileOrImage(info, singleImage, mipConstructData, propsData, nullptr, dstImage)) { return false; } @@ -1533,7 +1554,7 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const } else { // this is purely ktx1 output path - if (!writeKTX1FileOrImage(info, mipConstructData, propsData, dstFile, dstImage)) { + if (!writeKTX1FileOrImage(info, singleImage, mipConstructData, propsData, dstFile, dstImage)) { return false; } } @@ -1541,8 +1562,9 @@ bool Image::encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const return true; } -bool Image::writeKTX1FileOrImage( +bool KramEncoder::writeKTX1FileOrImage( ImageInfo& info, + Image& singleImage, MipConstructData& mipConstructData, const vector& propsData, FILE* dstFile, KTXImage& dstImage) const @@ -1590,7 +1612,7 @@ bool Image::writeKTX1FileOrImage( } // build and weite out the mip data - if (!createMipsFromChunks(info, mipConstructData, dstFile, dstImage)) { + if (!createMipsFromChunks(info, singleImage, mipConstructData, dstFile, dstImage)) { return false; } @@ -1665,8 +1687,9 @@ void printBCBlock(const uint8_t* bcBlock, MyMTLPixelFormat format) { } } -bool Image::createMipsFromChunks( +bool KramEncoder::createMipsFromChunks( ImageInfo& info, + Image& singleImage, MipConstructData& data, FILE* dstFile, KTXImage& dstImage @@ -1721,14 +1744,14 @@ bool Image::createMipsFromChunks( srcImage.pixelsFloat = floatImage.data(); } else { - srcImage.pixelsFloat = (float4*)_pixelsFloat.data(); + srcImage.pixelsFloat = (float4*)singleImage.pixelsFloat().data(); } // run this across all the source data // do this in-place before mips are generated if (doPremultiply) { if (info.isPrezero) { - for (const auto& pixel : _pixelsFloat) { + for (const auto& pixel : singleImage.pixelsFloat()) { float alpha = pixel.w; float4& pixelChange = const_cast(pixel); @@ -1740,7 +1763,7 @@ bool Image::createMipsFromChunks( } } else { - for (const auto& pixel : _pixelsFloat) { + for (const auto& pixel : singleImage.pixelsFloat()) { float alpha = pixel.w; float4& pixelChange = const_cast(pixel); pixelChange *= alpha; @@ -1756,7 +1779,7 @@ bool Image::createMipsFromChunks( srcImage.pixels = copyImage.data(); } else { - srcImage.pixels = (Color*)_pixels.data(); + srcImage.pixels = (Color*)singleImage.pixels().data(); } // used to store premul and linear color @@ -1799,12 +1822,12 @@ bool Image::createMipsFromChunks( if (info.isHDR) { if (isMultichunk) { - const float4* srcPixels = (const float4*)_pixelsFloat.data(); + const float4* srcPixels = (const float4*)singleImage.pixelsFloat().data(); for (int32_t y = 0; y < h; ++y) { int32_t y0 = y * w; // offset into original strip/atlas - int32_t yOffset = (y + chunkOffset.y) * _width + chunkOffset.x; + int32_t yOffset = (y + chunkOffset.y) * singleImage.width() + chunkOffset.x; for (int32_t x = 0; x < w; ++x) { float4 c0 = srcPixels[yOffset + x]; @@ -1816,12 +1839,12 @@ bool Image::createMipsFromChunks( } else { if (isMultichunk) { - const Color* srcPixels = (const Color*)_pixels.data(); + const Color* srcPixels = (const Color*)singleImage.pixels().data(); for (int32_t y = 0; y < h; ++y) { int32_t y0 = y * w; // offset into original strip/atlas - int32_t yOffset = (y + chunkOffset.y) * _width + chunkOffset.x; + int32_t yOffset = (y + chunkOffset.y) * singleImage.width() + chunkOffset.x; for (int32_t x = 0; x < w; ++x) { Color c0 = srcPixels[yOffset + x]; @@ -1966,7 +1989,7 @@ bool Image::createMipsFromChunks( } // TODO: try to elim KTXImage passed into this -bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, +bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image, ImageData& mipImage, TextureData& outputTexture, int32_t mipStorageSize) const { @@ -1982,6 +2005,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, switch (info.pixelFormat) { case MyMTLPixelFormatR8Unorm: case MyMTLPixelFormatRG8Unorm: + // no RGB8 writes case MyMTLPixelFormatRGBA8Unorm: case MyMTLPixelFormatRGBA8Unorm_sRGB: { int32_t count = image.blockSize() / 1; @@ -2010,6 +2034,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, case MyMTLPixelFormatR16Float: case MyMTLPixelFormatRG16Float: + // no RGB16Float writes case MyMTLPixelFormatRGBA16Float: { int32_t count = image.blockSize() / 2; @@ -2036,6 +2061,7 @@ bool Image::compressMipLevel(const ImageInfo& info, KTXImage& image, } case MyMTLPixelFormatR32Float: case MyMTLPixelFormatRG32Float: + // no RGB32Float writes case MyMTLPixelFormatRGBA32Float: { int32_t count = image.blockSize() / 4; diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h index 22b57578..7378bf95 100644 --- a/libkram/kram/KramImage.h +++ b/libkram/kram/KramImage.h @@ -43,16 +43,7 @@ class Image { bool loadImageFromKTX(const KTXImage& image); - // encode/ecode to a file - bool encode(ImageInfo& info, FILE* dstFile) const; - - bool decode(const KTXImage& image, FILE* dstFile, TexEncoder decoder, bool isVerbose, const string& swizzleText) const; - // encode/decode to a memory block - bool encode(ImageInfo& info, KTXImage& dstImage) const; - - bool decode(const KTXImage& image, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const; - // this is only for 2d images bool resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, ImageResizeFilter filter = kImageResizeFilterPoint); @@ -60,16 +51,72 @@ class Image { int32_t width() const { return _width; } int32_t height() const { return _height; } - const uint8_t* pixels() const { return _pixels.data(); } - const float4* pixelsFloat() const { return _pixelsFloat.data(); } + const vector& pixels() const { return _pixels; } + const vector& pixelsFloat() const { return _pixelsFloat; } bool hasColor() const { return _hasColor; } bool hasAlpha() const { return _hasAlpha; } private: - bool encodeImpl(ImageInfo& info, FILE* dstFile, KTXImage& dstImage) const; + // pixel size of image + int32_t _width = 0; + int32_t _height = 0; + + // this is whether png/ktx source image format was L or LA or A or RGB + // if unknown then set to true, and the pixel walk will set to false + bool _hasColor = true; + bool _hasAlpha = true; + + // this is the entire strip data, float version can be passed for HDR + // sources always 4 channels RGBA for 8 and 32f data. 16f promoted to 32f. + vector _pixels; // TODO: change to Color? + //vector _pixelsHalf; // TODO: add support to import fp16 + vector _pixelsFloat; +}; + +class KramDecoderParams { +public: + TexEncoder decoder = kTexEncoderUnknown; // will pick best available from format + bool isVerbose = false; + string swizzleText; +}; + +// The decoder can decode an entire KTX/KTX2 into RGBA8u/16F/32F data. +// This is useful on platforms to display formats unsupported by the gpu, but the expanded pixels +// can take up much more memory. +class KramDecoder { +public: + bool decode(const KTXImage& image, FILE* dstFile, const KramDecoderParams& params) const; + + bool decode(const KTXImage& image, KTXImage& dstImage, const KramDecoderParams& params) const; + + bool decodeBlocks( + int32_t w, int32_t h, + const uint8_t* blockData, uint32_t numBlocks, MyMTLPixelFormat blockFormat, + vector& dstPixels, // currently Color + const KramDecoderParams& params) const; - bool decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, TexEncoder decoder, bool isVerbose, const string& swizzleText) const; +private: + bool decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage, const KramDecoderParams& params) const; +}; + +// The encoder takes a single-mip image, and in-place encodes mips and applies other +// requested operations from ImageInfo as it writes those mips. Note that KTX2 must +// accumulate all mips if compressed so that offsets of where to write data are known. +class KramEncoder { +public: + // encode/ecode to a file + bool encode(ImageInfo& info, Image& singleImage, FILE* dstFile) const; + + // encode/decode to a memory block + bool encode(ImageInfo& info, Image& singleImage, KTXImage& dstImage) const; + + // TODO: supply encode() that takes a KTXImage src with mips already generated + // and then can encode them to a block format. In-place mips from Image don't + // allow for custom mips, and also require conversion of KTXImage to Image. + +private: + bool encodeImpl(ImageInfo& info, Image& singleImage, FILE* dstFile, KTXImage& dstImage) const; // compute how big mips will be void computeMipStorage(const KTXImage& image, int32_t& w, int32_t& h, int32_t& numSkippedMips, @@ -86,32 +133,21 @@ class Image { const KTXImage& image, ImageData& srcImage, vector& tmpImage) const; - bool createMipsFromChunks(ImageInfo& info, MipConstructData& data, + bool createMipsFromChunks(ImageInfo& info, + Image& singleImage, + MipConstructData& data, FILE* dstFile, KTXImage& dstImage) const; bool writeKTX1FileOrImage( ImageInfo& info, + Image& singleImage, MipConstructData& mipConstructData, const vector& propsData, FILE* dstFile, KTXImage& dstImage) const; void addBaseProps(const ImageInfo& info, KTXImage& dstImage) const; -private: - // pixel size of image - int32_t _width = 0; - int32_t _height = 0; - - // this is whether png/ktx source image format was L or LA or A or RGB - // if unknown then set to true, and the pixel walk will set to false - bool _hasColor = true; - bool _hasAlpha = true; - - // this is the entire strip data, float version can be passed for HDR - // sources always 4 channels RGBA for 8 and 32f data. 16f promoted to 32f. - vector _pixels; // TODO: change to Color? - //vector _pixelsHalf; // TODO: add support to import fp16 - vector _pixelsFloat; }; + } // namespace kram diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index 3fee97fa..391fd17a 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -1097,8 +1097,8 @@ void ImageInfo::initWithSourceImage(Image& sourceImage) // can only determine this after reading in the source texture int32_t w = sourceImage.width(); int32_t h = sourceImage.height(); - Color* srcPixels = (Color*)sourceImage.pixels(); - float4* srcPixelsFloat = (float4*)sourceImage.pixelsFloat(); + Color* srcPixels = (Color*)sourceImage.pixels().data(); + float4* srcPixelsFloat = (float4*)sourceImage.pixelsFloat().data(); isHDR = srcPixelsFloat != nullptr; diff --git a/libkram/kram/KramMipper.cpp b/libkram/kram/KramMipper.cpp index b20f9f77..79e85413 100644 --- a/libkram/kram/KramMipper.cpp +++ b/libkram/kram/KramMipper.cpp @@ -7,6 +7,8 @@ #include #include +#include "KTXImage.h" // for mipDown + namespace kram { using namespace std; diff --git a/libkram/kram/KramSDFMipper.cpp b/libkram/kram/KramSDFMipper.cpp index 7bb6f71d..aec353bd 100644 --- a/libkram/kram/KramSDFMipper.cpp +++ b/libkram/kram/KramSDFMipper.cpp @@ -7,6 +7,7 @@ #include #include "KramMipper.h" +#include "KTXImage.h" // for mipDown namespace kram { using namespace heman; @@ -57,11 +58,8 @@ void SDFMipper::mipmap(ImageData& dstImage, int32_t mipLevel) int32_t h = srcBitmapImage.height; int32_t d = 1; - // can use shift with mip down, but this iterates - for (int32_t i = 0; i < mipLevel; ++i) { - mipDown(w, h, d); - } - + mipDown(w, h, d, mipLevel); + dstImage.width = w; dstImage.height = h; From 0e4f88e1075f78f6f4943ca05e75a0f1ec077238 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Thu, 27 May 2021 23:31:16 -0700 Subject: [PATCH 079/901] kram - small decode cleanup, thumbnailer error handling improve decode to validate and set decoder if unknown support unpackLevel, so compressed ktx2 can be sent to decode don't need to unpack entire ktx2 if just decoding add error handling to NSLog from thumbnailer. add the ktx and ktx2 file types and a minimum dimension to appex plist --- kram-thumb/Info.plist | 7 +++-- kram-thumb/KramThumbnailProvider.mm | 38 +++++++++++++++++++----- kramv/KramLoader.mm | 11 ++++--- libkram/kram/KTXImage.cpp | 2 +- libkram/kram/KTXImage.h | 3 +- libkram/kram/Kram.cpp | 2 +- libkram/kram/KramImage.cpp | 45 ++++++++++++++++++++++------- libkram/kram/KramLog.cpp | 2 +- libkram/kram/KramLog.h | 6 ++++ 9 files changed, 88 insertions(+), 28 deletions(-) diff --git a/kram-thumb/Info.plist b/kram-thumb/Info.plist index e6b0324d..34f68a99 100644 --- a/kram-thumb/Info.plist +++ b/kram-thumb/Info.plist @@ -27,9 +27,12 @@ NSExtensionAttributes QLSupportedContentTypes - + + org.khronos.ktx + public.ktx2 + QLThumbnailMinimumDimension - 0 + 64 NSExtensionPointIdentifier com.apple.quicklook.thumbnail diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm index 90c9a866..4efa2c91 100644 --- a/kram-thumb/KramThumbnailProvider.mm +++ b/kram-thumb/KramThumbnailProvider.mm @@ -22,34 +22,53 @@ @implementation KramThumbnailProvider +void KLOGF(const char* format, ...) { + string str; + + va_list args; + va_start(args, format); + /* int32_t len = */ append_vsprintf(str, format, args); + va_end(args); + + // log here, so it can see it in Console + NSLog(@"%@", [NSString stringWithUTF8String: str.c_str()]); +} + - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request completionHandler:(void (^)(QLThumbnailReply * _Nullable, NSError * _Nullable))handler { // This // Second way: Draw the thumbnail into a context passed to your block, set up with Core Graphics's coordinate system. handler([QLThumbnailReply replyWithContextSize:request.maximumSize drawingBlock:^BOOL(CGContextRef _Nonnull context) { - const char* file = [request.fileURL fileSystemRepresentation]; + const char* filename = [request.fileURL fileSystemRepresentation]; - if (!(endsWith(file, ".ktx") || endsWith(file, ".ktx2"))) { + if (!(endsWith(filename, ".ktx") || endsWith(filename, ".ktx2"))) { + KLOGF("kramv %s only supports ktx/ktx2 files\n", filename); return NO; } // load the mmap file, and interpret it as a KTXImage MmapHelper mmapHelper; - if (!mmapHelper.open(file)) { + if (!mmapHelper.open(filename)) { + KLOGF("kramv %s failed to mmap\n", filename); return NO; } + + // TODO: might need to try FileHelper for non-local thumbnails + // open but leave the image compressed if KTX2 + zstd bool isInfoOnly = true; KTXImage image; if (!image.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) { + KLOGF("kramv %s failed to open\n", filename); return NO; } // no BC6 or ASTC HDR yet for thumbs, just do LDR first if (isHdrFormat(image.pixelFormat)) { + KLOGF("kramv %s doesn't support hdr thumbnails yet\n", filename); return NO; } @@ -84,6 +103,7 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet mipData.resize(image.mipLevels[mipNumber].length * numChunks); uint8_t* dstData = mipData.data(); if (!image.unpackLevel(mipNumber, srcData, dstData)) { + KLOGF("kramv %s failed to unpack mip\n", filename); return NO; } } @@ -108,6 +128,7 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet // want to just decode one chunk of the level that was unpacked abovve if (!decoder.decodeBlocks(w, h, mipData.data(), mipData.size(), image.pixelFormat, dstMipData, params)) { + KLOGF("kramv %s failed to decode blocks\n", filename); return NO; } @@ -125,9 +146,9 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet // Declare the pixel format for the vImage_Buffer vImage_CGImageFormat format = { - .bitsPerComponent = 8, - .bitsPerPixel = 32, - }; + .bitsPerComponent = 8, + .bitsPerPixel = 32, + }; format.bitmapInfo = kCGBitmapByteOrderDefault | (isPremul ? kCGImageAlphaPremultipliedLast: kCGImageAlphaLast); @@ -139,7 +160,10 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet //CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); vImage_Error err = 0; CGImageRef cgImage = vImageCreateCGImageFromBuffer( &buf, &format, NULL, NULL, kvImageNoAllocate, &err); - + if (err) { + KLOGF("kramv %s failed create cgimage\n", filename); + return NO; + } CGRect rect = CGRectMake(0, 0, w, h); // The image is scaled—disproportionately, if necessary—to fit the bounds diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index e2612552..062263a1 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -177,18 +177,21 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { // see if it needs decode first bool needsDecode = false; + bool needsConvert = false; +#if SUPPORT_RGB if (isInternalRGBFormat(image.pixelFormat)) { - needsDecode = true; + needsConvert = true; } +#endif #if DO_DECODE - else if (isDecodeImageNeeded(image.pixelFormat)) { + if (isDecodeImageNeeded(image.pixelFormat)) { needsDecode = true; } #endif // open it again, but unpack the levels if supercompressed - if (needsDecode) { + if (needsConvert) { isInfoOnly = false; if (!image.open(imageData, imageDataLength, isInfoOnly)) { @@ -197,7 +200,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { } #if SUPPORT_RGB - if (isInternalRGBFormat(image.pixelFormat)) { + if (needsConvert) { // loads and converts image from RGB to RGBA Image rgbaImage; if (!rgbaImage.loadImageFromKTX(image)) diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index fa38aeec..1e39f397 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -1538,7 +1538,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i return true; } -bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData) { +bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData) const { // uncompressed level uint32_t numChunks = totalChunks(); diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index bc2bfae8..43af56e0 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -308,7 +308,7 @@ class KTXImage { bool isKTX2() const { return skipImageLength; } // can use on ktx1/2 files, does a decompress if needed - bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData); + bool unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* dstData) const; // helpers to work with the mipLevels array, mipLength and levelLength are important to get right // mip data depends on format @@ -328,7 +328,6 @@ class KTXImage { uint32_t totalChunks() const; size_t chunkOffset(uint32_t mipNumber, uint32_t chunkNumber) const { return mipLevels[mipNumber].offset + mipLevels[mipNumber].length * chunkNumber; } - private: bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly); diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 84dc571a..898b0d50 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -1751,7 +1751,7 @@ static int32_t kramAppDecode(vector& args) // TODO: for hdr decode, may need to walk blocks or ask caller to pass -hdr flag if (!validateFormatAndDecoder(srcImage.textureType, srcImage.pixelFormat, textureDecoder)) { - KLOGE("Kram", "format decode only supports ktx output"); + KLOGE("Kram", "format decode only supports ktx and ktx2 output"); return -1; } diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index f218fe8c..a814a6ce 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -436,6 +436,12 @@ bool KramDecoder::decodeBlocks( // could tie use flags to format filter, or encoder settings // or may want to disable if decoders don't gen correct output TexEncoder decoder = params.decoder; + + if (!validateFormatAndDecoder(MyMTLTextureType2D, blockFormat, decoder)) { + KLOGE("Kram", "block decode only supports specific block types"); + return false; + } + #if COMPILE_ATE // Encode/decode formats differ depending on library version // but it's likely the fastest decoder. Only on macOS/iOS. @@ -774,8 +780,7 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& dstImage.reserveImageData(); } - bool success = false; - + // 1d textures need to write out 0 width KTXHeader headerCopy = dstHeader; @@ -802,21 +807,41 @@ bool KramDecoder::decodeImpl(const KTXImage& srcImage, FILE* dstFile, KTXImage& // DONE: walk chunks here and seek to src and dst offsets in conversion // make sure to walk chunks in the exact same order they are written, array then face, or slice + bool success = true; + + vector mipStorage; + mipStorage.resize(srcImage.mipLengthLargest() * numChunks); // enough to hold biggest mip + for (uint32_t i = 0; i < srcImage.header.numberOfMipmapLevels; ++i) { - // TODO: to decode compressed KTX2 want to walk all chunks of a single level + // DONE: to decode compressed KTX2 want to walk all chunks of a single level // after decompressing the level. This isn't doing unpackLevel and needs to here. - assert(!srcImage.isSupercompressed()); + + const KTXImageLevel& srcMipLevel = srcImage.mipLevels[i]; + + // this is offset to a given level + uint64_t mipBaseOffset = srcMipLevel.offset; + const uint8_t* srcLevelData = srcImage.fileData; + + if (srcImage.isSupercompressed()) { + + if (!srcImage.unpackLevel(i, srcLevelData + srcMipLevel.offset, mipStorage.data())) { + return false; + } + srcLevelData = mipStorage.data(); + + // going to upload from mipStorage temp array + mipBaseOffset = 0; + } uint32_t w, h, d; srcImage.mipDimensions(i, w, h, d); - for (int32_t chunk = 0; chunk < numChunks; ++chunk) { - - const KTXImageLevel& dstMipLevel = dstImage.mipLevels[i]; - outputTexture.resize(dstMipLevel.length); + const KTXImageLevel& dstMipLevel = dstImage.mipLevels[i]; + outputTexture.resize(dstMipLevel.length); - const KTXImageLevel& srcMipLevel = srcImage.mipLevels[i]; - const uint8_t* srcData = srcImage.fileData + srcMipLevel.offset + chunk * srcMipLevel.length; + + for (int32_t chunk = 0; chunk < numChunks; ++chunk) { + const uint8_t* srcData = srcLevelData + mipBaseOffset + chunk * srcMipLevel.length; // decode the blocks to LDR RGBA8 if (!decodeBlocks(w, h, srcData, srcMipLevel.length, srcImage.pixelFormat, outputTexture, params)) { diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp index 4045ad15..dd58e523 100644 --- a/libkram/kram/KramLog.cpp +++ b/libkram/kram/KramLog.cpp @@ -48,7 +48,7 @@ void getErrorLogCaptureText(string& text) { // being parsed (f.e. mmapped Json) this can significantly slow a parser down. -static int32_t append_vsprintf(string& str, const char* format, va_list args) +int32_t append_vsprintf(string& str, const char* format, va_list args) { // for KLOGE("group", "%s", "text") if (strcmp(format, "%s") == 0) { diff --git a/libkram/kram/KramLog.h b/libkram/kram/KramLog.h index 38f48e2c..0bb5fa6c 100644 --- a/libkram/kram/KramLog.h +++ b/libkram/kram/KramLog.h @@ -50,7 +50,9 @@ using namespace std; // when set true, the internal string is cleared void setErrorLogCapture(bool enable); + bool isErrorLogCapture(); + // return the text void getErrorLogCaptureText(string& text); @@ -60,7 +62,11 @@ int32_t sprintf(string& str, const char* format, ...) __printflike(2, 3); // returns length of chars appended, -1 if failure int32_t append_sprintf(string& str, const char* format, ...) __printflike(2, 3); +// returns length of chars appended, -1 if failure +int32_t append_vsprintf(string& str, const char* format, va_list args); + bool startsWith(const char* str, const string& substring); + bool endsWithExtension(const char* str, const string& substring); // https://stackoverflow.com/questions/874134/find-out-if-string-ends-with-another-string-in-c From c9bbb57ae382ce43912e1b889b8dc1e83e914641 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Fri, 28 May 2021 09:32:08 -0700 Subject: [PATCH 080/901] kram-thumb - support srgb --- kram-thumb/KramThumbnailProvider.mm | 7 ++++--- libkram/kram/KTXImage.h | 10 +++++----- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm index 4efa2c91..2f31350d 100644 --- a/kram-thumb/KramThumbnailProvider.mm +++ b/kram-thumb/KramThumbnailProvider.mm @@ -75,6 +75,7 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet // TODO: hookup to whether content is already premul with alpha // will have to come from props. ASTC always 4 channels but may hold other daa. bool isPremul = numChannelsOfFormat(image.pixelFormat) >= 4; + bool isSrgb = isSrgbFormat(image.pixelFormat); // unpack a level to get the blocks uint32_t mipNumber = 0; @@ -150,16 +151,16 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet .bitsPerPixel = 32, }; - format.bitmapInfo = kCGBitmapByteOrderDefault | (isPremul ? kCGImageAlphaPremultipliedLast: kCGImageAlphaLast); + format.bitmapInfo = kCGBitmapByteOrderDefault | (isPremul ? kCGImageAlphaPremultipliedLast : kCGImageAlphaLast); + format.colorSpace = isSrgb ? CGColorSpaceCreateWithName(kCGColorSpaceSRGB) : CGColorSpaceCreateDeviceRGB(); // don't need to allocate, can requse memory from mip // TODO: might want to convert to PNG, but maybe thumbnail system does that automatically? // see how big thumbs.db is after running this - //CGColorSpaceRef colorSpace = CGColorSpaceCreateDeviceRGB(); vImage_Error err = 0; - CGImageRef cgImage = vImageCreateCGImageFromBuffer( &buf, &format, NULL, NULL, kvImageNoAllocate, &err); + CGImageRef cgImage = vImageCreateCGImageFromBuffer(&buf, &format, NULL, NULL, kvImageNoAllocate, &err); if (err) { KLOGF("kramv %s failed create cgimage\n", filename); return NO; diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index 43af56e0..d2880971 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -340,9 +340,9 @@ class KTXImage { // copied out of header, but also may be 1 instead of 0 // also these can be modified, and often are non-zero even if header is - uint32_t width; - uint32_t height; - uint32_t depth; + uint32_t width = 0; + uint32_t height = 0; + uint32_t depth = 0; // for ktx2 bool skipImageLength = false; @@ -356,8 +356,8 @@ class KTXImage { vector mipLevels; // offsets into fileData // this only holds data for mipLevels - size_t fileDataLength; - const uint8_t* fileData; // mmap data + size_t fileDataLength = 0; + const uint8_t* fileData = nullptr; // mmap data }; // GL/D3D hobbled non-pow2 mips by only supporting round down, not round up From 63b71310855050b9109c2a591887db3e93139513 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 29 May 2021 14:24:38 -0700 Subject: [PATCH 081/901] Kram - hook up shapes with basis Could gen tan from normal, uv. But its cheaper to handle basis in vertex shader. --- kramv/KramLoader.mm | 24 +----- kramv/KramRenderer.mm | 159 ++++++++++++++++++++++++++++------------ kramv/KramShaders.h | 12 ++- kramv/KramShaders.metal | 56 +++++++++++--- kramv/KramViewerBase.h | 3 + kramv/KramViewerMain.mm | 24 +++++- 6 files changed, 198 insertions(+), 80 deletions(-) diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 062263a1..d60e3bb9 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -175,32 +175,15 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { return nil; } - // see if it needs decode first - bool needsDecode = false; - bool needsConvert = false; - #if SUPPORT_RGB if (isInternalRGBFormat(image.pixelFormat)) { - needsConvert = true; - } -#endif -#if DO_DECODE - if (isDecodeImageNeeded(image.pixelFormat)) { - needsDecode = true; - } -#endif - - // open it again, but unpack the levels if supercompressed - if (needsConvert) { isInfoOnly = false; + // reopen and unzip it all if (!image.open(imageData, imageDataLength, isInfoOnly)) { return nil; } - } - -#if SUPPORT_RGB - if (needsConvert) { + // loads and converts image from RGB to RGBA Image rgbaImage; if (!rgbaImage.loadImageFromKTX(image)) @@ -234,8 +217,9 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { if (originalFormat != nullptr) { *originalFormat = (MTLPixelFormat)image.pixelFormat; } + #if DO_DECODE - if (needsDecode) { + if (isDecodeImageNeeded(image.pixelFormat)) { KTXImage imageDecoded; if (!decodeImage(image, imageDecoded)) { return nil; diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index eb42a481..8c97e852 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -77,6 +77,13 @@ @implementation Renderer KramLoader *_loader; MTKMesh *_mesh; + MDLVertexDescriptor *_mdlVertexDescriptor; + + MTKMesh *_meshPlane; // really a thin gox + MTKMesh *_meshBox; + MTKMesh *_meshSphere; + MTKMesh *_meshCylinder; + MTKMeshBufferAllocator *_metalAllocator; ShowSettings* _showSettings; } @@ -93,6 +100,8 @@ -(nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view settings:(no _loader = [KramLoader new]; _loader.device = _device; + _metalAllocator = [[MTKMeshBufferAllocator alloc] initWithDevice: _device]; + _inFlightSemaphore = dispatch_semaphore_create(MaxBuffersInFlight); [self _loadMetalWithView:view]; [self _loadAssets]; @@ -139,6 +148,46 @@ - (void)_createSamplers _colorMapSamplerBilinearWrap = [_device newSamplerStateWithDescriptor:samplerDescriptor]; } +- (void)_createVertexDescriptor +{ + _mtlVertexDescriptor = [[MTLVertexDescriptor alloc] init]; + + _mtlVertexDescriptor.attributes[VertexAttributePosition].format = MTLVertexFormatFloat3; + _mtlVertexDescriptor.attributes[VertexAttributePosition].offset = 0; + _mtlVertexDescriptor.attributes[VertexAttributePosition].bufferIndex = BufferIndexMeshPosition; + + _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].format = MTLVertexFormatFloat2; // TODO: compress + _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].offset = 0; + _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].bufferIndex = BufferIndexMeshUV0; + + _mtlVertexDescriptor.attributes[VertexAttributeNormal].format = MTLVertexFormatFloat3; // TODO: compress + _mtlVertexDescriptor.attributes[VertexAttributeNormal].offset = 0; + _mtlVertexDescriptor.attributes[VertexAttributeNormal].bufferIndex = BufferIndexMeshNormal; + + _mtlVertexDescriptor.attributes[VertexAttributeTangent].format = MTLVertexFormatFloat4; // TODO: compress + _mtlVertexDescriptor.attributes[VertexAttributeTangent].offset = 0; + _mtlVertexDescriptor.attributes[VertexAttributeTangent].bufferIndex = BufferIndexMeshTangent; + + //_mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stepRate = 1; + //_mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stepFunction = MTLVertexStepFunctionPerVertex; + + _mtlVertexDescriptor.layouts[BufferIndexMeshPosition].stride = 3*4; + _mtlVertexDescriptor.layouts[BufferIndexMeshUV0].stride = 2*4; + _mtlVertexDescriptor.layouts[BufferIndexMeshNormal].stride = 3*4; + _mtlVertexDescriptor.layouts[BufferIndexMeshTangent].stride = 4*4; + + //----------------------- + // for ModelIO + _mdlVertexDescriptor = + MTKModelIOVertexDescriptorFromMetal(_mtlVertexDescriptor); + + _mdlVertexDescriptor.attributes[VertexAttributePosition].name = MDLVertexAttributePosition; + _mdlVertexDescriptor.attributes[VertexAttributeTexcoord].name = MDLVertexAttributeTextureCoordinate; + _mdlVertexDescriptor.attributes[VertexAttributeNormal].name = MDLVertexAttributeNormal; + _mdlVertexDescriptor.attributes[VertexAttributeTangent].name = MDLVertexAttributeTangent; + +} + - (void)_loadMetalWithView:(nonnull MTKView *)view { /// Load Metal state objects and initialize renderer dependent view properties @@ -151,24 +200,8 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view view.sampleCount = 1; - _mtlVertexDescriptor = [[MTLVertexDescriptor alloc] init]; - - _mtlVertexDescriptor.attributes[VertexAttributePosition].format = MTLVertexFormatFloat3; - _mtlVertexDescriptor.attributes[VertexAttributePosition].offset = 0; - _mtlVertexDescriptor.attributes[VertexAttributePosition].bufferIndex = BufferIndexMeshPositions; - - _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].format = MTLVertexFormatFloat2; - _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].offset = 0; - _mtlVertexDescriptor.attributes[VertexAttributeTexcoord].bufferIndex = BufferIndexMeshUV0; - - _mtlVertexDescriptor.layouts[BufferIndexMeshPositions].stride = 12; - //_mtlVertexDescriptor.layouts[BufferIndexMeshPositions].stepRate = 1; - //_mtlVertexDescriptor.layouts[BufferIndexMeshPositions].stepFunction = MTLVertexStepFunctionPerVertex; - - _mtlVertexDescriptor.layouts[BufferIndexMeshUV0].stride = 8; - //_mtlVertexDescriptor.layouts[BufferIndexMeshUV0].stepRate = 1; - //_mtlVertexDescriptor.layouts[BufferIndexMeshUV0].stepFunction = MTLVertexStepFunctionPerVertex; - + [self _createVertexDescriptor]; + [self _createRenderPipelines:view]; //----------------------- @@ -362,47 +395,78 @@ - (void)_createSampleRender _sampleTex = [_device newTextureWithDescriptor:textureDesc]; } -- (void)_loadAssets +- (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh { - /// Load assets into metal objects + NSError* error = nil; - NSError *error = nil; + //mdlMesh.vertexDescriptor = _mdlVertexDescriptor; + + [mdlMesh addOrthTanBasisForTextureCoordinateAttributeNamed: MDLVertexAttributeTextureCoordinate + normalAttributeNamed: MDLVertexAttributeNormal + tangentAttributeNamed: MDLVertexAttributeTangent]; + + mdlMesh.vertexDescriptor = _mdlVertexDescriptor; + + // TODO: name the vertex attributes, can that be done in _mdlVertexDescriptor + // may have to set name on MTLBuffer range on IB and VB + + // now set it into mtk mesh + MTKMesh* mesh = [[MTKMesh alloc] initWithMesh:mdlMesh + device:_device + error:&error]; + mesh.name = [NSString stringWithUTF8String:name]; - MTKMeshBufferAllocator *metalAllocator = [[MTKMeshBufferAllocator alloc] - initWithDevice: _device]; + if(!mesh || error) + { + NSLog(@"Error creating MetalKit mesh %@", error.localizedDescription); + return nil; + } -#if 1 // TODO: replace box with fsq or fst, or use thin box for perspective/rotation - MDLMesh *mdlMesh = [MDLMesh newBoxWithDimensions:(vector_float3){1, 1, 1} + return mesh; +} + +- (void)_loadAssets +{ + /// Load assets into metal objects + + MDLMesh *mdlMesh; + + mdlMesh = [MDLMesh newBoxWithDimensions:(vector_float3){1, 1, 1} segments:(vector_uint3){1, 1, 1} geometryType:MDLGeometryTypeTriangles inwardNormals:NO - allocator:metalAllocator]; + allocator:_metalAllocator]; -#endif + _meshBox = [self _createMeshAsset:"MeshBox" mdlMesh:mdlMesh]; - MDLVertexDescriptor *mdlVertexDescriptor = - MTKModelIOVertexDescriptorFromMetal(_mtlVertexDescriptor); - - mdlVertexDescriptor.attributes[VertexAttributePosition].name = MDLVertexAttributePosition; - mdlVertexDescriptor.attributes[VertexAttributeTexcoord].name = MDLVertexAttributeTextureCoordinate; - - mdlMesh.vertexDescriptor = mdlVertexDescriptor; - - _mesh = [[MTKMesh alloc] initWithMesh:mdlMesh - device:_device - error:&error]; - _mesh.name = @"BoxMesh"; + // TOOO: have more shape types - this is box, need thin box (plane), and sphere, and cylinder + // eventually load usdz and gltf2 custom model. Need 3d manipulation of shape like arcball + // and eyedropper is more complex. + + mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator]; + + _meshSphere = [self _createMeshAsset:"MeshSphere" mdlMesh:mdlMesh]; + + mdlMesh = [MDLMesh newCylinderWithHeight:1.0 + radii:(vector_float2){0.5, 0.5} + radialSegments:16 + verticalSegments:1 + geometryType:MDLGeometryTypeTriangles + inwardNormals:NO + allocator:_metalAllocator]; + + _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh]; + + _mesh = _meshBox; - if(!_mesh || error) - { - NSLog(@"Error creating MetalKit mesh %@", error.localizedDescription); - } } - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timestamp imageData:(nonnull const uint8_t*)imageData imageDataLength:(uint64_t)imageDataLength { // image can be decoded to rgba8u if platform can't display format natively // but still want to identify blockSize from original format + + // Note that modstamp can change, but content data hash may be the same bool isTextureChanged = (fullFilename != _showSettings->lastFilename) || (timestamp != _showSettings->lastTimestamp); @@ -420,7 +484,7 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest // then can decode blocks in kramv KTXImage sourceImage; - if (!sourceImage.open(imageData,imageDataLength)) { + if (!sourceImage.open(imageData, imageDataLength)) { return NO; } @@ -685,7 +749,12 @@ - (void)_updateGameState // this was stored so view could use it, but now that code calcs the transform via computeImageTransform _showSettings->projectionViewModelMatrix = projectionViewMatrix * _modelMatrix; - + // crude shape experiment + switch(_showSettings->meshNumber) { + case 0: _mesh = _meshBox; break; + case 1: _mesh = _meshSphere; break; + case 2: _mesh = _meshCylinder; break; + } //_rotation += .01; } diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index 3a192e0b..0b7e6b36 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -26,20 +26,24 @@ typedef NS_ENUM(int32_t, BufferIndex) { // mesh - BufferIndexMeshPositions = 0, // pos + BufferIndexMeshPosition = 0, // pos BufferIndexMeshUV0 = 1, // uv + BufferIndexMeshNormal = 2, // normals + BufferIndexMeshTangent = 3, // normals - BufferIndexUniforms = 2, - BufferIndexUniformsLevel = 3, + BufferIndexUniforms = 16, + BufferIndexUniformsLevel = 17, // for compute - BufferIndexUniformsCS = 0, + BufferIndexUniformsCS = 16, }; typedef NS_ENUM(int32_t, VertexAttribute) { VertexAttributePosition = 0, VertexAttributeTexcoord = 1, + VertexAttributeNormal = 2, + VertexAttributeTangent = 3, }; typedef NS_ENUM(int32_t, TextureIndex) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index bd430fed..c94e9537 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -185,27 +185,35 @@ half3 toNormal(half3 n) // use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt // see http://www.mikktspace.com/ -half3 transformNormal(half4 tangent, half3 vertexNormal, - texture2d texture, sampler s, float2 uv, bool isSigned = true) +half3 transformNormal(half4 tangent, half3 vertexNormal, half3 bumpNormal) { // Normalize tangent/vertexNormal in vertex shader // but don't renormalize interpolated tangent, vertexNormal in fragment shader // Reconstruct bitan in frag shader // https://bgolus.medium.com/generating-perfect-normal-maps-for-unity-f929e673fc57 - half4 nmap = texture.sample(s, uv); - if (!isSigned) { - nmap.xy = toSnorm8(nmap.xy); - } - half3 normal = toNormal(nmap.xyz); // now transform by basis and normalize from any shearing, and since interpolated basis vectors // are not normalized half3x3 tbn = half3x3(tangent.xyz, tangent.w * cross(vertexNormal, tangent.xyz), vertexNormal); - normal = tbn * normal; - return normalize(normal); + bumpNormal = tbn * bumpNormal; + return normalize(bumpNormal); } +half3 transformNormal(half4 tangent, half3 vertexNormal, + texture2d texture, sampler s, float2 uv, bool isSigned = true) +{ + half4 nmap = texture.sample(s, uv); + if (!isSigned) { + nmap.xy = toSnorm8(nmap.xy); + } + half3 bumpNormal = toNormal(nmap.xyz); + + return transformNormal(tangent, vertexNormal, bumpNormal); +} + + + // TODO: have more bones, or read from texture instead of uniforms // can then do instanced skining, but vfetch lookup slower #define maxBones 128 @@ -259,7 +267,7 @@ float3x3 toFloat3x3(float4x4 m) return float3x3(m[0].xyz, m[1].xyz, m[2].xyz); } -// this is for vertex shader +// this is for vertex shader if tangent supplied void transformBasis(thread float3& tangent, thread float3& normal, float4x4 modelToWorldTfm, bool isScaled = false) { @@ -309,6 +317,10 @@ struct Vertex { float4 position [[attribute(VertexAttributePosition)]]; float2 texCoord [[attribute(VertexAttributeTexcoord)]]; + + // basis + float3 normal [[attribute(VertexAttributeNormal)]];; // consider hallf + float4 tangent [[attribute(VertexAttributeTangent)]];; // tan + bitanSign }; struct ColorInOut @@ -317,6 +329,10 @@ struct ColorInOut float3 texCoordXYZ; float2 texCoord; float3 worldPos; + + // basis + half3 normal; + half4 tangent; }; ColorInOut DrawImageFunc( @@ -332,6 +348,21 @@ ColorInOut DrawImageFunc( float4 worldPos = uniforms.modelMatrix * position; + // deal with full basis + + if (uniforms.isNormal && uniforms.isPreview) { + float3 tangent = in.tangent.xyz; + float3 normal = in.normal; + transformBasis(tangent, normal, uniforms.modelMatrix, false); + + out.normal = toHalf(normal); + out.tangent.xyz = toHalf(tangent); + out.tangent.w = toHalf(in.tangent.w); + } + else { + out.normal = toHalf(in.normal); + out.tangent = toHalf(in.tangent); + } // try adding pixel offset to pixel values worldPos.xy += uniformsLevel.drawOffset; @@ -480,6 +511,8 @@ float4 DrawPixels( else if (uniforms.isNormal) { // light the normal map + + // add swizzle for ASTC/BC5nm, other 2 channels format can only store 01 in ba if (uniforms.isSwizzleAGToRG) { c = float4(c.ag, 0, 1); @@ -498,6 +531,9 @@ float4 DrawPixels( float3 n = c.xyz; + // handle the basis here + n = toFloat(transformNormal(in.tangent, in.normal, toHalf(n))); + // diffuse float dotNL = saturate(dot(n, lightDir)); float3 diffuse = lightColor.xyz * dotNL; diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index 9b3f2e2c..d9015f69 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -167,6 +167,9 @@ class ShowSettings { string lastFilename; double lastTimestamp = 0.0; + + int32_t meshNumber = 0; + int32_t meshCount = 3; }; float4x4 matrix4x4_translation(float tx, float ty, float tz); diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index a0c370bf..afc42d54 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -275,7 +275,11 @@ - (IBAction)showAboutDialog:(id)sender { Num2 = 0x13, Num3 = 0x14, Num4 = 0x15, - // ... + Num5 = 0x17, + Num6 = 0x16, + Num7 = 0x1A, + Num8 = 0x1C, + Num9 = 0x19, Num0 = 0x1D, LeftBrace = 0x21, @@ -1862,6 +1866,24 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown } break; + // test out different shapes, not offiical support yet + case Key::Num8: + if (_showSettings->meshCount > 1) { + if (isShiftKeyDown) { + _showSettings->meshNumber = _showSettings->meshNumber + _showSettings->meshCount - 1; + } + else { + _showSettings->meshNumber++; + } + _showSettings->meshNumber = _showSettings->meshNumber % _showSettings->meshCount; + + sprintf(text, "Mesh %d/%d", _showSettings->meshNumber, _showSettings->meshCount); + isChanged = true; + } + break; + + // TODO: should probably have these wrap and not clamp to count limits + // mip up/down case Key::M: if (_showSettings->maxLOD > 1) { From 0e6d2b84d4e9f1424192f7215164d8c5e9688b4a Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 29 May 2021 17:12:16 -0700 Subject: [PATCH 082/901] kram - flip dx, dy again in heightToNormals, add basis transform Needed to flip tangent.w ModelIO doesn't caclulate this correctly. Add facing support so if inside model, the faces look okay. --- kramv/KramShaders.metal | 67 +++++++++++++++++++++------------- libkram/kram/KramImageInfo.cpp | 8 ++-- 2 files changed, 45 insertions(+), 30 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index c94e9537..6f51ca1d 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -185,17 +185,21 @@ half3 toNormal(half3 n) // use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt // see http://www.mikktspace.com/ -half3 transformNormal(half4 tangent, half3 vertexNormal, half3 bumpNormal) +half3 transformNormal(half3 bumpNormal, half4 tangent, half3 vertexNormal) { // Normalize tangent/vertexNormal in vertex shader // but don't renormalize interpolated tangent, vertexNormal in fragment shader // Reconstruct bitan in frag shader // https://bgolus.medium.com/generating-perfect-normal-maps-for-unity-f929e673fc57 + // ModelIO not generating correct bitan sign + // TODO: flip this on srcData, and not here + half bitangentSign = -tangent.w; // now transform by basis and normalize from any shearing, and since interpolated basis vectors // are not normalized - half3x3 tbn = half3x3(tangent.xyz, tangent.w * cross(vertexNormal, tangent.xyz), vertexNormal); + half3 bitangent = bitangentSign * cross(vertexNormal, tangent.xyz); + half3x3 tbn = half3x3(tangent.xyz, bitangent, vertexNormal); bumpNormal = tbn * bumpNormal; return normalize(bumpNormal); } @@ -209,7 +213,8 @@ half3 transformNormal(half4 tangent, half3 vertexNormal, } half3 bumpNormal = toNormal(nmap.xyz); - return transformNormal(tangent, vertexNormal, bumpNormal); + return transformNormal(bumpNormal, + tangent, vertexNormal); } @@ -258,8 +263,8 @@ void skinPosAndBasis(thread float4& position, thread float3& tangent, thread flo // not dealing with non-uniform scale correction // see scale2 handling in transformBasis, a little different with transpose of 3x4 - tangent = (float4(tangent, 0.0) * bindPoseToBoneTransform); normal = (float4(normal, 0.0) * bindPoseToBoneTransform); + tangent = (float4(tangent, 0.0) * bindPoseToBoneTransform); } float3x3 toFloat3x3(float4x4 m) @@ -268,23 +273,21 @@ float3x3 toFloat3x3(float4x4 m) } // this is for vertex shader if tangent supplied -void transformBasis(thread float3& tangent, thread float3& normal, +void transformBasis(thread float3& normal, thread float3& tangent, float4x4 modelToWorldTfm, bool isScaled = false) { float3x3 m = toFloat3x3(modelToWorldTfm); + // note this is RinvT * n = (Rt)t = R, this is for simple inverse, inv scale handled below + // but uniform scale already handled by normalize + normal = m * normal; + // question here of whether tangent is transformed by m or mInvT // most apps assume m, but after averaging it can be just as off the surface as the normal - bool useInverseOnTangent = true; - if (useInverseOnTangent) - tangent = tangent * m; - else - tangent = m * tangent; + tangent = m * tangent; + - // note this is n * R = Rt * n, for simple affine transforms Rinv = Rt, invScale then handled below - normal = normal * m; - // have to apply invSquare of scale here to approximate invT // also make sure to identify inversion off determinant before instancing so that backfacing is correct // this is only needed if non-uniform scale present in modelToWorldTfm, could precompute scale2 @@ -300,13 +303,13 @@ void transformBasis(thread float3& tangent, thread float3& normal, scale2 = recip(max(0.0001 * 0.0001, scale2)); // apply inverse - tangent *= scale2; normal *= scale2; + tangent *= scale2; } // vertex shader normalize, but the fragment shader should not - tangent = normalize(tangent); normal = normalize(normal); + tangent = normalize(tangent); // make sure to preserve bitan sign in tangent.w } @@ -351,9 +354,9 @@ ColorInOut DrawImageFunc( // deal with full basis if (uniforms.isNormal && uniforms.isPreview) { - float3 tangent = in.tangent.xyz; float3 normal = in.normal; - transformBasis(tangent, normal, uniforms.modelMatrix, false); + float3 tangent = in.tangent.xyz; + transformBasis(normal, tangent, uniforms.modelMatrix, false); out.normal = toHalf(normal); out.tangent.xyz = toHalf(tangent); @@ -470,6 +473,7 @@ vertex ColorInOut DrawVolumeVS( float4 DrawPixels( ColorInOut in [[stage_in]], + bool facing [[front_facing]], constant Uniforms& uniforms, float4 c, float2 textureSize @@ -511,8 +515,6 @@ float4 DrawPixels( else if (uniforms.isNormal) { // light the normal map - - // add swizzle for ASTC/BC5nm, other 2 channels format can only store 01 in ba if (uniforms.isSwizzleAGToRG) { c = float4(c.ag, 0, 1); @@ -526,13 +528,20 @@ float4 DrawPixels( c.rgb = toNormal(c.rgb); + // flip the normal if facing is flipped + // TODO: needed for tangent too? + if (!facing) { + c.xyz = -c.xyz; + in.tangent.w = -in.tangent.w; + } + float3 lightDir = normalize(float3(1,1,1)); float3 lightColor = float3(1,1,1); float3 n = c.xyz; // handle the basis here - n = toFloat(transformNormal(in.tangent, in.normal, toHalf(n))); + n = toFloat(transformNormal(toHalf(n), in.tangent, in.normal)); // diffuse float dotNL = saturate(dot(n, lightDir)); @@ -776,6 +785,7 @@ float4 DrawPixels( fragment float4 Draw1DArrayPS( ColorInOut in [[stage_in]], + bool facing [[front_facing]], constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]], constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]], sampler colorSampler [[ sampler(SamplerIndexColor) ]], @@ -790,11 +800,12 @@ fragment float4 Draw1DArrayPS( float2 textureSize = float2(colorMap.get_width(0), 1); // colorMap.get_num_mip_levels(); - return DrawPixels(in, uniforms, c, textureSize); + return DrawPixels(in, facing, uniforms, c, textureSize); } fragment float4 DrawImagePS( ColorInOut in [[stage_in]], + bool facing [[front_facing]], constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]], constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]], sampler colorSampler [[ sampler(SamplerIndexColor) ]], @@ -808,11 +819,12 @@ fragment float4 DrawImagePS( float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod)); // colorMap.get_num_mip_levels(); - return DrawPixels(in, uniforms, c, textureSize); + return DrawPixels(in, facing, uniforms, c, textureSize); } fragment float4 DrawImageArrayPS( ColorInOut in [[stage_in]], + bool facing [[front_facing]], constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]], constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]], sampler colorSampler [[ sampler(SamplerIndexColor) ]], @@ -826,12 +838,13 @@ fragment float4 DrawImageArrayPS( float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod)); // colorMap.get_num_mip_levels(); - return DrawPixels(in, uniforms, c, textureSize); + return DrawPixels(in, facing, uniforms, c, textureSize); } fragment float4 DrawCubePS( ColorInOut in [[stage_in]], + bool facing [[front_facing]], constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]], constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]], sampler colorSampler [[ sampler(SamplerIndexColor) ]], @@ -846,11 +859,12 @@ fragment float4 DrawCubePS( float2 textureSize = float2(w, w); // colorMap.get_num_mip_levels(); - return DrawPixels(in, uniforms, c, textureSize); + return DrawPixels(in, facing, uniforms, c, textureSize); } fragment float4 DrawCubeArrayPS( ColorInOut in [[stage_in]], + bool facing [[front_facing]], constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]], constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]], sampler colorSampler [[ sampler(SamplerIndexColor) ]], @@ -865,12 +879,13 @@ fragment float4 DrawCubeArrayPS( float2 textureSize = float2(w, w); // colorMap.get_num_mip_levels(); - return DrawPixels(in, uniforms, c, textureSize); + return DrawPixels(in, facing, uniforms, c, textureSize); } fragment float4 DrawVolumePS( ColorInOut in [[stage_in]], + bool facing [[front_facing]], constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]], constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]], sampler colorSampler [[ sampler(SamplerIndexColor) ]], @@ -895,7 +910,7 @@ fragment float4 DrawVolumePS( float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod)); // colorMap.get_num_mip_levels(); - return DrawPixels(in, uniforms, c, textureSize); + return DrawPixels(in, facing, uniforms, c, textureSize); } //-------------------------------------------------- diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index 391fd17a..aba4df27 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -1305,8 +1305,8 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h, float dx = (cE - cW) * scaleX; float dy = (cN - cS) * scaleY; - //dx = -dx; - //dy = -dy; + dx = -dx; + dy = -dy; float4 normal = float4m(dx, dy, 1.0f, 0.0f); normal = normalize(normal); @@ -1340,8 +1340,8 @@ void ImageInfo::heightToNormals(int32_t w, int32_t h, float dx = (cE - cW) * scaleX; float dy = (cN - cS) * scaleY; - //dx = -dx; - //dy = -dy; + dx = -dx; + dy = -dy; float4 normal = float4m(dx, dy, 1.0f, 0.0f); normal = normalize(normal); From 29e76533ceb97024f8ea519b78d9f27517e0a9eb Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 01:44:19 -0700 Subject: [PATCH 083/901] CMake - fix including Metal source in builds for gpu capture --- kramv/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kramv/CMakeLists.txt b/kramv/CMakeLists.txt index 11baacc3..97d844bf 100644 --- a/kramv/CMakeLists.txt +++ b/kramv/CMakeLists.txt @@ -81,7 +81,7 @@ set_target_properties(${myTargetApp} PROPERTIES #------------------------- # turn on shader capture support and indexing # why can't this just be a yes or no, there's "Yes, exclude source code" - XCODE_ATTRIBUTE_MTL_ENABLE_DEBUG_INFO "Yes, include source code" + XCODE_ATTRIBUTE_MTL_ENABLE_DEBUG_INFO INCLUDE_SOURCE XCODE_ATTRIBUTE_MTL_ENABLE_INDEX_STORE YES ) From a9473d301b66412f1552e570f7b2008039597a6a Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 01:49:07 -0700 Subject: [PATCH 084/901] kramv - fixup the 3d shapes as much as possible, fix eyedropper. These are still all smushed, since xy-only scale is applied to the box to approximate a wxh image. Flip the u direction of the sphere/cylinder primitives, since these are inverted from the box. Invert the bitangent sign, since even with flipping u direction, the tangents are inverted from what they should be. Split out 2d from 3d view matrices. Fix eyedropper on archive by setting decodedFormat on that path, and fixing toSnorm8 call use. Try to activate specular, but it looks bad in ortho and with the scaling. --- kramv/KramRenderer.mm | 134 +++++++++++++++++++++++++++++----------- kramv/KramShaders.h | 3 +- kramv/KramShaders.metal | 29 ++++++--- kramv/KramViewerBase.h | 4 +- kramv/KramViewerMain.mm | 9 ++- 5 files changed, 128 insertions(+), 51 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 8c97e852..90542ca5 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -70,8 +70,14 @@ @implementation Renderer uint8_t _uniformBufferIndex; float4x4 _projectionMatrix; + + // 2d versions float4x4 _viewMatrix; float4x4 _modelMatrix; + + // 3d versions + float4x4 _viewMatrix3D; + float4x4 _modelMatrix3D; //float _rotation; KramLoader *_loader; @@ -79,11 +85,12 @@ @implementation Renderer MDLVertexDescriptor *_mdlVertexDescriptor; - MTKMesh *_meshPlane; // really a thin gox + //MTKMesh *_meshPlane; // really a thin gox MTKMesh *_meshBox; MTKMesh *_meshSphere; MTKMesh *_meshCylinder; MTKMeshBufferAllocator *_metalAllocator; + bool _is3DView; // whether view is 3d for now ShowSettings* _showSettings; } @@ -395,18 +402,30 @@ - (void)_createSampleRender _sampleTex = [_device newTextureWithDescriptor:textureDesc]; } -- (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh +- (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipUV:(bool)doFlipUV { NSError* error = nil; //mdlMesh.vertexDescriptor = _mdlVertexDescriptor; + + mdlMesh.vertexDescriptor = _mdlVertexDescriptor; + + // flip the u coordinate + if (doFlipUV) + { + id uvs = mdlMesh.vertexBuffers[1]; + float2* uvData = (float2*)uvs.map.bytes; + + for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) { + uvData[i].x = 1.0f - uvData[i].x; + } + } + [mdlMesh addOrthTanBasisForTextureCoordinateAttributeNamed: MDLVertexAttributeTextureCoordinate normalAttributeNamed: MDLVertexAttributeNormal tangentAttributeNamed: MDLVertexAttributeTangent]; - mdlMesh.vertexDescriptor = _mdlVertexDescriptor; - // TODO: name the vertex attributes, can that be done in _mdlVertexDescriptor // may have to set name on MTLBuffer range on IB and VB @@ -437,15 +456,19 @@ - (void)_loadAssets inwardNormals:NO allocator:_metalAllocator]; - _meshBox = [self _createMeshAsset:"MeshBox" mdlMesh:mdlMesh]; + _meshBox = [self _createMeshAsset:"MeshBox" mdlMesh:mdlMesh doFlipUV:false]; // TOOO: have more shape types - this is box, need thin box (plane), and sphere, and cylinder // eventually load usdz and gltf2 custom model. Need 3d manipulation of shape like arcball // and eyedropper is more complex. + // The sphere/cylinder shapes are v increasing in -Y, and u increasing conterclockwise, + // u is the opposite direction to the cube/plane, so need to flip those coords + // I think this has also flipped the tangents the wrong way. + mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator]; - - _meshSphere = [self _createMeshAsset:"MeshSphere" mdlMesh:mdlMesh]; + + _meshSphere = [self _createMeshAsset:"MeshSphere" mdlMesh:mdlMesh doFlipUV:true]; mdlMesh = [MDLMesh newCylinderWithHeight:1.0 radii:(vector_float2){0.5, 0.5} @@ -455,7 +478,7 @@ - (void)_loadAssets inwardNormals:NO allocator:_metalAllocator]; - _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh]; + _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh doFlipUV:true]; _mesh = _meshBox; @@ -492,6 +515,7 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, sourceImage, true); _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL; + _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat; _showSettings->lastFilename = fullFilename; _showSettings->lastTimestamp = timestamp; @@ -659,8 +683,13 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex // have one of these for each texture added to the viewer float scaleX = MAX(1, texture.width); float scaleY = MAX(1, texture.height); - _modelMatrix = float4x4(float4m(scaleX, scaleY, 1.0f, 1.0f)); - _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); + _modelMatrix = float4x4(float4m(scaleX, scaleY, 1.0f, 1.0f)); // non uniform scale + _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back + + // squashed 3d primitive in z, throws off normals + float scale = MAX(scaleX, scaleY); + _modelMatrix3D = float4x4(float4m(scale, scale, 1.0f, 1.0f)); // non uniform scale + _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back return YES; } @@ -670,10 +699,18 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0); // scale - float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); - viewMatrix = panTransform * viewMatrix; - - return _projectionMatrix * viewMatrix * _modelMatrix; + if (_is3DView) { + float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform scale + viewMatrix = panTransform * viewMatrix; + + return _projectionMatrix * viewMatrix * _modelMatrix3D; + } + else { + float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform scale + viewMatrix = panTransform * viewMatrix; + + return _projectionMatrix * viewMatrix * _modelMatrix; + } } - (void)_updateGameState @@ -729,33 +766,60 @@ - (void)_updateGameState uniforms.debugMode = _showSettings->isPreview ? ShaderDebugMode::ShDebugModeNone : (ShaderDebugMode)_showSettings->debugMode; uniforms.channels = (ShaderTextureChannels)_showSettings->channels; + // crude shape experiment + _is3DView = true; + switch(_showSettings->meshNumber) { + case 0: _mesh = _meshBox; _is3DView = false; break; + case 1: _mesh = _meshBox; break; + case 2: _mesh = _meshSphere; break; + case 3: _mesh = _meshCylinder; break; + } + // translate float4x4 panTransform = matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0); // scale - _viewMatrix = float4x4(float4m(_showSettings->zoom, _showSettings->zoom, 1.0f, 1.0f)); - _viewMatrix = panTransform * _viewMatrix; - - // viewMatrix should typically be the inverse - //_viewMatrix = simd_inverse(_viewMatrix); - - float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix; - - uniforms.projectionViewMatrix = projectionViewMatrix; - - // works when only one texture, but switch to projectViewMatrix - uniforms.modelMatrix = _modelMatrix; + float zoom = _showSettings->zoom; - // this was stored so view could use it, but now that code calcs the transform via computeImageTransform - _showSettings->projectionViewModelMatrix = projectionViewMatrix * _modelMatrix; - - // crude shape experiment - switch(_showSettings->meshNumber) { - case 0: _mesh = _meshBox; break; - case 1: _mesh = _meshSphere; break; - case 2: _mesh = _meshCylinder; break; + if (_is3DView) { + _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform + _viewMatrix3D = panTransform * _viewMatrix3D; + + // viewMatrix should typically be the inverse + //_viewMatrix = simd_inverse(_viewMatrix3D); + + float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix3D; + uniforms.projectionViewMatrix = projectionViewMatrix; + + // works when only one texture, but switch to projectViewMatrix + uniforms.modelMatrix = _modelMatrix3D; + + // this was stored so view could use it, but now that code calcs the transform via computeImageTransform + _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix; + + // cache the camera position + uniforms.cameraPosition = inverse(_viewMatrix3D).columns[3].xyz; // this is all ortho } - + else { + _viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); + _viewMatrix = panTransform * _viewMatrix; + + // viewMatrix should typically be the inverse + //_viewMatrix = simd_inverse(_viewMatrix3D); + + float4x4 projectionViewMatrix = _projectionMatrix * _viewMatrix; + uniforms.projectionViewMatrix = projectionViewMatrix; + + // works when only one texture, but switch to projectViewMatrix + uniforms.modelMatrix = _modelMatrix; + + // this was stored so view could use it, but now that code calcs the transform via computeImageTransform + _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix ; + + // cache the camera position + uniforms.cameraPosition = inverse(_viewMatrix).columns[3].xyz; // this is all ortho + } + //_rotation += .01; } diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index 0b7e6b36..6e363c69 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -96,7 +96,8 @@ struct Uniforms { simd::float4x4 projectionViewMatrix; simd::float4x4 modelMatrix; - + simd::float3 cameraPosition; // world-space + bool isSigned; bool isNormal; bool isSwizzleAGToRG; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 6f51ca1d..b77875ba 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -192,9 +192,11 @@ half3 transformNormal(half3 bumpNormal, half4 tangent, half3 vertexNormal) // Reconstruct bitan in frag shader // https://bgolus.medium.com/generating-perfect-normal-maps-for-unity-f929e673fc57 + half bitangentSign = tangent.w; + // ModelIO not generating correct bitan sign // TODO: flip this on srcData, and not here - half bitangentSign = -tangent.w; + bitangentSign = -bitangentSign; // now transform by basis and normalize from any shearing, and since interpolated basis vectors // are not normalized @@ -549,14 +551,20 @@ float4 DrawPixels( float3 specular = float3(0.0); - // this renders bright in one quadrant of wrap preview + // this renders bright in one quadrant of wrap preview, hard in ortho view // specular - //float3 v = normalize(in.worldPos); // - worldCameraPos); // or worldCameraDir - //float3 r = normalize(reflect(lightDir, n)); - //float dotRV = saturate(dot(r, v)); - //dotRV = pow(dotRV, 4.0); // * saturate(dotNL * 8.0); // no spec without diffuse - //specular = saturate(dotRV * lightColor.rgb); - + bool doSpecular = false; + if (doSpecular) { + float3 view = normalize(in.worldPos - uniforms.cameraPosition); + float3 ref = normalize(reflect(view, n)); + + // above can be interpolated + float dotRL = saturate(dot(ref, lightDir)); + dotRL = pow(dotRL, 4.0); // * saturate(dotNL * 8.0); // no spec without diffuse + specular = saturate(dotRL * lightColor.rgb); + } + + // Note: don't have any albedo yet, need second texture input float3 ambient = float3(0.1); c.xyz = ambient + diffuse + specular; @@ -576,6 +584,11 @@ float4 DrawPixels( c.xyz *= c.a; } } + + bool doShowUV = false; + if (doShowUV) { + c = float4(in.texCoord, 0.0, 1.0); + } } else { // handle single channel and SDF content diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index d9015f69..5976260c 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -147,7 +147,7 @@ class ShowSettings { // these control the view transform, zoomFit fits the image vertically to he view bound float zoomFit = 1.0f; - float zoom = 0.0f; + float zoom = 1.0f; float panX = 0.0f; float panY = 0.0f; @@ -169,7 +169,7 @@ class ShowSettings { double lastTimestamp = 0.0; int32_t meshNumber = 0; - int32_t meshCount = 3; + int32_t meshCount = 4; }; float4x4 matrix4x4_translation(float tx, float ty, float tz); diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index afc42d54..027b57ad 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1014,10 +1014,9 @@ - (void)updateEyedropper { int mipX = _showSettings->imageBoundsX; int mipY = _showSettings->imageBoundsY; - for (int i = 0; i < mipLOD; ++i) { - mipX = mipX >> 1; - mipY = mipY >> 1; - } + mipX = mipX >> mipLOD; + mipY = mipY >> mipLOD; + mipX = std::max(1, mipX); mipY = std::max(1, mipY); @@ -1050,7 +1049,7 @@ - (void)updateEyedropper { bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat); if (isSigned && !isDecodeSigned) { - c = toSnorm8(c.x); + c = toSnorm8(c); } if (isNormal) { From 90f554d192d3e27e2c26fdffc7b5c772f1739d79 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 10:47:36 -0700 Subject: [PATCH 085/901] kramv - first pass at 3d meshes These are still ortho. Fix bitangent sign on ModelIO prims. Pass in invScale2 from transform, so don't have to recompute in VS. This is 1.0 if uniform scale used. Don't set non-uniform scale on modelMatrix in 3D views. Using uniform scale now. Increase ortho z range, so scale doesn't cause prims to clip. --- kramv/KramRenderer.mm | 104 +++++++++++++++++++++++++++++++--------- kramv/KramShaders.h | 1 + kramv/KramShaders.metal | 44 +++++++++-------- 3 files changed, 107 insertions(+), 42 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 90542ca5..8cbc57a6 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -88,7 +88,8 @@ @implementation Renderer //MTKMesh *_meshPlane; // really a thin gox MTKMesh *_meshBox; MTKMesh *_meshSphere; - MTKMesh *_meshCylinder; + //MTKMesh *_meshCylinder; + MTKMesh *_meshCapsule; MTKMeshBufferAllocator *_metalAllocator; bool _is3DView; // whether view is 3d for now @@ -406,15 +407,15 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU { NSError* error = nil; - //mdlMesh.vertexDescriptor = _mdlVertexDescriptor; - - mdlMesh.vertexDescriptor = _mdlVertexDescriptor; + // ModelIO has the uv going counterclockwise on sphere/cylinder, but not on the box. + // And it also has a flipped bitangent.w. + // flip the u coordinate if (doFlipUV) { - id uvs = mdlMesh.vertexBuffers[1]; + id uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0]; float2* uvData = (float2*)uvs.map.bytes; for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) { @@ -426,6 +427,18 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU normalAttributeNamed: MDLVertexAttributeNormal tangentAttributeNamed: MDLVertexAttributeTangent]; + // DONE: flip the bitangent.w sign here, and remove the flip in the shader + bool doFlipBitangent = true; + if (doFlipBitangent) + { + id uvs = mdlMesh.vertexBuffers[BufferIndexMeshTangent]; + float4* uvData = (float4*)uvs.map.bytes; + + for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) { + uvData[i].w = -uvData[i].w; + } + } + // TODO: name the vertex attributes, can that be done in _mdlVertexDescriptor // may have to set name on MTLBuffer range on IB and VB @@ -466,19 +479,34 @@ - (void)_loadAssets // u is the opposite direction to the cube/plane, so need to flip those coords // I think this has also flipped the tangents the wrong way. + // All prims are viewed with +Y, not +Z up + mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator]; _meshSphere = [self _createMeshAsset:"MeshSphere" mdlMesh:mdlMesh doFlipUV:true]; - mdlMesh = [MDLMesh newCylinderWithHeight:1.0 - radii:(vector_float2){0.5, 0.5} - radialSegments:16 - verticalSegments:1 - geometryType:MDLGeometryTypeTriangles - inwardNormals:NO - allocator:_metalAllocator]; - - _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh doFlipUV:true]; +// this maps 1/3rd of texture to the caps, and just isn't a very good uv mapping, using capsule nistead +// mdlMesh = [MDLMesh newCylinderWithHeight:1.0 +// radii:(vector_float2){0.5, 0.5} +// radialSegments:16 +// verticalSegments:1 +// geometryType:MDLGeometryTypeTriangles +// inwardNormals:NO +// allocator:_metalAllocator]; +// +// _meshCylinder = [self _createMeshAsset:"MeshCylinder" mdlMesh:mdlMesh doFlipUV:true]; + + mdlMesh = [MDLMesh newCapsuleWithHeight:1.0 + radii:(vector_float2){0.5, 0.25} // vertical cap subtracted from height + radialSegments:16 + verticalSegments:1 + hemisphereSegments:16 + geometryType:MDLGeometryTypeTriangles + inwardNormals:NO + allocator:_metalAllocator]; + + + _meshCapsule = [self _createMeshAsset:"MeshCapsule" mdlMesh:mdlMesh doFlipUV:true]; _mesh = _meshBox; @@ -686,10 +714,10 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex _modelMatrix = float4x4(float4m(scaleX, scaleY, 1.0f, 1.0f)); // non uniform scale _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back - // squashed 3d primitive in z, throws off normals + // uniform scaled 3d primitiv float scale = MAX(scaleX, scaleY); - _modelMatrix3D = float4x4(float4m(scale, scale, 1.0f, 1.0f)); // non uniform scale - _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back + _modelMatrix3D = float4x4(float4m(scale, scale, scale, 1.0f)); // uniform scale + _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back return YES; } @@ -700,7 +728,7 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom // scale if (_is3DView) { - float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform scale + float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform scale is okay, affects ortho volume viewMatrix = panTransform * viewMatrix; return _projectionMatrix * viewMatrix * _modelMatrix3D; @@ -713,6 +741,33 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom } } +bool almost_equal_elements(float3 v, float tol) { + return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol); +} + +float3 inverseScaleSquared(float4x4 m) { + float3 scaleSquared = float3m( + length_squared(m.columns[0].xyz), + length_squared(m.columns[1].xyz), + length_squared(m.columns[2].xyz)); + + // if uniform, then set scaleSquared all to 1 + if (almost_equal_elements(scaleSquared, 1e-5)) { + scaleSquared = float3m(1.0); + } + + // don't divide by 0 + float3 invScaleSquared = recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared)); + + // TODO: could also identify determinant here for flipping orient + + // Note: in 2D, scales is x,x,1, so always apply invScale2, + // and that messes up preview normals on sphere/cylinder. + // May be from trying to do all that math in half. + + return invScaleSquared; +} + - (void)_updateGameState { /// Update any game state before encoding rendering commands to our drawable @@ -772,7 +827,8 @@ - (void)_updateGameState case 0: _mesh = _meshBox; _is3DView = false; break; case 1: _mesh = _meshBox; break; case 2: _mesh = _meshSphere; break; - case 3: _mesh = _meshCylinder; break; + //case 3: _mesh = _meshCylinder; break; + case 3: _mesh = _meshCapsule; break; } // translate @@ -794,6 +850,8 @@ - (void)_updateGameState // works when only one texture, but switch to projectViewMatrix uniforms.modelMatrix = _modelMatrix3D; + uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix3D); + // this was stored so view could use it, but now that code calcs the transform via computeImageTransform _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix; @@ -813,6 +871,8 @@ - (void)_updateGameState // works when only one texture, but switch to projectViewMatrix uniforms.modelMatrix = _modelMatrix; + uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix); + // this was stored so view could use it, but now that code calcs the transform via computeImageTransform _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix ; @@ -935,7 +995,7 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie [renderEncoder setCullMode:MTLCullModeBack]; [renderEncoder setDepthStencilState:_depthStateFull]; - [renderEncoder pushDebugGroup:@"DrawBox"]; + [renderEncoder pushDebugGroup:@"DrawShape"]; // set the mesh shape for (NSUInteger bufferIndex = 0; bufferIndex < _mesh.vertexBuffers.count; bufferIndex++) @@ -1221,7 +1281,7 @@ - (void)drawSamples:(id)commandBuffer lookupX:(int32_t)lookupX id renderEncoder = [commandBuffer computeCommandEncoder]; renderEncoder.label = @"SampleCompute"; - [renderEncoder pushDebugGroup:@"DrawBox"]; + [renderEncoder pushDebugGroup:@"DrawShape"]; UniformsCS uniforms; uniforms.uv.x = lookupX; @@ -1297,7 +1357,7 @@ - (void)updateViewTransforms { //float aspect = size.width / (float)size.height; //_projectionMatrix = perspective_rhs(45.0f * (M_PI / 180.0f), aspect, 0.1f, 100.0f); - _projectionMatrix = orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f, 100.0f, _showSettings->isReverseZ); + _projectionMatrix = orthographic_rhs(_showSettings->viewSizeX, _showSettings->viewSizeY, 0.1f, 100000.0f, _showSettings->isReverseZ); // DONE: adjust zoom to fit the entire image to the window _showSettings->zoomFit = MIN((float)_showSettings->viewSizeX, (float)_showSettings->viewSizeY) / diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index 6e363c69..cb0d33e1 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -96,6 +96,7 @@ struct Uniforms { simd::float4x4 projectionViewMatrix; simd::float4x4 modelMatrix; + simd::float3 modelMatrixInvScale2; // to supply inverse simd::float3 cameraPosition; // world-space bool isSigned; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index b77875ba..245b4ce8 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -195,8 +195,8 @@ half3 transformNormal(half3 bumpNormal, half4 tangent, half3 vertexNormal) half bitangentSign = tangent.w; // ModelIO not generating correct bitan sign - // TODO: flip this on srcData, and not here - bitangentSign = -bitangentSign; + // DONE: flip this on srcData, and not here + //bitangentSign = -bitangentSign; // now transform by basis and normalize from any shearing, and since interpolated basis vectors // are not normalized @@ -210,9 +210,13 @@ half3 transformNormal(half4 tangent, half3 vertexNormal, texture2d texture, sampler s, float2 uv, bool isSigned = true) { half4 nmap = texture.sample(s, uv); + + // unorm-only formats like ASTC need to convert if (!isSigned) { nmap.xy = toSnorm8(nmap.xy); } + + // rebuild the z term half3 bumpNormal = toNormal(nmap.xyz); return transformNormal(bumpNormal, @@ -276,7 +280,7 @@ float3x3 toFloat3x3(float4x4 m) // this is for vertex shader if tangent supplied void transformBasis(thread float3& normal, thread float3& tangent, - float4x4 modelToWorldTfm, bool isScaled = false) + float4x4 modelToWorldTfm, float3 invScale2) { float3x3 m = toFloat3x3(modelToWorldTfm); @@ -289,25 +293,24 @@ void transformBasis(thread float3& normal, thread float3& tangent, // most apps assume m, but after averaging it can be just as off the surface as the normal tangent = m * tangent; - // have to apply invSquare of scale here to approximate invT // also make sure to identify inversion off determinant before instancing so that backfacing is correct // this is only needed if non-uniform scale present in modelToWorldTfm, could precompute scale2 - if (isScaled) - { - // compute scale squared from rows - float3 scale2 = float3( - length_squared(m[0].xyz), - length_squared(m[1].xyz), - length_squared(m[2].xyz)); - - // do a max(1e4), but really don't have scale be super small - scale2 = recip(max(0.0001 * 0.0001, scale2)); +// if (isScaled) +// { +// // compute scale squared from rows +// float3 scale2 = float3( +// length_squared(m[0].xyz), +// length_squared(m[1].xyz), +// length_squared(m[2].xyz)); +// +// // do a max(1e4), but really don't have scale be super small +// scale2 = recip(max(0.0001 * 0.0001, scale2)); // apply inverse - normal *= scale2; - tangent *= scale2; - } + normal *= invScale2; + tangent *= invScale2; +// } // vertex shader normalize, but the fragment shader should not normal = normalize(normal); @@ -358,7 +361,7 @@ ColorInOut DrawImageFunc( if (uniforms.isNormal && uniforms.isPreview) { float3 normal = in.normal; float3 tangent = in.tangent.xyz; - transformBasis(normal, tangent, uniforms.modelMatrix, false); + transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2); out.normal = toHalf(normal); out.tangent.xyz = toHalf(tangent); @@ -546,7 +549,8 @@ float4 DrawPixels( n = toFloat(transformNormal(toHalf(n), in.tangent, in.normal)); // diffuse - float dotNL = saturate(dot(n, lightDir)); + float dotNLUnsat = dot(n, lightDir); + float dotNL = saturate(dotNLUnsat); float3 diffuse = lightColor.xyz * dotNL; float3 specular = float3(0.0); @@ -565,7 +569,7 @@ float4 DrawPixels( } // Note: don't have any albedo yet, need second texture input - float3 ambient = float3(0.1); + float3 ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5)); c.xyz = ambient + diffuse + specular; c.a = 1; From 44875cd7d2f31ccc60c244381120b8f190b58b7a Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 11:00:54 -0700 Subject: [PATCH 086/901] kram - fix Win build that doesn't have ATE --- libkram/kram/KramImage.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index a814a6ce..cbbbe803 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -461,13 +461,16 @@ bool KramDecoder::decodeBlocks( // copy srcData if using ATE, it says it needs 16-byte aligned data for encode // and assume for decode too. Output texture is already 16-byte aligned. const uint8_t* srcData = blockData; + +#if COMPILE_ATE vector srcTexture; if (useATE && (((uintptr_t)srcData & 15) != 0)) { srcTexture.resize(blockDataSize); memcpy(srcTexture.data(), srcData, blockDataSize); srcData = srcTexture.data(); } - +#endif + Int2 blockDims = blockDimsOfFormat(blockFormat); bool isVerbose = params.isVerbose; const string& swizzleText = params.swizzleText; From 539187b777cc328ebcc8d8bd7bfe2ca59c749b9f Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 12:08:29 -0700 Subject: [PATCH 087/901] kram - allow loading all texture types in LoadImageFromKTX The call also now decodes ktx2 supercompressed levels. Loads an entire level into a vertical strip, and then it can be decoded. --- kramv/KramLoader.mm | 23 +++++----- libkram/kram/KramImage.cpp | 94 +++++++++++++++----------------------- libkram/kram/KramImage.h | 9 +++- 3 files changed, 55 insertions(+), 71 deletions(-) diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index d60e3bb9..41c24951 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -177,14 +177,8 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { #if SUPPORT_RGB if (isInternalRGBFormat(image.pixelFormat)) { - isInfoOnly = false; - - // reopen and unzip it all - if (!image.open(imageData, imageDataLength, isInfoOnly)) { - return nil; - } - - // loads and converts image from RGB to RGBA + // loads and converts top level mip from RGB to RGBA (RGB0) + // handles all texture types Image rgbaImage; if (!rgbaImage.loadImageFromKTX(image)) return nil; @@ -193,14 +187,21 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { KTXImage rbgaImage2; ImageInfoArgs dstImageInfoArgs; + dstImageInfoArgs.textureType = image.textureType; dstImageInfoArgs.pixelFormat = remapInternalRGBFormat(image.pixelFormat); - dstImageInfoArgs.doMipmaps = false; + dstImageInfoArgs.doMipmaps = image.header.numberOfMipmapLevels > 1; // ignore 0 dstImageInfoArgs.textureEncoder = kTexEncoderExplicit; - dstImageInfoArgs.swizzleText = "rgb1"; - + + // set chunk count, so it's explicit + // the chunks are loaded into a vertical strip + dstImageInfoArgs.chunksX = 1; + dstImageInfoArgs.chunksY = + dstImageInfoArgs.chunksCount = image.totalChunks(); + ImageInfo dstImageInfo; dstImageInfo.initWithArgs(dstImageInfoArgs); + // this will build mips if needed KramEncoder encoder; if (!encoder.encode(dstImageInfo, rgbaImage, rbgaImage2)) { return nil; diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index cbbbe803..6c50937b 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -114,47 +114,48 @@ Image::Image() : _width(0), _height(0), _hasColor(false), _hasAlpha(false) // this routine converts KTX to float4, but don't need if already matching 4 channels // could do other formata conversions here on more supported formats (101010A2, etc). -// TODO: handle loading KTXImage with custom mips -// TODO: handle loading KTXImage with other texture types (cube, array, etc) - // TODO: image here is very specifically a single level of chunks of float4 or Color (RGBA8Unorm) // the encoder is only written to deal with those types. -// TODO: for png need to turn grid/horizontal strip into a vertical strip if not already -// that way can move through the chunks and overwrite them in-place. -// That would avoid copying each chunk out in the encode, but have to do in reodering. -// That way data is stored as KTX would instead of how PNG does. - bool Image::loadImageFromKTX(const KTXImage& image) { // copy the data into a contiguous array + // a verticaly chunke image, will be converted to chunks in encode _width = image.width; - _height = image.height; - - // TODO: handle more texture types with custom mips - if (image.textureType != MyMTLTextureType2D) { - KLOGE("Image", "Only support 2D texture type import for KTX"); - return false; - } - - // TODO: handle loading custom mips. Save will currently box filter to build - // remaining mips but for SDF or coverage scaled alpha test, need to - // preserve original data. Problem is that Image save to KTX/2 always does in-place - // mipgen. + _height = image.height * image.totalChunks(); if (image.header.numberOfMipmapLevels > 1) { - KLOGW("Image", "Skipping custom mip levels from KTX load"); + KLOGW("Image", "Skipping custom mip levels from KTX load, but will build them from top level"); } - // so can call through to blockSize - KTXHeader header; - header.initFormatGL(image.pixelFormat); - //int32_t blockSize = image.blockSize(); - _hasColor = isColorFormat(image.pixelFormat); _hasAlpha = isAlphaFormat(image.pixelFormat); // TODO: this assumes 1,2,3 channel srcData has no rowPadding to say 4 bytes + return convertToFourChannel(image); +} + +bool Image::convertToFourChannel(const KTXImage& image) { + + const uint32_t mipNumber = 0; + const auto& srcMipLevel = image.mipLevels[mipNumber]; + + // this is offset to a given level + uint64_t mipBaseOffset = srcMipLevel.offset; + const uint8_t* srcLevelData = image.fileData; + + vector mipStorage; + if (image.isSupercompressed()) { + + mipStorage.resize(image.mipLevelSize(mipNumber)); + if (!image.unpackLevel(mipNumber, srcLevelData + srcMipLevel.offset, mipStorage.data())) { + return false; + } + srcLevelData = mipStorage.data(); + + // going to upload from mipStorage temp array + mipBaseOffset = 0; + } switch (image.pixelFormat) { case MyMTLPixelFormatR8Unorm: @@ -166,18 +167,15 @@ bool Image::loadImageFromKTX(const KTXImage& image) case MyMTLPixelFormatRGBA8Unorm_sRGB: case MyMTLPixelFormatRGBA8Unorm: { - const uint8_t* srcPixels = - image.fileData + image.mipLevels[0].offset; - + const uint8_t* srcPixels = srcLevelData; + int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat); - // Note: clearing unspecified channels to 0000, not 0001 - // can set swizzleText when encoding _pixels.resize(4 * _width * _height); Color* dstPixels = (Color*)_pixels.data(); - Color dstTemp = {0,0,0,0}; + Color dstTemp = {0,0,0,255}; for (int32_t y = 0; y < _height; ++y) { int32_t y0 = y * _width; @@ -193,9 +191,6 @@ bool Image::loadImageFromKTX(const KTXImage& image) dstPixels[dstX] = dstTemp; } } - - // caller can use swizzle after loading data here, and even compress - // content break; } @@ -207,17 +202,14 @@ bool Image::loadImageFromKTX(const KTXImage& image) case MyMTLPixelFormatRGBA16Float: { int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat); - // Note: clearing unspecified channels to 0000, not 0001 - // can set swizzleText when encoding _pixelsFloat.resize(_width * _height); // treat as float for per channel copies float4* dstPixels = _pixelsFloat.data(); - const half* srcPixels = - (const half*)(image.fileData + image.mipLevels[0].offset); - - half4 dstTemp = half4((half)0); + const half* srcPixels = (const half*)srcLevelData; + + half4 dstTemp = toHalf4(float4m(0.0f, 0.0f, 0.0f, 1.0f)); for (int32_t y = 0; y < _height; ++y) { int32_t y0 = y * _width; @@ -235,12 +227,6 @@ bool Image::loadImageFromKTX(const KTXImage& image) dstPixels[dstX] = toFloat4(dstTemp); } } - - // caller can swizzle - // caller can compress to BC6H or ASTC-HDR if encoders available - // some textures could even go to LDR, but would need to tonemap or - // clamp the values - break; } @@ -250,18 +236,15 @@ bool Image::loadImageFromKTX(const KTXImage& image) case MyMTLPixelFormatRGB32Float_internal: #endif case MyMTLPixelFormatRGBA32Float: { - const float* srcPixels = - (const float*)(image.fileData + image.mipLevels[0].offset); + const float* srcPixels = (const float*)srcLevelData; int32_t numSrcChannels = numChannelsOfFormat(image.pixelFormat); - // Note: clearing unspecified channels to 0000, not 0001 - // can set swizzleText when encoding _pixelsFloat.resize(_width * _height); // treat as float for per channel copies float4* dstPixels = _pixelsFloat.data(); - float4 dstTemp = float4m(0.0f); + float4 dstTemp = float4m(0.0f, 0.0f, 0.0f, 1.0f); for (int32_t y = 0; y < _height; ++y) { int32_t y0 = y * _width; @@ -277,12 +260,7 @@ bool Image::loadImageFromKTX(const KTXImage& image) dstPixels[dstX] = dstTemp; } } - - // caller can swizzle - // caller can compress to BC6H or ASTC-HDR if encoders available - // some textures could even go to LDR, but would need to tonemap or - // clamp the values - + break; } default: diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h index 7378bf95..2619568c 100644 --- a/libkram/kram/KramImage.h +++ b/libkram/kram/KramImage.h @@ -31,8 +31,9 @@ enum ImageResizeFilter { struct MipConstructData; -// TODO: this can only holds one level of mips, so custom mips aren't possible. +// TODO: this can only hold one level of mips, so custom mips aren't possible. // Mipmap generation is all in-place to this storage. +// Multiple chunks are possible in strip or grid form. class Image { public: Image(); @@ -41,13 +42,14 @@ class Image { bool loadImageFromPixels(const vector& pixels, int32_t width, int32_t height, bool hasColor, bool hasAlpha); + // convert top level to single-image bool loadImageFromKTX(const KTXImage& image); // this is only for 2d images bool resizeImage(int32_t wResize, int32_t hResize, bool resizePow2, ImageResizeFilter filter = kImageResizeFilterPoint); - // return state + // this is width and height of the strip/grid, chunks may be copied out of this int32_t width() const { return _width; } int32_t height() const { return _height; } @@ -57,6 +59,9 @@ class Image { bool hasColor() const { return _hasColor; } bool hasAlpha() const { return _hasAlpha; } +private: + bool convertToFourChannel(const KTXImage& image); + private: // pixel size of image int32_t _width = 0; From 54354d127b644d46becf14ad83eb5c4ec35ba0f0 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 14:53:51 -0700 Subject: [PATCH 088/901] kram - more accurate and faster KTX/2 loading, add lighting to albedo preview Track the chunk count in the Image when converting from KTX/2. Also use levelLength, not mipLevelSize. Also add some lighting when previewing albedo. This is to match the normal preview which is also lit. Clean up mipLevelSize -> mipLengthCalc to avoid confusion with levellLength. These should move to size_t. --- kramv/KramShaders.metal | 69 ++++++++++++++++++++-------------- libkram/kram/KTXImage.cpp | 14 +++---- libkram/kram/KTXImage.h | 4 +- libkram/kram/Kram.cpp | 9 ++--- libkram/kram/KramImage.cpp | 40 ++++++++++---------- libkram/kram/KramImage.h | 8 +++- libkram/kram/KramImageInfo.cpp | 8 ++++ 7 files changed, 89 insertions(+), 63 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 245b4ce8..2c2f9a6d 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -469,6 +469,37 @@ vertex ColorInOut DrawVolumeVS( return out; } +float4 doLighting(float4 albedo, float3 viewDir, float3 n) { + + float3 lightDir = normalize(float3(1,1,1)); + float3 lightColor = float3(1,1,1); + + // diffuse + float dotNLUnsat = dot(n, lightDir); + float dotNL = saturate(dotNLUnsat); + float3 diffuse = lightColor.xyz * dotNL; + + float3 specular = float3(0.0); + + // TODO: this renders bright in one quadrant of wrap preview, hard in ortho view + // specular + bool doSpecular = false; + if (doSpecular) { + float3 ref = normalize(reflect(viewDir, n)); + + // above can be interpolated + float dotRL = saturate(dot(ref, lightDir)); + dotRL = pow(dotRL, 4.0); // * saturate(dotNL * 8.0); // no spec without diffuse + specular = saturate(dotRL * lightColor.rgb); + } + + // Note: don't have any albedo yet, need second texture input + float3 ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5)); + albedo.xyz *= (ambient + diffuse + specular); + + return albedo; +} + // TODO: do more test shapes, but that affects eyedropper // generate and pass down tangents + bitanSign in the geometry @@ -540,42 +571,16 @@ float4 DrawPixels( in.tangent.w = -in.tangent.w; } - float3 lightDir = normalize(float3(1,1,1)); - float3 lightColor = float3(1,1,1); float3 n = c.xyz; // handle the basis here n = toFloat(transformNormal(toHalf(n), in.tangent, in.normal)); - // diffuse - float dotNLUnsat = dot(n, lightDir); - float dotNL = saturate(dotNLUnsat); - float3 diffuse = lightColor.xyz * dotNL; - - float3 specular = float3(0.0); - - // this renders bright in one quadrant of wrap preview, hard in ortho view - // specular - bool doSpecular = false; - if (doSpecular) { - float3 view = normalize(in.worldPos - uniforms.cameraPosition); - float3 ref = normalize(reflect(view, n)); - - // above can be interpolated - float dotRL = saturate(dot(ref, lightDir)); - dotRL = pow(dotRL, 4.0); // * saturate(dotNL * 8.0); // no spec without diffuse - specular = saturate(dotRL * lightColor.rgb); - } - - // Note: don't have any albedo yet, need second texture input - float3 ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5)); - c.xyz = ambient + diffuse + specular; - + float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition); + c = doLighting(float4(1.0), viewDir, n); + c.a = 1; - - // TODO: add some specular, can this be combined with albedo texture in same folder? - // may want to change perspective for that, and give light controls } else { // to unorm @@ -583,6 +588,12 @@ float4 DrawPixels( c.xyz = toUnorm(c.xyz); } + // need an isAlbedo test + if (!uniforms.isSigned) { + float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition); + c = doLighting(c, viewDir, toFloat(in.normal)); + } + // to premul, but also need to see without premul if (uniforms.isPremul) { c.xyz *= c.a; diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 1e39f397..9ee2ee67 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -765,7 +765,7 @@ const char* supercompressionName(KTX2Supercompression type) // https://docs.unity3d.com/ScriptReference/Experimental.Rendering.GraphicsFormat.html // Unity only handles 4,5,6,8,10,12 square block dimensions -uint32_t KTXImage::mipLevelSize(uint32_t width_, uint32_t height_) const +uint32_t KTXImage::mipLengthCalc(uint32_t width_, uint32_t height_) const { // TODO: ktx has 4 byte row alignment, fix that in calcs and code // data isn't fully packed on explicit formats like r8, rg8, r16f. @@ -776,14 +776,14 @@ uint32_t KTXImage::mipLevelSize(uint32_t width_, uint32_t height_) const return count * size; } -uint32_t KTXImage::mipLevelSize(uint32_t mipNumber) const +uint32_t KTXImage::mipLengthCalc(uint32_t mipNumber) const { uint32_t w = width; uint32_t h = height; uint32_t d = depth; mipDown(w, h, d, mipNumber); - return mipLevelSize(w, h); + return mipLengthCalc(w, h); } uint32_t KTXImage::blockCountRows(uint32_t width_) const @@ -1157,7 +1157,7 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS (h >= mipMinSize && h <= mipMaxSize)); if (keepMip) { - level.length = mipLevelSize(w, h); + level.length = mipLengthCalc(w, h); if (mipLevels.empty()) { // adjust the top dimensions @@ -1183,7 +1183,7 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS if (keepMip && (mipLevels.size() < (size_t)maxMipLevels)) { // length needs to be multiplied by chunk size before writing out - level.length = mipLevelSize(w, h); + level.length = mipLengthCalc(w, h); if (mipLevels.empty()) { // adjust the top dimensions @@ -1204,7 +1204,7 @@ void KTXImage::initMipLevels(bool doMipmaps, int32_t mipMinSize, int32_t mipMaxS } else { // length needs to be multiplied by chunk size before writing out - level.length = mipLevelSize(w, h); + level.length = mipLengthCalc(w, h); mipLevels.push_back(level); } @@ -1233,7 +1233,7 @@ void KTXImage::initMipLevels(size_t mipOffset) int32_t d = depth; for (uint32_t i = 0; i < numMips; ++i) { - size_t dataSize = mipLevelSize(w, h); + size_t dataSize = mipLengthCalc(w, h); uint32_t levelSize = dataSize * numChunks; diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index d2880971..771b3d9c 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -315,8 +315,8 @@ class KTXImage { // mip void mipDimensions(uint32_t mipNumber, uint32_t& width_, uint32_t& height_, uint32_t& depth_) const; - uint32_t mipLevelSize(uint32_t width_, uint32_t height_) const; - uint32_t mipLevelSize(uint32_t mipNumber) const; + uint32_t mipLengthCalc(uint32_t width_, uint32_t height_) const; + uint32_t mipLengthCalc(uint32_t mipNumber) const; size_t mipLengthLargest() const { return mipLevels[0].length; } size_t mipLength(uint32_t mipNumber) const { return mipLevels[mipNumber].length; } diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 898b0d50..b4d4c75d 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -102,13 +102,12 @@ inline Color toGrayscaleRec709(Color c, const Mipper& mipper) { bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage) { KTXImage image; - if (!image.open(data, dataSize)) { + bool isInfoOnly = true; // don't decompress entire image, only going to unpack top level mip + if (!image.open(data, dataSize, isInfoOnly)) { return false; } - // many different types of KTX files, for now only import from 2D type - // and only pull the first mip, but want to be able to pull custom mips from - // many types + // this loads the top level into the sourceImage, caller must set chunkY to totalChunks return sourceImage.loadImageFromKTX(image); } @@ -1291,7 +1290,7 @@ string kramInfoToString(const string& srcFilename, bool isVerbose) // handle png and ktx if (isPNG) { // This was taken out of SetupSourceImage, dont want to decode PNG yet - // just peek tha the header. + // just peek at the header. const uint8_t* data = nullptr; int32_t dataSize = 0; diff --git a/libkram/kram/KramImage.cpp b/libkram/kram/KramImage.cpp index 6c50937b..aaf57685 100644 --- a/libkram/kram/KramImage.cpp +++ b/libkram/kram/KramImage.cpp @@ -131,6 +131,9 @@ bool Image::loadImageFromKTX(const KTXImage& image) _hasColor = isColorFormat(image.pixelFormat); _hasAlpha = isAlphaFormat(image.pixelFormat); + // preserve chunk count from the conversion + setChunksY(image.totalChunks()); + // TODO: this assumes 1,2,3 channel srcData has no rowPadding to say 4 bytes return convertToFourChannel(image); } @@ -144,10 +147,11 @@ bool Image::convertToFourChannel(const KTXImage& image) { uint64_t mipBaseOffset = srcMipLevel.offset; const uint8_t* srcLevelData = image.fileData; + vector mipStorage; if (image.isSupercompressed()) { - mipStorage.resize(image.mipLevelSize(mipNumber)); + mipStorage.resize(image.levelLength(mipNumber)); if (!image.unpackLevel(mipNumber, srcLevelData + srcMipLevel.offset, mipStorage.data())) { return false; } @@ -1994,19 +1998,23 @@ bool KramEncoder::createMipsFromChunks( return true; } -// TODO: try to elim KTXImage passed into this bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image, ImageData& mipImage, TextureData& outputTexture, int32_t mipStorageSize) const { int32_t w = mipImage.width; - + int32_t h = mipImage.height; + const Color* srcPixelData = mipImage.pixels; const float4* srcPixelDataFloat4 = mipImage.pixelsFloat; - int32_t h = mipImage.height; - ; - + // TODO: try to elim KTXImage passed into this + // only use of image (can determine this from format) + int32_t numBlocks = image.blockCount(w, h); + int32_t blockSize = image.blockSize(); + int32_t mipLength = image.mipLengthCalc(w, h); + Int2 blockDims = image.blockDims(); + if (info.isExplicit) { switch (info.pixelFormat) { case MyMTLPixelFormatR8Unorm: @@ -2014,7 +2022,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image, // no RGB8 writes case MyMTLPixelFormatRGBA8Unorm: case MyMTLPixelFormatRGBA8Unorm_sRGB: { - int32_t count = image.blockSize() / 1; + int32_t count = blockSize / 1; uint8_t* dst = (uint8_t*)outputTexture.data.data(); @@ -2042,7 +2050,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image, case MyMTLPixelFormatRG16Float: // no RGB16Float writes case MyMTLPixelFormatRGBA16Float: { - int32_t count = image.blockSize() / 2; + int32_t count = blockSize / 2; half* dst = (half*)outputTexture.data.data(); @@ -2069,7 +2077,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image, case MyMTLPixelFormatRG32Float: // no RGB32Float writes case MyMTLPixelFormatRGBA32Float: { - int32_t count = image.blockSize() / 4; + int32_t count = blockSize / 4; float* dst = (float*)outputTexture.data.data(); @@ -2276,12 +2284,9 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image, const int32_t blockDim = 4; int32_t blocks_x = (w + blockDim - 1) / blockDim; //int32_t blocks_y = (h + blockDim - 1) / blockDim; - int32_t blockSize = image.blockSize(); for (int32_t y = 0; y < h; y += blockDim) { for (int32_t x = 0; x < w; x += blockDim) { - - // Have to copy to temp block, since encode doesn't test w/h edges // copy src to 4x4 clamping the edge pixels // TODO: do clamped edge pixels get weighted more then on non-multiple of 4 images ? @@ -2386,7 +2391,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image, ATEEncoder encoder; success = encoder.Encode( - (int32_t)metalType(pixelFormatRemap), image.mipLevelSize(w, h), image.blockDims().y, + (int32_t)metalType(pixelFormatRemap), mipLength, blockDims.y, info.hasAlpha, info.isColorWeighted, info.isVerbose, info.quality, w, h, (const uint8_t*)srcPixelData, outputTexture.data.data()); @@ -2398,7 +2403,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image, // find the 8,1 block and print it // uint32_t numRowBlocks = image.blockCountRows(w); -// const uint8_t* block = outputTexture.data.data() + (numRowBlocks * 1 + 8) * image.blockSize(); +// const uint8_t* block = outputTexture.data.data() + (numRowBlocks * 1 + 8) * blockSize; // printBCBlock(block, pixelFormatRemap); } #endif @@ -2466,9 +2471,6 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image, // have to remap endpoints to signed values (-1,1) to (0,127) for // (0,1) and (-128,-127,0) for (-1,0)/ else if (success && info.isSigned && doRemapSnormEndpoints) { - int32_t numBlocks = image.blockCount(w, h); - int32_t blockSize = image.blockSize(); - int32_t blockSize16 = blockSize / sizeof(uint16_t); uint16_t* blockPtr = (uint16_t*)outputTexture.data.data(); @@ -2506,7 +2508,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image, if (info.useATE) { ATEEncoder encoder; bool success = encoder.Encode( - (int32_t)metalType(info.pixelFormat), image.mipLevelSize(w, h), image.blockDims().y, + (int32_t)metalType(info.pixelFormat), mipLength, blockDims.y, info.hasAlpha, info.isColorWeighted, info.isVerbose, info.quality, w, h, (const uint8_t*)srcPixelData, outputTexture.data.data()); @@ -2555,7 +2557,7 @@ bool KramEncoder::compressMipLevel(const ImageInfo& info, KTXImage& image, } // not generating 3d ASTC ever, even for 3D textures - Int2 blockDims = image.blockDims(); + //Int2 blockDims = image.blockDims(); // setup flags uint32_t flags = 0; diff --git a/libkram/kram/KramImage.h b/libkram/kram/KramImage.h index 2619568c..58bfaa4e 100644 --- a/libkram/kram/KramImage.h +++ b/libkram/kram/KramImage.h @@ -58,7 +58,11 @@ class Image { bool hasColor() const { return _hasColor; } bool hasAlpha() const { return _hasAlpha; } - + + // if converted a KTX/2 image to Image, then this field will be non-zero + uint32_t chunksY() const { return _chunksY; } + void setChunksY(uint32_t chunksY) { _chunksY = chunksY; } + private: bool convertToFourChannel(const KTXImage& image); @@ -77,6 +81,8 @@ class Image { vector _pixels; // TODO: change to Color? //vector _pixelsHalf; // TODO: add support to import fp16 vector _pixelsFloat; + + uint32_t _chunksY = 0; }; class KramDecoderParams { diff --git a/libkram/kram/KramImageInfo.cpp b/libkram/kram/KramImageInfo.cpp index aba4df27..ac5c5801 100644 --- a/libkram/kram/KramImageInfo.cpp +++ b/libkram/kram/KramImageInfo.cpp @@ -1102,6 +1102,14 @@ void ImageInfo::initWithSourceImage(Image& sourceImage) isHDR = srcPixelsFloat != nullptr; + // transfer the chunk count, this was a ktx/2 import + if (sourceImage.chunksY() > 0) { + chunksX = 1; + + chunksY = + chunksCount = sourceImage.chunksY();; + } + // these come from png header, but hasn't walked pixels yet if (!sourceImage.hasAlpha()) { hasAlpha = false; From 7741e848a357d14289706fa652316ce55ef75266 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 15:31:52 -0700 Subject: [PATCH 089/901] kramv - fix specular for 3d views Still causing a problem on mesh0 in 2dview likely from high non-uniform scaling. --- kramv/KramShaders.metal | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 2c2f9a6d..d9982b94 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -481,9 +481,7 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) { float3 specular = float3(0.0); - // TODO: this renders bright in one quadrant of wrap preview, hard in ortho view - // specular - bool doSpecular = false; + bool doSpecular = true; if (doSpecular) { float3 ref = normalize(reflect(viewDir, n)); @@ -493,9 +491,14 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) { specular = saturate(dotRL * lightColor.rgb); } - // Note: don't have any albedo yet, need second texture input float3 ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5)); - albedo.xyz *= (ambient + diffuse + specular); + + // attenuate, and not saturate below, so no HDR yet + specular *= 0.3; + diffuse *= 0.7; + //ambient *= 0.2; + + albedo.xyz *= saturate(ambient + diffuse + specular); return albedo; } @@ -600,9 +603,10 @@ float4 DrawPixels( } } + // this allows viewing wrap bool doShowUV = false; if (doShowUV) { - c = float4(in.texCoord, 0.0, 1.0); + c = float4(fract(in.texCoord), 0.0, 1.0); } } else { From 3f95d126bbce6f365cbd818b2408a26fb399640c Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 16:54:40 -0700 Subject: [PATCH 090/901] kramv - fix crack in 3d rendering when clamp is used by insetting the uv coords. Wrap doesn't hit this, but if border/transparent color is hit, then a visible gap is displayed on the primitives where 0/1 meet up. --- kramv/KramRenderer.mm | 8 ++++++-- kramv/KramShaders.h | 1 + kramv/KramShaders.metal | 9 ++++++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 8cbc57a6..4bc1bdc5 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -417,9 +417,11 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU { id uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0]; float2* uvData = (float2*)uvs.map.bytes; - + for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) { - uvData[i].x = 1.0f - uvData[i].x; + float2& uv = uvData[i]; + + uv.x = 1.0f - uv.x; } } @@ -890,9 +892,11 @@ - (void)_setUniformsLevel:(UniformsLevel&)uniforms mipLOD:(int32_t)mipLOD uniforms.arrayOrSlice = 0; uniforms.face = 0; + uniforms.textureSize = float4m(0.0f); MyMTLTextureType textureType = MyMTLTextureType2D; if (_colorMap) { textureType = (MyMTLTextureType)_colorMap.textureType; + uniforms.textureSize = float4m(_colorMap.width, _colorMap.height, 1.0f/_colorMap.width, 1.0f/_colorMap.height); } // TODO: set texture specific uniforms, but using single _colorMap for now diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index cb0d33e1..1cc36b4a 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -128,6 +128,7 @@ struct UniformsLevel { uint32_t face; uint32_t arrayOrSlice; simd::float2 drawOffset; // pixel offset to apply + simd::float4 textureSize; // width, height, 1/width, 1/height }; // This is all tied to a single level sample diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index d9982b94..6a0a6a31 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -377,12 +377,19 @@ ColorInOut DrawImageFunc( out.position = uniforms.projectionViewMatrix * worldPos; // this is a 2d coord always which is 0 to 1, or 0 to 2 - out.texCoord.xy = in.texCoord; if (uniforms.isWrap) { // can make this a repeat value uniform float wrapAmount = 2.0; + out.texCoord.xy = in.texCoord; out.texCoord.xy *= wrapAmount; } + else { + // inset from edge by 1 texel, to avoid clamp boundary error + // does this have to adjust for mipLOD too? + float2 halfPixel = 0.5 * uniformsLevel.textureSize.zw; + + out.texCoord.xy = clamp(in.texCoord, halfPixel, float2(1.0) - halfPixel); + } // potentially 3d coord, and may be -1 to 1 out.texCoordXYZ.xy = out.texCoord; From 524f772a87074800115542eac0e1f8a3a94be49f Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 18:57:21 -0700 Subject: [PATCH 091/901] kramv - sort archive so similar files are grouped more logically miniz sorts the entries in the zip by filename, but doesn't expose the remap indices since it is optional internal state. So expose that, and then use the remap table to order the ZipEntry list built up. --- kramv/KramRenderer.mm | 8 +++----- kramv/KramViewerMain.mm | 4 ++-- libkram/kram/KramZipHelper.cpp | 22 ++++++++++++++-------- libkram/miniz/miniz.cpp | 7 +++++++ libkram/miniz/miniz.h | 4 ++++ 5 files changed, 30 insertions(+), 15 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 4bc1bdc5..6f971ac9 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -473,13 +473,11 @@ - (void)_loadAssets _meshBox = [self _createMeshAsset:"MeshBox" mdlMesh:mdlMesh doFlipUV:false]; - // TOOO: have more shape types - this is box, need thin box (plane), and sphere, and cylinder - // eventually load usdz and gltf2 custom model. Need 3d manipulation of shape like arcball - // and eyedropper is more complex. - // The sphere/cylinder shapes are v increasing in -Y, and u increasing conterclockwise, // u is the opposite direction to the cube/plane, so need to flip those coords - // I think this has also flipped the tangents the wrong way. + // I think this has also flipped the tangents the wrong way, but building tangents after + // flipping u direction doesn't flip the bitangent. So bitangent.w is flipped. + // For sanity, Tangent is increasing u, and Bitangent is increasing v. // All prims are viewed with +Y, not +Z up diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 027b57ad..0620cd28 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1876,7 +1876,7 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown } _showSettings->meshNumber = _showSettings->meshNumber % _showSettings->meshCount; - sprintf(text, "Mesh %d/%d", _showSettings->meshNumber, _showSettings->meshCount); + sprintf(text, "Mesh %d %s", _showSettings->meshNumber, "Shape"); // TODO: put meshName in _showSettings isChanged = true; } break; @@ -2057,7 +2057,7 @@ -(BOOL)advanceTextureFromAchive:(BOOL)increment if (increment) _fileIndex = (_fileIndex + 1) % numEntries; else - _fileIndex = (_fileIndex - 1 + numEntries) % numEntries; + _fileIndex = (_fileIndex + numEntries - 1) % numEntries; // now lookup the filename and data at that entry const auto& entry = _zip.zipEntrys()[_fileIndex]; diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp index 4123653d..571201fd 100644 --- a/libkram/kram/KramZipHelper.cpp +++ b/libkram/kram/KramZipHelper.cpp @@ -66,6 +66,8 @@ void ZipHelper::initZipEntryTables() { totalFilenameSizes += mz_zip_reader_get_filename(zip.get(), i, nullptr, 0); } + const uint32_t* remappedIndices = mz_zip_reader_sorted_file_indices(zip.get()); + allFilenames.resize(totalFilenameSizes); // allocate an array with the data from the archive that we care about @@ -75,16 +77,18 @@ void ZipHelper::initZipEntryTables() { uint64_t length = 0; for (int32_t i = 0; i < numFiles; ++i) { + uint32_t sortedFileIndex = remappedIndices[i]; + // file_stat does quite a bit of work, but only want a few fields out of it mz_zip_archive_file_stat stat; - mz_zip_reader_file_stat(zip.get(), i, &stat); + mz_zip_reader_file_stat(zip.get(), sortedFileIndex, &stat); if (stat.m_is_directory || !stat.m_is_supported) { continue; } // we may skip over directories above // so zipEntry array entry doesn't tie with fileIndex - assert((uint32_t)i == stat.m_file_index); + //assert((uint32_t)i == stat.m_file_index); // skipping directories and unsupported items @@ -107,6 +111,7 @@ void ZipHelper::initZipEntryTables() { index++; } + // resize, since entries and filenames were skipped // this should change the addresses used above allFilenames.resize(length); @@ -124,15 +129,16 @@ const ZipEntry* ZipHelper::zipEntry(const char* name) const { return nullptr; } - // have to search back until file index is found + // have to find the zipEntry, have skipped and sorted entries by filename // the array build skips directories, so those can throw off the fileIndex + int32_t numEntries = (int32_t)_zipEntrys.size(); - int32_t search = index; - if (search >= numEntries) { - search = numEntries - 1; - } +// int32_t search = index; +// if (search >= numEntries) { +// search = numEntries - 1; +// } - for (int32_t i = search; i >= 0; --i) { + for (int32_t i = 0; i < numEntries; ++i) { if (_zipEntrys[i].fileIndex == index) { return &_zipEntrys[i]; } diff --git a/libkram/miniz/miniz.cpp b/libkram/miniz/miniz.cpp index 62ea05c4..431c442f 100644 --- a/libkram/miniz/miniz.cpp +++ b/libkram/miniz/miniz.cpp @@ -3417,6 +3417,13 @@ static mz_bool mz_zip_reader_init_internal(mz_zip_archive *pZip, mz_uint flags) return MZ_TRUE; } +const mz_uint32* mz_zip_reader_sorted_file_indices(mz_zip_archive *pZip) +{ + // these aren't offsets, it's a sorted array of the file index elements + return (const mz_uint32*)(pZip->m_pState->m_sorted_central_dir_offsets.m_p); +} + + static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array *pCentral_dir_array, const mz_zip_array *pCentral_dir_offsets, mz_uint l_index, mz_uint r_index) { const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; diff --git a/libkram/miniz/miniz.h b/libkram/miniz/miniz.h index e36d66e8..8867c3c6 100644 --- a/libkram/miniz/miniz.h +++ b/libkram/miniz/miniz.h @@ -1209,6 +1209,10 @@ mz_bool mz_zip_is_zip64(mz_zip_archive *pZip); /* The current max supported size is <= MZ_UINT32_MAX. */ size_t mz_zip_get_central_dir_size(mz_zip_archive *pZip); +/* Alec change - if files are sorted by filename, then this returns the remap table for each fileIndex */ +/* This was previously internal state, so use with caution. It's an array of mz_uint32 */ +const mz_uint32* mz_zip_reader_sorted_file_indices(mz_zip_archive *pZip); + /* Extracts a archive file to a memory buffer using no memory allocation. */ /* There must be at least enough room on the stack to store the inflator's state (~34KB or so). */ mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive *pZip, mz_uint file_index, void *pBuf, size_t buf_size, mz_uint flags, void *pUser_read_buf, size_t user_read_buf_size); From 84af8cc3d71820437ce21a0e6d97b9e45917145c Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 19:31:57 -0700 Subject: [PATCH 092/901] kramv - fixup scale of the initial cube When scale differs a lot, the specular starts to really go overbright. So in 2D case, keep scaleZ set to max(scaleX, scaleY). --- kramv/KramRenderer.mm | 21 +++++++++------------ libkram/kram/KramZipHelper.cpp | 17 +++++------------ 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 6f971ac9..a2045e42 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -711,7 +711,8 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex // have one of these for each texture added to the viewer float scaleX = MAX(1, texture.width); float scaleY = MAX(1, texture.height); - _modelMatrix = float4x4(float4m(scaleX, scaleY, 1.0f, 1.0f)); // non uniform scale + float scaleZ = MAX(scaleX, scaleY); // don't want 1.0f, or specular is all off due to extreme scale differences + _modelMatrix = float4x4(float4m(scaleX, scaleY, scaleZ, 1.0f)); // non uniform scale _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back // uniform scaled 3d primitiv @@ -726,17 +727,15 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom // translate float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0); + // non-uniform scale is okay here, only affects ortho volume + float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); + viewMatrix = panTransform * viewMatrix; + // scale if (_is3DView) { - float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform scale is okay, affects ortho volume - viewMatrix = panTransform * viewMatrix; - return _projectionMatrix * viewMatrix * _modelMatrix3D; } else { - float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform scale - viewMatrix = panTransform * viewMatrix; - return _projectionMatrix * viewMatrix * _modelMatrix; } } @@ -759,11 +758,9 @@ float3 inverseScaleSquared(float4x4 m) { // don't divide by 0 float3 invScaleSquared = recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared)); - // TODO: could also identify determinant here for flipping orient - - // Note: in 2D, scales is x,x,1, so always apply invScale2, - // and that messes up preview normals on sphere/cylinder. - // May be from trying to do all that math in half. + // TODO: could also identify determinant here for flipping orientation + // all shapes with negative determinant need orientation flipped for backfacing + // and need to be rendered together return invScaleSquared; } diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp index 571201fd..a41a1141 100644 --- a/libkram/kram/KramZipHelper.cpp +++ b/libkram/kram/KramZipHelper.cpp @@ -86,11 +86,8 @@ void ZipHelper::initZipEntryTables() { continue; } - // we may skip over directories above - // so zipEntry array entry doesn't tie with fileIndex - //assert((uint32_t)i == stat.m_file_index); - // skipping directories and unsupported items + // also the ordering here is in filename not fileIndex order // copy all filenames into fixed storage that's all // contguous, so that can alis the strings for lookup @@ -131,13 +128,9 @@ const ZipEntry* ZipHelper::zipEntry(const char* name) const { // have to find the zipEntry, have skipped and sorted entries by filename // the array build skips directories, so those can throw off the fileIndex + // TODO: do a binary search here, and don't use mz_zip call? int32_t numEntries = (int32_t)_zipEntrys.size(); -// int32_t search = index; -// if (search >= numEntries) { -// search = numEntries - 1; -// } - for (int32_t i = 0; i < numEntries; ++i) { if (_zipEntrys[i].fileIndex == index) { return &_zipEntrys[i]; @@ -195,7 +188,8 @@ bool ZipHelper::extract(int32_t fileIndex, void* buffer, uint64_t bufferSize) co mz_bool success = mz_zip_reader_extract_to_mem( zip.get(), fileIndex, buffer, bufferSize, 0); - /* TODO: alternative using optimized Apple library + /* TODO: alternative using optimized Apple library libCompression + this can do partial compression, so don't check uncompressedSize always f.e. can look at first 64-byte header on KTX files which is much faster. @@ -225,13 +219,12 @@ bool ZipHelper::extractRaw(const char *filename, const uint8_t** bufferData, uin return false; } - // this should really be in stat data + // this should really be cached with zipEntry data const uint8_t* data = mz_zip_reader_get_raw_data(zip.get(), entry->fileIndex); if (!data) { return false; } - // not sure if this is start of *bufferData = data; bufferDataSize = stat.m_uncomp_size; From d57d06faab6be4d498613a4bafa48d212e3ad8d0 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 20:00:15 -0700 Subject: [PATCH 093/901] kramv - turn off inset on 2d view, or on small 4x4 textures can see the inset by a half pixel The 4x4 blocks are 3x3 when insetting by half pixel. So only turn this on for the 3d views. --- kramv/KramRenderer.mm | 12 ++++++------ kramv/KramShaders.h | 1 + kramv/KramShaders.metal | 5 ++++- kramv/KramViewerBase.h | 4 ++++ 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index a2045e42..e80ad4a7 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -91,7 +91,6 @@ @implementation Renderer //MTKMesh *_meshCylinder; MTKMesh *_meshCapsule; MTKMeshBufferAllocator *_metalAllocator; - bool _is3DView; // whether view is 3d for now ShowSettings* _showSettings; } @@ -732,7 +731,7 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom viewMatrix = panTransform * viewMatrix; // scale - if (_is3DView) { + if (_showSettings->is3DView) { return _projectionMatrix * viewMatrix * _modelMatrix3D; } else { @@ -819,22 +818,23 @@ - (void)_updateGameState uniforms.channels = (ShaderTextureChannels)_showSettings->channels; // crude shape experiment - _is3DView = true; + _showSettings->is3DView = true; switch(_showSettings->meshNumber) { - case 0: _mesh = _meshBox; _is3DView = false; break; + case 0: _mesh = _meshBox; _showSettings->is3DView = false; break; case 1: _mesh = _meshBox; break; case 2: _mesh = _meshSphere; break; //case 3: _mesh = _meshCylinder; break; case 3: _mesh = _meshCapsule; break; } - + uniforms.is3DView = _showSettings->is3DView; + // translate float4x4 panTransform = matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0); // scale float zoom = _showSettings->zoom; - if (_is3DView) { + if (_showSettings->is3DView) { _viewMatrix3D = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); // non-uniform _viewMatrix3D = panTransform * _viewMatrix3D; diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index 1cc36b4a..d81b866e 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -108,6 +108,7 @@ struct Uniforms bool isWrap; bool isSDF; bool isPreview; + bool is3DView; uint32_t numChannels; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 6a0a6a31..11786bdb 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -383,13 +383,16 @@ ColorInOut DrawImageFunc( out.texCoord.xy = in.texCoord; out.texCoord.xy *= wrapAmount; } - else { + else if (uniforms.is3DView) { // inset from edge by 1 texel, to avoid clamp boundary error // does this have to adjust for mipLOD too? float2 halfPixel = 0.5 * uniformsLevel.textureSize.zw; out.texCoord.xy = clamp(in.texCoord, halfPixel, float2(1.0) - halfPixel); } + else { + out.texCoord.xy = in.texCoord; + } // potentially 3d coord, and may be -1 to 1 out.texCoordXYZ.xy = out.texCoord; diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index 5976260c..b985f75d 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -99,6 +99,10 @@ class ShowSettings { // this mode shows the content with lighting or with bilinear/mips active bool isPreview = false; + // the 2d view doesn't want to inset pixels for clamp, or point sampling is thrown off + // expecially on small 4x4 textures + bool is3DView = false; + // TODO: Might eliminate this, since mips are either built with or without srgb // and disabling with a MTLView caused many flags to have to be set on MTLTexture //bool isSRGBShown = true; From 79e3a97d3de55c82d65737707377442935a8d6e3 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 20:30:40 -0700 Subject: [PATCH 094/901] kramv - reduce the inset so only small amount of pixel lost, fix keyDown handling for unhandled keys Still not seeing keyDown events while mouseMove is passed by NSTrackingArea. Seems like an AppKit bug, or some flag isn't set. --- kramv/KramShaders.metal | 5 +++-- kramv/KramViewerMain.mm | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 11786bdb..0021f1ee 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -384,9 +384,10 @@ ColorInOut DrawImageFunc( out.texCoord.xy *= wrapAmount; } else if (uniforms.is3DView) { - // inset from edge by 1 texel, to avoid clamp boundary error + // inset from edge by a fraction of a pixel, to avoid clamp boundary error // does this have to adjust for mipLOD too? - float2 halfPixel = 0.5 * uniformsLevel.textureSize.zw; + float2 onePixel = uniformsLevel.textureSize.zw; + float2 halfPixel = (1.0/16.0) * onePixel; out.texCoord.xy = clamp(in.texCoord, halfPixel, float2(1.0) - halfPixel); } diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 0620cd28..3091d6ef 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1517,10 +1517,15 @@ - (void)keyDown:(NSEvent *)theEvent bool isShiftKeyDown = theEvent.modifierFlags & NSEventModifierFlagShift; uint32_t keyCode = theEvent.keyCode; - [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown]; + bool isHandled = [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown]; + if (!isHandled) + { + // this will bonk + [super keyDown:theEvent]; + } } -- (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown +- (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown { // Some data depends on the texture data (isSigned, isNormal, ..) bool isChanged = false; @@ -1545,7 +1550,7 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown case Key::U: // this means no image loaded yet if (_noImageLoaded) { - return; + return true; } _buttonStack.hidden = !_buttonStack.hidden; @@ -1936,6 +1941,9 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown isChanged = true; } break; + default: + // non-handled key + return false; } if (!text.empty()) { @@ -1949,6 +1957,7 @@ - (void)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown if (isChanged) { self.needsDisplay = YES; } + return true; } @@ -2342,6 +2351,8 @@ - (void)viewDidLoad // this is better than requesting mousemoved events, they're only sent when cursor is inside _trackingArea = [[NSTrackingArea alloc] initWithRect:_view.bounds options: (NSTrackingMouseEnteredAndExited | NSTrackingMouseMoved | + + //NSTrackingActiveWhenFirstResponder NSTrackingActiveInActiveApp //NSTrackingActiveInKeyWindow ) From dd20b705aafd46e7ccc4c858fb8c54441fbda1c6 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 23:12:18 -0700 Subject: [PATCH 095/901] kramv - allow archive reloads Added timestamp to track archive mods. Also re-look up the previous filename, since it may not be in archive or at the same fileIndex. --- kramv/KramViewerMain.mm | 43 +++++++++++++++++++++++++-------- libkram/kram/KramFileHelper.cpp | 19 +++++++++++++++ libkram/kram/KramFileHelper.h | 3 +++ 3 files changed, 55 insertions(+), 10 deletions(-) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 3091d6ef..0d4f1b8b 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -20,11 +20,14 @@ #import "KramShaders.h" #include "KramLog.h" #include "KramMipper.h" + +#include "KramFileHelper.h" #include "KramMmapHelper.h" +#include "KramZipHelper.h" + #include "KramImage.h" #include "KramViewerBase.h" #include "KramVersion.h" // keep kramv version in sync with libkram -#include "KramZipHelper.h" #ifdef NDEBUG static bool doPrintPanZoom = false; @@ -44,6 +47,8 @@ @interface MyMTKView : MTKView //@property (nonatomic, readwrite, nullable) NSPanGestureRecognizer* panGesture; @property (retain, nonatomic, readwrite, nullable) NSMagnificationGestureRecognizer* zoomGesture; +@property (nonatomic, readwrite) double lastArchiveTimestamp; + - (BOOL)loadTextureFromURL:(NSURL*)url; - (void)setHudText:(const char*)text; @@ -489,7 +494,7 @@ - (nonnull ShowSettings*)showSettings { } - (NSStackView*)_addButtons { - const int32_t numButtons = 25; // 13; + const int32_t numButtons = 26; // 13; const char* names[numButtons*2] = { "?", "Help", @@ -517,6 +522,7 @@ - (NSStackView*)_addButtons { "J", "Next", "L", "Reload", "0", "Fit", + "8", "Shape", // TODO: need to shift hud over a little // "UI", - add to show/hide buttons @@ -1497,6 +1503,8 @@ - (IBAction)handleAction:(id)sender { keyCode = Key::L; else if (title == "0") keyCode = Key::Num0; + else if (title == "8") + keyCode = Key::Num8; else if (title == "R") keyCode = Key::R; @@ -2030,8 +2038,6 @@ - (BOOL)performDragOperation:(id)sender { -(BOOL)loadArchive:(const char*)zipFilename { - // TODO: avoid loading the zip again if name and/or timestamp hasn't changed on it - _zipMmap.close(); if (!_zipMmap.open(zipFilename)) { return NO; @@ -2144,23 +2150,40 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { } if (endsWithExtension(filename, ".zip")) { - if (!self.imageURL || ![self.imageURL isEqualTo:url]) { - BOOL isArchiveLoaded = [self loadArchive:filename]; + auto archiveTimestamp = FileHelper::modificationTimestamp(filename); + + if (!self.imageURL || (!([self.imageURL isEqualTo:url])) || (self.lastArchiveTimestamp != archiveTimestamp)) { + + // copy this out before it's replaced + string existingFilename; + if (self.lastArchiveTimestamp) + existingFilename = _zip.zipEntrys()[_fileIndex].filename; + BOOL isArchiveLoaded = [self loadArchive:filename]; if (!isArchiveLoaded) { return NO; } // store the archive url self.imageURL = url; - + self.lastArchiveTimestamp = archiveTimestamp; + // add it to recent docs NSDocumentController* dc = [NSDocumentController sharedDocumentController]; [dc noteNewRecentDocumentURL:url]; + + // now reload the filename if needed + const ZipEntry* formerEntry = _zip.zipEntry(existingFilename.c_str()); + if (formerEntry) { + // lookup the index in the remapIndices table + _fileIndex = (uintptr_t)(formerEntry - &_zip.zipEntrys().front()); + } + else { + _fileIndex = 0; + } } - - // now reload the filename if needed - const auto& entry = _zip.zipEntrys()[_fileIndex]; + + const auto& entry =_zip.zipEntrys()[_fileIndex]; const char* filename = entry.filename; double timestamp = entry.modificationDate; diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp index aad88744..a93d23e5 100644 --- a/libkram/kram/KramFileHelper.cpp +++ b/libkram/kram/KramFileHelper.cpp @@ -277,4 +277,23 @@ int64_t FileHelper::size() const return (int64_t)stats.st_size; } +uint64_t FileHelper::modificationTimestamp(const char* filename) { + struct stat stats; + if (stat(filename, &stats) < 0) { + return 0; + } + + // https://www.quora.com/What-is-the-difference-between-mtime-atime-and-ctime + // atime is last access time + // ctime when attributes change + // mtime when contents change + // folders mtime changes when files added/deleted + + // 32.32, only return seconds for now + // https://stackoverflow.com/questions/11373505/getting-the-last-modified-date-of-a-file-in-c + timespec timestamp = stats.st_mtimespec; + return timestamp.tv_sec; +} + + } // namespace kram diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h index a50167e4..64ff56d5 100644 --- a/libkram/kram/KramFileHelper.h +++ b/libkram/kram/KramFileHelper.h @@ -45,6 +45,9 @@ class FileHelper { static bool readBytes(FILE* fp, uint8_t* data, int dataSize); static bool writeBytes(FILE* fp, const uint8_t* data, int dataSize); + // return mod stamp on filename + static uint64_t modificationTimestamp(const char* filename); + static size_t pagesize(); private: From d3f1c549b62db6ed4949261b986692b490b63813 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 23:30:31 -0700 Subject: [PATCH 096/901] kram - fix stat call for Win, speed up info lookup with isInfoOnly --- kramv/KramRenderer.mm | 3 ++- libkram/kram/KramFileHelper.cpp | 19 ++++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index e80ad4a7..e22e5e52 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -534,7 +534,8 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest // then can decode blocks in kramv KTXImage sourceImage; - if (!sourceImage.open(imageData, imageDataLength)) { + bool isInfoOnly = true; + if (!sourceImage.open(imageData, imageDataLength, isInfoOnly)) { return NO; } diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp index a93d23e5..67129de4 100644 --- a/libkram/kram/KramFileHelper.cpp +++ b/libkram/kram/KramFileHelper.cpp @@ -278,11 +278,21 @@ int64_t FileHelper::size() const } uint64_t FileHelper::modificationTimestamp(const char* filename) { - struct stat stats; - if (stat(filename, &stats) < 0) { + + // Win has to rename all this, so make it happy using wrappers from miniz + #if defined(_MSC_VER) || defined(__MINGW64__) + #define MZ_FILE_STAT_STRUCT _stat64 + #define MZ_FILE_STAT _stat64 + #else + #define MZ_FILE_STAT_STRUCT stat + #define MZ_FILE_STAT stat + #endif + + struct MZ_FILE_STAT_STRUCT stats; + if (MZ_FILE_STAT(filename, &stats) < 0) { return 0; } - + // https://www.quora.com/What-is-the-difference-between-mtime-atime-and-ctime // atime is last access time // ctime when attributes change @@ -291,8 +301,7 @@ uint64_t FileHelper::modificationTimestamp(const char* filename) { // 32.32, only return seconds for now // https://stackoverflow.com/questions/11373505/getting-the-last-modified-date-of-a-file-in-c - timespec timestamp = stats.st_mtimespec; - return timestamp.tv_sec; + return stats.st_mtime; } From c1568641af0c852fbada06954c06747ecd13b326 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 30 May 2021 23:41:37 -0700 Subject: [PATCH 097/901] kramv - set shape state --- kramv/KramViewerMain.mm | 4 ++++ plugin/kps/KPS.cpp | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 0d4f1b8b..14a85c1c 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1373,6 +1373,7 @@ - (void)updateUIControlState auto arrayState = toState(_showSettings->arrayNumber > 0); auto faceState = toState(_showSettings->faceNumber > 0); auto mipState = toState(_showSettings->mipLOD > 0); + auto meshState = toState(_showSettings->meshNumber > 0); // TODO: UI state, and vertical state auto uiState = toState(_buttonStack.hidden); @@ -1381,6 +1382,7 @@ - (void)updateUIControlState auto infoState = Off; auto jumpState = Off; + // buttons [self findButton:"?"].state = helpState; [self findButton:"I"].state = infoState; @@ -1399,6 +1401,7 @@ - (void)updateUIControlState [self findButton:"S"].state = showAllState; [self findButton:"O"].state = previewState; + [self findButton:"8"].state = meshState; [self findButton:"W"].state = wrapState; [self findButton:"D"].state = gridState; [self findButton:"E"].state = debugState; @@ -1428,6 +1431,7 @@ - (void)updateUIControlState [self findMenuItem:"S"].state = showAllState; [self findMenuItem:"O"].state = previewState; + [self findMenuItem:"8"].state = meshState; [self findMenuItem:"W"].state = wrapState; [self findMenuItem:"D"].state = gridState; [self findMenuItem:"E"].state = debugState; diff --git a/plugin/kps/KPS.cpp b/plugin/kps/KPS.cpp index e3524f85..bf6c5e55 100755 --- a/plugin/kps/KPS.cpp +++ b/plugin/kps/KPS.cpp @@ -543,7 +543,7 @@ static void DoReadContinue(GlobalsPtr globals) } KTXImage srcImage; - if (!srcImage.open(data.data(), data.size())) { + if (!srcImage.open(data.data(), data.size())) { // TODO: consider using isInfoOnly HandleError(globals, "Read - Couldn't parse file"); return; } From 1f9b29695cb5db6746755f34b5b6e37b5af6d01d Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 31 May 2021 11:59:54 -0700 Subject: [PATCH 098/901] kramv - support combined albedo + normal views in preview When texture-a/-d.ktx/2 is followed by texture-n.ktx/2 in an archive and they are 2d textures, then display them both together. Fix texture loader. Remove safety check on bufferOffset being 0 on loading. --- kramv/KramLoader.mm | 8 +-- kramv/KramRenderer.h | 8 ++- kramv/KramRenderer.mm | 42 ++++++++++++++-- kramv/KramShaders.h | 9 +++- kramv/KramShaders.metal | 106 ++++++++++++++++++++++++---------------- kramv/KramViewerMain.mm | 30 +++++++++++- 6 files changed, 150 insertions(+), 53 deletions(-) diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 41c24951..12f09b79 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -53,7 +53,7 @@ @implementation KramLoader { // only one of these for now id _buffer; uint8_t* _data; - uint8_t _bufferOffset; + uint32_t _bufferOffset; vector _blits; NSMutableArray>* _blitTextures; @@ -621,9 +621,9 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) { // TODO: first make sure have enough buffer to upload, otherwise need to queue this image // try not to load much until that's established // queue would need KTXImage and mmap to stay alive long enough for queue to be completed - if (_bufferOffset != 0) { - return nil; - } +// if (_bufferOffset != 0) { +// return nil; +// } id texture = [self createTexture:image isPrivate:true]; if (!texture) diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h index 082990ed..801ace1e 100644 --- a/kramv/KramRenderer.h +++ b/kramv/KramRenderer.h @@ -26,7 +26,13 @@ namespace kram { - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view settings:(nonnull kram::ShowSettings*)settings; -- (BOOL)loadTextureFromData:(const std::string&)fullFilename timestamp:(double)timestamp imageData:(nonnull const uint8_t*)imageData imageDataLength:(uint64_t)imageDataLength; +- (BOOL)loadTextureFromData:(const std::string&)fullFilename + timestamp:(double)timestamp + imageData:(nonnull const uint8_t*)imageData + imageDataLength:(uint64_t)imageDataLength + imageNormalData:(nullable const uint8_t*)imageNormalData + imageNormalDataLength:(uint64_t)imageNormalDataLength; + - (BOOL)loadTexture:(nonnull NSURL *)url; diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index e22e5e52..8f08b1e8 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -56,7 +56,7 @@ @implementation Renderer // TODO: Array< id > _textures; id _colorMap; - //id _colorMapView; + id _normalMap; id _colorMapSamplerWrap; id _colorMapSamplerClamp; @@ -511,7 +511,13 @@ - (void)_loadAssets } -- (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timestamp imageData:(nonnull const uint8_t*)imageData imageDataLength:(uint64_t)imageDataLength +- (BOOL)loadTextureFromData:(const string&)fullFilename + timestamp:(double)timestamp + imageData:(nonnull const uint8_t*)imageData + imageDataLength:(uint64_t)imageDataLength + imageNormalData:(nullable const uint8_t*)imageNormalData + imageNormalDataLength:(uint64_t)imageNormalDataLength + { // image can be decoded to rgba8u if platform can't display format natively // but still want to identify blockSize from original format @@ -522,13 +528,22 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest (timestamp != _showSettings->lastTimestamp); if (isTextureChanged) { - // synchronously cpu upload from ktx file to texture + // synchronously cpu upload from ktx file to buffer, with eventual gpu blit from buffer to returned texture MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid; id texture = [_loader loadTextureFromData:imageData imageDataLength:imageDataLength originalFormat:&originalFormatMTL]; if (!texture) { return NO; } + // hacking in the normal texture here, so can display them together during preview + id normalTexture; + if (imageNormalData) { + normalTexture = [_loader loadTextureFromData:imageNormalData imageDataLength:imageNormalDataLength originalFormat:nil]; + if (!normalTexture) { + return NO; + } + } + // archive shouldn't contain png, so only support ktx/ktx2 here // TODO: have loader return KTXImage instead of parsing it again // then can decode blocks in kramv @@ -550,6 +565,7 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename timestamp:(double)timest @autoreleasepool { _colorMap = texture; + _normalMap = normalTexture; } } @@ -592,6 +608,7 @@ - (BOOL)loadTexture:(nonnull NSURL *)url @autoreleasepool { _colorMap = texture; + _normalMap = nil; } } @@ -796,6 +813,16 @@ - (void)_updateGameState uniforms.isPreview = _showSettings->isPreview; + uniforms.isNormalMapPreview = false; + if (uniforms.isPreview) { + uniforms.isNormalMapPreview = uniforms.isNormal || (_normalMap != nil); + + if (_normalMap != nil) { + uniforms.isNormalMapSigned = isSignedFormat((MyMTLPixelFormat)_normalMap.pixelFormat); + uniforms.isNormalMapSwizzleAGToRG = false; // TODO: need a prop for this + } + } + uniforms.gridX = 0; uniforms.gridY = 0; @@ -1056,9 +1083,14 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie // set the texture up - id texture = _colorMap; - [renderEncoder setFragmentTexture:texture + [renderEncoder setFragmentTexture:_colorMap atIndex:TextureIndexColor]; + + // setup normal map + if (_normalMap && _showSettings->isPreview && _colorMap.textureType == MTLTextureType2D) { + [renderEncoder setFragmentTexture:_normalMap + atIndex:TextureIndexNormal]; + } diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index d81b866e..fd8bb48e 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -49,7 +49,9 @@ typedef NS_ENUM(int32_t, VertexAttribute) typedef NS_ENUM(int32_t, TextureIndex) { TextureIndexColor = 0, - TextureIndexSamples = 1, // used for compute + TextureIndexNormal = 1, + + TextureIndexSamples = 2, // used for compute }; typedef NS_ENUM(int32_t, SamplerIndex) @@ -108,7 +110,12 @@ struct Uniforms bool isWrap; bool isSDF; bool isPreview; + bool is3DView; + bool isNormalMapPreview; // for isNormal or combined + + bool isNormalMapSigned; + bool isNormalMapSwizzleAGToRG; uint32_t numChannels; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 0021f1ee..4ae2f668 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -183,6 +183,8 @@ half3 toNormal(half3 n) return n; } + + // use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt // see http://www.mikktspace.com/ half3 transformNormal(half3 bumpNormal, half4 tangent, half3 vertexNormal) @@ -219,11 +221,40 @@ half3 transformNormal(half4 tangent, half3 vertexNormal, // rebuild the z term half3 bumpNormal = toNormal(nmap.xyz); - return transformNormal(bumpNormal, - tangent, vertexNormal); + return transformNormal(bumpNormal, tangent, vertexNormal); } +float3 transformNormal(float4 nmap, half3 vertexNormal, half4 tangent, + bool isSwizzleAGToRG, bool isSigned, bool isFrontFacing) +{ + // add swizzle for ASTC/BC5nm, other 2 channels format can only store 01 in ba + // could use hw swizzle for this + if (isSwizzleAGToRG) { + nmap = float4(nmap.ag, 0, 1); + } + + // to signed, also for ASTC/BC5nm + if (!isSigned) { + // convert to signed normal to compute z + nmap.rg = toSnorm8(nmap.rg); + } + + float3 bumpNormal = nmap.xyz; + + bumpNormal = toNormal(bumpNormal); + + // flip the normal if facing is flipped + // TODO: needed for tangent too? + if (!isFrontFacing) { + bumpNormal = -bumpNormal; + tangent.w = -tangent.w; + } + + // handle the basis here + bumpNormal = toFloat(transformNormal(toHalf(bumpNormal), tangent, vertexNormal)); + return bumpNormal; +} // TODO: have more bones, or read from texture instead of uniforms // can then do instanced skining, but vfetch lookup slower @@ -358,7 +389,7 @@ ColorInOut DrawImageFunc( // deal with full basis - if (uniforms.isNormal && uniforms.isPreview) { + if (uniforms.isNormalMapPreview) { float3 normal = in.normal; float3 tangent = in.tangent.xyz; transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2); @@ -383,11 +414,11 @@ ColorInOut DrawImageFunc( out.texCoord.xy = in.texCoord; out.texCoord.xy *= wrapAmount; } - else if (uniforms.is3DView) { + else if (uniforms.is3DView && !uniforms.isWrap) { // inset from edge by a fraction of a pixel, to avoid clamp boundary error // does this have to adjust for mipLOD too? float2 onePixel = uniformsLevel.textureSize.zw; - float2 halfPixel = (1.0/16.0) * onePixel; + float2 halfPixel = (1.0/4.0) * onePixel; out.texCoord.xy = clamp(in.texCoord, halfPixel, float2(1.0) - halfPixel); } @@ -526,6 +557,7 @@ float4 DrawPixels( bool facing [[front_facing]], constant Uniforms& uniforms, float4 c, + float4 nmap, float2 textureSize ) { @@ -565,32 +597,10 @@ float4 DrawPixels( else if (uniforms.isNormal) { // light the normal map - // add swizzle for ASTC/BC5nm, other 2 channels format can only store 01 in ba - if (uniforms.isSwizzleAGToRG) { - c = float4(c.ag, 0, 1); - } - - // to signed - if (!uniforms.isSigned) { - // convert to signed normal to compute z - c.rg = toSnorm8(c.rg); - } - - c.rgb = toNormal(c.rgb); - - // flip the normal if facing is flipped - // TODO: needed for tangent too? - if (!facing) { - c.xyz = -c.xyz; - in.tangent.w = -in.tangent.w; - } + float3 n = transformNormal(c, in.normal, in.tangent, + uniforms.isSwizzleAGToRG, uniforms.isSigned, facing); - float3 n = c.xyz; - - // handle the basis here - n = toFloat(transformNormal(toHalf(n), in.tangent, in.normal)); - float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition); c = doLighting(float4(1.0), viewDir, n); @@ -601,11 +611,18 @@ float4 DrawPixels( if (uniforms.isSigned) { c.xyz = toUnorm(c.xyz); } - - // need an isAlbedo test - if (!uniforms.isSigned) { + else { // TODO: need an isAlbedo test float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition); - c = doLighting(c, viewDir, toFloat(in.normal)); + + if (uniforms.isNormalMapPreview) { + float3 n = transformNormal(nmap, in.normal, in.tangent, + uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing); + + c = doLighting(c, viewDir, n); + } + else { + c = doLighting(c, viewDir, toFloat(in.normal)); + } } // to premul, but also need to see without premul @@ -843,7 +860,8 @@ fragment float4 Draw1DArrayPS( float2 textureSize = float2(colorMap.get_width(0), 1); // colorMap.get_num_mip_levels(); - return DrawPixels(in, facing, uniforms, c, textureSize); + float4 n = float4(0,0,1,1); + return DrawPixels(in, facing, uniforms, c, n, textureSize); } fragment float4 DrawImagePS( @@ -852,17 +870,19 @@ fragment float4 DrawImagePS( constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]], constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]], sampler colorSampler [[ sampler(SamplerIndexColor) ]], - texture2d colorMap [[ texture(TextureIndexColor) ]] + texture2d colorMap [[ texture(TextureIndexColor) ]], + texture2d normalMap [[ texture(TextureIndexNormal) ]] ) { float4 c = colorMap.sample(colorSampler, in.texCoordXYZ.xy); - + float4 n = normalMap.sample(colorSampler, in.texCoordXYZ.xy); + // here are the pixel dimensions of the lod uint lod = uniformsLevel.mipLOD; float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod)); // colorMap.get_num_mip_levels(); - return DrawPixels(in, facing, uniforms, c, textureSize); + return DrawPixels(in, facing, uniforms, c, n, textureSize); } fragment float4 DrawImageArrayPS( @@ -881,7 +901,8 @@ fragment float4 DrawImageArrayPS( float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod)); // colorMap.get_num_mip_levels(); - return DrawPixels(in, facing, uniforms, c, textureSize); + float4 n = float4(0,0,1,1); + return DrawPixels(in, facing, uniforms, c, n, textureSize); } @@ -902,7 +923,8 @@ fragment float4 DrawCubePS( float2 textureSize = float2(w, w); // colorMap.get_num_mip_levels(); - return DrawPixels(in, facing, uniforms, c, textureSize); + float4 n = float4(0,0,1,1); + return DrawPixels(in, facing, uniforms, c, n, textureSize); } fragment float4 DrawCubeArrayPS( @@ -922,7 +944,8 @@ fragment float4 DrawCubeArrayPS( float2 textureSize = float2(w, w); // colorMap.get_num_mip_levels(); - return DrawPixels(in, facing, uniforms, c, textureSize); + float4 n = float4(0,0,1,1); + return DrawPixels(in, facing, uniforms, c, n, textureSize); } @@ -953,7 +976,8 @@ fragment float4 DrawVolumePS( float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod)); // colorMap.get_num_mip_levels(); - return DrawPixels(in, facing, uniforms, c, textureSize); + float4 n = float4(0,0,1,1); + return DrawPixels(in, facing, uniforms, c, n, textureSize); } //-------------------------------------------------- diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 14a85c1c..422d5a2b 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -2101,9 +2101,37 @@ - (BOOL)loadTextureFromArchive:(const char*)filename timestamp:(double)timestamp return NO; } + // see if this is albedo, and then search for normal map in the same archive + const uint8_t* imageNormalData = nullptr; + uint64_t imageNormalDataLength = 0; + + string normalFilename = filename; + + // first only do this on albedo/diffuse textures + string search = "-a.ktx"; + auto searchPos = normalFilename.find(search); + bool isFound = searchPos != string::npos; + + if (!isFound) { + search = "-d.ktx"; + searchPos = normalFilename.find(search); + isFound = searchPos != string::npos; + } + + if (isFound) { + normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); // works for ktx or ktx2 file + + if (!_zip.extractRaw(normalFilename.c_str(), &imageNormalData, imageNormalDataLength)) { + // ignore failure case here, this is just guessing there's a -n file + } + } + string fullFilename = filename; Renderer* renderer = (Renderer*)self.delegate; - if (![renderer loadTextureFromData:fullFilename timestamp:(double)timestamp imageData:imageData imageDataLength:imageDataLength]) { + if (![renderer loadTextureFromData:fullFilename timestamp:(double)timestamp + imageData:imageData imageDataLength:imageDataLength + imageNormalData:imageNormalData imageNormalDataLength:imageNormalDataLength]) + { return NO; } From 8d743e5c64ae185216d0484a1e76832695852bb7 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 31 May 2021 13:35:08 -0700 Subject: [PATCH 099/901] kramv - fix eyedropper, support array2d for combined color+normals --- kramv/KramRenderer.mm | 46 +++++++++++++++++++++++++++++++---------- kramv/KramShaders.metal | 17 ++++++++------- 2 files changed, 44 insertions(+), 19 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 8f08b1e8..0b53f5ba 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -538,12 +538,19 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename // hacking in the normal texture here, so can display them together during preview id normalTexture; if (imageNormalData) { - normalTexture = [_loader loadTextureFromData:imageNormalData imageDataLength:imageNormalDataLength originalFormat:nil]; - if (!normalTexture) { - return NO; + KTXImage imageNormal; + if (imageNormal.open(imageNormalData, imageNormalDataLength, true)) { + // only have shaders that expects diffuse/normal to be same texture type + if (imageNormal.textureType == (MyMTLTextureType)texture.textureType && + (imageNormal.textureType == MyMTLTextureType2D || imageNormal.textureType == MyMTLTextureType2DArray)) + { + normalTexture = [_loader loadTextureFromData:imageNormalData imageDataLength:imageNormalDataLength originalFormat:nil]; + if (!normalTexture) { + return NO; + } + } } } - // archive shouldn't contain png, so only support ktx/ktx2 here // TODO: have loader return KTXImage instead of parsing it again // then can decode blocks in kramv @@ -1002,12 +1009,16 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie if (renderPassDescriptor == nil) { return; } + if (_colorMap == nil) { // this will clear target id renderEncoder = - [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor]; - renderEncoder.label = @"MainRender"; - [renderEncoder endEncoding]; + [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor]; + + if (renderEncoder) { + renderEncoder.label = @"MainRender"; + [renderEncoder endEncoding]; + } return; } @@ -1015,6 +1026,10 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie /// Final pass rendering code here id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor]; + if (!renderEncoder) { + return; + } + renderEncoder.label = @"MainRender"; // set raster state @@ -1087,7 +1102,8 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie atIndex:TextureIndexColor]; // setup normal map - if (_normalMap && _showSettings->isPreview && _colorMap.textureType == MTLTextureType2D) { + if (_normalMap && _showSettings->isPreview) + { [renderEncoder setFragmentTexture:_normalMap atIndex:TextureIndexNormal]; } @@ -1265,6 +1281,9 @@ - (void)drawSample } id commandBuffer = [_commandQueue commandBuffer]; + if (!commandBuffer) + return; + commandBuffer.label = @"MyCommand"; int32_t textureLookupX = _showSettings->textureLookupX; @@ -1277,9 +1296,11 @@ - (void)drawSample // Synchronize the managed texture. id blitCommandEncoder = [commandBuffer blitCommandEncoder]; - [blitCommandEncoder synchronizeResource:_sampleTex]; - [blitCommandEncoder endEncoding]; - + if (blitCommandEncoder) { + [blitCommandEncoder synchronizeResource:_sampleTex]; + [blitCommandEncoder endEncoding]; + } + // After synchonization, copy value back to the cpu id texture = _sampleTex; [commandBuffer addCompletedHandler:^(id /* buffer */) @@ -1311,6 +1332,9 @@ - (void)drawSamples:(id)commandBuffer lookupX:(int32_t)lookupX // Final pass rendering code here id renderEncoder = [commandBuffer computeCommandEncoder]; + if (!renderEncoder) + return; + renderEncoder.label = @"SampleCompute"; [renderEncoder pushDebugGroup:@"DrawShape"]; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 4ae2f668..020de693 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -891,17 +891,18 @@ fragment float4 DrawImageArrayPS( constant Uniforms& uniforms [[ buffer(BufferIndexUniforms) ]], constant UniformsLevel& uniformsLevel [[ buffer(BufferIndexUniformsLevel) ]], sampler colorSampler [[ sampler(SamplerIndexColor) ]], - texture2d_array colorMap [[ texture(TextureIndexColor) ]] + texture2d_array colorMap [[ texture(TextureIndexColor) ]], + texture2d_array normalMap [[ texture(TextureIndexNormal) ]] ) { float4 c = colorMap.sample(colorSampler, in.texCoordXYZ.xy, uniformsLevel.arrayOrSlice); + float4 n = normalMap.sample(colorSampler, in.texCoordXYZ.xy, uniformsLevel.arrayOrSlice); // here are the pixel dimensions of the lod uint lod = uniformsLevel.mipLOD; float2 textureSize = float2(colorMap.get_width(lod), colorMap.get_height(lod)); // colorMap.get_num_mip_levels(); - float4 n = float4(0,0,1,1); return DrawPixels(in, facing, uniforms, c, n, textureSize); } @@ -1077,7 +1078,7 @@ kernel void SampleImage1DArrayCS( texture1d_array colorMap [[ texture(TextureIndexColor) ]], constant UniformsCS& uniforms [[ buffer(BufferIndexUniformsCS) ]], uint2 index [[thread_position_in_grid]], - texture2d result + texture2d result [[ texture(TextureIndexSamples) ]] ) { // the for-loop is replaced with a collection of threads, each of which @@ -1096,7 +1097,7 @@ kernel void SampleImageCS( texture2d colorMap [[ texture(TextureIndexColor) ]], constant UniformsCS& uniforms [[ buffer(BufferIndexUniformsCS) ]], uint2 index [[thread_position_in_grid]], - texture2d result + texture2d result [[ texture(TextureIndexSamples) ]] ) { // the for-loop is replaced with a collection of threads, each of which @@ -1113,7 +1114,7 @@ kernel void SampleImageArrayCS( texture2d_array colorMap [[ texture(TextureIndexColor) ]], constant UniformsCS& uniforms [[ buffer(BufferIndexUniformsCS) ]], uint2 index [[thread_position_in_grid]], - texture2d result + texture2d result [[ texture(TextureIndexSamples) ]] ) { // the for-loop is replaced with a collection of threads, each of which @@ -1132,7 +1133,7 @@ kernel void SampleCubeCS( texturecube colorMap [[ texture(TextureIndexColor) ]], constant UniformsCS& uniforms [[ buffer(BufferIndexUniformsCS) ]], uint2 index [[thread_position_in_grid]], - texture2d result + texture2d result [[ texture(TextureIndexSamples) ]] ) { // the for-loop is replaced with a collection of threads, each of which @@ -1154,7 +1155,7 @@ kernel void SampleCubeArrayCS( texturecube_array colorMap [[ texture(TextureIndexColor) ]], constant UniformsCS& uniforms [[ buffer(BufferIndexUniformsCS) ]], uint2 index [[thread_position_in_grid]], - texture2d result + texture2d result [[ texture(TextureIndexSamples) ]] ) { // the for-loop is replaced with a collection of threads, each of which @@ -1174,7 +1175,7 @@ kernel void SampleVolumeCS( texture3d colorMap [[ texture(TextureIndexColor) ]], constant UniformsCS& uniforms [[ buffer(BufferIndexUniformsCS) ]], uint2 index [[thread_position_in_grid]], - texture2d result + texture2d result [[ texture(TextureIndexSamples) ]] ) { // the for-loop is replaced with a collection of threads, each of which From 7c02247e5ce53a4de9a72701627f8fc681bf0839 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 31 May 2021 13:40:46 -0700 Subject: [PATCH 100/901] kramv - more test cases, move to heights that gen normals to keep things in sync run scripts/kramTests.sh to build the normal maps from the heights, and build the bundle the bundle can be dropped onto kramv to see combined albedo + normal --- tests/src/GradientGray4x4-a.png | 3 +++ tests/src/White4x4-a.png | 3 +++ tests/src/brick01-d.png | 3 +++ tests/src/brick01-h.png | 3 +++ tests/src/{collectorbarrelh-h.png => collectorbarrel-h.png} | 0 tests/src/collectorbarrel-n.png | 3 --- tests/src/laying_rock7-d.png | 3 +++ tests/src/laying_rock7-h.png | 3 +++ tests/src/rockwall-d.png | 3 +++ tests/src/rockwall-h.png | 3 +++ tests/src/roots-d.png | 3 +++ tests/src/roots-h.png | 3 +++ 12 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 tests/src/GradientGray4x4-a.png create mode 100644 tests/src/White4x4-a.png create mode 100755 tests/src/brick01-d.png create mode 100755 tests/src/brick01-h.png rename tests/src/{collectorbarrelh-h.png => collectorbarrel-h.png} (100%) delete mode 100644 tests/src/collectorbarrel-n.png create mode 100755 tests/src/laying_rock7-d.png create mode 100755 tests/src/laying_rock7-h.png create mode 100755 tests/src/rockwall-d.png create mode 100755 tests/src/rockwall-h.png create mode 100755 tests/src/roots-d.png create mode 100755 tests/src/roots-h.png diff --git a/tests/src/GradientGray4x4-a.png b/tests/src/GradientGray4x4-a.png new file mode 100644 index 00000000..782ec70e --- /dev/null +++ b/tests/src/GradientGray4x4-a.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70a0898336eb863668cf1ffc9edcaada4ab702f93b2b244276e07cc501825d8f +size 1871 diff --git a/tests/src/White4x4-a.png b/tests/src/White4x4-a.png new file mode 100644 index 00000000..486e1d69 --- /dev/null +++ b/tests/src/White4x4-a.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9767dc8547c1a7c1a3db989ce5b84fa28f6b7b026af5c850b9d01e942c65a59 +size 1412 diff --git a/tests/src/brick01-d.png b/tests/src/brick01-d.png new file mode 100755 index 00000000..6614baa0 --- /dev/null +++ b/tests/src/brick01-d.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d2d2645121417c9559c9b5689546b44e7dcb139bb225e556634406345194ce2 +size 355401 diff --git a/tests/src/brick01-h.png b/tests/src/brick01-h.png new file mode 100755 index 00000000..c45e07d1 --- /dev/null +++ b/tests/src/brick01-h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8ea48fdb25da97f55d3f4c939b8c0a8572e2ad6bbb6f1105d6fd340889c823 +size 74397 diff --git a/tests/src/collectorbarrelh-h.png b/tests/src/collectorbarrel-h.png similarity index 100% rename from tests/src/collectorbarrelh-h.png rename to tests/src/collectorbarrel-h.png diff --git a/tests/src/collectorbarrel-n.png b/tests/src/collectorbarrel-n.png deleted file mode 100644 index 2cc4c0c1..00000000 --- a/tests/src/collectorbarrel-n.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:151e91179b05f8a127f635b30857525b9c3b124fd129971db370cbf9b9d6f6e1 -size 33987 diff --git a/tests/src/laying_rock7-d.png b/tests/src/laying_rock7-d.png new file mode 100755 index 00000000..ae3bdf2b --- /dev/null +++ b/tests/src/laying_rock7-d.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb7a51282ee5c9ca1314b53443ce59a9da93ba4cd1daea59280400e6d088938c +size 540419 diff --git a/tests/src/laying_rock7-h.png b/tests/src/laying_rock7-h.png new file mode 100755 index 00000000..6a6621c2 --- /dev/null +++ b/tests/src/laying_rock7-h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecaa27e2311541d0882bd5c1b53a5e4818dc808a29a6081a53a540b318aeebf5 +size 87309 diff --git a/tests/src/rockwall-d.png b/tests/src/rockwall-d.png new file mode 100755 index 00000000..b191fad5 --- /dev/null +++ b/tests/src/rockwall-d.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:694da069137942b85428aa1990831cecdcd7d1f168c0b3e47d8773a9a4592dab +size 705228 diff --git a/tests/src/rockwall-h.png b/tests/src/rockwall-h.png new file mode 100755 index 00000000..5fe1e8c1 --- /dev/null +++ b/tests/src/rockwall-h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b64ce3b80d21a3bde9d223e8db37e1c365c24421c73717a54fe94632c5f61656 +size 68538 diff --git a/tests/src/roots-d.png b/tests/src/roots-d.png new file mode 100755 index 00000000..067b5762 --- /dev/null +++ b/tests/src/roots-d.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50ef4f60c5ad4536d022cc9263d9444afe92181b00373a7b98699fc807c6df28 +size 604307 diff --git a/tests/src/roots-h.png b/tests/src/roots-h.png new file mode 100755 index 00000000..29182ac8 --- /dev/null +++ b/tests/src/roots-h.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5348fba75215a3aeea537dd50fa4943b1c1674473752346c4ebe1f05e22a83a0 +size 63375 From f01f71453512da831e983973d98a544094053211 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 31 May 2021 13:51:16 -0700 Subject: [PATCH 101/901] kramv - fix gap on clamp with full half pixel inset. Might be able to reduce, but using highest mip textureSize right now. --- kramv/KramShaders.metal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 020de693..32639e57 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -418,7 +418,7 @@ ColorInOut DrawImageFunc( // inset from edge by a fraction of a pixel, to avoid clamp boundary error // does this have to adjust for mipLOD too? float2 onePixel = uniformsLevel.textureSize.zw; - float2 halfPixel = (1.0/4.0) * onePixel; + float2 halfPixel = 0.5 * onePixel; out.texCoord.xy = clamp(in.texCoord, halfPixel, float2(1.0) - halfPixel); } From d0f45a1c5fbb979905a84e071107997cfd30c00d Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 31 May 2021 18:59:12 -0700 Subject: [PATCH 102/901] kramv - add folder drop, and filter unsupported folder/archive extensions Now can use a folder(s) or an archive to view textures. The same combined albedo+normal works on folder drop. The folder drop doesn't yet support PNG. The load data path only works on ktx/ktx2 files for now. .zip bundles in a folder drop are ignored, so those can reside along with loose files. --- kramv/KramShaders.metal | 28 +-- kramv/KramViewerBase.h | 5 +- kramv/KramViewerMain.mm | 355 ++++++++++++++++++++++++++++----- libkram/kram/KramLog.cpp | 2 + libkram/kram/KramZipHelper.cpp | 16 ++ libkram/kram/KramZipHelper.h | 3 + 6 files changed, 351 insertions(+), 58 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 32639e57..b064f983 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -515,15 +515,15 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) { float3 lightDir = normalize(float3(1,1,1)); float3 lightColor = float3(1,1,1); - - // diffuse - float dotNLUnsat = dot(n, lightDir); - float dotNL = saturate(dotNLUnsat); - float3 diffuse = lightColor.xyz * dotNL; float3 specular = float3(0.0); - + float3 diffuse = float3(0.0); + float3 ambient = float3(0.0); + bool doSpecular = true; + bool doDiffuse = true; + bool doAmbient = true; + if (doSpecular) { float3 ref = normalize(reflect(viewDir, n)); @@ -533,10 +533,18 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) { specular = saturate(dotRL * lightColor.rgb); } - float3 ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5)); + if (doDiffuse) { + float dotNL = saturate(dot(n, lightDir)); + diffuse = dotNL * lightColor.rgb; + } + + if (doAmbient) { + float dotNLUnsat = dot(n, lightDir); + ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5)); + } // attenuate, and not saturate below, so no HDR yet - specular *= 0.3; + specular *= 0.8; diffuse *= 0.7; //ambient *= 0.2; @@ -545,10 +553,6 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) { return albedo; } - -// TODO: do more test shapes, but that affects eyedropper -// generate and pass down tangents + bitanSign in the geometry - // TODO: eliminate the toUnorm() calls below, rendering to rgba16f // but also need to remove conversion code on cpu side expecting unorm in eyedropper diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index b985f75d..d0d5a2ac 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -110,9 +110,12 @@ class ShowSettings { // draw with reverseZ to better match perspective bool isReverseZ = true; - // whether files are pulled from disk or zip archive. + // whether files are pulled from zip archive. bool isArchive = false; + // whether files are pulled from folder(s) + bool isFolder = false; + // can have up to 5 channels (xyz as xy, 2 other channels) int32_t numChannels = 0; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 422d5a2b..d0166c32 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -421,8 +421,11 @@ @implementation MyMTKView // allow zip files to be dropped and opened, and can advance through bundle content ZipHelper _zip; MmapHelper _zipMmap; - int32_t _fileIndex; + int32_t _fileArchiveIndex; BOOL _noImageLoaded; + + vector _folderFiles; + int32_t _fileFolderIndex; } - (void)awakeFromNib @@ -1288,7 +1291,7 @@ - (void)updateUIAfterLoad { bool isFaceSliceHidden = _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1; bool isMipHidden = _showSettings->maxLOD <= 1; - bool isJumpToNextHidden = !_showSettings->isArchive; + bool isJumpToNextHidden = !(_showSettings->isArchive || _showSettings->isFolder); bool isRedHidden = false; bool isGreenHidden = _showSettings->numChannels <= 1; @@ -1875,9 +1878,17 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown case Key::J: if (![self findButton:"J"].isHidden) { - if ([self advanceTextureFromAchive:!isShiftKeyDown]) { - isChanged = true; - text = "Loaded " + _showSettings->lastFilename; + if (_showSettings->isArchive) { + if ([self advanceTextureFromAchive:!isShiftKeyDown]) { + isChanged = true; + text = "Loaded " + _showSettings->lastFilename; + } + } + else if (_showSettings->isFolder) { + if ([self advanceTextureFromFolder:!isShiftKeyDown]) { + isChanged = true; + text = "Loaded " + _showSettings->lastFilename; + } } } break; @@ -2053,9 +2064,18 @@ -(BOOL)loadArchive:(const char*)zipFilename if (!_zip.openForRead(_zipMmap.data(), _zipMmap.dataLength())) { return NO; } + + // filter out unsupported extensions + + _zip.filterExtensions({".ktx", ".ktx2"}); + // don't switch to empty archive + if (_zip.zipEntrys().empty()) { + return NO; + } + // load the first entry in the archive - _fileIndex = 0; + _fileArchiveIndex = 0; return YES; } @@ -2067,27 +2087,156 @@ -(BOOL)advanceTextureFromAchive:(BOOL)increment return NO; } - // this advances through the fileIndex of a dropped - size_t numEntries = _zip.zipEntrys().size(); - if (numEntries == 0) { + if (_zip.zipEntrys().empty()) { return NO; } + + size_t numEntries = _zip.zipEntrys().size(); + + if (increment) + _fileArchiveIndex++; + else + _fileArchiveIndex += numEntries - 1; // back 1 + + _fileArchiveIndex = _fileArchiveIndex % numEntries; + return [self loadTextureFromArchive]; +} + +-(BOOL)advanceTextureFromFolder:(BOOL)increment +{ + if (_folderFiles.empty()) { + // no archive loaded + return NO; + } + + size_t numEntries = _folderFiles.size(); if (increment) - _fileIndex = (_fileIndex + 1) % numEntries; + _fileFolderIndex++; else - _fileIndex = (_fileIndex + numEntries - 1) % numEntries; + _fileFolderIndex += numEntries - 1; // back 1 + + _fileFolderIndex = _fileFolderIndex % numEntries; + + return [self loadTextureFromFolder]; +} +- (BOOL)loadTextureFromFolder +{ // now lookup the filename and data at that entry - const auto& entry = _zip.zipEntrys()[_fileIndex]; - const char* filename = entry.filename; - double timestamp = (double)entry.modificationDate; + const char* filename = _folderFiles[_fileFolderIndex].c_str(); + auto timestamp = FileHelper::modificationTimestamp(filename); + + // have already filtered filenames out, so this should never get hit + if (!(//endsWithExtension(filename, ".png") || + endsWithExtension(filename, ".ktx") || + endsWithExtension(filename, ".ktx2")) ) + { + return NO; + } + + const uint8_t* imageData = nullptr; + uint64_t imageDataLength = 0; + + // TODO: assuming can mmap here, but may need FileHelper fallback + MmapHelper imageMmap; + if (!imageMmap.open(filename)) { + return NO; + } + + imageData = imageMmap.data(); + imageDataLength = imageMmap.dataLength(); + + // see if this is albedo, and then search for normal map in the same archive + const uint8_t* imageNormalData = nullptr; + uint64_t imageNormalDataLength = 0; + MmapHelper imageNormalMmap; + + string normalFilename = filename; + + // first only do this on albedo/diffuse textures + string search = "-a.ktx"; + auto searchPos = normalFilename.find(search); + bool isFound = searchPos != string::npos; + + if (!isFound) { + search = "-d.ktx"; + searchPos = normalFilename.find(search); + isFound = searchPos != string::npos; + } + + if (isFound) { + normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); // works for ktx or ktx2 file + + // binary search for the filename in the array, will have to be in same directory + isFound = false; + for (const auto& search : _folderFiles) { + if (search == normalFilename) { + isFound = true; + break; + } + } + + if (isFound) { + if (imageNormalMmap.open(normalFilename.c_str())) { + imageNormalData = imageNormalMmap.data(); + imageNormalDataLength = imageNormalMmap.dataLength(); + } + } + } + + string fullFilename = filename; + Renderer* renderer = (Renderer*)self.delegate; + if (![renderer loadTextureFromData:fullFilename timestamp:(double)timestamp + imageData:imageData imageDataLength:imageDataLength + imageNormalData:imageNormalData imageNormalDataLength:imageNormalDataLength]) + { + return NO; + } - return [self loadTextureFromArchive:filename timestamp:timestamp]; + // set title to filename, chop this to just file+ext, not directory + const char* filenameShort = strrchr(filename, '/'); + if (filenameShort == nullptr) { + filenameShort = filename; + } + else { + filenameShort += 1; + } + + // was using subtitle, but that's macOS 11.0 feature. + string title = "kramv - "; + title += formatTypeName(_showSettings->originalFormat); + title += " - "; + title += filenameShort; + + self.window.title = [NSString stringWithUTF8String: title.c_str()]; + + // doesn't set imageURL or update the recent document menu + + // show the controls + if (_noImageLoaded) { + _buttonStack.hidden = NO; // show controls + _noImageLoaded = NO; + } + + _showSettings->isArchive = false; + _showSettings->isFolder = true; + + // show/hide button + [self updateUIAfterLoad]; + + self.needsDisplay = YES; + return YES; } -- (BOOL)loadTextureFromArchive:(const char*)filename timestamp:(double)timestamp +- (BOOL)loadTextureFromArchive { + // now lookup the filename and data at that entry + const auto& entry = _zip.zipEntrys()[_fileArchiveIndex]; + const char* filename = entry.filename; + double timestamp = (double)entry.modificationDate; + + // have already filtered filenames out, so this should never get hit if (!(//endsWithExtension(filename, ".png") || endsWithExtension(filename, ".ktx") || endsWithExtension(filename, ".ktx2")) ) @@ -2161,6 +2310,7 @@ - (BOOL)loadTextureFromArchive:(const char*)filename timestamp:(double)timestamp } _showSettings->isArchive = true; + _showSettings->isFolder = false; // show/hide button [self updateUIAfterLoad]; @@ -2181,57 +2331,98 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { return NO; } - if (endsWithExtension(filename, ".zip")) { - auto archiveTimestamp = FileHelper::modificationTimestamp(filename); + // this likely means it's a local file directory + if (strchr(filename, '.') == nullptr) { + // make list of all file in the directory - if (!self.imageURL || (!([self.imageURL isEqualTo:url])) || (self.lastArchiveTimestamp != archiveTimestamp)) { + if (!self.imageURL || (!([self.imageURL isEqualTo:url]))) { - // copy this out before it's replaced - string existingFilename; - if (self.lastArchiveTimestamp) - existingFilename = _zip.zipEntrys()[_fileIndex].filename; - BOOL isArchiveLoaded = [self loadArchive:filename]; - if (!isArchiveLoaded) { + NSDirectoryEnumerator *directoryEnumerator = [[NSFileManager defaultManager] enumeratorAtURL:url includingPropertiesForKeys:[NSArray array] options:0 errorHandler://nil + ^BOOL(NSURL *url, NSError *error) { + macroUnusedVar(url); + macroUnusedVar(error); + + // handle error return NO; + } + ]; + + vector files; + while (NSURL *fileOrDirectoryURL = [directoryEnumerator nextObject]) { + const char* name = fileOrDirectoryURL.fileSystemRepresentation; + + // filter only types that are supported + if (endsWithExtension(name, ".ktx") || + endsWithExtension(name, ".ktx2") + // || endsWithExtension(name, ".png") // TODO: can't support with KTXImage load path, needs PNG loader + + ) + { + files.push_back(name); + } } - // store the archive url - self.imageURL = url; - self.lastArchiveTimestamp = archiveTimestamp; + // don't change to this folder if it's devoid of content + if (files.empty()) { + return NO; + } // add it to recent docs NSDocumentController* dc = [NSDocumentController sharedDocumentController]; [dc noteNewRecentDocumentURL:url]; - // now reload the filename if needed - const ZipEntry* formerEntry = _zip.zipEntry(existingFilename.c_str()); - if (formerEntry) { - // lookup the index in the remapIndices table - _fileIndex = (uintptr_t)(formerEntry - &_zip.zipEntrys().front()); - } - else { - _fileIndex = 0; + // sort them + sort(files.begin(), files.end()); + + // replicate archive logic below + + self.imageURL = url; + + // preserve old folder + string existingFilename; + if (_fileFolderIndex < (int32_t)_folderFiles.size()) + existingFilename = _folderFiles[_fileFolderIndex]; + else + _fileFolderIndex = 0; + + _folderFiles = files; + + // TODO: preserve filename before load, and restore that index, by finding that name in refreshed folder list + + if (!existingFilename.empty()) { + uint32_t index = 0; + for (const auto& fileIt : _folderFiles) { + if (fileIt == existingFilename) { + break; + } + } + + _fileFolderIndex = index; } } - const auto& entry =_zip.zipEntrys()[_fileIndex]; - const char* filename = entry.filename; - double timestamp = entry.modificationDate; - + // now load image from directory + _showSettings->isArchive = false; + _showSettings->isFolder = true; + + // now load the file at the index setErrorLogCapture(true); - BOOL success = [self loadTextureFromArchive:filename timestamp:timestamp]; + BOOL success = [self loadTextureFromFolder]; if (!success) { + // get back error text from the failed load string errorText; getErrorLogCaptureText(errorText); setErrorLogCapture(false); + const string& filename = _folderFiles[_fileFolderIndex]; + // prepend filename string finalErrorText; append_sprintf(finalErrorText, - "Could not load from archive:\n %s\n", filename); + "Could not load from folder:\n %s\n", filename.c_str()); finalErrorText += errorText; [self setHudText: finalErrorText.c_str()]; @@ -2240,8 +2431,12 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { setErrorLogCapture(false); return success; } - - if (!(endsWithExtension(filename, ".png") || + + //------------------- + + // file is not a supported extension + if (!(endsWithExtension(filename, ".zip") || + endsWithExtension(filename, ".png") || endsWithExtension(filename, ".ktx") || endsWithExtension(filename, ".ktx2")) ) { @@ -2249,19 +2444,88 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { string finalErrorText; append_sprintf(finalErrorText, - "Could not load from archive:\n %s\n", filename); + "Could not load from file:\n %s\n", filename); finalErrorText += errorText; [self setHudText: finalErrorText.c_str()]; return NO; } + + //------------------- + + if (endsWithExtension(filename, ".zip")) { + auto archiveTimestamp = FileHelper::modificationTimestamp(filename); + + if (!self.imageURL || (!([self.imageURL isEqualTo:url])) || (self.lastArchiveTimestamp != archiveTimestamp)) { + + // copy this out before it's replaced + string existingFilename; + if (_fileArchiveIndex < (int32_t)_zip.zipEntrys().size()) + existingFilename = _zip.zipEntrys()[_fileArchiveIndex].filename; + else + _fileArchiveIndex = 0; + + BOOL isArchiveLoaded = [self loadArchive:filename]; + if (!isArchiveLoaded) { + return NO; + } + + // store the archive url + self.imageURL = url; + self.lastArchiveTimestamp = archiveTimestamp; + + // add it to recent docs + NSDocumentController* dc = [NSDocumentController sharedDocumentController]; + [dc noteNewRecentDocumentURL:url]; + + // now reload the filename if needed + if (!existingFilename.empty()) { + const ZipEntry* formerEntry = _zip.zipEntry(existingFilename.c_str()); + if (formerEntry) { + // lookup the index in the remapIndices table + _fileArchiveIndex = (uintptr_t)(formerEntry - &_zip.zipEntrys().front()); + } + else { + _fileArchiveIndex = 0; + } + } + } + + setErrorLogCapture(true); + BOOL success = [self loadTextureFromArchive]; + + if (!success) { + // get back error text from the failed load + string errorText; + getErrorLogCaptureText(errorText); + setErrorLogCapture(false); + + const auto& entry =_zip.zipEntrys()[_fileArchiveIndex]; + const char* filename = entry.filename; + + // prepend filename + string finalErrorText; + append_sprintf(finalErrorText, + "Could not load from archive:\n %s\n", filename); + finalErrorText += errorText; + + [self setHudText: finalErrorText.c_str()]; + } + + setErrorLogCapture(false); + return success; + } + + //------------------- + Renderer* renderer = (Renderer*)self.delegate; setErrorLogCapture(true); BOOL success = [renderer loadTexture:url]; if (!success) { + // get back error text from the failed load string errorText; getErrorLogCaptureText(errorText); setErrorLogCapture(false); @@ -2310,6 +2574,7 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { } _showSettings->isArchive = false; + _showSettings->isFolder = false; // show/hide button [self updateUIAfterLoad]; diff --git a/libkram/kram/KramLog.cpp b/libkram/kram/KramLog.cpp index dd58e523..1d0c2d4b 100644 --- a/libkram/kram/KramLog.cpp +++ b/libkram/kram/KramLog.cpp @@ -122,6 +122,8 @@ bool endsWith(const string& value, const string& ending) if (ending.size() > value.size()) { return false; } + + // reverse comparison at end of value return equal(ending.rbegin(), ending.rend(), value.rbegin()); } diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp index a41a1141..49aa5b27 100644 --- a/libkram/kram/KramZipHelper.cpp +++ b/libkram/kram/KramZipHelper.cpp @@ -44,6 +44,22 @@ bool ZipHelper::openForRead(const uint8_t* zipData_, uint64_t zipDataSize) { // return true; } +void ZipHelper::filterExtensions(const vector& extensions) { + + vector zipEntrysFiltered; + + std::copy_if(_zipEntrys.begin(), _zipEntrys.end(), std::back_inserter(zipEntrysFiltered), [&extensions](const auto& zipEntry) { + for (const auto& ext : extensions) { + if (endsWithExtension(zipEntry.filename, ext)) { + return true; + } + } + return false; + }); + + _zipEntrys = zipEntrysFiltered; +} + void ZipHelper::close() { if (zip != nullptr) { mz_zip_end(zip.get()); diff --git a/libkram/kram/KramZipHelper.h b/libkram/kram/KramZipHelper.h index e224c7f3..cf5208aa 100644 --- a/libkram/kram/KramZipHelper.h +++ b/libkram/kram/KramZipHelper.h @@ -35,6 +35,9 @@ struct ZipHelper { bool openForRead(const uint8_t* zipData, uint64_t zipDataSize); void close(); + // Only keep entries that match the extensions provided + void filterExtensions(const vector& extensions); + // buffer is resized if smaller, can use to lookat headers (f.e. ktx or mod) // the zip decodes only the length of the buffer passed in, and this should be small // since an iterator is called once to extract data From 1bacf05d78b921d8df17aa11bc74296cb7724a7e Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 31 May 2021 23:11:54 -0700 Subject: [PATCH 103/901] kramv - turn on sandbox --- kram-thumb/KramThumbnailProvider.mm | 2 +- kramv/KramShaders.metal | 16 +++++++--------- kramv/kramv.entitlements | 2 +- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm index 2f31350d..e65e6514 100644 --- a/kram-thumb/KramThumbnailProvider.mm +++ b/kram-thumb/KramThumbnailProvider.mm @@ -128,7 +128,7 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet vector dstMipData; // want to just decode one chunk of the level that was unpacked abovve - if (!decoder.decodeBlocks(w, h, mipData.data(), mipData.size(), image.pixelFormat, dstMipData, params)) { + if (!decoder.decodeBlocks(w, h, mipData.data(), (int32_t)mipData.size(), image.pixelFormat, dstMipData, params)) { KLOGF("kramv %s failed to decode blocks\n", filename); return NO; } diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index b064f983..faad3b9e 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -553,8 +553,8 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) { return albedo; } -// TODO: eliminate the toUnorm() calls below, rendering to rgba16f -// but also need to remove conversion code on cpu side expecting unorm in eyedropper +// TODO: eliminate the toUnorm() calls below, rendering to rgba16f but then present +// doesn't have enough info to remap 16F to the display. float4 DrawPixels( ColorInOut in [[stage_in]], @@ -1109,7 +1109,7 @@ kernel void SampleImageCS( uint2 uv = uniforms.uv; // tie into texture lookup // uv >>= uniforms.mipLOD; - // the color returned is linear + // the color is returned to linear rgba32f float4 color = colorMap.read(uv, uniforms.mipLOD); result.write(color, index); } @@ -1128,7 +1128,7 @@ kernel void SampleImageArrayCS( uint arrayOrSlice = uniforms.arrayOrSlice; - // the color returned is linear + // the color is returned to linear rgba32f float4 color = colorMap.read(uv, arrayOrSlice, uniforms.mipLOD); result.write(color, index); } @@ -1147,9 +1147,7 @@ kernel void SampleCubeCS( uint face = uniforms.face; - // This writes out linear float32, can do srgb conversion on cpu side - - // the color returned is linear + // the color is returned to linear rgba32f float4 color = colorMap.read(uv, face, uniforms.mipLOD); result.write(color, index); } @@ -1170,7 +1168,7 @@ kernel void SampleCubeArrayCS( uint face = uniforms.face; uint arrayOrSlice = uniforms.arrayOrSlice; - // the color returned is linear + // the color is returned to linear rgba32f float4 color = colorMap.read(uv, face, arrayOrSlice, uniforms.mipLOD); result.write(color, index); } @@ -1187,7 +1185,7 @@ kernel void SampleVolumeCS( uint3 uv = uint3(uniforms.uv, uniforms.arrayOrSlice); // tie into texture lookup //uv >>= uniforms.mipLOD); - // the color returned is linear + // the color is returned to linear rgba32f float4 color = colorMap.read(uv, uniforms.mipLOD); result.write(color, index); } diff --git a/kramv/kramv.entitlements b/kramv/kramv.entitlements index 311b32bd..18aff0ce 100644 --- a/kramv/kramv.entitlements +++ b/kramv/kramv.entitlements @@ -3,7 +3,7 @@ com.apple.security.app-sandbox - + com.apple.security.files.user-selected.read-only From 5e112685d38b32ec0bde0fd69dabfd5e5647b484 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 1 Jun 2021 08:25:03 -0700 Subject: [PATCH 104/901] kram - fix win build on ZipHelper copy_if needs include --- libkram/kram/KramZipHelper.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp index 49aa5b27..6e79ee5b 100644 --- a/libkram/kram/KramZipHelper.cpp +++ b/libkram/kram/KramZipHelper.cpp @@ -9,6 +9,9 @@ //#include //#include +#include // for copy_if +#include + #include "miniz.h" namespace kram { From 3cfbca7410784e4521cb1f9ff9d15b3d47cea1a9 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 1 Jun 2021 08:45:19 -0700 Subject: [PATCH 105/901] kram - one more time Win stl fix --- libkram/kram/KramZipHelper.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libkram/kram/KramZipHelper.cpp b/libkram/kram/KramZipHelper.cpp index 6e79ee5b..7f100aea 100644 --- a/libkram/kram/KramZipHelper.cpp +++ b/libkram/kram/KramZipHelper.cpp @@ -9,7 +9,8 @@ //#include //#include -#include // for copy_if +#include // for copy_if on Win +#include // for copy_if on Win #include #include "miniz.h" From 92ad9811edc9b8323bbb9b0712064ffddafb70c3 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Fri, 4 Jun 2021 22:20:51 -0700 Subject: [PATCH 106/901] kram - added KTXImageData to isolate the memory backing a KTXImage There were too many steps to wrap mmap, file helper, and alias data from a bundle. Switch FileHelper to size_t. Used to be int, but that's limited to 32-bit. Simplify kram setup code for dealing with files. PNG is still not tied into KTXImage so has to replicate some things. Bubbling KTXImage up and out of loading. It's useful to decode blocks and reference in the viewer. --- kramv/KramRenderer.mm | 52 +++----- kramv/KramViewerMain.mm | 116 ++++++++++------ libkram/kram/KTXImage.cpp | 29 ++-- libkram/kram/KTXImage.h | 3 +- libkram/kram/Kram.cpp | 229 ++++++++++++++++++-------------- libkram/kram/Kram.h | 17 +++ libkram/kram/KramFileHelper.cpp | 102 ++------------ libkram/kram/KramFileHelper.h | 12 +- 8 files changed, 276 insertions(+), 284 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 0b53f5ba..92defb1d 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -511,13 +511,10 @@ - (void)_loadAssets } -- (BOOL)loadTextureFromData:(const string&)fullFilename - timestamp:(double)timestamp - imageData:(nonnull const uint8_t*)imageData - imageDataLength:(uint64_t)imageDataLength - imageNormalData:(nullable const uint8_t*)imageNormalData - imageNormalDataLength:(uint64_t)imageNormalDataLength - +- (BOOL)loadTextureFromImage:(const string&)fullFilename + timestamp:(double)timestamp + image:(kram::KTXImage&)image + imageNormal:(kram::KTXImage*)imageNormal { // image can be decoded to rgba8u if platform can't display format natively // but still want to identify blockSize from original format @@ -527,42 +524,28 @@ - (BOOL)loadTextureFromData:(const string&)fullFilename (fullFilename != _showSettings->lastFilename) || (timestamp != _showSettings->lastTimestamp); - if (isTextureChanged) { - // synchronously cpu upload from ktx file to buffer, with eventual gpu blit from buffer to returned texture + if (isTextureChanged) { + // synchronously cpu upload from ktx file to buffer, with eventual gpu blit from buffer to returned texture. TODO: If buffer is full, then something needs to keep KTXImage and data alive. This load may also decode the texture to RGBA8. + MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid; - id texture = [_loader loadTextureFromData:imageData imageDataLength:imageDataLength originalFormat:&originalFormatMTL]; + id texture = [_loader loadTextureFromImage:image originalFormat:&originalFormatMTL]; if (!texture) { return NO; } // hacking in the normal texture here, so can display them together during preview id normalTexture; - if (imageNormalData) { - KTXImage imageNormal; - if (imageNormal.open(imageNormalData, imageNormalDataLength, true)) { - // only have shaders that expects diffuse/normal to be same texture type - if (imageNormal.textureType == (MyMTLTextureType)texture.textureType && - (imageNormal.textureType == MyMTLTextureType2D || imageNormal.textureType == MyMTLTextureType2DArray)) - { - normalTexture = [_loader loadTextureFromData:imageNormalData imageDataLength:imageNormalDataLength originalFormat:nil]; - if (!normalTexture) { - return NO; - } - } + if (imageNormal) { + normalTexture = [_loader loadTextureFromImage:*imageNormal originalFormat:nil]; + if (!normalTexture) { + return NO; } } + // archive shouldn't contain png, so only support ktx/ktx2 here - // TODO: have loader return KTXImage instead of parsing it again - // then can decode blocks in kramv - KTXImage sourceImage; - bool isInfoOnly = true; - if (!sourceImage.open(imageData, imageDataLength, isInfoOnly)) { - return NO; - } - - _showSettings->imageInfo = kramInfoKTXToString(fullFilename, sourceImage, false); - _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, sourceImage, true); + _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false); + _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true); _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL; _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat; @@ -597,13 +580,16 @@ - (BOOL)loadTexture:(nonnull NSURL *)url // image can be decoded to rgba8u if platform can't display format natively // but still want to identify blockSize from original format if (isTextureChanged) { - // synchronously cpu upload from ktx file to texture + MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid; id texture = [_loader loadTextureFromURL:url originalFormat:&originalFormatMTL]; if (!texture) { return NO; } + // This doesn't look for or load corresponding normal map, but should + + // TODO:: this reloads KTXImage twice over _showSettings->imageInfo = kramInfoToString(fullFilename, false); _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true); diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index d0166c32..f1617780 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -21,6 +21,7 @@ #include "KramLog.h" #include "KramMipper.h" +#include "Kram.h" #include "KramFileHelper.h" #include "KramMmapHelper.h" #include "KramZipHelper.h" @@ -2134,27 +2135,10 @@ - (BOOL)loadTextureFromFolder { return NO; } - - const uint8_t* imageData = nullptr; - uint64_t imageDataLength = 0; - - // TODO: assuming can mmap here, but may need FileHelper fallback - MmapHelper imageMmap; - if (!imageMmap.open(filename)) { - return NO; - } - - imageData = imageMmap.data(); - imageDataLength = imageMmap.dataLength(); - - // see if this is albedo, and then search for normal map in the same archive - const uint8_t* imageNormalData = nullptr; - uint64_t imageNormalDataLength = 0; - MmapHelper imageNormalMmap; + // first only do this on albedo/diffuse textures string normalFilename = filename; - // first only do this on albedo/diffuse textures string search = "-a.ktx"; auto searchPos = normalFilename.find(search); bool isFound = searchPos != string::npos; @@ -2177,22 +2161,42 @@ - (BOOL)loadTextureFromFolder } } - if (isFound) { - if (imageNormalMmap.open(normalFilename.c_str())) { - imageNormalData = imageNormalMmap.data(); - imageNormalDataLength = imageNormalMmap.dataLength(); - } + if (!isFound) { + normalFilename.clear(); } } + //------------------------------- + + KTXImage image; + KTXImageData imageDataKTX; + + KTXImage imageNormal; + KTXImageData imageNormalDataKTX; + bool hasNormal = false; + string fullFilename = filename; + if (!imageDataKTX.open(fullFilename.c_str(), image)) { + return NO; + } + + if (isFound && imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) { + + // shaders only pull from albedo + normal on these texture types + if (imageNormal.textureType == image.textureType && + (imageNormal.textureType == MyMTLTextureType2D || + imageNormal.textureType == MyMTLTextureType2DArray)) + { + hasNormal = true; + } + } + Renderer* renderer = (Renderer*)self.delegate; - if (![renderer loadTextureFromData:fullFilename timestamp:(double)timestamp - imageData:imageData imageDataLength:imageDataLength - imageNormalData:imageNormalData imageNormalDataLength:imageNormalDataLength]) - { + if (![renderer loadTextureFromImage:fullFilename timestamp:timestamp image:image imageNormal:hasNormal ? &imageNormal : nullptr]) { return NO; } + + //------------------------------- // set title to filename, chop this to just file+ext, not directory const char* filenameShort = strrchr(filename, '/'); @@ -2244,16 +2248,6 @@ - (BOOL)loadTextureFromArchive return NO; } - const uint8_t* imageData = nullptr; - uint64_t imageDataLength = 0; - if (!_zip.extractRaw(filename, &imageData, imageDataLength)) { - return NO; - } - - // see if this is albedo, and then search for normal map in the same archive - const uint8_t* imageNormalData = nullptr; - uint64_t imageNormalDataLength = 0; - string normalFilename = filename; // first only do this on albedo/diffuse textures @@ -2268,22 +2262,62 @@ - (BOOL)loadTextureFromArchive } if (isFound) { - normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); // works for ktx or ktx2 file + normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); // works for + } + //--------------------------- + + const uint8_t* imageData = nullptr; + uint64_t imageDataLength = 0; + + if (!_zip.extractRaw(filename, &imageData, imageDataLength)) { + return NO; + } + + const uint8_t* imageNormalData = nullptr; + uint64_t imageNormalDataLength = 0; + + // see if this is albedo, and then search for normal map in the same archive + if (isFound) { if (!_zip.extractRaw(normalFilename.c_str(), &imageNormalData, imageNormalDataLength)) { // ignore failure case here, this is just guessing there's a -n file } } + //--------------------------- + + // files in archive are just offsets into the mmap + // That's why we can't just pass filenames to the renderer + KTXImage image; + KTXImageData imageDataKTX; + if (!imageDataKTX.open(imageData, imageDataLength, image)) { + return NO; + } + + KTXImage imageNormal; + KTXImageData imageNormalDataKTX; + bool hasNormal = false; + if (isFound && imageNormalDataKTX.open(imageNormalData, imageNormalDataLength, imageNormal)) { + + // shaders only pull from albedo + normal on these texture types + if (imageNormal.textureType == image.textureType && + (imageNormal.textureType == MyMTLTextureType2D || + imageNormal.textureType == MyMTLTextureType2DArray)) + { + hasNormal = true; + } + } + string fullFilename = filename; Renderer* renderer = (Renderer*)self.delegate; - if (![renderer loadTextureFromData:fullFilename timestamp:(double)timestamp - imageData:imageData imageDataLength:imageDataLength - imageNormalData:imageNormalData imageNormalDataLength:imageNormalDataLength]) + if (![renderer loadTextureFromImage:fullFilename timestamp:(double)timestamp + image:image imageNormal:hasNormal ? &imageNormal : nullptr]) { return NO; } + //--------------------------------- + // set title to filename, chop this to just file+ext, not directory const char* filenameShort = strrchr(filename, '/'); if (filenameShort == nullptr) { diff --git a/libkram/kram/KTXImage.cpp b/libkram/kram/KTXImage.cpp index 9ee2ee67..1dd717f9 100644 --- a/libkram/kram/KTXImage.cpp +++ b/libkram/kram/KTXImage.cpp @@ -1502,11 +1502,7 @@ bool KTXImage::openKTX2(const uint8_t* imageData, size_t imageDataLength, bool i // compute the decompressed size // Note: initMipLevels computes but doesn't store this - fileDataLength = mipLevels.back().offset + mipLevels.back().length * numChunks; - - // DONE: this memory is held in the class to keep it alive, mmap is no longer used - imageDataFromKTX2.resize(fileDataLength, 0); - fileData = imageDataFromKTX2.data(); + reserveImageData(); // TODO: may need to fill out length field in fileData @@ -1601,22 +1597,33 @@ bool KTXImage::unpackLevel(uint32_t mipNumber, const uint8_t* srcData, uint8_t* } vector& KTXImage::imageData() { - return imageDataFromKTX2; + return _imageData; } void KTXImage::reserveImageData() { int32_t numChunks = totalChunks(); // on KTX1 the last mip is the smallest and last in the file + // on KTX2 the first mip is the largest and last in the file + const auto& firstMip = mipLevels[0]; const auto& lastMip = mipLevels[header.numberOfMipmapLevels-1]; - size_t totalKTXSize = + + size_t firstMipOffset = + firstMip.offset + firstMip.length * numChunks; + size_t lastMipOffset = lastMip.offset + lastMip.length * numChunks; - imageDataFromKTX2.resize(totalKTXSize); - memset(imageDataFromKTX2.data(), 0, totalKTXSize); + size_t totalSize = max(firstMipOffset, lastMipOffset); - fileDataLength = totalKTXSize; - fileData = imageDataFromKTX2.data(); + reserveImageData(totalSize); } +void KTXImage::reserveImageData(size_t totalSize) { + + _imageData.resize(totalSize); + memset(_imageData.data(), 0, totalSize); + + fileDataLength = totalSize; + fileData = _imageData.data(); +} } // namespace kram diff --git a/libkram/kram/KTXImage.h b/libkram/kram/KTXImage.h index 771b3d9c..e4a15441 100644 --- a/libkram/kram/KTXImage.h +++ b/libkram/kram/KTXImage.h @@ -299,6 +299,7 @@ class KTXImage { // this is where KTXImage holds all mip data internally void reserveImageData(); + void reserveImageData(size_t totalSize); vector& imageData(); // for KTX2 files, the mips can be compressed using various encoders @@ -332,7 +333,7 @@ class KTXImage { bool openKTX2(const uint8_t* imageData, size_t imageDataLength, bool isInfoOnly); // ktx2 mips are uncompressed to convert back to ktx1, but without the image offset - vector imageDataFromKTX2; + vector _imageData; public: // TODO: bury this MyMTLTextureType textureType = MyMTLTextureType2D; diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index b4d4c75d..85d68b09 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -34,6 +34,67 @@ namespace kram { using namespace std; + + +bool KTXImageData::open(const char* filename, KTXImage& image) { + bool useMmap = true; + if (!mmapHelper.open(filename)) { + useMmap = false; + + // open file, copy it to memory, then close it + FileHelper fileHelper; + if (!fileHelper.open(filename, "rb")) { + return false; + } + + // read the file into memory + size_t size = fileHelper.size(); + if (size == (size_t)-1) { + return false; + } + + fileData.resize(size); + if (!fileHelper.read(fileData.data(), size)) { + return false; + } + } + + // read the KTXImage in from the data, it will alias mmap or fileData + if (useMmap) { + if (!image.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) { + return false; + } + } + else { + if (!image.open(fileData.data(), fileData.size(), isInfoOnly)) { + return false; + } + } + + return true; +} + +bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image) +{ + if (!image.open(data, dataSize, isInfoOnly)) { + return false; + } + return true; +} + +// decoding reads a ktx file into KTXImage (not Image) +bool SetupSourceKTX(KTXImageData& srcImageData, + const string& srcFilename, + KTXImage& sourceImage) +{ + if (!srcImageData.open(srcFilename.c_str(), sourceImage)) { + KLOGE("Kram", "File input \"%s\" could not be opened for read.\n", + srcFilename.c_str()); + return false; + } + return true; +} + // Twiddle pixels or blocks into Morton order. Usually this is done during the upload of // linear-order block textures. But on some platforms may be able to directly use the block // and pixel data if organized in the exact twiddle order the hw uses. @@ -205,8 +266,8 @@ bool SetupTmpFile(FileHelper& tmpFileHelper, const char* suffix) return tmpFileHelper.openTemporaryFile(suffix, "w+b"); } -bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper, - vector& fileBuffer, +bool SetupSourceImage(//MmapHelper& mmapHelper, FileHelper& fileHelper, + //vector& fileBuffer, const string& srcFilename, Image& sourceImage, bool isPremulSrgb = false, bool isGray = false) { @@ -219,28 +280,17 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper, return false; } + // TODO: really KTXImageData + MmapHelper mmapHelper; + FileHelper fileHelper; + vector fileData; + // first try mmap, and then use file -> buffer bool useMmap = true; if (!mmapHelper.open(srcFilename.c_str())) { - // fallback to opening file if no mmap support or it didn't work useMmap = false; - } - - if (useMmap) { - if (isKTX) { // really want endsWidth - if (!LoadKtx(mmapHelper.data(), mmapHelper.dataLength(), - sourceImage)) { - return false; // error - } - } - else if (isPNG) { - if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(), isPremulSrgb, isGray, - sourceImage)) { - return false; // error - } - } - } - else { + + // fallback to opening file if no mmap support or it didn't work if (!fileHelper.open(srcFilename.c_str(), "rb")) { KLOGE("Kram", "File input \"%s\" could not be opened for read.\n", srcFilename.c_str()); @@ -249,68 +299,50 @@ bool SetupSourceImage(MmapHelper& mmapHelper, FileHelper& fileHelper, // read entire png into memory size_t size = fileHelper.size(); - fileBuffer.resize(size); - - if (!fileHelper.read(fileBuffer.data(), size)) { + if (size == (size_t)-1) { return false; } + + fileData.resize(size); - if (isKTX) { - if (!LoadKtx(fileBuffer.data(), fileBuffer.size(), - sourceImage)) { + if (!fileHelper.read(fileData.data(), size)) { + return false; + } + } + + if (isPNG) { + if (useMmap) { + if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(), isPremulSrgb, isGray, + sourceImage)) { return false; // error } } - else if (isPNG) { - if (!LoadPng(fileBuffer.data(), fileHelper.size(), isPremulSrgb, isGray, + else { + if (!LoadPng(fileData.data(), fileData.size(), isPremulSrgb, isGray, sourceImage)) { return false; // error } } } - - return true; -} - -// decoding reads a ktx file into KTXImage (not Image) -bool SetupSourceKTX(MmapHelper& mmapHelper, FileHelper& fileHelper, - vector& fileBuffer, - const string& srcFilename, KTXImage& sourceImage, bool isInfoOnly = false) -{ - // first try mmap, and then use file -> buffer - bool useMmap = true; - if (!mmapHelper.open(srcFilename.c_str())) { - // fallback to file system if no mmap or failed - useMmap = false; - } - - if (useMmap) { - if (!sourceImage.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) { - return false; - } - } else { - if (!fileHelper.open(srcFilename.c_str(), "rb")) { - KLOGE("Kram", "File input \"%s\" could not be opened for read.\n", - srcFilename.c_str()); - return false; - } - - // read entire ktx into memory - size_t size = fileHelper.size(); - fileBuffer.resize(size); - if (!fileHelper.read(fileBuffer.data(), size)) { - return false; + if (useMmap) { + if (!LoadKtx(mmapHelper.data(), mmapHelper.dataLength(), sourceImage)) { + return false; // error + } } - - if (!sourceImage.open(fileBuffer.data(), (int32_t)fileBuffer.size(), isInfoOnly)) { - return false; + else { + if (!LoadKtx(fileData.data(), fileData.size(), sourceImage)) { + return false; // error + } } } - + + return true; } + + // better countof in C++11, https://www.g-truc.net/post-0708.html template constexpr size_t countof(T const (&)[N]) noexcept @@ -1281,65 +1313,65 @@ string kramInfoToString(const string& srcFilename, bool isVerbose) bool isPNG = endsWith(srcFilename, ".png"); bool isKTX = endsWith(srcFilename, ".ktx") || endsWith(srcFilename, ".ktx2"); - MmapHelper srcMmapHelper; - FileHelper srcFileHelper; - vector srcFileBuffer; - string info; // handle png and ktx if (isPNG) { + MmapHelper srcMmapHelper; + vector srcFileBuffer; + // This was taken out of SetupSourceImage, dont want to decode PNG yet // just peek at the header. - const uint8_t* data = nullptr; - int32_t dataSize = 0; - + // first try mmap, and then use file -> buffer bool useMmap = true; if (!srcMmapHelper.open(srcFilename.c_str())) { // fallback to file system if no mmap or it failed useMmap = false; - } - - if (useMmap) { - data = srcMmapHelper.data(); - dataSize = (int32_t)srcMmapHelper.dataLength(); - } - else { + + FileHelper srcFileHelper; if (!srcFileHelper.open(srcFilename.c_str(), "rb")) { - KLOGE("Kram", "File input \"%s\" could not be opened for read.\n", + KLOGE("Kram", "File input \"%s\" could not be opened for info read.\n", srcFilename.c_str()); return ""; } // read entire png into memory // even though really just want to peek at header - int64_t size = srcFileHelper.size(); - if (size < 0) return ""; + uint64_t size = srcFileHelper.size(); + if (size == (size_t)-1) { + return ""; + } srcFileBuffer.resize(size); if (!srcFileHelper.read(srcFileBuffer.data(), size)) { return ""; } + } + const uint8_t* data = nullptr; + size_t dataSize = 0; + + if (useMmap) { + data = srcMmapHelper.data(); + dataSize = srcMmapHelper.dataLength(); + } + else { data = srcFileBuffer.data(); - dataSize = (int32_t)srcFileBuffer.size(); + dataSize = srcFileBuffer.size(); } + info = kramInfoPNGToString(srcFilename, data, dataSize, isVerbose); } else if (isKTX) { KTXImage srcImage; - - // This means don't convert to KTX1, keep original data/offsets - // and also skip decompressing the mips - bool isInfoOnly = true; + KTXImageData srcImageData; - // Note: could change to not read any mips - bool success = SetupSourceKTX(srcMmapHelper, srcFileHelper, srcFileBuffer, - srcFilename, srcImage, isInfoOnly); + bool success = SetupSourceKTX(srcImageData, srcFilename, srcImage); if (!success) { - KLOGE("Kram", "info couldn't open ktx file"); + KLOGE("Kram", "File input \"%s\" could not be opened for info read.\n", + srcFilename.c_str()); return ""; } @@ -1740,13 +1772,10 @@ static int32_t kramAppDecode(vector& args) } KTXImage srcImage; - MmapHelper srcMmapHelper; - FileHelper srcFileHelper; + KTXImageData srcImageData; FileHelper tmpFileHelper; - vector srcFileBuffer; - - bool success = SetupSourceKTX(srcMmapHelper, srcFileHelper, srcFileBuffer, - srcFilename, srcImage); + + bool success = SetupSourceKTX(srcImageData, srcFilename, srcImage); // TODO: for hdr decode, may need to walk blocks or ask caller to pass -hdr flag if (!validateFormatAndDecoder(srcImage.textureType, srcImage.pixelFormat, textureDecoder)) { @@ -2195,13 +2224,9 @@ static int32_t kramAppEncode(vector& args) // The helper keeps ktx mips in mmap alive in case want to read them // incrementally. Fallback to read into fileBuffer if mmap fails. Image srcImage; - MmapHelper srcMmapHelper; - FileHelper srcFileHelper; FileHelper tmpFileHelper; - vector srcFileBuffer; - - bool success = SetupSourceImage(srcMmapHelper, srcFileHelper, srcFileBuffer, - srcFilename, srcImage, isPremulRgb, isGray); + + bool success = SetupSourceImage(srcFilename, srcImage, isPremulRgb, isGray); if (success) { success = SetupTmpFile(tmpFileHelper, isDstKTX2 ? ".ktx2" : ".ktx"); diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h index 5d715e97..4ad5eceb 100644 --- a/libkram/kram/Kram.h +++ b/libkram/kram/Kram.h @@ -5,6 +5,7 @@ #pragma once #include +#include "KramMmapHelper.h" namespace kram { using namespace std; @@ -12,6 +13,22 @@ using namespace std; class Image; class KTXImage; +// This helper needs to stay alive since KTXImage aliases it +// May be able to fold these into KTXImage since it has an internal vector already +class KTXImageData { +public: + // class keeps the data alive in mmapHelper or fileData + bool open(const char* filename, KTXImage& image); + + // class aliases data, so caller must keep alive. Useful with bundle. + bool open(const uint8_t* data, size_t dataSize, KTXImage& image); + +private: + MmapHelper mmapHelper; + vector fileData; + bool isInfoOnly = true; +}; + // helpers to source from a png or single level of a ktx bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage); bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulSrgb, bool isGray, Image& sourceImage); diff --git a/libkram/kram/KramFileHelper.cpp b/libkram/kram/KramFileHelper.cpp index 67129de4..32c7f712 100644 --- a/libkram/kram/KramFileHelper.cpp +++ b/libkram/kram/KramFileHelper.cpp @@ -56,16 +56,16 @@ bool FileHelper::openTemporaryFile(const char* suffix, const char* access) return true; } -bool FileHelper::read(uint8_t* data, int dataSize) +bool FileHelper::read(uint8_t* data, size_t dataSize) { return FileHelper::readBytes(_fp, data, dataSize); } -bool FileHelper::write(const uint8_t* data, int dataSize) +bool FileHelper::write(const uint8_t* data, size_t dataSize) { return FileHelper::writeBytes(_fp, data, dataSize); } -bool FileHelper::readBytes(FILE* fp, uint8_t* data, int dataSize) +bool FileHelper::readBytes(FILE* fp, uint8_t* data, size_t dataSize) { size_t elementsRead = fread(data, 1, dataSize, fp); if (elementsRead != (size_t)dataSize) { @@ -73,7 +73,7 @@ bool FileHelper::readBytes(FILE* fp, uint8_t* data, int dataSize) } return true; } -bool FileHelper::writeBytes(FILE* fp, const uint8_t* data, int dataSize) +bool FileHelper::writeBytes(FILE* fp, const uint8_t* data, size_t dataSize) { size_t elementsWritten = fwrite(data, 1, dataSize, fp); if (elementsWritten != (size_t)dataSize) { @@ -107,14 +107,14 @@ bool FileHelper::copyTemporaryFileTo(const char* dstFilename) // since we're not closing, need to flush output fflush(_fp); - int size_ = size(); - if (size_ < 0) { + size_t size_ = size(); + if (size_ == (size_t)-1) { return false; } // DONE: copy in smaller buffered chunks - int maxBufferSize = 256*1024; - int bufferSize = min(size_, maxBufferSize); + size_t maxBufferSize = 256*1024; + size_t bufferSize = min(size_, maxBufferSize); vector tmpBuf; tmpBuf.resize(bufferSize); @@ -136,7 +136,7 @@ bool FileHelper::copyTemporaryFileTo(const char* dstFilename) return false; } - int bytesRemaining = size_; + size_t bytesRemaining = size_; while(bytesRemaining > 0) { int bytesToRead = min(bufferSize, bytesRemaining); bytesRemaining -= bytesToRead; @@ -154,84 +154,6 @@ bool FileHelper::copyTemporaryFileTo(const char* dstFilename) return true; } -/* This code was original attempt to move file, but it interfered with unlink of the file - since a closed file was needed for rename() and many many other issues. - -bool FileHelper::moveTemporaryFileTo(const char* dstFilename) -{ - if (!_fp) return false; - if (_filename.empty()) return false; - -#if USE_TMPFILEPLUS - fclose(_fp); - - // windows doesn't remove any existing file, so have to do it explicitly - //remove(dstFilename); - // - // now do the rename - // rename on Windows does a copy if different volumes, but no way to identify if it did or moved the file - // so tmp file would need to be auto deleted then. Could call MoveFileEx twice, with and without COPY - // if the move failed. - //bool success = (rename(_filename.c_str(), dstFilename) == 0); - - // this is probably better than remove/rename, and maybe works across volumes/partitions - // this can't replace directories and will fail, only for files - bool success = MoveFileEx(_filename.c_str(), dstFilename, MOVEFILE_REPLACE_EXISTING) == 0; - if (!success) { - MoveFileEx(_filename.c_str(), dstFilename, MOVEFILE_COPY_ALLOWED); - - // since move was expected, delete the source, it's had fclose already called - remove(_filename.c_str()); - } - - // so that close doesn't do another fclose() - _fp = nullptr; - _isTmpFile = false; - _filename.clear(); - -#else - // since we're not closing, need to flush output - fflush(_fp); - - // somehow even though the file isn't closed, can rename it - // if an open temp file is closed, then the following fails since the fd/fp are released. - - // rename() only works if tmp and filename are on same volume - // and must have an actual filename to call this, which tmpfile() doesn't supply - // this removes any old file present - bool success = (rename(_filename.c_str(), dstFilename) == 0); - - // Some impls of rename don't work with directories, but that's all the docs say. - // This is to fix rename also failing on mac/linux if going cross volume, win does copy behind the scenes - // but using USE_TMPFILEPLUS code above instead on Win. - if (!success) { - - size_t size_ = size(); - vector tmpBuf; - tmpBuf.resize(size_); - - rewind(_fp); - if (!read(tmpBuf.data(), size_)) { - return false; - } - // need to fopen file on other volume, then buffer copy the contents over to the over drive - FileHelper moveHelper; - if (!moveHelper.open(dstFilename, "w+b")) { - return false; - } - if (!moveHelper.write(tmpBuf.data(), size_)) { - return false; - } - - // close() should delete the original file - } -#endif - // Note: this doesn't change _filename to dstFilename - - return success; -} -*/ - bool FileHelper::open(const char* filename, const char* access) { close(); @@ -258,11 +180,11 @@ void FileHelper::close() _fp = nullptr; } -int64_t FileHelper::size() const +size_t FileHelper::size() const { // returns -1, so can't return size_t if (!_fp) { - return -1; + return (size_t)-1; } // otherwise fstat won't extract the size @@ -272,7 +194,7 @@ int64_t FileHelper::size() const struct stat stats; if (fstat(fd, &stats) < 0) { - return -1; + return (size_t)-1; } return (int64_t)stats.st_size; } diff --git a/libkram/kram/KramFileHelper.h b/libkram/kram/KramFileHelper.h index 64ff56d5..304f3239 100644 --- a/libkram/kram/KramFileHelper.h +++ b/libkram/kram/KramFileHelper.h @@ -32,18 +32,18 @@ class FileHelper { void close(); - // returns -1 if stat call fails - int64_t size() const; + // returns (size_t)-1 if stat call fails + size_t size() const; FILE* pointer() { return _fp; } // safe calls that test bytes read/written - bool read(uint8_t* data, int dataSize); - bool write(const uint8_t* data, int dataSize); + bool read(uint8_t* data, size_t dataSize); + bool write(const uint8_t* data, size_t dataSize); // if caller only has FILE* then can use these - static bool readBytes(FILE* fp, uint8_t* data, int dataSize); - static bool writeBytes(FILE* fp, const uint8_t* data, int dataSize); + static bool readBytes(FILE* fp, uint8_t* data, size_t dataSize); + static bool writeBytes(FILE* fp, const uint8_t* data, size_t dataSize); // return mod stamp on filename static uint64_t modificationTimestamp(const char* filename); From 86bd26a038714cc1a54269a7e67278d62842912e Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Fri, 4 Jun 2021 23:30:10 -0700 Subject: [PATCH 107/901] kramv - add missing files related to simplifying loading --- kram-thumb/KramThumbnailProvider.mm | 39 +++++++++------------------ kramv/KramLoader.h | 10 +++++-- kramv/KramLoader.mm | 41 ++++++++++++++++++++--------- kramv/KramRenderer.h | 11 ++++---- 4 files changed, 54 insertions(+), 47 deletions(-) diff --git a/kram-thumb/KramThumbnailProvider.mm b/kram-thumb/KramThumbnailProvider.mm index e65e6514..cf264062 100644 --- a/kram-thumb/KramThumbnailProvider.mm +++ b/kram-thumb/KramThumbnailProvider.mm @@ -8,7 +8,6 @@ #import "KramThumbnailProvider.h" #include "Kram.h" -#include "KramMmapHelper.h" #include "KramLog.h" #include "KTXImage.h" #include "KramImage.h" // for KramDecoder @@ -40,32 +39,20 @@ - (void)provideThumbnailForFileRequest:(QLFileThumbnailRequest *)request complet // Second way: Draw the thumbnail into a context passed to your block, set up with Core Graphics's coordinate system. handler([QLThumbnailReply replyWithContextSize:request.maximumSize drawingBlock:^BOOL(CGContextRef _Nonnull context) { - const char* filename = [request.fileURL fileSystemRepresentation]; - - if (!(endsWith(filename, ".ktx") || endsWith(filename, ".ktx2"))) { - KLOGF("kramv %s only supports ktx/ktx2 files\n", filename); - return NO; - } - - // load the mmap file, and interpret it as a KTXImage - MmapHelper mmapHelper; - if (!mmapHelper.open(filename)) { - KLOGF("kramv %s failed to mmap\n", filename); - return NO; - } - - // TODO: might need to try FileHelper for non-local thumbnails + const char* filename = [request.fileURL fileSystemRepresentation]; + + if (!(endsWith(filename, ".ktx") || endsWith(filename, ".ktx2"))) { + KLOGF("kramv %s only supports ktx/ktx2 files\n", filename); + return NO; + } + + KTXImage image; + KTXImageData imageData; - - // open but leave the image compressed if KTX2 + zstd - bool isInfoOnly = true; - - KTXImage image; - if (!image.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) { - KLOGF("kramv %s failed to open\n", filename); - return NO; - } - + if (!imageData.open(filename, image)) { + KLOGF("kramv %s coould not open file\n", filename); + + } // no BC6 or ASTC HDR yet for thumbs, just do LDR first if (isHdrFormat(image.pixelFormat)) { KLOGF("kramv %s doesn't support hdr thumbnails yet\n", filename); diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h index d9ecedda..0c117051 100644 --- a/kramv/KramLoader.h +++ b/kramv/KramLoader.h @@ -18,17 +18,23 @@ #endif +namespace kram { +class KTXImage; +} // This loads KTX and PNG data synchronously. Will likely move to only loading KTX files, with a png -> ktx conversion. // The underlying KTXImage is not yet returned to the caller, but would be useful for prop queries. @interface KramLoader : NSObject -// from mem, caller must keep data alive +// from mem, copied to MTLBuffer if available, if not caller must keep mem alive - (nullable id)loadTextureFromData:(nonnull const uint8_t *)imageData imageDataLength:(int32_t)imageDataLength originalFormat:(nullable MTLPixelFormat*)originalFormat; -// from mem, if NSData then caller must keep data alive until blit +// from mem, copied to MTLBuffer if available, if not caller must keep mem alive - (nullable id)loadTextureFromData:(nonnull NSData*)imageData originalFormat:(nullable MTLPixelFormat*)originalFormat; +// load from a KTXImage +- (nullable id)loadTextureFromImage:(const kram::KTXImage&)image originalFormat:(nullable MTLPixelFormat*)originalFormat; + // from url (mmap) - (nullable id)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat; diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 12f09b79..9367944b 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -94,7 +94,7 @@ bool isDecodeImageNeeded(MyMTLPixelFormat pixelFormat) { return needsDecode; } -bool decodeImage(KTXImage& image, KTXImage& imageDecoded) +bool decodeImage(const KTXImage& image, KTXImage& imageDecoded) { KramDecoderParams decoderParams; KramDecoder decoder; @@ -174,7 +174,12 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { if (!image.open(imageData, imageDataLength, isInfoOnly)) { return nil; } - + + return [self loadTextureFromImage:image originalFormat:originalFormat]; +} + +- (nullable id)loadTextureFromImage:(const KTXImage&)image originalFormat:(nullable MTLPixelFormat*)originalFormat +{ #if SUPPORT_RGB if (isInternalRGBFormat(image.pixelFormat)) { // loads and converts top level mip from RGB to RGBA (RGB0) @@ -311,12 +316,7 @@ static uint32_t numberOfMipmapLevels(const Image& image) { // TODO: could also ignore extension, and look at header/signature instead // files can be renamed to the incorrect extensions string filename = toLower(path); - - MmapHelper mmapHelper; - if (!mmapHelper.open(path)) { - return nil; - } - + if (endsWithExtension(filename.c_str(), ".png")) { // set title to filename, chop this to just file+ext, not directory string filenameShort = filename; @@ -340,14 +340,29 @@ static uint32_t numberOfMipmapLevels(const Image& image) { bool isSRGB = (!isNormal && !isSDF); + MmapHelper mmapHelper; + if (!mmapHelper.open(path)) { + return nil; + } + + // TODO: need FileHelper fallback here + return [self loadTextureFromPNGData:mmapHelper.data() dataSize:(int32_t)mmapHelper.dataLength() isSRGB:isSRGB originalFormat:originalFormat]; } - - // route all data through the version that copies or does sync upload - return [self loadTextureFromData:mmapHelper.data() imageDataLength:(int32_t)mmapHelper.dataLength() originalFormat:originalFormat]; + else { + KTXImage image; + KTXImageData imageData; + + if (!imageData.open(path, image)) { + return nil; + } + + // route all data through the version that copies or does sync upload + return [self loadTextureFromImage:image originalFormat:originalFormat]; + } } -- (nullable id)createTexture:(KTXImage&)image isPrivate:(bool)isPrivate { +- (nullable id)createTexture:(const KTXImage&)image isPrivate:(bool)isPrivate { MTLTextureDescriptor *textureDescriptor = [[MTLTextureDescriptor alloc] init]; // Indicate that each pixel has a blue, green, red, and alpha channel, where each channel is @@ -611,7 +626,7 @@ inline uint64_t alignOffset(uint64_t offset, uint64_t alignment) { // Has a synchronous upload via replaceRegion that only works for shared/managed (f.e. ktx), // and another path for private that uses a blitEncoder and must have block aligned data (f.e. ktxa, ktx2). // Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer. -- (nullable id)blitTextureFromImage:(KTXImage &)image +- (nullable id)blitTextureFromImage:(const KTXImage &)image { if (_buffer == nil) { // this is enough to upload 4k x 4x @ RGBA8u with mips, 8k x 8k compressed with mips @96MB diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h index 801ace1e..4f68d4ca 100644 --- a/kramv/KramRenderer.h +++ b/kramv/KramRenderer.h @@ -18,6 +18,7 @@ namespace kram { class ShowSettings; + class KTXImage; } // Our platform independent renderer class. Implements the MTKViewDelegate protocol which @@ -26,12 +27,10 @@ namespace kram { - (nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view settings:(nonnull kram::ShowSettings*)settings; -- (BOOL)loadTextureFromData:(const std::string&)fullFilename - timestamp:(double)timestamp - imageData:(nonnull const uint8_t*)imageData - imageDataLength:(uint64_t)imageDataLength - imageNormalData:(nullable const uint8_t*)imageNormalData - imageNormalDataLength:(uint64_t)imageNormalDataLength; +- (BOOL)loadTextureFromImage:(const std::string&)fullFilename + timestamp:(double)timestamp + image:(kram::KTXImage&)image + imageNormal:(nullable kram::KTXImage*)imageNormal; - (BOOL)loadTexture:(nonnull NSURL *)url; From 8f40cfc3e8aced4e3325384c5fc9d5908fa9501a Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 5 Jun 2021 00:48:53 -0700 Subject: [PATCH 108/901] Kramv - simplify loader to only use blit path, also kramv can load png into one level of KTXImage This lost the mip support of BlitEncoder, but can add that back later or use Mipper. This opens up pulling PNG from folder, not just KTX/2 files. Need to call openPNG vs. open call, or detect off 4-5 bytes code at beginning of file. Remove the cpu upload path. This is slow and untwiddled, but useful for reference or when synchronous upload required. Nothing is keeping the KTXImage data alive if the staging buffer is flooded. --- kramv/KramLoader.mm | 52 ++++++++++++++-------------- libkram/kram/Kram.cpp | 79 ++++++++++++++++++++++++++++++++++++++++--- libkram/kram/Kram.h | 2 ++ 3 files changed, 101 insertions(+), 32 deletions(-) diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 9367944b..7a55c863 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -216,7 +216,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { *originalFormat = (MTLPixelFormat)rbgaImage2.pixelFormat; // TODO: should this return rgbaImage.pixelFormat ? } - return [self loadTextureFromImage:rbgaImage2]; + return [self blitTextureFromImage:rbgaImage2]; } #endif @@ -231,7 +231,7 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { return nil; } - return [self loadTextureFromImage:imageDecoded]; + return [self blitTextureFromImage:imageDecoded]; } else #endif @@ -241,6 +241,8 @@ inline MyMTLPixelFormat remapInternalRGBFormat(MyMTLPixelFormat format) { } } +/* + static uint32_t numberOfMipmapLevels(const Image& image) { uint32_t w = image.width(); uint32_t h = image.height(); @@ -254,32 +256,22 @@ static uint32_t numberOfMipmapLevels(const Image& image) { return numberOfMips; } -- (nullable id)loadTextureFromPNGData:(const uint8_t*)data dataSize:(int32_t)dataSize isSRGB:(BOOL)isSRGB originalFormat:(nullable MTLPixelFormat*)originalFormat +- (nullable id)_loadTextureFromPNGData:(const uint8_t*)data dataSize:(int32_t)dataSize isSRGB:(BOOL)isSRGB originalFormat:(nullable MTLPixelFormat*)originalFormat { // can only load 8u and 16u from png, no hdr formats, no premul either, no props // this also doesn't handle strips like done in libkram. - Image sourceImage; - bool isLoaded = LoadPng(data, dataSize, false, false, sourceImage); + // Image sourceImage; + bool isLoaded = LoadPng(data, dataSize, false, false, image); if (!isLoaded) { return nil; } - KTXImage image; - image.width = sourceImage.width(); - image.height = sourceImage.height(); - image.depth = 0; - - image.header.numberOfArrayElements = 0; - image.header.numberOfMipmapLevels = numberOfMipmapLevels(sourceImage); - - image.textureType = MyMTLTextureType2D; - image.pixelFormat = isSRGB ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm; - + // TODO: replace this with code that gens a KTXImage from png (and cpu mips) // instead of needing to use autogenmip that has it's own filters (probably a box) - id texture = [self createTexture:image isPrivate:false]; + id texture = [self createTexture:image isPrivate:true]; if (!texture) { return nil; } @@ -288,6 +280,10 @@ static uint32_t numberOfMipmapLevels(const Image& image) { *originalFormat = (MTLPixelFormat)image.pixelFormat; } + // this means KTXImage must hold data + [self blitTextureFromImage:image]; + + // cpu copy the bytes from the data object into the texture const MTLRegion region = { { 0, 0, 0 }, // MTLOrigin @@ -300,6 +296,7 @@ static uint32_t numberOfMipmapLevels(const Image& image) { mipmapLevel:0 withBytes:sourceImage.pixels().data() bytesPerRow:bytesPerRow]; + // have to schedule autogen inside render using MTLBlitEncoder if (image.header.numberOfMipmapLevels > 1) { @@ -308,6 +305,7 @@ static uint32_t numberOfMipmapLevels(const Image& image) { return texture; } +*/ - (nullable id)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat { @@ -317,6 +315,9 @@ static uint32_t numberOfMipmapLevels(const Image& image) { // files can be renamed to the incorrect extensions string filename = toLower(path); + KTXImage image; + KTXImageData imageDataKTX; + if (endsWithExtension(filename.c_str(), ".png")) { // set title to filename, chop this to just file+ext, not directory string filenameShort = filename; @@ -339,21 +340,15 @@ static uint32_t numberOfMipmapLevels(const Image& image) { } bool isSRGB = (!isNormal && !isSDF); - - MmapHelper mmapHelper; - if (!mmapHelper.open(path)) { + + if (!imageDataKTX.openPNG(path, isSRGB, image)) { return nil; } - // TODO: need FileHelper fallback here - - return [self loadTextureFromPNGData:mmapHelper.data() dataSize:(int32_t)mmapHelper.dataLength() isSRGB:isSRGB originalFormat:originalFormat]; + return [self loadTextureFromImage:image originalFormat:originalFormat]; } else { - KTXImage image; - KTXImageData imageData; - - if (!imageData.open(path, image)) { + if (!imageDataKTX.open(path, image)) { return nil; } @@ -396,6 +391,8 @@ static uint32_t numberOfMipmapLevels(const Image& image) { return texture; } +/* just for reference now + // Has a synchronous upload via replaceRegion that only works for shared/managed (f.e. ktx), // and another path for private that uses a blitEncoder and must have block aligned data (f.e. ktxa, ktx2). // Could repack ktx data into ktxa before writing to temporary file, or when copying NSData into MTLBuffer. @@ -536,6 +533,7 @@ static uint32_t numberOfMipmapLevels(const Image& image) { return texture; } +*/ //-------------------------- diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 85d68b09..5ad78a4c 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -59,21 +59,90 @@ bool KTXImageData::open(const char* filename, KTXImage& image) { } } - // read the KTXImage in from the data, it will alias mmap or fileData + const uint8_t* data; + size_t dataSize; if (useMmap) { - if (!image.open(mmapHelper.data(), mmapHelper.dataLength(), isInfoOnly)) { - return false; - } + data = mmapHelper.data(); + dataSize = mmapHelper.dataLength(); } else { - if (!image.open(fileData.data(), fileData.size(), isInfoOnly)) { + data = fileData.data(); + dataSize = fileData.size(); + } + + // read the KTXImage in from the data, it will alias mmap or fileData + if (!image.open(data, dataSize, isInfoOnly)) { + return false; + } + + return true; +} + +bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) { + bool useMmap = true; + if (!mmapHelper.open(filename)) { + useMmap = false; + + // open file, copy it to memory, then close it + FileHelper fileHelper; + if (!fileHelper.open(filename, "rb")) { + return false; + } + + // read the file into memory + size_t size = fileHelper.size(); + if (size == (size_t)-1) { + return false; + } + + fileData.resize(size); + if (!fileHelper.read(fileData.data(), size)) { return false; } } + const uint8_t* data; + size_t dataSize; + if (useMmap) { + data = mmapHelper.data(); + dataSize = mmapHelper.dataLength(); + } + else { + data = fileData.data(); + dataSize = fileData.size(); + } + + // the mmap/filehelper point to the png data + // use Image to + + Image singleImage; + bool isLoaded = LoadPng(data, dataSize, false, false, singleImage); + if (!isLoaded) { + return false; + } + + // now move the png pixels into the KTXImage + + image.width = singleImage.width(); + image.height = singleImage.height(); + image.depth = 0; + + image.header.numberOfArrayElements = 0; + image.header.numberOfMipmapLevels = 1; + image.textureType = MyMTLTextureType2D; + image.pixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm; + + // TODO: support mips with blitEncoder or Mipper + // TODO: support chunks, but may need to copy horizontal to vertical + // TODO: png has 16u format useful for heights + + image.reserveImageData(); + memcpy((uint8_t*)image.fileData + image.mipLevels[0].offset, singleImage.pixels().data(), image.levelLength(0)); + return true; } + bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image) { if (!image.open(data, dataSize, isInfoOnly)) { diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h index 4ad5eceb..2b2051fa 100644 --- a/libkram/kram/Kram.h +++ b/libkram/kram/Kram.h @@ -23,6 +23,8 @@ class KTXImageData { // class aliases data, so caller must keep alive. Useful with bundle. bool open(const uint8_t* data, size_t dataSize, KTXImage& image); + bool openPNG(const char* filename, bool isSrgb, KTXImage& image); + private: MmapHelper mmapHelper; vector fileData; From 197e7d3fbff055578ba4c36632deb4a3eeef0283 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 5 Jun 2021 10:51:38 -0700 Subject: [PATCH 109/901] kramv - more loader improvements Slowly bubbling the KTXImage up to caller. Simplfy loader code in kram too. Expose a close() call in KTXImageData to release memory, and use this in open calls so they don't fail on reuse. --- kramv/KramLoader.h | 9 ++-- kramv/KramLoader.mm | 34 +++++++------ kramv/KramRenderer.mm | 19 +++++-- kramv/KramViewerMain.mm | 2 +- libkram/kram/Kram.cpp | 106 ++++++++++++++++++++++++++-------------- libkram/kram/Kram.h | 10 +++- 6 files changed, 120 insertions(+), 60 deletions(-) diff --git a/kramv/KramLoader.h b/kramv/KramLoader.h index 0c117051..65e4decb 100644 --- a/kramv/KramLoader.h +++ b/kramv/KramLoader.h @@ -20,10 +20,10 @@ namespace kram { class KTXImage; +class KTXImageData; } -// This loads KTX and PNG data synchronously. Will likely move to only loading KTX files, with a png -> ktx conversion. -// The underlying KTXImage is not yet returned to the caller, but would be useful for prop queries. +// This loads KTX/2 and PNG data. Moving towards KTX/2 files only, with a PNG to KTX/2 conversion. @interface KramLoader : NSObject // from mem, copied to MTLBuffer if available, if not caller must keep mem alive @@ -33,7 +33,10 @@ class KTXImage; - (nullable id)loadTextureFromData:(nonnull NSData*)imageData originalFormat:(nullable MTLPixelFormat*)originalFormat; // load from a KTXImage -- (nullable id)loadTextureFromImage:(const kram::KTXImage&)image originalFormat:(nullable MTLPixelFormat*)originalFormat; +- (nullable id)loadTextureFromImage:(const kram::KTXImage&)image originalFormat:(nullable MTLPixelFormat*)originalFormat; + +// load into KTXImage and KTXImageData, can use with loadTextureFromImage +- (BOOL)loadImageFromURL:(nonnull NSURL *)url image:(kram::KTXImage&)image imageData:(kram::KTXImageData&)imageData; // from url (mmap) - (nullable id)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat; diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 7a55c863..a882ed14 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -307,17 +307,14 @@ static uint32_t numberOfMipmapLevels(const Image& image) { } */ -- (nullable id)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat { - +- (BOOL)loadImageFromURL:(nonnull NSURL *)url image:(KTXImage&)image imageData:(KTXImageData&)imageData +{ const char *path = [url.absoluteURL.path UTF8String]; // TODO: could also ignore extension, and look at header/signature instead // files can be renamed to the incorrect extensions string filename = toLower(path); - KTXImage image; - KTXImageData imageDataKTX; - if (endsWithExtension(filename.c_str(), ".png")) { // set title to filename, chop this to just file+ext, not directory string filenameShort = filename; @@ -341,20 +338,29 @@ static uint32_t numberOfMipmapLevels(const Image& image) { bool isSRGB = (!isNormal && !isSDF); - if (!imageDataKTX.openPNG(path, isSRGB, image)) { - return nil; + if (!imageData.openPNG(path, isSRGB, image)) { + return NO; } - - return [self loadTextureFromImage:image originalFormat:originalFormat]; } else { - if (!imageDataKTX.open(path, image)) { - return nil; + if (!imageData.open(path, image)) { + return NO; } - - // route all data through the version that copies or does sync upload - return [self loadTextureFromImage:image originalFormat:originalFormat]; } + + return YES; +} + +- (nullable id)loadTextureFromURL:(nonnull NSURL *)url originalFormat:(nullable MTLPixelFormat*)originalFormat +{ + KTXImage image; + KTXImageData imageData; + + if (![self loadImageFromURL:url image:image imageData:imageData]) { + return nil; + } + + return [self loadTextureFromImage:image originalFormat:originalFormat]; } - (nullable id)createTexture:(const KTXImage&)image isPrivate:(bool)isPrivate { diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 92defb1d..93da1eda 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -580,18 +580,29 @@ - (BOOL)loadTexture:(nonnull NSURL *)url // image can be decoded to rgba8u if platform can't display format natively // but still want to identify blockSize from original format if (isTextureChanged) { + // TODO: hold onto these, so can reference block data + KTXImage image; + KTXImageData imageData; + + if (![_loader loadImageFromURL:url image:image imageData:imageData]) { + return NO; + } MTLPixelFormat originalFormatMTL = MTLPixelFormatInvalid; - id texture = [_loader loadTextureFromURL:url originalFormat:&originalFormatMTL]; + id texture = [_loader loadTextureFromImage:image originalFormat:&originalFormatMTL]; if (!texture) { return NO; } // This doesn't look for or load corresponding normal map, but should - // TODO:: this reloads KTXImage twice over - _showSettings->imageInfo = kramInfoToString(fullFilename, false); - _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true); + // this is not the png data, but info on converted png to ktx level + // But this avoids loading the image 2 more times + _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false); + _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true); + + //_showSettings->imageInfo = kramInfoToString(fullFilename, image, false); + //_showSettings->imageInfoVerbose = kramInfoToString(fullFilename, image, true); _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL; _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index f1617780..d8a15a45 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -2262,7 +2262,7 @@ - (BOOL)loadTextureFromArchive } if (isFound) { - normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); // works for + normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); } //--------------------------- diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 5ad78a4c..bd8a03d4 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -34,12 +34,19 @@ namespace kram { using namespace std; +template +void releaseVector(vector& v) { + v.clear(); + v.shrink_to_fit(); +} bool KTXImageData::open(const char* filename, KTXImage& image) { - bool useMmap = true; + close(); + + isMmap = true; if (!mmapHelper.open(filename)) { - useMmap = false; + isMmap = false; // open file, copy it to memory, then close it FileHelper fileHelper; @@ -61,7 +68,7 @@ bool KTXImageData::open(const char* filename, KTXImage& image) { const uint8_t* data; size_t dataSize; - if (useMmap) { + if (isMmap) { data = mmapHelper.data(); dataSize = mmapHelper.dataLength(); } @@ -71,17 +78,34 @@ bool KTXImageData::open(const char* filename, KTXImage& image) { } // read the KTXImage in from the data, it will alias mmap or fileData - if (!image.open(data, dataSize, isInfoOnly)) { + bool isLoaded = image.open(data, dataSize, isInfoOnly); + + // this means KTXImage is using it's own storage + if (!isLoaded || image.fileData != data) { + close(); + } + + if (!isLoaded) { return false; } return true; } +void KTXImageData::close() { + // don't need these anymore, singleImage holds the data + mmapHelper.close(); + releaseVector(fileData); + isMmap = false; +} + + bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) { - bool useMmap = true; + close(); + + isMmap = true; if (!mmapHelper.open(filename)) { - useMmap = false; + isMmap = false; // open file, copy it to memory, then close it FileHelper fileHelper; @@ -103,7 +127,7 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) { const uint8_t* data; size_t dataSize; - if (useMmap) { + if (isMmap) { data = mmapHelper.data(); dataSize = mmapHelper.dataLength(); } @@ -117,6 +141,10 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) { Image singleImage; bool isLoaded = LoadPng(data, dataSize, false, false, singleImage); + + // don't need png data anymore + close(); + if (!isLoaded) { return false; } @@ -132,8 +160,11 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) { image.textureType = MyMTLTextureType2D; image.pixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm; - // TODO: support mips with blitEncoder or Mipper - // TODO: support chunks, but may need to copy horizontal to vertical + // TODO: support mips with blitEncoder but tha confuses mipCount in KTXImage + // Mipper can also generate on cpu side. Mipped can do premul conversion though. + + // TODO: support chunks and striped png, but may need to copy horizontal to vertical + // TODO: png has 16u format useful for heights image.reserveImageData(); @@ -145,6 +176,10 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) { bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image) { + close(); + + // image will likely alias incoming data, so KTXImageData is unused + if (!image.open(data, dataSize, isInfoOnly)) { return false; } @@ -335,9 +370,7 @@ bool SetupTmpFile(FileHelper& tmpFileHelper, const char* suffix) return tmpFileHelper.openTemporaryFile(suffix, "w+b"); } -bool SetupSourceImage(//MmapHelper& mmapHelper, FileHelper& fileHelper, - //vector& fileBuffer, - const string& srcFilename, Image& sourceImage, +bool SetupSourceImage(const string& srcFilename, Image& sourceImage, bool isPremulSrgb = false, bool isGray = false) { bool isKTX = endsWith(srcFilename, ".ktx") || endsWith(srcFilename, ".ktx2"); @@ -349,15 +382,19 @@ bool SetupSourceImage(//MmapHelper& mmapHelper, FileHelper& fileHelper, return false; } - // TODO: really KTXImageData + // TODO: basically KTXImageData, but the encode can't take in a KTXImage yet + // so here it's generate a single Image. Also here the LoadKTX converts + // 1/2/3/4 channel formats to 4. + MmapHelper mmapHelper; - FileHelper fileHelper; vector fileData; // first try mmap, and then use file -> buffer - bool useMmap = true; + bool isMmap = true; if (!mmapHelper.open(srcFilename.c_str())) { - useMmap = false; + isMmap = false; + + FileHelper fileHelper; // fallback to opening file if no mmap support or it didn't work if (!fileHelper.open(srcFilename.c_str(), "rb")) { @@ -379,30 +416,27 @@ bool SetupSourceImage(//MmapHelper& mmapHelper, FileHelper& fileHelper, } } + const uint8_t* data; + size_t dataSize; + if (isMmap) { + data = mmapHelper.data(); + dataSize = mmapHelper.dataLength(); + } + else { + data = fileData.data(); + dataSize = fileData.size(); + } + + //----------------------- + if (isPNG) { - if (useMmap) { - if (!LoadPng(mmapHelper.data(), mmapHelper.dataLength(), isPremulSrgb, isGray, - sourceImage)) { - return false; // error - } - } - else { - if (!LoadPng(fileData.data(), fileData.size(), isPremulSrgb, isGray, - sourceImage)) { - return false; // error - } + if (!LoadPng(data, dataSize, isPremulSrgb, isGray, sourceImage)) { + return false; // error } } else { - if (useMmap) { - if (!LoadKtx(mmapHelper.data(), mmapHelper.dataLength(), sourceImage)) { - return false; // error - } - } - else { - if (!LoadKtx(fileData.data(), fileData.size(), sourceImage)) { - return false; // error - } + if (!LoadKtx(data, dataSize, sourceImage)) { + return false; // error } } diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h index 2b2051fa..f2e75afc 100644 --- a/libkram/kram/Kram.h +++ b/libkram/kram/Kram.h @@ -13,8 +13,8 @@ using namespace std; class Image; class KTXImage; -// This helper needs to stay alive since KTXImage aliases it -// May be able to fold these into KTXImage since it has an internal vector already +// This helper needs to stay alive since KTXImage may alias the data. +// KTXImage also has an internal vector already, but fileData may point to the mmap or vector here. class KTXImageData { public: // class keeps the data alive in mmapHelper or fileData @@ -23,11 +23,17 @@ class KTXImageData { // class aliases data, so caller must keep alive. Useful with bundle. bool open(const uint8_t* data, size_t dataSize, KTXImage& image); + // Open png image into a KTXImage as a single-level mip + // Only handles 2d case and only srgba/rgba conversion. bool openPNG(const char* filename, bool isSrgb, KTXImage& image); + // This releases all memory associated with this class + void close(); + private: MmapHelper mmapHelper; vector fileData; + bool isMmap = false; bool isInfoOnly = true; }; From b9e6444217609601c84360d431e6cc5db7cef244 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 5 Jun 2021 11:16:49 -0700 Subject: [PATCH 110/901] Update README.md --- README.md | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 22dfb9cf..2abedd00 100644 --- a/README.md +++ b/README.md @@ -22,34 +22,38 @@ Many of the encoder sources can multithread a single image, but that is unused. Similar to a makefile system, the script sample kramtexture.py uses modstamps to skip textures that have already been processed. If the source png/ktx is older than the ktx output, then the file is skipped. Command line options are not yet compared, so if those change then use --force on the python script to rebuild all textures. Also a crc/hash could be used instead when modstamp isn't sufficient or the same data could come from different folders. ### About kramv -kramv is a viewer for the BC/ASTC/ETC2 and HDR KTX textures generated by kram from LDR PNG and LDR/HDR KTX sources. kramv decodes ASTC/ETC2 textures on macOS Intel, where the GPU doesn't support them. macOS with Apple Silicon supports all three formats, and doesn't need to decode. +kramv is a viewer for the BC/ASTC/ETC2 and HDR KTX/2 textures generated by kram from LDR PNG and LDR/HDR KTX/2 sources. kramv decodes ASTC/ETC2 textures on macOS Intel, where the GPU doesn't support them. macOS with Apple Silicon supports all three formats, and doesn't need to decode. -This is all in ObjC++ with the intent to port to Windows as time permits. It's adapted from Apple's Metal sample app. There's very little GUI and it's all controlled via keyboard to make the code easy to port and change, but the key features are useful for texture triage and analysis. Drag and drop, and click-launch are supported. Recently used textures are listed in the menu. The app is currently single-document only, but I'd like to fix that. Subsequent opens reuse the same document Window. Can drop zip bundles of KTX/KTX2 files, and advance through all textures in the archive. +kramv uses ObjC++ with the intent to port to Windows C++ as time permits. Uses menus, buttons, and keyboard handling useful for texture triage and analysis. Drag and drop folders, bundles, and click-to-launch are supported. Recently used textures/folders/bundles are listed in the menu. The app currently shows a single document at a time. Subsequent opens reuse the same document Window. With bundles and folders, kramv will attempt to pair albedo and normal maps together by filename for the preview. -Compute shaders are used to display a single pixel sample from the gpu texture. This simplifies adding more viewable formats in the future, but there is not a cpu fallback. The preview is rendered to a cube with a single face visible using shaders. Preview mode provides lighting, sdf cutoff, and mip visuals for a given texture. +Preview mode provides lighting, sdf cutoff, and mip visuals for a given texture. Multiple shapes can help identify inconsistent normal maps. The u-axis advances counterclockwise, and v-axis advances down on the shapes. +Y OpenGL normals are assumed, not -Y DirectX convention. Lighting appears up and to the right when normal maps are correctly specified. -In non-preview mode, point sampling in a pixel shader is used to show exact pixel values of a single mip, array, and face. Debug modes provide pixel analysis. KramLoader shows synchronous cpu upload to a private Metal texture, but does not yet supply the underlying KTXImage. Pinch zoom and panning tries to keep the image from onscreen, and zoom is to the cursor so navigating feels intuitive. +In non-preview mode, point sampling in a pixel shader is used to show exact pixel values of a single mip, array, and face. Debug modes provide pixel analysis. KramLoader shows synchronous cpu upload to a private Metal texture, but does not yet supply the underlying KTXImage. Pinch-zoom and pan tries to keep the image from onscreen, and zoom is to the cursor so navigating feels intuitive. + +Compute shaders are used to sample a single pixel sample from the gpu texture for the eyedropper. This simplifies adding more viewable formats in the future, but there is not a cpu fallback. Normal.z is reconstructed and displayed in the hud, and linear and srgb channels are shown. ``` Formats - R/RG/RGBA 8/16F/32F, BC/ETC2/ASTC, RGB has limited import support Container Types - KTX, KTX2, PNG Content Types - Albedo, Normal, SDF, Height -Debug modes - transparent, color, gray, +x, +y, xy >= 1 +Debug modes - transparent, color, non-zero, gray, +x, +y, xy >= 1 Texture Types - 1darray (no mips), 2d, 2darray, 3d (no mips), cube, cube array ⇧ decrement any advance listed below -/ - show keyboard shortcuts +?/ - show keyboard shortcuts O - toggle preview, disables debug mode, shows lit normals, and mips and filtering are enabled ⇧D - toggle through none, pixel grid, block grid, atlas grid (32, 64, 128, 256), must be zoomed-in to see pixel grid ⇧E - advance debug mode, this is texture content specific H - toggle hud +U - toggle ui +V - toggle vertical vs. horizontal buttons I - show texture info in overlay -W - toggle repeat filter, scales uv from [0,1] to [0,2] +W - toggle repeat filter, scales uv from [0,1] to [0,2] and changes sampler to wrap/repeat S - show all - arrays, faces, slices and mips all on-screen -R/G/B/A - show channel in isolation -P - toggle shader premul, the shader performs this after sampling but for point sampling it is correct +R/G/B/A - show channel in isolation, alpha as grayscale +P - toggle shader premul, shader does this post-sample so only correct for point-sampling not preview N - toggle signed/unsigned ⇧0 - refit the current mip image to 1x, or fit view. (at 1x with ⇧). @@ -58,8 +62,10 @@ N - toggle signed/unsigned ⇧Y advance array ⇧F advance face ⇧M advance mip +⇧8 advance shape (plane, unit box, sphere, capsule) + +⇧J advance bundle/folder image (can traverse zip of ktx/ktx2 files) -⇧J advance bundle image (can traverse zip of ktx/ktx2 files) ``` ### Limitations @@ -88,7 +94,7 @@ ETC2_RGB8A1 - disabled, broken in ETC2 optimizations ASTC LDR - rrr1, rrrg/gggr, rgb1, rgba must be followed to avoid endpoint storage, requires swizzles ASTC HDR - encoder uses 8-bit source image, need 16f/32f passed to encoder, no hw L+A mode -R/RG/RGBA 8/16F/32F - use ktx2ktx2 and ktx2sc KTX2 to supercompress, use as source formats +R/RG/RGBA 8/16F/32F - use kram or ktx2ktx2+ktx2sc to generate supercompressed ktx2 R8/RG8/R16F - input/output rowBytes not aligned to 4 bytes to match KTX spec, code changes needed PVRTC - unsupported, no open-source encoders, requires pow2 size @@ -96,12 +102,13 @@ PVRTC - unsupported, no open-source encoders, requires pow2 size Containers PVR/DDS/Basis/Crunch - unsupoorted -KTX - breaks loads of mips with 4 byte length offset at the start of each level of mips, - metadata/props aren't standardized and only ascii prop support so easy to dump out +KTX - only uncompressed, mip levels are unaligned to block size from 4 byte length at chunk 0 + metadata/props aren't standardized or prevalent + libkram supports only text props for display in kramv -KTX2 - works in kram and viewer, has aligned compressed levels of mips, +KTX2 - works in kram and viewer, has aligned levels of mips when uncompressed, libkram supports None/Zlib/Zstd supercompression for read/write - doesn't support UASTC or BasisLZ yet + libkram does not support UASTC or BasisLZ yet ``` From 08dc556ae021b55e515ecf5e7a22afccff42bd33 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 5 Jun 2021 13:47:33 -0700 Subject: [PATCH 111/901] kramv - add tangent-less normals Good to compare this against explicit tangents for perf and quality. This saves storing/transforming tangents. Need to compare mirroring with that vs. explicit tangent case. --- kramv/KramRenderer.mm | 5 +- kramv/KramShaders.metal | 130 +++++++++++++++++++++++++++------------- 2 files changed, 92 insertions(+), 43 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 93da1eda..04cdec54 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -542,7 +542,8 @@ - (BOOL)loadTextureFromImage:(const string&)fullFilename } } - // archive shouldn't contain png, so only support ktx/ktx2 here + // if archive contained png, then it's been converted to ktx + // so the info below may not reflect original data _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false); _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true); @@ -598,6 +599,8 @@ - (BOOL)loadTexture:(nonnull NSURL *)url // this is not the png data, but info on converted png to ktx level // But this avoids loading the image 2 more times + // Size of png is very different than decompressed or recompressed ktx + _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false); _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true); diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index faad3b9e..a0847c63 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -6,6 +6,9 @@ using namespace metal; +// whether to use model tangents or generate from normal in fragment shader +constant bool useTangent = false; + //--------------------------------- // helpers @@ -184,10 +187,46 @@ half3 toNormal(half3 n) } +// https://www.gamasutra.com/blogs/RobertBasler/20131122/205462/Three_Normal_Mapping_Techniques_Explained_For_the_Mathematically_Uninclined.php?print=1 +// http://www.thetenthplanet.de/archives/1180 +// This generates the TBN from vertex normal and p and uv derivatives +// Then transforms the bumpNormal to that space. No tangent is needed. +// The downside is this must all be fp32, and all done in fragment shader and use derivatives. +// Derivatives are known to be caclulated differently depending on hw and different precision. +half3 transformNormalByBasis(half3 vertexNormal, half3 bumpNormal, float3 worldPos, float2 uv) +{ + float3 N = toFloat(vertexNormal); + + // for OpenGL +Y convention, flip N.y + // but this doesn't match explicit tangents case, see if those are wrong. + //N.y = -N.y; + + // get edge vectors of the pixel triangle + float3 dp1 = dfdx(worldPos); + float3 dp2 = dfdy(worldPos); + float2 duv1 = dfdx(uv); + float2 duv2 = dfdy(uv); + + // solve the linear system + float3 dp2perp = cross(dp2, N); + float3 dp1perp = cross(N, dp1); + float3 T = dp2perp * duv1.x + dp1perp * duv2.x; + float3 B = dp2perp * duv1.y + dp1perp * duv2.y; + float invmax = rsqrt(max(length_squared(T), length_squared(B))); + + // keeps relative magnitude of two vectors, they're not both unit vecs + T *= invmax; + B *= invmax; + + // construct a scale-invariant frame + // drop to half to match other call + bumpNormal = half3x3(toHalf(T), toHalf(B), vertexNormal) * bumpNormal; + return bumpNormal; +} // use mikktspace, gen bitan in frag shader with sign, don't normalize vb/vt // see http://www.mikktspace.com/ -half3 transformNormal(half3 bumpNormal, half4 tangent, half3 vertexNormal) +half3 transformNormalByBasis(half3 bumpNormal, half4 tangent, half3 vertexNormal) { // Normalize tangent/vertexNormal in vertex shader // but don't renormalize interpolated tangent, vertexNormal in fragment shader @@ -208,7 +247,8 @@ half3 transformNormal(half3 bumpNormal, half4 tangent, half3 vertexNormal) return normalize(bumpNormal); } -half3 transformNormal(half4 tangent, half3 vertexNormal, + +half3 transformNormal(half4 tangent, half3 vertexNormal, float3 worldPos, texture2d texture, sampler s, float2 uv, bool isSigned = true) { half4 nmap = texture.sample(s, uv); @@ -221,17 +261,23 @@ half3 transformNormal(half4 tangent, half3 vertexNormal, // rebuild the z term half3 bumpNormal = toNormal(nmap.xyz); - return transformNormal(bumpNormal, tangent, vertexNormal); + if (useTangent) + bumpNormal = transformNormalByBasis(bumpNormal, tangent, vertexNormal); + else + bumpNormal = transformNormalByBasis(bumpNormal, vertexNormal, worldPos, uv); + + return bumpNormal; } -float3 transformNormal(float4 nmap, half3 vertexNormal, half4 tangent, +half3 transformNormal(half4 nmap, half3 vertexNormal, half4 tangent, + float3 worldPos, float2 uv, // to gen TBN bool isSwizzleAGToRG, bool isSigned, bool isFrontFacing) { // add swizzle for ASTC/BC5nm, other 2 channels format can only store 01 in ba // could use hw swizzle for this if (isSwizzleAGToRG) { - nmap = float4(nmap.ag, 0, 1); + nmap = half4(nmap.ag, 0, 1); } // to signed, also for ASTC/BC5nm @@ -240,22 +286,30 @@ float3 transformNormal(float4 nmap, half3 vertexNormal, half4 tangent, nmap.rg = toSnorm8(nmap.rg); } - float3 bumpNormal = nmap.xyz; + half3 bumpNormal = nmap.xyz; bumpNormal = toNormal(bumpNormal); - // flip the normal if facing is flipped - // TODO: needed for tangent too? - if (!isFrontFacing) { - bumpNormal = -bumpNormal; - tangent.w = -tangent.w; + // handle the basis here (need worldPos and uv for other path) + if (useTangent) { + // flip the normal if facing is flipped + // TODO: needed for tangent too? + if (!isFrontFacing) { + bumpNormal = -bumpNormal; + tangent.w = -tangent.w; + } + + bumpNormal = transformNormalByBasis(bumpNormal, tangent, vertexNormal); + } + else { + bumpNormal = transformNormalByBasis(bumpNormal, vertexNormal, worldPos, uv); } - // handle the basis here - bumpNormal = toFloat(transformNormal(toHalf(bumpNormal), tangent, vertexNormal)); return bumpNormal; } + + // TODO: have more bones, or read from texture instead of uniforms // can then do instanced skining, but vfetch lookup slower #define maxBones 128 @@ -301,7 +355,9 @@ void skinPosAndBasis(thread float4& position, thread float3& tangent, thread flo // see scale2 handling in transformBasis, a little different with transpose of 3x4 normal = (float4(normal, 0.0) * bindPoseToBoneTransform); - tangent = (float4(tangent, 0.0) * bindPoseToBoneTransform); + + if (useTangent) + tangent = (float4(tangent, 0.0) * bindPoseToBoneTransform); } float3x3 toFloat3x3(float4x4 m) @@ -319,33 +375,16 @@ void transformBasis(thread float3& normal, thread float3& tangent, // note this is RinvT * n = (Rt)t = R, this is for simple inverse, inv scale handled below // but uniform scale already handled by normalize normal = m * normal; - + normal *= invScale2; + normal = normalize(normal); + // question here of whether tangent is transformed by m or mInvT // most apps assume m, but after averaging it can be just as off the surface as the normal - tangent = m * tangent; - - // have to apply invSquare of scale here to approximate invT - // also make sure to identify inversion off determinant before instancing so that backfacing is correct - // this is only needed if non-uniform scale present in modelToWorldTfm, could precompute scale2 -// if (isScaled) -// { -// // compute scale squared from rows -// float3 scale2 = float3( -// length_squared(m[0].xyz), -// length_squared(m[1].xyz), -// length_squared(m[2].xyz)); -// -// // do a max(1e4), but really don't have scale be super small -// scale2 = recip(max(0.0001 * 0.0001, scale2)); - - // apply inverse - normal *= invScale2; + if (useTangent) { + tangent = m * tangent; tangent *= invScale2; -// } - - // vertex shader normalize, but the fragment shader should not - normal = normalize(normal); - tangent = normalize(tangent); + tangent = normalize(tangent); + } // make sure to preserve bitan sign in tangent.w } @@ -395,6 +434,8 @@ ColorInOut DrawImageFunc( transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2); out.normal = toHalf(normal); + + // may be invalid if useTangent is false out.tangent.xyz = toHalf(tangent); out.tangent.w = toHalf(in.tangent.w); } @@ -600,13 +641,15 @@ float4 DrawPixels( } else if (uniforms.isNormal) { // light the normal map + half4 nmapH = toHalf(c); - float3 n = transformNormal(c, in.normal, in.tangent, + half3 n = transformNormal(nmapH, in.normal, in.tangent, + in.worldPos, in.texCoord, // to build TBN uniforms.isSwizzleAGToRG, uniforms.isSigned, facing); float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition); - c = doLighting(float4(1.0), viewDir, n); + c = doLighting(float4(1.0), viewDir, toFloat(n)); c.a = 1; } @@ -619,10 +662,13 @@ float4 DrawPixels( float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition); if (uniforms.isNormalMapPreview) { - float3 n = transformNormal(nmap, in.normal, in.tangent, + half4 nmapH = toHalf(nmap); + + half3 n = transformNormal(nmapH, in.normal, in.tangent, + in.worldPos, in.texCoord, // to build TBN uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing); - c = doLighting(c, viewDir, n); + c = doLighting(c, viewDir, toFloat(n)); } else { c = doLighting(c, viewDir, toFloat(in.normal)); From 05d1be9a17afd71b9bd63fc4f68faf11a422bed5 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 5 Jun 2021 13:58:10 -0700 Subject: [PATCH 112/901] kramv - contrast grid on white, helps with gray images and show alpha --- kramv/KramShaders.metal | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index a0847c63..92e5e65f 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -885,7 +885,17 @@ float4 DrawPixels( // Just visualize the grid lines directly float lineIntensity = 1.0 - min(line, 1.0); - c.rgb = float3(lineIntensity) + (1.0 - lineIntensity) * c.rgb; + // determine proximity of white color to pixel + // and ensure contrast on this blend + float cDist = distance(float3(1.0), c.rgb); + + float lineColor = 1.0; + if (cDist < 0.2) { + lineColor = 0.5; + } + + c.rgb = mix(c.rgb, float3(lineColor), lineIntensity); + // nothing for alpha? } } From 020de93fad6cd85746f60a3edac2364a1a3a86ed Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 5 Jun 2021 15:41:22 -0700 Subject: [PATCH 113/901] kram - fix png to ktx creation reserveImageData() needs initMipLevels() called prior. --- libkram/kram/Kram.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index bd8a03d4..5a02ec12 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -167,7 +167,10 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) { // TODO: png has 16u format useful for heights + image.initMipLevels(sizeof(KTXHeader)); // TODO: could also make this ktx2 with zstd compress image.reserveImageData(); + memcpy((uint8_t*)image.fileData, &image.header, sizeof(KTXHeader)); + memcpy((uint8_t*)image.fileData + image.mipLevels[0].offset, singleImage.pixels().data(), image.levelLength(0)); return true; From 5c2ff4bc70d8a952482795418c3038ee48e29e3a Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 6 Jun 2021 12:12:13 -0700 Subject: [PATCH 114/901] kramv - fix sphere prim, add mirrored sphere, disable tan from normal The author of tan-from-normal removed my inquiry about the algorithm and mirrored uv, so I implemented a mirrored uv test case with a sphere. Note that the triangles aren't mirrored, only the uvs. So the normals are flipped, but not the triangles. Will add that other case later. Tangent generation can handle that case, since it can compute correct normal/tangent and flip them. But the tan-from-normal may need the faces inverted as well (and the normals flipped). Had to rotate the sphere to match the cube/capsule. It needed pos and normals rotated. Now lighting is consistent. Turned off specular in shader for now. Attenuate diffuse off dot(vertNormal, bumpNormal). So dark side doesn't have diffuse. --- kramv/KramRenderer.mm | 136 ++++++++++++++++++++++++++++++++++++++-- kramv/KramShaders.metal | 43 +++++++++---- kramv/KramViewerBase.h | 2 +- 3 files changed, 165 insertions(+), 16 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 04cdec54..60e3689c 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -88,6 +88,7 @@ @implementation Renderer //MTKMesh *_meshPlane; // really a thin gox MTKMesh *_meshBox; MTKMesh *_meshSphere; + MTKMesh *_meshSphereMirrored; //MTKMesh *_meshCylinder; MTKMesh *_meshCapsule; MTKMeshBufferAllocator *_metalAllocator; @@ -415,10 +416,10 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU if (doFlipUV) { id uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0]; - float2* uvData = (float2*)uvs.map.bytes; + packed_float2* uvData = (packed_float2*)uvs.map.bytes; for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) { - float2& uv = uvData[i]; + auto& uv = uvData[i]; uv.x = 1.0f - uv.x; } @@ -433,9 +434,14 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU if (doFlipBitangent) { id uvs = mdlMesh.vertexBuffers[BufferIndexMeshTangent]; - float4* uvData = (float4*)uvs.map.bytes; + packed_float4* uvData = (packed_float4*)uvs.map.bytes; for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) { + if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) { + int bp = 0; + bp = bp; + } + uvData[i].w = -uvData[i].w; } } @@ -458,6 +464,11 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU return mesh; } +// why isn't this defined in simd lib? +struct packed_float3 { + float x,y,z; +}; + - (void)_loadAssets { /// Load assets into metal objects @@ -481,8 +492,124 @@ - (void)_loadAssets // All prims are viewed with +Y, not +Z up mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator]; + + float angle = M_PI * 0.5; // TODO: + or - + float2 cosSin = float2m(cos(angle), sin(angle)); + + { + mdlMesh.vertexDescriptor = _mdlVertexDescriptor; + + id pos = mdlMesh.vertexBuffers[BufferIndexMeshPosition]; + packed_float3* posData = (packed_float3*)pos.map.bytes; + + id normals = mdlMesh.vertexBuffers[BufferIndexMeshNormal]; + packed_float3* normalData = (packed_float3*)normals.map.bytes; + + // vertexCount reports 306, but vertex 289+ are garbage + uint32_t numVertices = 289; // mdlMesh.vertexCount + + for (uint32_t i = 0; i < numVertices; ++i) { + { + auto& pos = posData[i]; + + // dumb rotate about Y-axis + auto copy = pos; + + pos.x = copy.x * cosSin.x - copy.z * cosSin.y; + pos.z = copy.x * cosSin.y + copy.z * cosSin.x; + } + + { + auto& normal = normalData[i]; + auto copy = normal; + normal.x = copy.x * cosSin.x - copy.z * cosSin.y; + normal.z = copy.x * cosSin.y + copy.z * cosSin.x; + } + } + + } _meshSphere = [self _createMeshAsset:"MeshSphere" mdlMesh:mdlMesh doFlipUV:true]; + + + mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator]; + + + // ModelIO has the uv going counterclockwise on sphere/cylinder, but not on the box. + // And it also has a flipped bitangent.w. + + // flip the u coordinate + bool doFlipUV = true; + if (doFlipUV) + { + mdlMesh.vertexDescriptor = _mdlVertexDescriptor; + + id uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0]; + packed_float2* uvData = (packed_float2*)uvs.map.bytes; + + // this is all aos + + id pos = mdlMesh.vertexBuffers[BufferIndexMeshPosition]; + packed_float3* posData = (packed_float3*)pos.map.bytes; + + id normals = mdlMesh.vertexBuffers[BufferIndexMeshNormal]; + packed_float3* normalData = (packed_float3*)normals.map.bytes; + + + // vertexCount reports 306, but vertex 289+ are garbage + uint32_t numVertices = 289; // mdlMesh.vertexCount + + for (uint32_t i = 0; i < numVertices; ++i) { + { + auto& pos = posData[i]; + + // dumb rotate about Y-axis + auto copy = pos; + pos.x = copy.x * cosSin.x - copy.z * cosSin.y; + pos.z = copy.x * cosSin.y + copy.z * cosSin.x; + } + + { + auto& normal = normalData[i]; + auto copy = normal; + normal.x = copy.x * cosSin.x - copy.z * cosSin.y; + normal.z = copy.x * cosSin.y + copy.z * cosSin.x; + } + + auto& uv = uvData[i]; + + if (uv.x < 0.0 || uv.x > 1.0) { + int bp = 0; + bp = bp; + } + + // this makes it counterclockwise 0 to 1 + float x = uv.x; + + x = 1.0f - x; + + // -1 to 1 counterclockwise + x = 2.0f * x - 1.0f; + + if (x <= 0) { + // now -1 to 0 is 0 to 1 clockwise with 1 in back + x = 1.0f + x; + } + else { + // 0 to 1, now 1 to 0 with 1 in back + x = 1.0f - x; + } + + uv.x = x; + } + + // TODO: may need to flip tangent on the inverted side + // otherwise lighting is just wrong, but tangents generated in _createMeshAsset + // move that here, and flip the tangents in the loop + } + + _meshSphereMirrored = [self _createMeshAsset:"MeshSphereMirrored" mdlMesh:mdlMesh doFlipUV:false]; + // this maps 1/3rd of texture to the caps, and just isn't a very good uv mapping, using capsule nistead // mdlMesh = [MDLMesh newCylinderWithHeight:1.0 @@ -858,8 +985,9 @@ - (void)_updateGameState case 0: _mesh = _meshBox; _showSettings->is3DView = false; break; case 1: _mesh = _meshBox; break; case 2: _mesh = _meshSphere; break; + case 3: _mesh = _meshSphereMirrored; break; //case 3: _mesh = _meshCylinder; break; - case 3: _mesh = _meshCapsule; break; + case 4: _mesh = _meshCapsule; break; } uniforms.is3DView = _showSettings->is3DView; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 92e5e65f..f3c47105 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -6,8 +6,10 @@ using namespace metal; -// whether to use model tangents or generate from normal in fragment shader -constant bool useTangent = false; +// Whether to use model tangents (true) or generate tangents from normal in fragment shader (false). +// When set false, the algorithm doesn't adjust for mirrored uv +// See meshSphereMirrored and set this to false. +constant bool useTangent = true; //--------------------------------- // helpers @@ -193,7 +195,7 @@ half3 toNormal(half3 n) // Then transforms the bumpNormal to that space. No tangent is needed. // The downside is this must all be fp32, and all done in fragment shader and use derivatives. // Derivatives are known to be caclulated differently depending on hw and different precision. -half3 transformNormalByBasis(half3 vertexNormal, half3 bumpNormal, float3 worldPos, float2 uv) +half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldPos, float2 uv) { float3 N = toFloat(vertexNormal); @@ -220,7 +222,8 @@ half3 transformNormalByBasis(half3 vertexNormal, half3 bumpNormal, float3 worldP // construct a scale-invariant frame // drop to half to match other call - bumpNormal = half3x3(toHalf(T), toHalf(B), vertexNormal) * bumpNormal; + bumpNormal = toHalf(float3x3(T, B, N) * toFloat(bumpNormal)); + return bumpNormal; } @@ -233,15 +236,23 @@ half3 transformNormalByBasis(half3 bumpNormal, half4 tangent, half3 vertexNormal // Reconstruct bitan in frag shader // https://bgolus.medium.com/generating-perfect-normal-maps-for-unity-f929e673fc57 + + // so if eyevector + + + // TODO: there's facing too, could be inside model + + half bitangentSign = tangent.w; + half3 bitangent = bitangentSign * cross(vertexNormal, tangent.xyz); + // ModelIO not generating correct bitan sign // DONE: flip this on srcData, and not here //bitangentSign = -bitangentSign; // now transform by basis and normalize from any shearing, and since interpolated basis vectors // are not normalized - half3 bitangent = bitangentSign * cross(vertexNormal, tangent.xyz); half3x3 tbn = half3x3(tangent.xyz, bitangent, vertexNormal); bumpNormal = tbn * bumpNormal; return normalize(bumpNormal); @@ -552,19 +563,20 @@ vertex ColorInOut DrawVolumeVS( return out; } -float4 doLighting(float4 albedo, float3 viewDir, float3 n) { +float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) { - float3 lightDir = normalize(float3(1,1,1)); + float3 lightDir = normalize(float3(1,1,1)); // looking down -Z axis float3 lightColor = float3(1,1,1); float3 specular = float3(0.0); float3 diffuse = float3(0.0); float3 ambient = float3(0.0); - bool doSpecular = true; + bool doSpecular = false; // this is a bit too bright, and can confuse bool doDiffuse = true; bool doAmbient = true; + if (doSpecular) { float3 ref = normalize(reflect(viewDir, n)); @@ -575,11 +587,20 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n) { } if (doDiffuse) { + float dotNL = saturate(dot(n, lightDir)); + + // soften the terminator off the vertNormal + // this is so no diffuse if normal completely off from vertex normal + // also limiting diffuse lighting bump to lighting by vertex normal + float dotVertex = saturate(dot(vertexNormal, n)); + dotNL *= saturate(9.0 * dotVertex); + diffuse = dotNL * lightColor.rgb; } if (doAmbient) { + // can misconstrue as diffuse with this, but make dark side not look flat float dotNLUnsat = dot(n, lightDir); ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5)); } @@ -649,7 +670,7 @@ float4 DrawPixels( float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition); - c = doLighting(float4(1.0), viewDir, toFloat(n)); + c = doLighting(float4(1.0), viewDir, toFloat(n), toFloat(in.normal)); c.a = 1; } @@ -668,10 +689,10 @@ float4 DrawPixels( in.worldPos, in.texCoord, // to build TBN uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing); - c = doLighting(c, viewDir, toFloat(n)); + c = doLighting(c, viewDir, toFloat(n), toFloat(in.normal)); } else { - c = doLighting(c, viewDir, toFloat(in.normal)); + c = doLighting(c, viewDir, toFloat(in.normal), toFloat(in.normal)); } } diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index d0d5a2ac..68aa2d12 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -176,7 +176,7 @@ class ShowSettings { double lastTimestamp = 0.0; int32_t meshNumber = 0; - int32_t meshCount = 4; + int32_t meshCount = 5; }; float4x4 matrix4x4_translation(float tx, float ty, float tz); From f05b90fc5cca1ba41303e7a0c2c78ec6de7941c3 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 6 Jun 2021 14:25:11 -0700 Subject: [PATCH 115/901] kramv - add shape/mesh channel viewing Can look at depth, basis, or uv channels of the mesh visually. These are always hard to interpret since they're just rgb unorm intensity, but better than nothing for now. --- kramv/KramRenderer.mm | 35 +++++++++++++++++++++++-------- kramv/KramShaders.h | 21 ++++++++++++++++++- kramv/KramShaders.metal | 34 +++++++++++++++++++++++++----- kramv/KramViewerBase.cpp | 45 +++++++++++++++++++++++++++++----------- kramv/KramViewerBase.h | 23 ++++++++++++++++++-- kramv/KramViewerMain.mm | 31 +++++++++++++++++++++++---- 6 files changed, 156 insertions(+), 33 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 60e3689c..796f2933 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -416,7 +416,9 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU if (doFlipUV) { id uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0]; - packed_float2* uvData = (packed_float2*)uvs.map.bytes; + MDLMeshBufferMap *uvsMap = [uvs map]; + + packed_float2* uvData = (packed_float2*)uvsMap.bytes; for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) { auto& uv = uvData[i]; @@ -434,7 +436,8 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU if (doFlipBitangent) { id uvs = mdlMesh.vertexBuffers[BufferIndexMeshTangent]; - packed_float4* uvData = (packed_float4*)uvs.map.bytes; + MDLMeshBufferMap *uvsMap = [uvs map]; + packed_float4* uvData = (packed_float4*)uvsMap.bytes; for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) { if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) { @@ -493,17 +496,19 @@ - (void)_loadAssets mdlMesh = [MDLMesh newEllipsoidWithRadii:(vector_float3){0.5, 0.5, 0.5} radialSegments:16 verticalSegments:16 geometryType:MDLGeometryTypeTriangles inwardNormals:NO hemisphere:NO allocator:_metalAllocator]; - float angle = M_PI * 0.5; // TODO: + or - + float angle = M_PI * 0.5; float2 cosSin = float2m(cos(angle), sin(angle)); { mdlMesh.vertexDescriptor = _mdlVertexDescriptor; id pos = mdlMesh.vertexBuffers[BufferIndexMeshPosition]; - packed_float3* posData = (packed_float3*)pos.map.bytes; + MDLMeshBufferMap *posMap = [pos map]; + packed_float3* posData = (packed_float3*)posMap.bytes; id normals = mdlMesh.vertexBuffers[BufferIndexMeshNormal]; - packed_float3* normalData = (packed_float3*)normals.map.bytes; + MDLMeshBufferMap *normalsMap = [normals map]; + packed_float3* normalData = (packed_float3*)normalsMap.bytes; // vertexCount reports 306, but vertex 289+ are garbage uint32_t numVertices = 289; // mdlMesh.vertexCount @@ -545,15 +550,18 @@ - (void)_loadAssets mdlMesh.vertexDescriptor = _mdlVertexDescriptor; id uvs = mdlMesh.vertexBuffers[BufferIndexMeshUV0]; - packed_float2* uvData = (packed_float2*)uvs.map.bytes; + MDLMeshBufferMap *uvsMap = [uvs map]; + packed_float2* uvData = (packed_float2*)uvsMap.bytes; // this is all aos id pos = mdlMesh.vertexBuffers[BufferIndexMeshPosition]; - packed_float3* posData = (packed_float3*)pos.map.bytes; + MDLMeshBufferMap *posMap = [pos map]; + packed_float3* posData = (packed_float3*)posMap.bytes; id normals = mdlMesh.vertexBuffers[BufferIndexMeshNormal]; - packed_float3* normalData = (packed_float3*)normals.map.bytes; + MDLMeshBufferMap *normalsMap = [normals map]; + packed_float3* normalData = (packed_float3*)normalsMap.bytes; // vertexCount reports 306, but vertex 289+ are garbage @@ -859,6 +867,8 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex // be supported debugMode for new texture _showSettings->debugMode = DebugMode::DebugModeNone; + _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone; + // have one of these for each texture added to the viewer float scaleX = MAX(1, texture.width); float scaleY = MAX(1, texture.height); @@ -976,9 +986,16 @@ - (void)_updateGameState } // no debug mode when preview kicks on, make it possible to toggle back and forth more easily - uniforms.debugMode = _showSettings->isPreview ? ShaderDebugMode::ShDebugModeNone : (ShaderDebugMode)_showSettings->debugMode; + uniforms.debugMode = (ShaderDebugMode)_showSettings->debugMode; + uniforms.shapeChannel = (ShaderShapeChannel)_showSettings->shapeChannel; uniforms.channels = (ShaderTextureChannels)_showSettings->channels; + // turn these off in preview mode, but they may be useful? + if (_showSettings->isPreview) { + uniforms.debugMode = ShaderDebugMode::ShDebugModeNone; + uniforms.shapeChannel = ShaderShapeChannel::ShShapeChannelNone; + } + // crude shape experiment _showSettings->is3DView = true; switch(_showSettings->meshNumber) { diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index fd8bb48e..a556dfc8 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -92,6 +92,21 @@ typedef NS_ENUM(int32_t, ShaderDebugMode) ShDebugModeCount }; +// keep in sync with enum ShapeChannel +typedef NS_ENUM(int32_t, ShaderShapeChannel) +{ + ShShapeChannelNone = 0, + + ShShapeChannelDepth, + + ShShapeChannelUV0, + + ShShapeChannelNormal, + ShShapeChannelTangent, + ShShapeChannelBitangent +}; + + // TODO: placement of these elements in the struct breaks transfer // of data. This seems to work. Alignment issues with mixing these differently. struct Uniforms @@ -123,9 +138,13 @@ struct Uniforms uint32_t gridX; uint32_t gridY; - // can look at pixels that meet criteria of the debugMode + // View pixels that meet criteria of the debugMode ShaderDebugMode debugMode; + // View various aspects of shape geometry (depth, normal, tangent, ...) + ShaderShapeChannel shapeChannel; + + // View the r,g,b,a channels of the texture ShaderTextureChannels channels; // mask }; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index f3c47105..23e84eef 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -9,6 +9,7 @@ using namespace metal; // Whether to use model tangents (true) or generate tangents from normal in fragment shader (false). // When set false, the algorithm doesn't adjust for mirrored uv // See meshSphereMirrored and set this to false. +// TODO: hook this up to uniform and pass into calls constant bool useTangent = true; //--------------------------------- @@ -329,8 +330,8 @@ half3 transformNormal(half4 nmap, half3 vertexNormal, half4 tangent, void skinPosAndBasis(thread float4& position, thread float3& tangent, thread float3& normal, uint4 indices, float4 weights, float3x4 bones[maxBones]) { - // TODO: might do this as up to 12x vtex lookup, fetch from buffer texture - // but uniforms after setup would be faster if many bones + // TODO: might do this as up to 3x vtex lookup per bone, fetch from buffer texture + // but uniforms after setup would be faster if many bones. Could support 1-n bones with vtex. // instances use same bones, but different indices/weights already // but could draw skinned variants with vtex lookup and not have so much upload prep @@ -576,7 +577,6 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) bool doDiffuse = true; bool doAmbient = true; - if (doSpecular) { float3 ref = normalize(reflect(viewDir, n)); @@ -763,6 +763,30 @@ float4 DrawPixels( } } + if (uniforms.shapeChannel != ShShapeChannelNone) { + // TODO: Really hard to interpret direction from color + // see about use the vector flow fields + + if (uniforms.shapeChannel == ShShapeChannelUV0) { + c.rgb = fract(in.texCoordXYZ); + } + else if (uniforms.shapeChannel == ShShapeChannelNormal) { + c.rgb = toUnorm(toFloat(in.normal)); + } + else if (useTangent && uniforms.shapeChannel == ShShapeChannelTangent) { + // TODO: make this work with useTangent = false + c.rgb = toUnorm(toFloat(in.tangent.xyz)); + } + else if (uniforms.shapeChannel == ShShapeChannelBitangent) { + // TODO: make this work with useTangent = false + half3 bitangent = cross(in.tangent.xyz, in.normal) * in.tangent.w; + c.rgb = toUnorm(toFloat(bitangent)); + } + else if (uniforms.shapeChannel == ShShapeChannelDepth) { + c.rgb = saturate(in.position.z / in.position.w); + } + } + // mask to see one channel in isolation, this is really 0'ing out other channels // would be nice to be able to keep this set on each channel independently. switch(uniforms.channels) @@ -798,11 +822,11 @@ float4 DrawPixels( float selector = sign(fmod(checker.x + checker.y, 2.0)); float cb = mix(float(1), float(222.0/255.0), selector); - c.rgb = c.rgb + (1-c.a) * cb; + c.rgb = c.rgb + (1.0 - c.a) * cb; // nothing for alpha? } - + if (uniforms.debugMode != ShDebugModeNone && c.a != 0.0) { diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp index e241bd5d..a5c5c032 100644 --- a/kramv/KramViewerBase.cpp +++ b/kramv/KramViewerBase.cpp @@ -10,15 +10,36 @@ int32_t ShowSettings::totalChunks() const { return std::max(one, faceCount) * std::max(one, arrayCount) * std::max(one, sliceCount); } -void ShowSettings::advanceDebugMode(bool isShiftKeyDown) { +void ShowSettings::advanceShapeChannel(bool decrement) { + int32_t numEnums = ShapeChannelCount; + int32_t mode = shapeChannel; + if (decrement) { + mode += numEnums - 1; + } + else { + mode += 1; + } + + shapeChannel = (ShapeChannel)(mode % numEnums); + + // skip this channel for now, in ortho it's mostly pure white + if (shapeChannel == ShapeChannelDepth) { + advanceShapeChannel(decrement); + } +} + +void ShowSettings::advanceDebugMode(bool decrement) { int32_t numEnums = DebugModeCount; - if (isShiftKeyDown) { - debugMode = (DebugMode)(((int32_t)debugMode - 1 + numEnums) % numEnums); + int32_t mode = debugMode; + if (decrement) { + mode += numEnums - 1; } else { - debugMode = (DebugMode)(((int32_t)debugMode + 1) % numEnums); + mode += 1; } + debugMode = (DebugMode)(mode % numEnums); + MyMTLPixelFormat format = (MyMTLPixelFormat)originalFormat; bool isHdr = isHdrFormat(format); @@ -27,20 +48,20 @@ void ShowSettings::advanceDebugMode(bool isShiftKeyDown) { bool isColor = isColorFormat(format); if (debugMode == DebugModeTransparent && (numChannels <= 3 || !isAlpha)) { - advanceDebugMode(isShiftKeyDown); + advanceDebugMode(decrement); } - // 2 channel textures don't really color or grayscale pixels + // 2 channel textures don't really have color or grayscale pixels if (debugMode == DebugModeColor && (numChannels <= 2 || !isColor)) { - advanceDebugMode(isShiftKeyDown); + advanceDebugMode(decrement); } if (debugMode == DebugModeGray && numChannels <= 2) { - advanceDebugMode(isShiftKeyDown); + advanceDebugMode(decrement); } if (debugMode == DebugModeHDR && !isHdr) { - advanceDebugMode(isShiftKeyDown); + advanceDebugMode(decrement); } // for 3 and for channel textures could skip these with more info about image (hasColor) @@ -48,13 +69,13 @@ void ShowSettings::advanceDebugMode(bool isShiftKeyDown) { // for normals show directions if (debugMode == DebugModePosX && !(isNormal || isSDF)) { - advanceDebugMode(isShiftKeyDown); + advanceDebugMode(decrement); } if (debugMode == DebugModePosY && !(isNormal)) { - advanceDebugMode(isShiftKeyDown); + advanceDebugMode(decrement); } if (debugMode == DebugModeCircleXY && !(isNormal)) { - advanceDebugMode(isShiftKeyDown); + advanceDebugMode(decrement); } // TODO: have a clipping mode against a variable range too, only show pixels within that range diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index 68aa2d12..b8eaed05 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -48,6 +48,22 @@ enum DebugMode DebugModeCount }; +enum ShapeChannel +{ + ShapeChannelNone = 0, + + ShapeChannelDepth, + + ShapeChannelUV0, + + ShapeChannelNormal, + ShapeChannelTangent, + ShapeChannelBitangent, + + ShapeChannelCount +}; + + class ShowSettings { public: // Can mask various channels (r/g/b/a only, vs. all), may also add toggle of channel @@ -160,6 +176,8 @@ class ShowSettings { DebugMode debugMode = DebugModeNone; + ShapeChannel shapeChannel = ShapeChannelNone; + float4x4 projectionViewModelMatrix; // cached on load, raw info about the texture from libkram @@ -170,8 +188,9 @@ class ShowSettings { MyMTLPixelFormat originalFormat; MyMTLPixelFormat decodedFormat; - void advanceDebugMode(bool isShiftKeyDown); - + void advanceDebugMode(bool decrement); + void advanceShapeChannel(bool decrement); + string lastFilename; double lastTimestamp = 0.0; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index d8a15a45..f7f833cf 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -498,7 +498,7 @@ - (nonnull ShowSettings*)showSettings { } - (NSStackView*)_addButtons { - const int32_t numButtons = 26; // 13; + const int32_t numButtons = 27; // 13; const char* names[numButtons*2] = { "?", "Help", @@ -527,6 +527,7 @@ - (NSStackView*)_addButtons { "L", "Reload", "0", "Fit", "8", "Shape", + "6", "Shape Channel", // TODO: need to shift hud over a little // "UI", - add to show/hide buttons @@ -1378,6 +1379,7 @@ - (void)updateUIControlState auto faceState = toState(_showSettings->faceNumber > 0); auto mipState = toState(_showSettings->mipLOD > 0); auto meshState = toState(_showSettings->meshNumber > 0); + auto meshChannelState = toState(_showSettings->shapeChannel > 0); // TODO: rename to meshChannel // TODO: UI state, and vertical state auto uiState = toState(_buttonStack.hidden); @@ -1406,6 +1408,7 @@ - (void)updateUIControlState [self findButton:"S"].state = showAllState; [self findButton:"O"].state = previewState; [self findButton:"8"].state = meshState; + [self findButton:"6"].state = meshChannelState; [self findButton:"W"].state = wrapState; [self findButton:"D"].state = gridState; [self findButton:"E"].state = debugState; @@ -1436,6 +1439,8 @@ - (void)updateUIControlState [self findMenuItem:"S"].state = showAllState; [self findMenuItem:"O"].state = previewState; [self findMenuItem:"8"].state = meshState; + [self findMenuItem:"6"].state = meshChannelState; + [self findMenuItem:"W"].state = wrapState; [self findMenuItem:"D"].state = gridState; [self findMenuItem:"E"].state = debugState; @@ -1509,11 +1514,14 @@ - (IBAction)handleAction:(id)sender { keyCode = Key::J; else if (title == "L") keyCode = Key::L; + else if (title == "0") keyCode = Key::Num0; else if (title == "8") keyCode = Key::Num8; - + else if (title == "6") + keyCode = Key::Num6; + else if (title == "R") keyCode = Key::R; else if (title == "G") @@ -1522,8 +1530,7 @@ - (IBAction)handleAction:(id)sender { keyCode = Key::B; else if (title == "A") keyCode = Key::A; - - + if (keyCode >= 0) [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown]; } @@ -1648,6 +1655,22 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown } break; + case Key::Num6: { + _showSettings->advanceShapeChannel(isShiftKeyDown); + + switch(_showSettings->shapeChannel) { + case ShapeChannelNone: text = "Show Off"; break; + case ShapeChannelUV0: text = "Show UV0"; break; + case ShapeChannelNormal: text = "Show Normal"; break; + case ShapeChannelTangent: text = "Show Tangent"; break; + case ShapeChannelBitangent: text = "Show Bitangent"; break; + case ShapeChannelDepth: text = "Show Depth"; break; + default: break; + } + + isChanged = true; + break; + } case Key::E: { _showSettings->advanceDebugMode(isShiftKeyDown); From 8a81fb8da45e0fb4fd42672688a7e6c647d90518 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 6 Jun 2021 20:56:28 -0700 Subject: [PATCH 116/901] kramv - more shape and sampler cleanup, pass useTangent in shaders, more shape channel modes --- kramv/KramRenderer.mm | 97 +++++++++++++++++++++++++++++++++------- kramv/KramShaders.h | 9 +++- kramv/KramShaders.metal | 63 +++++++++++++++++++------- kramv/KramViewerBase.cpp | 36 +++++++++++++++ kramv/KramViewerBase.h | 12 ++++- kramv/KramViewerMain.mm | 26 +---------- 6 files changed, 184 insertions(+), 59 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 796f2933..a48899a8 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -58,11 +58,18 @@ @implementation Renderer id _colorMap; id _normalMap; - id _colorMapSamplerWrap; - id _colorMapSamplerClamp; + // border is a better edge sample, but at edges it filters in the transparent color + // around the border which is undesirable. It would be better if the hw did + // clamp to edge until uv outside 0 to 1. This results in having to inset the uv by 0.5 px + // to avoid this artifact, but on small texturs that are 4x4, a 1 px inset is noticeable. - id _colorMapSamplerBilinearWrap; - id _colorMapSamplerBilinearClamp; + id _colorMapSamplerNearestWrap; + id _colorMapSamplerNearestBorder; + id _colorMapSamplerNearestEdge; + + id _colorMapSamplerFilterWrap; + id _colorMapSamplerFilterBorder; + id _colorMapSamplerFilterEdge; //id _sampleRT; id _sampleTex; @@ -128,32 +135,52 @@ - (void)_createSamplers samplerDescriptor.sAddressMode = MTLSamplerAddressModeRepeat; samplerDescriptor.tAddressMode = MTLSamplerAddressModeRepeat; samplerDescriptor.rAddressMode = MTLSamplerAddressModeRepeat; - samplerDescriptor.label = @"colorMapSamplerWrap"; + samplerDescriptor.label = @"colorMapSamplerNearestWrap"; - _colorMapSamplerWrap = [_device newSamplerStateWithDescriptor:samplerDescriptor]; + _colorMapSamplerNearestWrap = [_device newSamplerStateWithDescriptor:samplerDescriptor]; samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor; samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor; samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToBorderColor; - samplerDescriptor.label = @"colorMapSamplerClamp"; + samplerDescriptor.label = @"colorMapSamplerNearestBorder"; - _colorMapSamplerClamp = [_device newSamplerStateWithDescriptor:samplerDescriptor]; + _colorMapSamplerNearestBorder = [_device newSamplerStateWithDescriptor:samplerDescriptor]; + + samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge; + samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge; + samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge; + samplerDescriptor.label = @"colorMapSamplerNearsetEdge"; + + _colorMapSamplerNearestEdge = [_device newSamplerStateWithDescriptor:samplerDescriptor]; + + // ----- // these are for preview mode // use the mips, and specify linear for min/mag for SDF case samplerDescriptor.minFilter = MTLSamplerMinMagFilterLinear; samplerDescriptor.magFilter = MTLSamplerMinMagFilterLinear; samplerDescriptor.mipFilter = MTLSamplerMipFilterLinear; - samplerDescriptor.label = @"colorMapSamplerBilinearClamp"; + + samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor; + samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor; + samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToBorderColor; + samplerDescriptor.label = @"colorMapSamplerFilterBorder"; - _colorMapSamplerBilinearClamp = [_device newSamplerStateWithDescriptor:samplerDescriptor]; + _colorMapSamplerFilterBorder = [_device newSamplerStateWithDescriptor:samplerDescriptor]; + + samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToEdge; + samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToEdge; + samplerDescriptor.rAddressMode = MTLSamplerAddressModeClampToEdge; + samplerDescriptor.label = @"colorMapSamplerFilterEdge"; + + _colorMapSamplerFilterEdge = [_device newSamplerStateWithDescriptor:samplerDescriptor]; samplerDescriptor.sAddressMode = MTLSamplerAddressModeRepeat; samplerDescriptor.tAddressMode = MTLSamplerAddressModeRepeat; samplerDescriptor.rAddressMode = MTLSamplerAddressModeRepeat; samplerDescriptor.label = @"colorMapSamplerBilinearWrap"; - _colorMapSamplerBilinearWrap = [_device newSamplerStateWithDescriptor:samplerDescriptor]; + _colorMapSamplerFilterWrap = [_device newSamplerStateWithDescriptor:samplerDescriptor]; } - (void)_createVertexDescriptor @@ -449,15 +476,35 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU } } - // TODO: name the vertex attributes, can that be done in _mdlVertexDescriptor - // may have to set name on MTLBuffer range on IB and VB - + // now set it into mtk mesh MTKMesh* mesh = [[MTKMesh alloc] initWithMesh:mdlMesh device:_device error:&error]; mesh.name = [NSString stringWithUTF8String:name]; + + // these range names may onl show up when looking at geometry in capture + // These don't seem to appear as the buffer name that is suballocated from + { + // name the vertex range on the vb + MTKMeshBuffer* pos = mesh.vertexBuffers[BufferIndexMeshPosition]; + MTKMeshBuffer* uvs = mesh.vertexBuffers[BufferIndexMeshUV0]; + MTKMeshBuffer* normals = mesh.vertexBuffers[BufferIndexMeshNormal]; + MTKMeshBuffer* tangents = mesh.vertexBuffers[BufferIndexMeshTangent]; + + [pos.buffer addDebugMarker:@"Pos" range:NSMakeRange(pos.offset, pos.length)]; + [uvs.buffer addDebugMarker:@"UV" range:NSMakeRange(uvs.offset, uvs.length)]; + [normals.buffer addDebugMarker:@"Nor" range:NSMakeRange(normals.offset, normals.length)]; + [tangents.buffer addDebugMarker:@"Tan" range:NSMakeRange(tangents.offset, tangents.length)]; + + // This seems to already be named "ellisoid-Indices", + // need to do for ib as well + for (MTKSubmesh* submesh in mesh.submeshes) { + [submesh.indexBuffer.buffer addDebugMarker:mesh.name range:NSMakeRange(submesh.indexBuffer.offset, submesh.indexBuffer.length)]; + } + } + if(!mesh || error) { NSLog(@"Error creating MetalKit mesh %@", error.localizedDescription); @@ -531,6 +578,12 @@ - (void)_loadAssets normal.z = copy.x * cosSin.y + copy.z * cosSin.x; } } + + // Hack - knock out all bogus tris from ModelIO that lead to garbage tris + for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) { + auto& pos = posData[i]; + pos.x = NAN; + } } @@ -611,6 +664,12 @@ - (void)_loadAssets uv.x = x; } + // Hack - knock out all bogus tris from ModelIO that lead to garbage tris + for (uint32_t i = numVertices; i < mdlMesh.vertexCount; ++i) { + auto& pos = posData[i]; + pos.x = NAN; + } + // TODO: may need to flip tangent on the inverted side // otherwise lighting is just wrong, but tangents generated in _createMeshAsset // move that here, and flip the tangents in the loop @@ -966,6 +1025,10 @@ - (void)_updateGameState uniforms.isNormalMapSwizzleAGToRG = false; // TODO: need a prop for this } } + + // TODO: tie to UI + // a few things to fix before enabling this + uniforms.useTangent = false; uniforms.gridX = 0; uniforms.gridY = 0; @@ -1272,7 +1335,7 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie // use exisiting lod, and mip [renderEncoder setFragmentSamplerState: - (canWrap && _showSettings->isWrap) ? _colorMapSamplerBilinearWrap : _colorMapSamplerBilinearClamp + (canWrap && _showSettings->isWrap) ? _colorMapSamplerFilterWrap : _colorMapSamplerFilterBorder atIndex:SamplerIndexColor]; for(MTKSubmesh *submesh in _mesh.submeshes) @@ -1347,7 +1410,7 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie // force lod, and don't mip [renderEncoder setFragmentSamplerState: - (canWrap && _showSettings->isWrap) ? _colorMapSamplerWrap : _colorMapSamplerClamp + (canWrap && _showSettings->isWrap) ? _colorMapSamplerNearestWrap : _colorMapSamplerNearestBorder lodMinClamp:mip lodMaxClamp:mip + 1 atIndex:SamplerIndexColor]; @@ -1384,7 +1447,7 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie // force lod, and don't mip [renderEncoder setFragmentSamplerState: - (canWrap && _showSettings->isWrap) ? _colorMapSamplerWrap : _colorMapSamplerClamp + (canWrap && _showSettings->isWrap) ? _colorMapSamplerNearestWrap : _colorMapSamplerNearestBorder lodMinClamp:mip lodMaxClamp:mip + 1 atIndex:SamplerIndexColor]; diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index a556dfc8..33b60c6d 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -101,9 +101,13 @@ typedef NS_ENUM(int32_t, ShaderShapeChannel) ShShapeChannelUV0, + ShShapeChannelFaceNormal, + ShShapeChannelNormal, ShShapeChannelTangent, - ShShapeChannelBitangent + ShShapeChannelBitangent, + + // ShShapeChannelBumpNormal, }; @@ -132,6 +136,9 @@ struct Uniforms bool isNormalMapSigned; bool isNormalMapSwizzleAGToRG; + // this means pull tangent from vertex + bool useTangent; + uint32_t numChannels; // control the pixel grid dimensions, can be block size, or pixel size diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 23e84eef..fc49f249 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -6,11 +6,10 @@ using namespace metal; -// Whether to use model tangents (true) or generate tangents from normal in fragment shader (false). -// When set false, the algorithm doesn't adjust for mirrored uv -// See meshSphereMirrored and set this to false. -// TODO: hook this up to uniform and pass into calls -constant bool useTangent = true; +// TODO: Getting weird triangle artifacts on AMC 5500m on 16" MBP with useTangent = false. +// Seems that uv derivatives used for basis generation are 0 in gpu capture +// even though the uv itself are not. That shouldn't be possible. +// This results in large triangular artitfacts at the bottom of the sphere/capsule. //--------------------------------- // helpers @@ -189,6 +188,14 @@ half3 toNormal(half3 n) return n; } +// This will result in comlier failed XPC_ERROR_CONNECTION_INTERRUPTED +// was based on forum suggestion. assert() does nothing in Metal. +//#define myMetalAssert(x) \ +// if (!(x)) { \ +// device float* f = 0; \ +// *f = 12; \ +// } +//#define myMetalAssert(x) assert(x) // https://www.gamasutra.com/blogs/RobertBasler/20131122/205462/Three_Normal_Mapping_Techniques_Explained_For_the_Mathematically_Uninclined.php?print=1 // http://www.thetenthplanet.de/archives/1180 @@ -210,6 +217,13 @@ half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldP float2 duv1 = dfdx(uv); float2 duv2 = dfdy(uv); + // getting non-zere uv with 0 length duv1/2 on MBP 16", this leaves missing bump artifacts + // in large triangle error so this is a patch to avoid that. + if ((length_squared(duv1) < 1e-12) && + (length_squared(duv2) < 1e-12)) { + return vertexNormal; + } + // solve the linear system float3 dp2perp = cross(dp2, N); float3 dp1perp = cross(N, dp1); @@ -218,7 +232,7 @@ half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldP float invmax = rsqrt(max(length_squared(T), length_squared(B))); // keeps relative magnitude of two vectors, they're not both unit vecs - T *= invmax; + T *= -invmax; // had to flip this sign to get correct lighting B *= invmax; // construct a scale-invariant frame @@ -261,6 +275,7 @@ half3 transformNormalByBasis(half3 bumpNormal, half4 tangent, half3 vertexNormal half3 transformNormal(half4 tangent, half3 vertexNormal, float3 worldPos, + bool useTangent, texture2d texture, sampler s, float2 uv, bool isSigned = true) { half4 nmap = texture.sample(s, uv); @@ -283,8 +298,8 @@ half3 transformNormal(half4 tangent, half3 vertexNormal, float3 worldPos, half3 transformNormal(half4 nmap, half3 vertexNormal, half4 tangent, - float3 worldPos, float2 uv, // to gen TBN - bool isSwizzleAGToRG, bool isSigned, bool isFrontFacing) + float3 worldPos, float2 uv, bool useTangent, // to gen TBN from normal + bool isSwizzleAGToRG, bool isSigned, bool isFrontFacing) { // add swizzle for ASTC/BC5nm, other 2 channels format can only store 01 in ba // could use hw swizzle for this @@ -368,7 +383,8 @@ void skinPosAndBasis(thread float4& position, thread float3& tangent, thread flo normal = (float4(normal, 0.0) * bindPoseToBoneTransform); - if (useTangent) + // compiler will deadstrip if tangent unused by caller + //if (useTangent) tangent = (float4(tangent, 0.0) * bindPoseToBoneTransform); } @@ -379,7 +395,7 @@ float3x3 toFloat3x3(float4x4 m) // this is for vertex shader if tangent supplied void transformBasis(thread float3& normal, thread float3& tangent, - float4x4 modelToWorldTfm, float3 invScale2) + float4x4 modelToWorldTfm, float3 invScale2, bool useTangent) { float3x3 m = toFloat3x3(modelToWorldTfm); @@ -443,7 +459,7 @@ ColorInOut DrawImageFunc( if (uniforms.isNormalMapPreview) { float3 normal = in.normal; float3 tangent = in.tangent.xyz; - transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2); + transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2, uniforms.useTangent); out.normal = toHalf(normal); @@ -665,7 +681,7 @@ float4 DrawPixels( half4 nmapH = toHalf(c); half3 n = transformNormal(nmapH, in.normal, in.tangent, - in.worldPos, in.texCoord, // to build TBN + in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN uniforms.isSwizzleAGToRG, uniforms.isSigned, facing); @@ -686,7 +702,7 @@ float4 DrawPixels( half4 nmapH = toHalf(nmap); half3 n = transformNormal(nmapH, in.normal, in.tangent, - in.worldPos, in.texCoord, // to build TBN + in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing); c = doLighting(c, viewDir, toFloat(n), toFloat(in.normal)); @@ -773,18 +789,35 @@ float4 DrawPixels( else if (uniforms.shapeChannel == ShShapeChannelNormal) { c.rgb = toUnorm(toFloat(in.normal)); } - else if (useTangent && uniforms.shapeChannel == ShShapeChannelTangent) { + else if (uniforms.useTangent && uniforms.shapeChannel == ShShapeChannelTangent) { // TODO: make this work with useTangent = false + // may have to call routine again, or pass back basis + c.rgb = toUnorm(toFloat(in.tangent.xyz)); } else if (uniforms.shapeChannel == ShShapeChannelBitangent) { // TODO: make this work with useTangent = false + // may have to call routine again, or pass back basis + half3 bitangent = cross(in.tangent.xyz, in.normal) * in.tangent.w; c.rgb = toUnorm(toFloat(bitangent)); } else if (uniforms.shapeChannel == ShShapeChannelDepth) { c.rgb = saturate(in.position.z / in.position.w); } + else if (uniforms.shapeChannel == ShShapeChannelFaceNormal) { + float3 faceNormal = -cross(dfdx(in.worldPos), dfdy(in.worldPos)); + faceNormal = normalize(faceNormal); + + // TODO: incorporate facing? + + c.rgb = saturate(toUnorm(faceNormal)); + } +// else if (uniforms.shapeChannel == ShShapeChannelBumpNormal) { +// c.rgb = saturate(bumpNormal); +// } + + c.a = 1.0; } // mask to see one channel in isolation, this is really 0'ing out other channels @@ -826,8 +859,6 @@ float4 DrawPixels( // nothing for alpha? } - - if (uniforms.debugMode != ShDebugModeNone && c.a != 0.0) { bool isHighlighted = false; diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp index a5c5c032..643f340d 100644 --- a/kramv/KramViewerBase.cpp +++ b/kramv/KramViewerBase.cpp @@ -10,6 +10,42 @@ int32_t ShowSettings::totalChunks() const { return std::max(one, faceCount) * std::max(one, arrayCount) * std::max(one, sliceCount); } +const char* ShowSettings::shapeChannelText() const { + const char* text = ""; + + switch(shapeChannel) { + case ShapeChannelNone: text = "Show Off"; break; + case ShapeChannelUV0: text = "Show UV0"; break; + case ShapeChannelNormal: text = "Show Normal"; break; + case ShapeChannelTangent: text = "Show Tangent"; break; + case ShapeChannelBitangent: text = "Show Bitangent"; break; + case ShapeChannelDepth: text = "Show Depth"; break; + case ShapeChannelFaceNormal: text = "Show Faces"; break; + //case ShapeChannelBumpNormal: text = "Show Bumps"; break; + default: break; + } + + return text; +} + +const char* ShowSettings::debugModeText() const { + const char* text = ""; + + switch(debugMode) { + case DebugModeNone: text = "Debug Off"; break; + case DebugModeTransparent: text = "Debug Transparent"; break; + case DebugModeNonZero: text = "Debug NonZero"; break; + case DebugModeColor: text = "Debug Color"; break; + case DebugModeGray: text = "Debug Gray"; break; + case DebugModeHDR: text = "Debug HDR"; break; + case DebugModePosX: text = "Debug +X"; break; + case DebugModePosY: text = "Debug +Y"; break; + case DebugModeCircleXY: text = "Debug XY>=1"; break; + default: break; + } + return text; +} + void ShowSettings::advanceShapeChannel(bool decrement) { int32_t numEnums = ShapeChannelCount; int32_t mode = shapeChannel; diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index b8eaed05..5ba1568f 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -56,10 +56,17 @@ enum ShapeChannel ShapeChannelUV0, - ShapeChannelNormal, + ShapeChannelFaceNormal, // gen from dfdx and dfdy + + ShapeChannelNormal, // vertex normal ShapeChannelTangent, ShapeChannelBitangent, + // don't need bump, since can already see it, but what if combined diffuse + normal + // ShapeChannelBumpNormal, + + // ShapeChannelMipLevel, // can estimate mip chose off dfdx/dfdy, and pseudocolor + ShapeChannelCount }; @@ -191,6 +198,9 @@ class ShowSettings { void advanceDebugMode(bool decrement); void advanceShapeChannel(bool decrement); + const char* shapeChannelText() const; + const char* debugModeText() const; + string lastFilename; double lastTimestamp = 0.0; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index f7f833cf..f1aaf7db 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1657,35 +1657,13 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown case Key::Num6: { _showSettings->advanceShapeChannel(isShiftKeyDown); - - switch(_showSettings->shapeChannel) { - case ShapeChannelNone: text = "Show Off"; break; - case ShapeChannelUV0: text = "Show UV0"; break; - case ShapeChannelNormal: text = "Show Normal"; break; - case ShapeChannelTangent: text = "Show Tangent"; break; - case ShapeChannelBitangent: text = "Show Bitangent"; break; - case ShapeChannelDepth: text = "Show Depth"; break; - default: break; - } - + text = _showSettings->shapeChannelText(); isChanged = true; break; } case Key::E: { _showSettings->advanceDebugMode(isShiftKeyDown); - - switch(_showSettings->debugMode) { - case DebugModeNone: text = "Debug Off"; break; - case DebugModeTransparent: text = "Debug Transparent"; break; - case DebugModeNonZero: text = "Debug NonZero"; break; - case DebugModeColor: text = "Debug Color"; break; - case DebugModeGray: text = "Debug Gray"; break; - case DebugModeHDR: text = "Debug HDR"; break; - case DebugModePosX: text = "Debug +X"; break; - case DebugModePosY: text = "Debug +Y"; break; - case DebugModeCircleXY: text = "Debug XY>=1"; break; - default: break; - } + text = _showSettings->debugModeText(); isChanged = true; break; } From 31c556bb3cffef718e64e225065538f946d4ea6c Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 6 Jun 2021 21:43:13 -0700 Subject: [PATCH 117/901] kramv - add doInvertX test Shaders can get rendering shape and uv mirroring. Add inversion test, but that doesn't mean code passes when invert is on. Lighting looks flipped. Pass the determinant (sign is inversion) via the inverseScale2.w term. Add flip of winding for front vs. backfacing. Shape is a bit pancaked in capture since view does non-uniform scale but not on z-axis. --- kramv/KramRenderer.mm | 51 ++++++++++++++++++++++++++--------------- kramv/KramShaders.h | 2 +- kramv/KramShaders.metal | 2 +- kramv/KramViewerBase.h | 1 + 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index a48899a8..33cb7d18 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -467,10 +467,10 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU packed_float4* uvData = (packed_float4*)uvsMap.bytes; for (uint32_t i = 0; i < mdlMesh.vertexCount; ++i) { - if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) { - int bp = 0; - bp = bp; - } +// if (uvData[i].w != -1.0f && uvData[i].w != 1.0f) { +// int bp = 0; +// bp = bp; +// } uvData[i].w = -uvData[i].w; } @@ -484,7 +484,7 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU mesh.name = [NSString stringWithUTF8String:name]; - // these range names may onl show up when looking at geometry in capture + // these range names may only show up when looking at geometry in capture // These don't seem to appear as the buffer name that is suballocated from { // name the vertex range on the vb @@ -639,10 +639,10 @@ - (void)_loadAssets auto& uv = uvData[i]; - if (uv.x < 0.0 || uv.x > 1.0) { - int bp = 0; - bp = bp; - } +// if (uv.x < 0.0 || uv.x > 1.0) { +// int bp = 0; +// bp = bp; +// } // this makes it counterclockwise 0 to 1 float x = uv.x; @@ -678,7 +678,7 @@ - (void)_loadAssets _meshSphereMirrored = [self _createMeshAsset:"MeshSphereMirrored" mdlMesh:mdlMesh doFlipUV:false]; -// this maps 1/3rd of texture to the caps, and just isn't a very good uv mapping, using capsule nistead +// this maps 1/3rd of texture to the caps, and just isn't a very good uv mapping, using capsule instead // mdlMesh = [MDLMesh newCylinderWithHeight:1.0 // radii:(vector_float2){0.5, 0.5} // radialSegments:16 @@ -928,16 +928,19 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone; + // test rendering with inversion and mirroring + bool doInvertX = false; + // have one of these for each texture added to the viewer float scaleX = MAX(1, texture.width); float scaleY = MAX(1, texture.height); float scaleZ = MAX(scaleX, scaleY); // don't want 1.0f, or specular is all off due to extreme scale differences - _modelMatrix = float4x4(float4m(scaleX, scaleY, scaleZ, 1.0f)); // non uniform scale + _modelMatrix = float4x4(float4m(doInvertX ? -scaleX : scaleX, scaleY, scaleZ, 1.0f)); // non uniform scale _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back // uniform scaled 3d primitiv float scale = MAX(scaleX, scaleY); - _modelMatrix3D = float4x4(float4m(scale, scale, scale, 1.0f)); // uniform scale + _modelMatrix3D = float4x4(float4m(doInvertX ? -scale : scale, scale, scale, 1.0f)); // uniform scale _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back return YES; @@ -948,6 +951,8 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom float4x4 panTransform = matrix4x4_translation(-panX, panY, 0.0); // non-uniform scale is okay here, only affects ortho volume + // setting this to uniform zoom and object is not visible, zoom can be 20x in x and y + float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); viewMatrix = panTransform * viewMatrix; @@ -964,7 +969,11 @@ bool almost_equal_elements(float3 v, float tol) { return (fabs(v.x - v.y) < tol) && (fabs(v.x - v.z) < tol); } -float3 inverseScaleSquared(float4x4 m) { +const float3x3& toFloat3x3(const float4x4& m) { + return (const float3x3&)m; +} + +float4 inverseScaleSquared(const float4x4& m) { float3 scaleSquared = float3m( length_squared(m.columns[0].xyz), length_squared(m.columns[1].xyz), @@ -978,11 +987,12 @@ float3 inverseScaleSquared(float4x4 m) { // don't divide by 0 float3 invScaleSquared = recip(simd::max(float3m(0.0001 * 0.0001), scaleSquared)); - // TODO: could also identify determinant here for flipping orientation + // identify determinant here for flipping orientation // all shapes with negative determinant need orientation flipped for backfacing - // and need to be rendered together + // and need to be grouned together if rendering with instancing + float det = determinant(toFloat3x3(m)); - return invScaleSquared; + return float4m(invScaleSquared, det); } - (void)_updateGameState @@ -1028,7 +1038,7 @@ - (void)_updateGameState // TODO: tie to UI // a few things to fix before enabling this - uniforms.useTangent = false; + uniforms.useTangent = true; uniforms.gridX = 0; uniforms.gridY = 0; @@ -1092,6 +1102,8 @@ - (void)_updateGameState uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix3D); + _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f; + // this was stored so view could use it, but now that code calcs the transform via computeImageTransform _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix; @@ -1113,6 +1125,8 @@ - (void)_updateGameState uniforms.modelMatrixInvScale2 = inverseScaleSquared(_modelMatrix); + _showSettings->isInverted = uniforms.modelMatrixInvScale2.w < 0.0f; + // this was stored so view could use it, but now that code calcs the transform via computeImageTransform _showSettings->projectionViewModelMatrix = uniforms.projectionViewMatrix * uniforms.modelMatrix ; @@ -1241,7 +1255,8 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie renderEncoder.label = @"MainRender"; // set raster state - [renderEncoder setFrontFacingWinding:MTLWindingCounterClockwise]; + [renderEncoder setFrontFacingWinding:_showSettings->isInverted ? + MTLWindingCounterClockwise : MTLWindingCounterClockwise]; [renderEncoder setCullMode:MTLCullModeBack]; [renderEncoder setDepthStencilState:_depthStateFull]; diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index 33b60c6d..f2027eab 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -117,7 +117,7 @@ struct Uniforms { simd::float4x4 projectionViewMatrix; simd::float4x4 modelMatrix; - simd::float3 modelMatrixInvScale2; // to supply inverse + simd::float4 modelMatrixInvScale2; // to supply inverse, w is determinant simd::float3 cameraPosition; // world-space bool isSigned; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index fc49f249..d93fcc6c 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -459,7 +459,7 @@ ColorInOut DrawImageFunc( if (uniforms.isNormalMapPreview) { float3 normal = in.normal; float3 tangent = in.tangent.xyz; - transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2, uniforms.useTangent); + transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2.xyz, uniforms.useTangent); out.normal = toHalf(normal); diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index 5ba1568f..22a51403 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -186,6 +186,7 @@ class ShowSettings { ShapeChannel shapeChannel = ShapeChannelNone; float4x4 projectionViewModelMatrix; + bool isInverted; // cached on load, raw info about the texture from libkram string imageInfo; From 5b507b4547f341e8099a6f63a258b240e7dfa045 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 6 Jun 2021 22:21:40 -0700 Subject: [PATCH 118/901] kramv - fix zoom when doInvertX is true computing rect needs to take into absolute bound so it doesn't go negative. --- kramv/KramViewerMain.mm | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index f1aaf7db..bb5afa40 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -780,7 +780,11 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer //pt1 /= pt1.w; // see that rectangle intersects the view, view is -1 to 1 - CGRect imageRect = CGRectMake(pt0.x, pt0.y, pt1.x - pt0.x, pt1.y - pt0.y); + // this handles inversion + float2 ptOrigin = simd::min(pt0.xy, pt1.xy); + float2 ptSize = abs(pt0.xy - pt1.xy); + + CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y); CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f); int32_t numTexturesX = _showSettings->totalChunks(); @@ -1210,8 +1214,11 @@ - (void)scrollWheel:(NSEvent *)event //pt0 /= pt0.w; //pt1 /= pt1.w; + float2 ptOrigin = simd::min(pt0.xy, pt1.xy); + float2 ptSize = abs(pt0.xy - pt1.xy); + // see that rectangle intersects the view, view is -1 to 1 - CGRect imageRect = CGRectMake(pt0.x, pt0.y, pt1.x - pt0.x, pt1.y - pt0.y); + CGRect imageRect = CGRectMake(ptOrigin.x, ptOrigin.y, ptSize.x, ptSize.y); CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f); int32_t numTexturesX = _showSettings->totalChunks(); From f7b410703456463d86311512d7e639f632ec6502 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 8 Jun 2021 09:17:43 -0700 Subject: [PATCH 119/901] kramv - different address mode, split up update vs. reset settings, add shape name to hud, display png info Switch to clamp-to-edge from clamp-to-zero to avoid isHalfPixelInset and transparent pulldown at edges. --- kramv/KramRenderer.h | 3 +- kramv/KramRenderer.mm | 239 ++++++++++++++++++++++----------------- kramv/KramShaders.h | 3 + kramv/KramShaders.metal | 2 +- kramv/KramViewerBase.cpp | 28 +++++ kramv/KramViewerBase.h | 4 +- kramv/KramViewerMain.mm | 15 +-- 7 files changed, 179 insertions(+), 115 deletions(-) diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h index 4f68d4ca..31490ae2 100644 --- a/kramv/KramRenderer.h +++ b/kramv/KramRenderer.h @@ -30,7 +30,8 @@ namespace kram { - (BOOL)loadTextureFromImage:(const std::string&)fullFilename timestamp:(double)timestamp image:(kram::KTXImage&)image - imageNormal:(nullable kram::KTXImage*)imageNormal; + imageNormal:(nullable kram::KTXImage*)imageNormal + isArchive:(BOOL)isArchive; - (BOOL)loadTexture:(nonnull NSURL *)url; diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 33cb7d18..06d469c5 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -705,18 +705,23 @@ - (void)_loadAssets } +static bool isPNGFilename(const char* filename) { + // should really lookg at first 4 bytes of data + return endsWithExtension(filename, ".png") || endsWithExtension(filename, ".PNG"); +} + - (BOOL)loadTextureFromImage:(const string&)fullFilename timestamp:(double)timestamp image:(kram::KTXImage&)image imageNormal:(kram::KTXImage*)imageNormal + isArchive:(BOOL)isArchive { // image can be decoded to rgba8u if platform can't display format natively // but still want to identify blockSize from original format // Note that modstamp can change, but content data hash may be the same - bool isTextureChanged = - (fullFilename != _showSettings->lastFilename) || - (timestamp != _showSettings->lastTimestamp); + bool isNewFile = (fullFilename != _showSettings->lastFilename); + bool isTextureChanged = isNewFile || (timestamp != _showSettings->lastTimestamp); if (isTextureChanged) { // synchronously cpu upload from ktx file to buffer, with eventual gpu blit from buffer to returned texture. TODO: If buffer is full, then something needs to keep KTXImage and data alive. This load may also decode the texture to RGBA8. @@ -738,10 +743,19 @@ - (BOOL)loadTextureFromImage:(const string&)fullFilename // if archive contained png, then it's been converted to ktx // so the info below may not reflect original data + // Would need original png data to look at header + // This is only info on image, not on imageNormal + + bool isPNG = isPNGFilename(fullFilename.c_str()); + if (!isArchive && isPNG) { + _showSettings->imageInfo = kramInfoToString(fullFilename, false); + _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true); + } + else { + _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false); + _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true); + } - _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false); - _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true); - _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL; _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat; @@ -752,9 +766,13 @@ - (BOOL)loadTextureFromImage:(const string&)fullFilename _colorMap = texture; _normalMap = normalTexture; } + + [self updateImageSettings:fullFilename image:image]; } - return [self loadTextureImpl:fullFilename isTextureChanged:isTextureChanged]; + [self resetSomeImageSettings:isNewFile]; + + return YES; } - (BOOL)loadTexture:(nonnull NSURL *)url @@ -768,9 +786,9 @@ - (BOOL)loadTexture:(nonnull NSURL *)url // DONE: tie this to url and modstamp differences double timestamp = fileDate.timeIntervalSince1970; - bool isTextureChanged = - (fullFilename != _showSettings->lastFilename) || - (timestamp != _showSettings->lastTimestamp); + bool isNewFile = (fullFilename != _showSettings->lastFilename); + + bool isTextureChanged = isNewFile || (timestamp != _showSettings->lastTimestamp); // image can be decoded to rgba8u if platform can't display format natively // but still want to identify blockSize from original format @@ -794,12 +812,15 @@ - (BOOL)loadTexture:(nonnull NSURL *)url // this is not the png data, but info on converted png to ktx level // But this avoids loading the image 2 more times // Size of png is very different than decompressed or recompressed ktx - - _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false); - _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true); - - //_showSettings->imageInfo = kramInfoToString(fullFilename, image, false); - //_showSettings->imageInfoVerbose = kramInfoToString(fullFilename, image, true); + bool isPNG = isPNGFilename(fullFilename.c_str()); + if (isPNG) { + _showSettings->imageInfo = kramInfoToString(fullFilename, false); + _showSettings->imageInfoVerbose = kramInfoToString(fullFilename, true); + } + else { + _showSettings->imageInfo = kramInfoKTXToString(fullFilename, image, false); + _showSettings->imageInfoVerbose = kramInfoKTXToString(fullFilename, image, true); + } _showSettings->originalFormat = (MyMTLPixelFormat)originalFormatMTL; _showSettings->decodedFormat = (MyMTLPixelFormat)texture.pixelFormat; @@ -811,39 +832,33 @@ - (BOOL)loadTexture:(nonnull NSURL *)url _colorMap = texture; _normalMap = nil; } + + [self updateImageSettings:fullFilename image:image]; } - return [self loadTextureImpl:fullFilename isTextureChanged:isTextureChanged]; + [self resetSomeImageSettings:isNewFile]; + + return YES; } - - -- (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTextureChanged +// only called on new or modstamp-changed image +- (void)updateImageSettings:(const string&)fullFilename image:(KTXImage&)image { - if (isTextureChanged) { - Int2 blockDims = blockDimsOfFormat(_showSettings->originalFormat); - _showSettings->blockX = blockDims.x; - _showSettings->blockY = blockDims.y; - } - + // this is the actual format, may have been decoded id texture = _colorMap; - MyMTLPixelFormat format = (MyMTLPixelFormat)texture.pixelFormat; - MyMTLPixelFormat originalFormat = _showSettings->originalFormat; - // based on original or transcode? + // format may be trancoded to gpu-friendly format + MyMTLPixelFormat originalFormat = image.pixelFormat; + + _showSettings->blockX = image.blockDims().x; + _showSettings->blockY = image.blockDims().y; + _showSettings->isSigned = isSignedFormat(format); - // need a way to get at KTXImage, but would need to keep mmap alive - // this doesn't handle normals that are ASTC, so need more data from loader string fullFilenameCopy = fullFilename; - - // this is so unreadable string filename = toLower(fullFilenameCopy); - // could cycle between rrr1 and r001. - int32_t numChannels = numChannelsOfFormat(originalFormat); - // set title to filename, chop this to just file+ext, not directory string filenameShort = filename; const char* filenameSlash = strrchr(filenameShort.c_str(), '/'); @@ -858,6 +873,9 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex bool isNormal = false; bool isSDF = false; + // could cycle between rrr1 and r001. + int32_t numChannels = numChannelsOfFormat(originalFormat); + // note that decoded textures are 3/4 channel even though they are normal/sdf originally, so test those first if (numChannels == 2 || endsWith(filenameShort, "-n") || endsWith(filenameShort, "_normal")) { isNormal = true; @@ -878,8 +896,7 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex if (isAlbedo && endsWithExtension(filename.c_str(), ".png")) { _showSettings->isPremul = true; // convert to premul in shader, so can see other channels } - - if (isNormal || isSDF) { + else if (isNormal || isSDF) { _showSettings->isPremul = false; } @@ -891,28 +908,51 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex _showSettings->isSwizzleAGToRG = false; +// For best sdf and normal reconstruct from ASTC or BC3, must use RRR1 and GGGR or RRRG +// BC1nm multiply r*a in the shader, but just use BC5 anymore. // if (isASTCFormat(originalFormat) && isNormal) { // // channels after = "ag01" // _showSettings->isSwizzleAGToRG = true; // } - // then can manipulate this after loading - _showSettings->mipLOD = 0; - _showSettings->faceNumber = 0; - _showSettings->arrayNumber = 0; - _showSettings->sliceNumber = 0; - // can derive these from texture queries - _showSettings->maxLOD = (int32_t)texture.mipmapLevelCount; - _showSettings->faceCount = (texture.textureType == MTLTextureTypeCube || - texture.textureType == MTLTextureTypeCubeArray) ? 6 : 0; - _showSettings->arrayCount = (int32_t)texture.arrayLength; - _showSettings->sliceCount = (int32_t)texture.depth; - - _showSettings->channels = TextureChannels::ModeRGBA; + _showSettings->maxLOD = (int32_t)image.header.numberOfMipmapLevels; + _showSettings->faceCount = (image.textureType == MyMTLTextureTypeCube || + image.textureType == MyMTLTextureTypeCubeArray) ? 6 : 0; + _showSettings->arrayCount = (int32_t)image.header.numberOfArrayElements; + _showSettings->sliceCount = (int32_t)image.depth; + + _showSettings->imageBoundsX = (int32_t)image.width; + _showSettings->imageBoundsY = (int32_t)image.height; +} + +- (void)resetSomeImageSettings:(BOOL)isNewFile { - _showSettings->imageBoundsX = (int32_t)texture.width; - _showSettings->imageBoundsY = (int32_t)texture.height; + // only reset these on new texture, but have to revalidate + if (isNewFile) { + // then can manipulate this after loading + _showSettings->mipLOD = 0; + _showSettings->faceNumber = 0; + _showSettings->arrayNumber = 0; + _showSettings->sliceNumber = 0; + + + _showSettings->channels = TextureChannels::ModeRGBA; + + // wish could keep existing setting, but new texture might not + // be supported debugMode for new texture + _showSettings->debugMode = DebugMode::DebugModeNone; + + _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone; + } + else { + // reloaded file may have different limits + _showSettings->mipLOD = std::min(_showSettings->mipLOD, _showSettings->maxLOD); + _showSettings->faceNumber = std::min(_showSettings->faceNumber, _showSettings->faceCount); + _showSettings->arrayNumber = std::min(_showSettings->arrayNumber, _showSettings->arrayCount); + _showSettings->sliceNumber = std::min(_showSettings->sliceNumber, _showSettings->sliceCount); + } + [self updateViewTransforms]; @@ -922,18 +962,12 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex _showSettings->zoom = _showSettings->zoomFit; - // wish could keep existing setting, but new texture might not - // be supported debugMode for new texture - _showSettings->debugMode = DebugMode::DebugModeNone; - - _showSettings->shapeChannel = ShapeChannel::ShapeChannelNone; - // test rendering with inversion and mirroring bool doInvertX = false; // have one of these for each texture added to the viewer - float scaleX = MAX(1, texture.width); - float scaleY = MAX(1, texture.height); + float scaleX = MAX(1, _showSettings->imageBoundsX); + float scaleY = MAX(1, _showSettings->imageBoundsY); float scaleZ = MAX(scaleX, scaleY); // don't want 1.0f, or specular is all off due to extreme scale differences _modelMatrix = float4x4(float4m(doInvertX ? -scaleX : scaleX, scaleY, scaleZ, 1.0f)); // non uniform scale _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back @@ -942,8 +976,6 @@ - (BOOL)loadTextureImpl:(const string&)fullFilename isTextureChanged:(BOOL)isTex float scale = MAX(scaleX, scaleY); _modelMatrix3D = float4x4(float4m(doInvertX ? -scale : scale, scale, scale, 1.0f)); // uniform scale _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back - - return YES; } - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom { @@ -1017,12 +1049,12 @@ - (void)_updateGameState } uniforms.isCheckerboardShown = _showSettings->isCheckerboardShown; - bool canWrap = true; - if (textureType == MyMTLTextureTypeCube || textureType == MyMTLTextureTypeCubeArray) { - canWrap = false; - } - uniforms.isWrap = canWrap ? _showSettings->isWrap : false; + // addressing mode + bool isCube = (textureType == MyMTLTextureTypeCube || textureType == MyMTLTextureTypeCubeArray); + bool doWrap = !isCube && _showSettings->isWrap; + bool doEdge = !doWrap; + uniforms.isWrap = doWrap ? _showSettings->isWrap : false; uniforms.isPreview = _showSettings->isPreview; @@ -1081,6 +1113,13 @@ - (void)_updateGameState } uniforms.is3DView = _showSettings->is3DView; + // on small textures can really see missing pixel (3 instead of 4 pixels) + // so only do this on the sphere/capsule which wrap-around uv space + uniforms.isInsetByHalfPixel = false; + if (_showSettings->meshNumber >= 2 && doEdge) { + uniforms.isInsetByHalfPixel = true; + } + // translate float4x4 panTransform = matrix4x4_translation(-_showSettings->panX, _showSettings->panY, 0.0); @@ -1224,8 +1263,8 @@ - (void)drawInMTKView:(nonnull MTKView *)view } - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)view { - /// Delay getting the currentRenderPassDescriptor until absolutely needed. This avoids - /// holding onto the drawable and blocking the display pipeline any longer than necessary + // Delay getting the currentRenderPassDescriptor until absolutely needed. This avoids + // holding onto the drawable and blocking the display pipeline any longer than necessary MTLRenderPassDescriptor* renderPassDescriptor = view.currentRenderPassDescriptor; if (renderPassDescriptor == nil) { @@ -1245,7 +1284,7 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie return; } - /// Final pass rendering code here + // Final pass rendering code here id renderEncoder = [commandBuffer renderCommandEncoderWithDescriptor:renderPassDescriptor]; if (!renderEncoder) { @@ -1274,12 +1313,29 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie } } + //--------------------------------------- + // figure out the sampler + + id sampler; + + MyMTLTextureType textureType = (MyMTLTextureType)_colorMap.textureType; + + bool isCube = (textureType == MyMTLTextureTypeCube || textureType == MyMTLTextureTypeCubeArray); + bool doWrap = !isCube && _showSettings->isWrap; + bool doEdge = !doWrap; + + if (_showSettings->isPreview) { + sampler = doWrap ? _colorMapSamplerFilterWrap : (doEdge ? _colorMapSamplerFilterEdge : _colorMapSamplerFilterBorder); + } + else { + sampler = doWrap ? _colorMapSamplerNearestWrap : (doEdge ? _colorMapSamplerNearestEdge : _colorMapSamplerNearestBorder); + } + + //--------------------------------------- //for (texture in _textures) // TODO: setup //if (_colorMap) { // TODO: set texture specific uniforms, but using single _colorMap for now - bool canWrap = true; - switch(_colorMap.textureType) { case MTLTextureType1DArray: [renderEncoder setRenderPipelineState:_pipelineState1DArray]; @@ -1298,11 +1354,8 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie break; case MTLTextureTypeCube: [renderEncoder setRenderPipelineState:_pipelineStateCube]; - canWrap = false; - break; case MTLTextureTypeCubeArray: - canWrap = false; [renderEncoder setRenderPipelineState:_pipelineStateCubeArray]; break; @@ -1331,8 +1384,6 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie atIndex:TextureIndexNormal]; } - - UniformsLevel uniformsLevel; uniformsLevel.drawOffset = float2m(0.0f); @@ -1349,9 +1400,7 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie atIndex:BufferIndexUniformsLevel]; // use exisiting lod, and mip - [renderEncoder setFragmentSamplerState: - (canWrap && _showSettings->isWrap) ? _colorMapSamplerFilterWrap : _colorMapSamplerFilterBorder - atIndex:SamplerIndexColor]; + [renderEncoder setFragmentSamplerState:sampler atIndex:SamplerIndexColor]; for(MTKSubmesh *submesh in _mesh.submeshes) { @@ -1368,16 +1417,6 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie int32_t h = _colorMap.height; //int32_t d = _colorMap.depth; - MyMTLTextureType textureType = MyMTLTextureType2D; - if (_colorMap) { - textureType = (MyMTLTextureType)_colorMap.textureType; - } - - bool isCube = false; - if (textureType == MyMTLTextureTypeCube || textureType == MyMTLTextureTypeCubeArray) { - isCube = true; - } - // gap the contact sheet, note this 2 pixels is scaled on small textures by the zoom int32_t gap = _showSettings->showAllPixelGap; // * _showSettings->viewContentScaleFactor; @@ -1424,11 +1463,10 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie atIndex:BufferIndexUniformsLevel]; // force lod, and don't mip - [renderEncoder setFragmentSamplerState: - (canWrap && _showSettings->isWrap) ? _colorMapSamplerNearestWrap : _colorMapSamplerNearestBorder - lodMinClamp:mip - lodMaxClamp:mip + 1 - atIndex:SamplerIndexColor]; + [renderEncoder setFragmentSamplerState:sampler + lodMinClamp:mip + lodMaxClamp:mip + 1 + atIndex:SamplerIndexColor]; // TODO: since this isn't a preview, have mode to display all faces and mips on on screen @@ -1461,11 +1499,10 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie atIndex:BufferIndexUniformsLevel]; // force lod, and don't mip - [renderEncoder setFragmentSamplerState: - (canWrap && _showSettings->isWrap) ? _colorMapSamplerNearestWrap : _colorMapSamplerNearestBorder - lodMinClamp:mip - lodMaxClamp:mip + 1 - atIndex:SamplerIndexColor]; + [renderEncoder setFragmentSamplerState:sampler + lodMinClamp:mip + lodMaxClamp:mip + 1 + atIndex:SamplerIndexColor]; // TODO: since this isn't a preview, have mode to display all faces and mips on on screen diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index f2027eab..554acdb4 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -136,6 +136,9 @@ struct Uniforms bool isNormalMapSigned; bool isNormalMapSwizzleAGToRG; + // this is used on wrap-around objects to avoid black transparent using clampToZero + bool isInsetByHalfPixel; + // this means pull tangent from vertex bool useTangent; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index d93fcc6c..9ead4bbc 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -483,7 +483,7 @@ ColorInOut DrawImageFunc( out.texCoord.xy = in.texCoord; out.texCoord.xy *= wrapAmount; } - else if (uniforms.is3DView && !uniforms.isWrap) { + else if (uniforms.is3DView && uniforms.isInsetByHalfPixel) { // inset from edge by a fraction of a pixel, to avoid clamp boundary error // does this have to adjust for mipLOD too? float2 onePixel = uniformsLevel.textureSize.zw; diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp index 643f340d..bf1f4cdc 100644 --- a/kramv/KramViewerBase.cpp +++ b/kramv/KramViewerBase.cpp @@ -10,6 +10,21 @@ int32_t ShowSettings::totalChunks() const { return std::max(one, faceCount) * std::max(one, arrayCount) * std::max(one, sliceCount); } +const char* ShowSettings::meshNumberText() const { + const char* text = ""; + + switch(meshNumber) { + case 0: text = "Shape Plane"; break; + case 1: text = "Shape Box"; break; + case 2: text = "Shape Sphere"; break; + case 3: text = "Shape Sphere MirrorU"; break; + case 4: text = "Shape Capsule"; break; + default: break; + } + + return text; +} + const char* ShowSettings::shapeChannelText() const { const char* text = ""; @@ -46,6 +61,19 @@ const char* ShowSettings::debugModeText() const { return text; } +void ShowSettings::advanceMeshNumber(bool decrement) { + int32_t numEnums = meshCount; + int32_t number = meshNumber; + if (decrement) { + number += numEnums - 1; + } + else { + number += 1; + } + + meshNumber = number % numEnums; +} + void ShowSettings::advanceShapeChannel(bool decrement) { int32_t numEnums = ShapeChannelCount; int32_t mode = shapeChannel; diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index 22a51403..28261f03 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -195,10 +195,12 @@ class ShowSettings { // format before any transcode to supported formats MyMTLPixelFormat originalFormat; MyMTLPixelFormat decodedFormat; - + + void advanceMeshNumber(bool decrement); void advanceDebugMode(bool decrement); void advanceShapeChannel(bool decrement); + const char* meshNumberText() const; const char* shapeChannelText() const; const char* debugModeText() const; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index bb5afa40..990a2392 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1905,15 +1905,8 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown // test out different shapes, not offiical support yet case Key::Num8: if (_showSettings->meshCount > 1) { - if (isShiftKeyDown) { - _showSettings->meshNumber = _showSettings->meshNumber + _showSettings->meshCount - 1; - } - else { - _showSettings->meshNumber++; - } - _showSettings->meshNumber = _showSettings->meshNumber % _showSettings->meshCount; - - sprintf(text, "Mesh %d %s", _showSettings->meshNumber, "Shape"); // TODO: put meshName in _showSettings + _showSettings->advanceMeshNumber(isShiftKeyDown); + text = _showSettings->meshNumberText(); isChanged = true; } break; @@ -2200,7 +2193,7 @@ - (BOOL)loadTextureFromFolder } Renderer* renderer = (Renderer*)self.delegate; - if (![renderer loadTextureFromImage:fullFilename timestamp:timestamp image:image imageNormal:hasNormal ? &imageNormal : nullptr]) { + if (![renderer loadTextureFromImage:fullFilename timestamp:timestamp image:image imageNormal:hasNormal ? &imageNormal : nullptr isArchive:NO]) { return NO; } @@ -2319,7 +2312,7 @@ - (BOOL)loadTextureFromArchive string fullFilename = filename; Renderer* renderer = (Renderer*)self.delegate; if (![renderer loadTextureFromImage:fullFilename timestamp:(double)timestamp - image:image imageNormal:hasNormal ? &imageNormal : nullptr]) + image:image imageNormal:hasNormal ? &imageNormal : nullptr isArchive:YES]) { return NO; } From b36cd3d447740f4298d7575db8a539c1c2c8baad Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 8 Jun 2021 09:22:26 -0700 Subject: [PATCH 120/901] kramv - fix address logic --- kramv/KramRenderer.mm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 06d469c5..ba61092e 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -1054,6 +1054,7 @@ - (void)_updateGameState bool isCube = (textureType == MyMTLTextureTypeCube || textureType == MyMTLTextureTypeCubeArray); bool doWrap = !isCube && _showSettings->isWrap; bool doEdge = !doWrap; + bool doZero = !doEdge; uniforms.isWrap = doWrap ? _showSettings->isWrap : false; uniforms.isPreview = _showSettings->isPreview; @@ -1116,7 +1117,7 @@ - (void)_updateGameState // on small textures can really see missing pixel (3 instead of 4 pixels) // so only do this on the sphere/capsule which wrap-around uv space uniforms.isInsetByHalfPixel = false; - if (_showSettings->meshNumber >= 2 && doEdge) { + if (_showSettings->meshNumber >= 2 && doZero) { uniforms.isInsetByHalfPixel = true; } From 0edac5bf033e9470a875cd57c01c769418169231 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 8 Jun 2021 21:04:32 -0700 Subject: [PATCH 121/901] kramv - support png with KTXImageData in folders/archives This is a lot more work to translate png to props. But they are prevalent as source in folders. --- kramv/KramLoader.mm | 7 +- kramv/KramRenderer.mm | 9 +-- kramv/KramViewerMain.mm | 158 ++++++++++++++++++++++++++++------------ libkram/kram/Kram.cpp | 45 ++++++++++-- libkram/kram/Kram.h | 15 +++- 5 files changed, 172 insertions(+), 62 deletions(-) diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index a882ed14..6053733e 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -338,9 +338,14 @@ - (BOOL)loadImageFromURL:(nonnull NSURL *)url image:(KTXImage&)image imageData:( bool isSRGB = (!isNormal && !isSDF); - if (!imageData.openPNG(path, isSRGB, image)) { + if (!imageData.open(path, image)) { return NO; } + + // have to adjust the format if srgb + if (isSRGB) { + image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB; + } } else { if (!imageData.open(path, image)) { diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index ba61092e..8aa9148b 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -705,10 +705,7 @@ - (void)_loadAssets } -static bool isPNGFilename(const char* filename) { - // should really lookg at first 4 bytes of data - return endsWithExtension(filename, ".png") || endsWithExtension(filename, ".PNG"); -} + - (BOOL)loadTextureFromImage:(const string&)fullFilename timestamp:(double)timestamp @@ -892,8 +889,10 @@ - (void)updateImageSettings:(const string&)fullFilename image:(KTXImage&)image // textures are already premul, so don't need to premul in shader // should really have 3 modes, unmul, default, premul + bool isPNG = isPNGFilename(filename.c_str()); + _showSettings->isPremul = false; - if (isAlbedo && endsWithExtension(filename.c_str(), ".png")) { + if (isAlbedo && isPNG) { _showSettings->isPremul = true; // convert to premul in shader, so can see other channels } else if (isNormal || isSDF) { diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 990a2392..dc6d08b2 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -2069,7 +2069,7 @@ -(BOOL)loadArchive:(const char*)zipFilename // filter out unsupported extensions - _zip.filterExtensions({".ktx", ".ktx2"}); + _zip.filterExtensions({".ktx", ".ktx2", ".png"}); // don't switch to empty archive if (_zip.zipEntrys().empty()) { @@ -2123,14 +2123,29 @@ -(BOOL)advanceTextureFromFolder:(BOOL)increment return [self loadTextureFromFolder]; } +- (BOOL)findFilenameInFolders:(const string&)filename { + // TODO: binary search for the filename in the array, but would have to be in same directory + + bool isFound = false; + for (const auto& search : _folderFiles) { + if (search == filename) { + isFound = true; + break; + } + } + return isFound; +} + - (BOOL)loadTextureFromFolder { // now lookup the filename and data at that entry const char* filename = _folderFiles[_fileFolderIndex].c_str(); + string fullFilename = filename; auto timestamp = FileHelper::modificationTimestamp(filename); // have already filtered filenames out, so this should never get hit - if (!(//endsWithExtension(filename, ".png") || + bool isPNG = isPNGFilename(filename); + if (!(isPNG || endsWithExtension(filename, ".ktx") || endsWithExtension(filename, ".ktx2")) ) { @@ -2138,29 +2153,47 @@ - (BOOL)loadTextureFromFolder } // first only do this on albedo/diffuse textures - string normalFilename = filename; + string normalFilename; - string search = "-a.ktx"; - auto searchPos = normalFilename.find(search); - bool isFound = searchPos != string::npos; + string search; + bool isFound = false; + string::size_type searchPos; - if (!isFound) { - search = "-d.ktx"; - searchPos = normalFilename.find(search); + if (isPNG) { + // find matching png + search = "-a.png"; + searchPos = fullFilename.find(search); isFound = searchPos != string::npos; + + if (!isFound) { + search = "-d.png"; + searchPos = fullFilename.find(search); + isFound = searchPos != string::npos; + } + } + else { + // find matching ktx/2 + search = "-a.ktx"; + searchPos = fullFilename.find(search); + isFound = searchPos != string::npos; + + if (!isFound) { + search = "-d.ktx"; + searchPos = fullFilename.find(search); + isFound = searchPos != string::npos; + } } + bool isSrgb = isFound; + if (isFound) { - normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); // works for ktx or ktx2 file - - // binary search for the filename in the array, will have to be in same directory - isFound = false; - for (const auto& search : _folderFiles) { - if (search == normalFilename) { - isFound = true; - break; - } - } + // stupid stl mods fullFilename in the replace if not a copy + normalFilename = fullFilename; + + // this won't work for mix of png/ktx files, but that's okay + normalFilename = normalFilename.replace(searchPos, search.length(), isPNG ? "-n.png" : "-n.ktx"); + + isFound = [self findFilenameInFolders:normalFilename]; if (!isFound) { normalFilename.clear(); @@ -2176,7 +2209,7 @@ - (BOOL)loadTextureFromFolder KTXImageData imageNormalDataKTX; bool hasNormal = false; - string fullFilename = filename; + // this requires decode and conversion to RGBA8u if (!imageDataKTX.open(fullFilename.c_str(), image)) { return NO; } @@ -2192,6 +2225,10 @@ - (BOOL)loadTextureFromFolder } } + if (isPNG && isSrgb) { + image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB; + } + Renderer* renderer = (Renderer*)self.delegate; if (![renderer loadTextureFromImage:fullFilename timestamp:timestamp image:image imageNormal:hasNormal ? &imageNormal : nullptr isArchive:NO]) { return NO; @@ -2239,49 +2276,73 @@ - (BOOL)loadTextureFromArchive // now lookup the filename and data at that entry const auto& entry = _zip.zipEntrys()[_fileArchiveIndex]; const char* filename = entry.filename; + string fullFilename = filename; double timestamp = (double)entry.modificationDate; // have already filtered filenames out, so this should never get hit - if (!(//endsWithExtension(filename, ".png") || + bool isPNG = isPNGFilename(filename); + + if (!(isPNG || endsWithExtension(filename, ".ktx") || endsWithExtension(filename, ".ktx2")) ) { return NO; } - string normalFilename = filename; + string normalFilename; // first only do this on albedo/diffuse textures - string search = "-a.ktx"; - auto searchPos = normalFilename.find(search); - bool isFound = searchPos != string::npos; - if (!isFound) { - search = "-d.ktx"; - searchPos = normalFilename.find(search); + string search; + bool isFound = false; + string::size_type searchPos; + + if (isPNG) { + search = "-a.png"; + searchPos = fullFilename.find(search); isFound = searchPos != string::npos; + + if (!isFound) { + search = "-d.png"; + searchPos = fullFilename.find(search); + isFound = searchPos != string::npos; + } } - - if (isFound) { - normalFilename = normalFilename.replace(searchPos, search.length(), "-n.ktx"); + else { + search = "-a.ktx"; + searchPos = fullFilename.find(search); + isFound = searchPos != string::npos; + + if (!isFound) { + search = "-d.ktx"; + searchPos = fullFilename.find(search); + isFound = searchPos != string::npos; + } } + bool isSrgb = isFound; + //--------------------------- const uint8_t* imageData = nullptr; uint64_t imageDataLength = 0; - + + const uint8_t* imageNormalData = nullptr; + uint64_t imageNormalDataLength = 0; + + // search for main file - can be albedo or normal if (!_zip.extractRaw(filename, &imageData, imageDataLength)) { return NO; } - const uint8_t* imageNormalData = nullptr; - uint64_t imageNormalDataLength = 0; - - // see if this is albedo, and then search for normal map in the same archive + // search for normal map in the same archive if (isFound) { + normalFilename = fullFilename; + + normalFilename = normalFilename.replace(searchPos, search.length(), isPNG ? "-n.png" : "-n.ktx"); + if (!_zip.extractRaw(normalFilename.c_str(), &imageNormalData, imageNormalDataLength)) { - // ignore failure case here, this is just guessing there's a -n file + // ignore failure case here, this is just guessing there's a related normal file } } @@ -2291,12 +2352,14 @@ - (BOOL)loadTextureFromArchive // That's why we can't just pass filenames to the renderer KTXImage image; KTXImageData imageDataKTX; + + KTXImage imageNormal; + KTXImageData imageNormalDataKTX; + if (!imageDataKTX.open(imageData, imageDataLength, image)) { return NO; } - KTXImage imageNormal; - KTXImageData imageNormalDataKTX; bool hasNormal = false; if (isFound && imageNormalDataKTX.open(imageNormalData, imageNormalDataLength, imageNormal)) { @@ -2309,7 +2372,10 @@ - (BOOL)loadTextureFromArchive } } - string fullFilename = filename; + if (isPNG && isSrgb) { + image.pixelFormat = MyMTLPixelFormatRGBA8Unorm_sRGB; + } + Renderer* renderer = (Renderer*)self.delegate; if (![renderer loadTextureFromImage:fullFilename timestamp:(double)timestamp image:image imageNormal:hasNormal ? &imageNormal : nullptr isArchive:YES]) @@ -2388,11 +2454,11 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { const char* name = fileOrDirectoryURL.fileSystemRepresentation; // filter only types that are supported - if (endsWithExtension(name, ".ktx") || - endsWithExtension(name, ".ktx2") - // || endsWithExtension(name, ".png") // TODO: can't support with KTXImage load path, needs PNG loader - - ) + bool isPNG = isPNGFilename(name); + + if (isPNG || + endsWithExtension(name, ".ktx") || + endsWithExtension(name, ".ktx2")) { files.push_back(name); } @@ -2471,7 +2537,7 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { // file is not a supported extension if (!(endsWithExtension(filename, ".zip") || - endsWithExtension(filename, ".png") || + isPNGFilename(filename) || endsWithExtension(filename, ".ktx") || endsWithExtension(filename, ".ktx2")) ) { diff --git a/libkram/kram/Kram.cpp b/libkram/kram/Kram.cpp index 5a02ec12..232bfd67 100644 --- a/libkram/kram/Kram.cpp +++ b/libkram/kram/Kram.cpp @@ -41,9 +41,33 @@ void releaseVector(vector& v) { } +bool isPNGFilename(const char* filename) { + // should really lookg at first 4 bytes of data + return endsWithExtension(filename, ".png"); +} + +bool isPNGFilename(const uint8_t* data, size_t dataSize) { + // read the 4 chars at the beginning of the file + const uint32_t numChars = 8; + if (dataSize < numChars) + return false; + + const uint8_t kPngSignature[numChars] = { 137, 80, 78, 71, 13, 10, 26, 10 }; + if (memcmp(data, kPngSignature, sizeof(kPngSignature)) != 0) { + return false; + } + + return true; +} + + bool KTXImageData::open(const char* filename, KTXImage& image) { close(); + if (isPNGFilename(filename)) { + return openPNG(filename, image); + } + isMmap = true; if (!mmapHelper.open(filename)) { isMmap = false; @@ -100,8 +124,8 @@ void KTXImageData::close() { } -bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) { - close(); +bool KTXImageData::openPNG(const char* filename, KTXImage& image) { + //close(); isMmap = true; if (!mmapHelper.open(filename)) { @@ -135,9 +159,15 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) { data = fileData.data(); dataSize = fileData.size(); } - + + return openPNG(data, dataSize, image); +} + +bool KTXImageData::openPNG(const uint8_t* data, size_t dataSize, KTXImage& image) { + //close(); + // the mmap/filehelper point to the png data - // use Image to + // use Image to Image singleImage; bool isLoaded = LoadPng(data, dataSize, false, false, singleImage); @@ -158,7 +188,7 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) { image.header.numberOfArrayElements = 0; image.header.numberOfMipmapLevels = 1; image.textureType = MyMTLTextureType2D; - image.pixelFormat = isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : MyMTLPixelFormatRGBA8Unorm; + image.pixelFormat = /*isSrgb ? MyMTLPixelFormatRGBA8Unorm_sRGB : */ MyMTLPixelFormatRGBA8Unorm; // TODO: support mips with blitEncoder but tha confuses mipCount in KTXImage // Mipper can also generate on cpu side. Mipped can do premul conversion though. @@ -176,11 +206,14 @@ bool KTXImageData::openPNG(const char* filename, bool isSrgb, KTXImage& image) { return true; } - bool KTXImageData::open(const uint8_t* data, size_t dataSize, KTXImage& image) { close(); + if (isPNGFilename(data, dataSize)) { + return openPNG(data, dataSize, image); + } + // image will likely alias incoming data, so KTXImageData is unused if (!image.open(data, dataSize, isInfoOnly)) { diff --git a/libkram/kram/Kram.h b/libkram/kram/Kram.h index f2e75afc..c8deab11 100644 --- a/libkram/kram/Kram.h +++ b/libkram/kram/Kram.h @@ -23,13 +23,18 @@ class KTXImageData { // class aliases data, so caller must keep alive. Useful with bundle. bool open(const uint8_t* data, size_t dataSize, KTXImage& image); + // This releases all memory associated with this class + void close(); + +private: // Open png image into a KTXImage as a single-level mip // Only handles 2d case and only srgba/rgba conversion. - bool openPNG(const char* filename, bool isSrgb, KTXImage& image); + // Only returns non-srgb RGBA8, but format can be changed after for srgb + bool openPNG(const char* filename, KTXImage& image); + + // The data version + bool openPNG(const uint8_t* data, size_t dataSize, KTXImage& image); - // This releases all memory associated with this class - void close(); - private: MmapHelper mmapHelper; vector fileData; @@ -37,6 +42,8 @@ class KTXImageData { bool isInfoOnly = true; }; +bool isPNGFilename(const char* filename); + // helpers to source from a png or single level of a ktx bool LoadKtx(const uint8_t* data, size_t dataSize, Image& sourceImage); bool LoadPng(const uint8_t* data, size_t dataSize, bool isPremulSrgb, bool isGray, Image& sourceImage); From 6d8906ac2df3be525577c6f434a13bd297756c96 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Tue, 8 Jun 2021 22:25:59 -0700 Subject: [PATCH 122/901] kramv - tangent button to compare Note that tan/bitan shapeChannel doesn't yet work with fragment tangents --- kramv/KramRenderer.mm | 3 +-- kramv/KramShaders.metal | 47 +++++++++++++---------------------------- kramv/KramViewerBase.h | 3 +++ kramv/KramViewerMain.mm | 34 +++++++++++++++++++++++------ 4 files changed, 47 insertions(+), 40 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 8aa9148b..55c586d7 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -1068,9 +1068,8 @@ - (void)_updateGameState } } - // TODO: tie to UI // a few things to fix before enabling this - uniforms.useTangent = true; + uniforms.useTangent = _showSettings->useTangent; uniforms.gridX = 0; uniforms.gridY = 0; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 9ead4bbc..c1eff9ce 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -232,9 +232,12 @@ half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldP float invmax = rsqrt(max(length_squared(T), length_squared(B))); // keeps relative magnitude of two vectors, they're not both unit vecs - T *= -invmax; // had to flip this sign to get correct lighting + T *= invmax; B *= invmax; + // had to flip this sign to get lighting to match vertex data + T = -T; + // construct a scale-invariant frame // drop to half to match other call bumpNormal = toHalf(float3x3(T, B, N) * toFloat(bumpNormal)); @@ -251,17 +254,11 @@ half3 transformNormalByBasis(half3 bumpNormal, half4 tangent, half3 vertexNormal // Reconstruct bitan in frag shader // https://bgolus.medium.com/generating-perfect-normal-maps-for-unity-f929e673fc57 - - // so if eyevector - - // TODO: there's facing too, could be inside model - half bitangentSign = tangent.w; half3 bitangent = bitangentSign * cross(vertexNormal, tangent.xyz); - // ModelIO not generating correct bitan sign // DONE: flip this on srcData, and not here //bitangentSign = -bitangentSign; @@ -273,30 +270,6 @@ half3 transformNormalByBasis(half3 bumpNormal, half4 tangent, half3 vertexNormal return normalize(bumpNormal); } - -half3 transformNormal(half4 tangent, half3 vertexNormal, float3 worldPos, - bool useTangent, - texture2d texture, sampler s, float2 uv, bool isSigned = true) -{ - half4 nmap = texture.sample(s, uv); - - // unorm-only formats like ASTC need to convert - if (!isSigned) { - nmap.xy = toSnorm8(nmap.xy); - } - - // rebuild the z term - half3 bumpNormal = toNormal(nmap.xyz); - - if (useTangent) - bumpNormal = transformNormalByBasis(bumpNormal, tangent, vertexNormal); - else - bumpNormal = transformNormalByBasis(bumpNormal, vertexNormal, worldPos, uv); - - return bumpNormal; -} - - half3 transformNormal(half4 nmap, half3 vertexNormal, half4 tangent, float3 worldPos, float2 uv, bool useTangent, // to gen TBN from normal bool isSwizzleAGToRG, bool isSigned, bool isFrontFacing) @@ -335,7 +308,17 @@ half3 transformNormal(half4 nmap, half3 vertexNormal, half4 tangent, return bumpNormal; } - +half3 transformNormal(half4 tangent, half3 vertexNormal, float3 worldPos, + bool useTangent, + texture2d texture, sampler s, float2 uv, + bool isSigned, bool isSwizzleAGToRG, bool isFrontFacing) +{ + half4 nmap = texture.sample(s, uv); + + return transformNormal(nmap, vertexNormal, tangent, + worldPos, uv, useTangent, + isSwizzleAGToRG, isSigned, isFrontFacing); +} // TODO: have more bones, or read from texture instead of uniforms // can then do instanced skining, but vfetch lookup slower diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index 28261f03..fa64a926 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -130,6 +130,9 @@ class ShowSettings { // and disabling with a MTLView caused many flags to have to be set on MTLTexture //bool isSRGBShown = true; + // whether to use normal to tangent (false), or vertex tangents (true) + bool useTangent = true; + // draw with reverseZ to better match perspective bool isReverseZ = true; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index dc6d08b2..94d00c25 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -498,7 +498,7 @@ - (nonnull ShowSettings*)showSettings { } - (NSStackView*)_addButtons { - const int32_t numButtons = 27; // 13; + const int32_t numButtons = 29; // 13; const char* names[numButtons*2] = { "?", "Help", @@ -526,8 +526,12 @@ - (NSStackView*)_addButtons { "J", "Next", "L", "Reload", "0", "Fit", + + "-", "", + "8", "Shape", "6", "Shape Channel", + "T", "Tangents", // TODO: need to shift hud over a little // "UI", - add to show/hide buttons @@ -1385,9 +1389,11 @@ - (void)updateUIControlState auto arrayState = toState(_showSettings->arrayNumber > 0); auto faceState = toState(_showSettings->faceNumber > 0); auto mipState = toState(_showSettings->mipLOD > 0); - auto meshState = toState(_showSettings->meshNumber > 0); - auto meshChannelState = toState(_showSettings->shapeChannel > 0); // TODO: rename to meshChannel + auto meshState = toState(_showSettings->meshNumber > 0); + auto meshChannelState = toState(_showSettings->shapeChannel > 0); + auto tangentState = toState(_showSettings->useTangent); + // TODO: UI state, and vertical state auto uiState = toState(_buttonStack.hidden); @@ -1419,6 +1425,7 @@ - (void)updateUIControlState [self findButton:"W"].state = wrapState; [self findButton:"D"].state = gridState; [self findButton:"E"].state = debugState; + [self findButton:"T"].state = tangentState; [self findButton:"P"].state = premulState; [self findButton:"N"].state = signedState; @@ -1447,7 +1454,8 @@ - (void)updateUIControlState [self findMenuItem:"O"].state = previewState; [self findMenuItem:"8"].state = meshState; [self findMenuItem:"6"].state = meshChannelState; - + [self findMenuItem:"T"].state = tangentState; + [self findMenuItem:"W"].state = wrapState; [self findMenuItem:"D"].state = gridState; [self findMenuItem:"E"].state = debugState; @@ -1519,15 +1527,20 @@ - (IBAction)handleAction:(id)sender { keyCode = Key::Y; else if (title == "J") keyCode = Key::J; + + // reload/refit else if (title == "L") keyCode = Key::L; - else if (title == "0") keyCode = Key::Num0; + + // mesh else if (title == "8") keyCode = Key::Num8; else if (title == "6") keyCode = Key::Num6; + else if (title == "T") + keyCode = Key::T; else if (title == "R") keyCode = Key::R; @@ -1537,7 +1550,7 @@ - (IBAction)handleAction:(id)sender { keyCode = Key::B; else if (title == "A") keyCode = Key::A; - + if (keyCode >= 0) [self handleKey:keyCode isShiftKeyDown:isShiftKeyDown]; } @@ -1668,6 +1681,15 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown isChanged = true; break; } + case Key::T: { + _showSettings->useTangent = !_showSettings->useTangent; + if (_showSettings->useTangent) + text = "Vertex Tangents"; + else + text = "Fragment Tangents"; + isChanged = true; + break; + } case Key::E: { _showSettings->advanceDebugMode(isShiftKeyDown); text = _showSettings->debugModeText(); From 1787e99d4b1b2b6565ec17735a0ac3a3417b63b7 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Wed, 9 Jun 2021 14:02:37 -0700 Subject: [PATCH 123/901] kramv - simplify normal lookup --- kramv/KramViewerMain.mm | 116 ++++++++++++++++------------------------ 1 file changed, 47 insertions(+), 69 deletions(-) diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 94d00c25..fc28d890 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -2174,52 +2174,37 @@ - (BOOL)loadTextureFromFolder return NO; } + const char* ext = strrchr(filename, '.'); + // first only do this on albedo/diffuse textures - string normalFilename; - string search; - bool isFound = false; - string::size_type searchPos; + // find matching png + string search = "-a"; + search += ext; - if (isPNG) { - // find matching png - search = "-a.png"; - searchPos = fullFilename.find(search); - isFound = searchPos != string::npos; + auto searchPos = fullFilename.find(search); + bool isFound = searchPos != string::npos; + + if (!isFound) { + search = "-d"; + search += ext; - if (!isFound) { - search = "-d.png"; - searchPos = fullFilename.find(search); - isFound = searchPos != string::npos; - } - } - else { - // find matching ktx/2 - search = "-a.ktx"; searchPos = fullFilename.find(search); isFound = searchPos != string::npos; - - if (!isFound) { - search = "-d.ktx"; - searchPos = fullFilename.find(search); - isFound = searchPos != string::npos; - } } bool isSrgb = isFound; + string normalFilename; + bool hasNormal = false; + if (isFound) { - // stupid stl mods fullFilename in the replace if not a copy normalFilename = fullFilename; + normalFilename = normalFilename.erase(searchPos); + normalFilename += "-n"; + normalFilename += ext; - // this won't work for mix of png/ktx files, but that's okay - normalFilename = normalFilename.replace(searchPos, search.length(), isPNG ? "-n.png" : "-n.ktx"); - - isFound = [self findFilenameInFolders:normalFilename]; - - if (!isFound) { - normalFilename.clear(); - } + hasNormal = [self findFilenameInFolders:normalFilename]; } //------------------------------- @@ -2229,21 +2214,22 @@ - (BOOL)loadTextureFromFolder KTXImage imageNormal; KTXImageData imageNormalDataKTX; - bool hasNormal = false; // this requires decode and conversion to RGBA8u if (!imageDataKTX.open(fullFilename.c_str(), image)) { return NO; } - if (isFound && imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) { - + if (hasNormal && imageNormalDataKTX.open(normalFilename.c_str(), imageNormal)) { // shaders only pull from albedo + normal on these texture types if (imageNormal.textureType == image.textureType && (imageNormal.textureType == MyMTLTextureType2D || imageNormal.textureType == MyMTLTextureType2DArray)) { - hasNormal = true; + //hasNormal = true; + } + else { + hasNormal = false; } } @@ -2310,36 +2296,24 @@ - (BOOL)loadTextureFromArchive { return NO; } - - string normalFilename; + + const char* ext = strrchr(filename, '.'); + // first only do this on albedo/diffuse textures - string search; - bool isFound = false; - string::size_type searchPos; + string search = "-a"; + search += ext; - if (isPNG) { - search = "-a.png"; - searchPos = fullFilename.find(search); - isFound = searchPos != string::npos; + auto searchPos = fullFilename.find(search); + bool isFound = searchPos != string::npos; + + if (!isFound) { + search = "-d"; + search += ext; - if (!isFound) { - search = "-d.png"; - searchPos = fullFilename.find(search); - isFound = searchPos != string::npos; - } - } - else { - search = "-a.ktx"; searchPos = fullFilename.find(search); isFound = searchPos != string::npos; - - if (!isFound) { - search = "-d.ktx"; - searchPos = fullFilename.find(search); - isFound = searchPos != string::npos; - } } bool isSrgb = isFound; @@ -2358,14 +2332,16 @@ - (BOOL)loadTextureFromArchive } // search for normal map in the same archive + string normalFilename; + bool hasNormal = false; + if (isFound) { normalFilename = fullFilename; + normalFilename = normalFilename.erase(searchPos); + normalFilename += "-n"; + normalFilename += ext; - normalFilename = normalFilename.replace(searchPos, search.length(), isPNG ? "-n.png" : "-n.ktx"); - - if (!_zip.extractRaw(normalFilename.c_str(), &imageNormalData, imageNormalDataLength)) { - // ignore failure case here, this is just guessing there's a related normal file - } + hasNormal = _zip.extractRaw(normalFilename.c_str(), &imageNormalData, imageNormalDataLength); } //--------------------------- @@ -2382,15 +2358,17 @@ - (BOOL)loadTextureFromArchive return NO; } - bool hasNormal = false; - if (isFound && imageNormalDataKTX.open(imageNormalData, imageNormalDataLength, imageNormal)) { - + if (hasNormal && imageNormalDataKTX.open(imageNormalData, imageNormalDataLength, imageNormal)) { + // shaders only pull from albedo + normal on these texture types if (imageNormal.textureType == image.textureType && (imageNormal.textureType == MyMTLTextureType2D || imageNormal.textureType == MyMTLTextureType2DArray)) { - hasNormal = true; + //hasNormal = true; + } + else { + hasNormal = false; } } From f9bb8fb60ab38203b87833888c81b31b8a6be614 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Thu, 10 Jun 2021 21:16:03 -0700 Subject: [PATCH 124/901] kramv - add 4x aniso to preview, and add mip level shape channel --- kramv/KramRenderer.mm | 1 + kramv/KramShaders.h | 2 ++ kramv/KramShaders.metal | 51 +++++++++++++++++++++++++++++++++++++++- kramv/KramViewerBase.cpp | 1 + kramv/KramViewerBase.h | 4 ++-- 5 files changed, 56 insertions(+), 3 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 55c586d7..78287aa4 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -160,6 +160,7 @@ - (void)_createSamplers samplerDescriptor.minFilter = MTLSamplerMinMagFilterLinear; samplerDescriptor.magFilter = MTLSamplerMinMagFilterLinear; samplerDescriptor.mipFilter = MTLSamplerMipFilterLinear; + samplerDescriptor.maxAnisotropy = 4; // 1,2,4,8,16 are choices samplerDescriptor.sAddressMode = MTLSamplerAddressModeClampToBorderColor; samplerDescriptor.tAddressMode = MTLSamplerAddressModeClampToBorderColor; diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index 554acdb4..f11491a4 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -107,6 +107,8 @@ typedef NS_ENUM(int32_t, ShaderShapeChannel) ShShapeChannelTangent, ShShapeChannelBitangent, + ShShapeChannelMipLevel, + // ShShapeChannelBumpNormal, }; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index c1eff9ce..d71e86c4 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -144,6 +144,50 @@ float4 toFloat(half4 c) //------------------------------------------- // functions +// https://bgolus.medium.com/anti-aliased-alpha-test-the-esoteric-alpha-to-coverage-8b177335ae4f +float toMipLevel(float2 uv) +{ + float2 dx = dfdx(uv); + float2 dy = dfdy(uv); + + // a better approximation than fwidth + float deltaSquared = max(length_squared(dx), length_squared(dy)); + + // 0.5 because squared, find mip level + return max(0.0, 0.5 * log2(deltaSquared)); +} + +// Also see here: +// https://developer.nvidia.com/gpugems/gpugems2/part-iii-high-quality-rendering/chapter-28-mipmap-level-measurement +// 100 percent, 25 percent, 6.3 percent, and 1.6 percent) + +float4 toMipLevelColor(float2 uv) +{ + // yellow, blue, green, red, black/transparent + // 1, 0.75, 0.5, 0.25, 0 + // point sample from a texture with unique mip level colors + float lev = toMipLevel(uv); + float clev = saturate(lev / 4.0); + float alpha = saturate(1.0 - clev); + + const float3 colors[5] = { + float3(1,1,0), // yellow + float3(0,0,1), // blue + float3(0,1,0), // green + float3(1,0,0), // red + float3(0,0,0), // black + }; + + float clev4 = clev * 4.0; + float3 low = colors[int(floor(clev4))]; + float3 hi = colors[int(round(clev4))]; + + float3 color = mix(low, hi, fract(clev4)); + + // grayscale for now, but use colors so can see mips + return float4(color, alpha); +} + // reconstruct normal from xy, n.z ignored float3 toNormal(float3 n) { @@ -796,11 +840,16 @@ float4 DrawPixels( c.rgb = saturate(toUnorm(faceNormal)); } + else if (uniforms.shapeChannel == ShShapeChannelMipLevel) { + c = toMipLevelColor(in.texCoord * textureSize.xy); // only for 2d textures + } // else if (uniforms.shapeChannel == ShShapeChannelBumpNormal) { // c.rgb = saturate(bumpNormal); // } - c.a = 1.0; + if (uniforms.shapeChannel != ShShapeChannelMipLevel) { + c.a = 1.0; + } } // mask to see one channel in isolation, this is really 0'ing out other channels diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp index bf1f4cdc..2bdb8321 100644 --- a/kramv/KramViewerBase.cpp +++ b/kramv/KramViewerBase.cpp @@ -37,6 +37,7 @@ const char* ShowSettings::shapeChannelText() const { case ShapeChannelDepth: text = "Show Depth"; break; case ShapeChannelFaceNormal: text = "Show Faces"; break; //case ShapeChannelBumpNormal: text = "Show Bumps"; break; + case ShapeChannelMipLevel: text = "Show Mip Levels"; break; default: break; } diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index fa64a926..a0fc8eb1 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -62,11 +62,11 @@ enum ShapeChannel ShapeChannelTangent, ShapeChannelBitangent, + ShapeChannelMipLevel, // can estimate mip chose off dfdx/dfdy, and pseudocolor + // don't need bump, since can already see it, but what if combined diffuse + normal // ShapeChannelBumpNormal, - // ShapeChannelMipLevel, // can estimate mip chose off dfdx/dfdy, and pseudocolor - ShapeChannelCount }; From dbfcba74b3705c5983cc0ba05591e41a2539ab0a Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Thu, 10 Jun 2021 21:30:02 -0700 Subject: [PATCH 125/901] kramv - use premul for mip level colors --- kramv/KramShaders.metal | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index d71e86c4..225918ba 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -138,6 +138,15 @@ float4 toFloat(half4 c) return float4(c); } +float4 toPremul(float4 c) { + c.rgb *= c.a; + return c; +} +half4 toPremul(half4 c) { + c.rgb *= c.a; + return c; +} + // TODO: note that Metal must pass the same half3 from vertex to fragment shader // so can't mix a float vs with half fs. @@ -182,10 +191,10 @@ float4 toMipLevelColor(float2 uv) float3 low = colors[int(floor(clev4))]; float3 hi = colors[int(round(clev4))]; + // lerp in unmul space float3 color = mix(low, hi, fract(clev4)); - // grayscale for now, but use colors so can see mips - return float4(color, alpha); + return toPremul(float4(color, alpha)); } // reconstruct normal from xy, n.z ignored From 7a8ab10aa8d6ee4780da1ce2d322337b41e77a33 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 13 Jun 2021 13:55:11 -0700 Subject: [PATCH 126/901] kramv - add render sampler to see pixels from drawable in shapeChannel, show all, preview, etc --- kramv/KramRenderer.mm | 172 +++++++++++++-------- kramv/KramShaders.metal | 7 +- kramv/KramViewerBase.cpp | 5 + kramv/KramViewerBase.h | 10 +- kramv/KramViewerMain.mm | 314 ++++++++++++++++++++++++++------------- 5 files changed, 342 insertions(+), 166 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 78287aa4..a59d21ae 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -30,50 +30,52 @@ @implementation Renderer { dispatch_semaphore_t _inFlightSemaphore; - id _device; - id _commandQueue; - - id _dynamicUniformBuffer[MaxBuffersInFlight]; - - id _pipelineState1DArray; - id _pipelineStateImage; - id _pipelineStateImageArray; - id _pipelineStateCube; - id _pipelineStateCubeArray; - id _pipelineStateVolume; - - id _pipelineState1DArrayCS; - id _pipelineStateImageCS; - id _pipelineStateImageArrayCS; - id _pipelineStateCubeCS; - id _pipelineStateCubeArrayCS; - id _pipelineStateVolumeCS; - - id _depthStateFull; - id _depthStateNone; + id _device; + id _commandQueue; + + id _dynamicUniformBuffer[MaxBuffersInFlight]; + + id _pipelineState1DArray; + id _pipelineStateImage; + id _pipelineStateImageArray; + id _pipelineStateCube; + id _pipelineStateCubeArray; + id _pipelineStateVolume; + + id _pipelineState1DArrayCS; + id _pipelineStateImageCS; + id _pipelineStateImageArrayCS; + id _pipelineStateCubeCS; + id _pipelineStateCubeArrayCS; + id _pipelineStateVolumeCS; + + id _depthStateFull; + id _depthStateNone; MTLVertexDescriptor *_mtlVertexDescriptor; // TODO: Array< id > _textures; - id _colorMap; - id _normalMap; + id _colorMap; + id _normalMap; + id _lastDrawableTexture; // border is a better edge sample, but at edges it filters in the transparent color // around the border which is undesirable. It would be better if the hw did // clamp to edge until uv outside 0 to 1. This results in having to inset the uv by 0.5 px // to avoid this artifact, but on small texturs that are 4x4, a 1 px inset is noticeable. - id _colorMapSamplerNearestWrap; - id _colorMapSamplerNearestBorder; - id _colorMapSamplerNearestEdge; + id _colorMapSamplerNearestWrap; + id _colorMapSamplerNearestBorder; + id _colorMapSamplerNearestEdge; - id _colorMapSamplerFilterWrap; - id _colorMapSamplerFilterBorder; - id _colorMapSamplerFilterEdge; + id _colorMapSamplerFilterWrap; + id _colorMapSamplerFilterBorder; + id _colorMapSamplerFilterEdge; //id _sampleRT; - id _sampleTex; - + id _sampleComputeTex; + id _sampleRenderTex; + uint8_t _uniformBufferIndex; float4x4 _projectionMatrix; @@ -423,12 +425,23 @@ - (void)_createRenderPipelines:(MTKView*)view - (void)_createSampleRender { - // writing to this texture - MTLTextureDescriptor* textureDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA32Float width:1 height:1 mipmapped:NO]; + { + // writing to this texture + MTLTextureDescriptor* textureDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA32Float width:1 height:1 mipmapped:NO]; + + textureDesc.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead; + textureDesc.storageMode = MTLStorageModeManaged; + _sampleComputeTex = [_device newTextureWithDescriptor:textureDesc]; + } - textureDesc.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead; - textureDesc.storageMode = MTLStorageModeManaged; - _sampleTex = [_device newTextureWithDescriptor:textureDesc]; + { + // this must match drawable format due to using a blit to copy pixel out of drawable + MTLTextureDescriptor* textureDesc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA16Float width:1 height:1 mipmapped:NO]; + //textureDesc.usage = MTLTextureUsageShaderWrite | MTLTextureUsageShaderRead; + textureDesc.storageMode = MTLStorageModeManaged; + + _sampleRenderTex = [_device newTextureWithDescriptor:textureDesc]; + } } - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipUV:(bool)doFlipUV @@ -916,7 +929,7 @@ - (void)updateImageSettings:(const string&)fullFilename image:(KTXImage&)image // } // can derive these from texture queries - _showSettings->maxLOD = (int32_t)image.header.numberOfMipmapLevels; + _showSettings->mipCount = (int32_t)image.header.numberOfMipmapLevels; _showSettings->faceCount = (image.textureType == MyMTLTextureTypeCube || image.textureType == MyMTLTextureTypeCubeArray) ? 6 : 0; _showSettings->arrayCount = (int32_t)image.header.numberOfArrayElements; @@ -931,7 +944,7 @@ - (void)resetSomeImageSettings:(BOOL)isNewFile { // only reset these on new texture, but have to revalidate if (isNewFile) { // then can manipulate this after loading - _showSettings->mipLOD = 0; + _showSettings->mipNumber = 0; _showSettings->faceNumber = 0; _showSettings->arrayNumber = 0; _showSettings->sliceNumber = 0; @@ -947,7 +960,7 @@ - (void)resetSomeImageSettings:(BOOL)isNewFile { } else { // reloaded file may have different limits - _showSettings->mipLOD = std::min(_showSettings->mipLOD, _showSettings->maxLOD); + _showSettings->mipNumber = std::min(_showSettings->mipNumber, _showSettings->mipCount); _showSettings->faceNumber = std::min(_showSettings->faceNumber, _showSettings->faceCount); _showSettings->arrayNumber = std::min(_showSettings->arrayNumber, _showSettings->arrayCount); _showSettings->sliceNumber = std::min(_showSettings->sliceNumber, _showSettings->sliceCount); @@ -1257,6 +1270,9 @@ - (void)drawInMTKView:(nonnull MTKView *)view [self drawMain:commandBuffer view:view]; + // hold onto this for sampling from it via eyedropper + _lastDrawableTexture = view.currentDrawable.texture; + [commandBuffer presentDrawable:view.currentDrawable]; [commandBuffer commit]; } @@ -1389,7 +1405,7 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie if (_showSettings->isPreview) { // upload this on each face drawn, since want to be able to draw all mips/levels at once - [self _setUniformsLevel:uniformsLevel mipLOD:_showSettings->mipLOD]; + [self _setUniformsLevel:uniformsLevel mipLOD:_showSettings->mipNumber]; [renderEncoder setVertexBytes:&uniformsLevel length:sizeof(uniformsLevel) @@ -1420,7 +1436,7 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie // gap the contact sheet, note this 2 pixels is scaled on small textures by the zoom int32_t gap = _showSettings->showAllPixelGap; // * _showSettings->viewContentScaleFactor; - for (int32_t mip = 0; mip < _showSettings->maxLOD; ++mip) { + for (int32_t mip = 0; mip < _showSettings->mipCount; ++mip) { // upload this on each face drawn, since want to be able to draw all mips/levels at once [self _setUniformsLevel:uniformsLevel mipLOD:mip]; @@ -1485,7 +1501,7 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie } } else { - int32_t mip = _showSettings->mipLOD; + int32_t mip = _showSettings->mipNumber; // upload this on each face drawn, since want to be able to draw all mips/levels at once [self _setUniformsLevel:uniformsLevel mipLOD:mip]; @@ -1531,11 +1547,6 @@ - (void)drawMain:(id)commandBuffer view:(nonnull MTKView *)vie // want to run samples independent of redrawing the main view - (void)drawSample { - // Note: this is failing when running via Cmake - bool doSample = true; - if (!doSample) { - return; - } if (_colorMap == nil) { return; } @@ -1546,25 +1557,57 @@ - (void)drawSample commandBuffer.label = @"MyCommand"; + // this reads directly from compressed texture via a compute shader int32_t textureLookupX = _showSettings->textureLookupX; int32_t textureLookupY = _showSettings->textureLookupY; - int32_t textureLookupMipX = _showSettings->textureLookupMipX; - int32_t textureLookupMipY = _showSettings->textureLookupMipY; - - [self drawSamples:commandBuffer lookupX:textureLookupMipX lookupY:textureLookupMipY]; + bool isDrawableBlit = _showSettings->isEyedropperFromDrawable(); - // Synchronize the managed texture. - id blitCommandEncoder = [commandBuffer blitCommandEncoder]; - if (blitCommandEncoder) { - [blitCommandEncoder synchronizeResource:_sampleTex]; - [blitCommandEncoder endEncoding]; + // TODO: only don't blit for plane + no debug or shape + // otherwise want the pixel under the cursor, but this may include grid mixed in and other debug overlays + if (isDrawableBlit) { + MTLOrigin srcOrigin = MTLOriginMake(_showSettings->cursorX, _showSettings->cursorY, 0); + srcOrigin.x *= _showSettings->viewContentScaleFactor; + srcOrigin.y *= _showSettings->viewContentScaleFactor; + + // Note: here we don't know the uv in original texture, would have to write that out to another + // texture. Also on shapes, texel may not change but lighting might. + + // can simply blit the color out of the render buffer + id blitCommandEncoder = [commandBuffer blitCommandEncoder]; + if (blitCommandEncoder) { + [blitCommandEncoder copyFromTexture:_lastDrawableTexture + sourceSlice:0 sourceLevel:0 sourceOrigin:srcOrigin sourceSize:MTLSizeMake(1,1,1) + toTexture:_sampleRenderTex + destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0,0,0) + ]; + [blitCommandEncoder synchronizeResource:_sampleRenderTex]; + [blitCommandEncoder endEncoding]; + } + } + else { + + int32_t textureLookupMipX = _showSettings->textureLookupMipX; + int32_t textureLookupMipY = _showSettings->textureLookupMipY; + + [self drawSamples:commandBuffer lookupX:textureLookupMipX lookupY:textureLookupMipY]; + + // Synchronize the managed texture. + id blitCommandEncoder = [commandBuffer blitCommandEncoder]; + if (blitCommandEncoder) { + [blitCommandEncoder synchronizeResource:_sampleComputeTex]; + [blitCommandEncoder endEncoding]; + } } // After synchonization, copy value back to the cpu - id texture = _sampleTex; - [commandBuffer addCompletedHandler:^(id /* buffer */) + id texture = isDrawableBlit ? _sampleRenderTex : _sampleComputeTex; + + [commandBuffer addCompletedHandler:^(id buffer) { + if (buffer.error != nil) { + return; + } // only 1 pixel in the texture right now float4 data; @@ -1574,7 +1617,14 @@ - (void)drawSample { 1, 1, 1 } // MTLSize }; - [texture getBytes:&data bytesPerRow:16 fromRegion:region mipmapLevel:0]; + if (isDrawableBlit) { + half4 data16f; + [texture getBytes:&data16f bytesPerRow:8 fromRegion:region mipmapLevel:0]; + data = toFloat4(data16f); + } + else { + [texture getBytes:&data bytesPerRow:16 fromRegion:region mipmapLevel:0]; + } // return the value at the sample _showSettings->textureResult = data; @@ -1608,7 +1658,7 @@ - (void)drawSamples:(id)commandBuffer lookupX:(int32_t)lookupX if (_showSettings->sliceNumber) { uniforms.arrayOrSlice = _showSettings->sliceNumber; } - uniforms.mipLOD = _showSettings->mipLOD; + uniforms.mipLOD = _showSettings->mipNumber; // run compute here, don't need a shape switch(_colorMap.textureType) { @@ -1642,7 +1692,7 @@ - (void)drawSamples:(id)commandBuffer lookupX:(int32_t)lookupX [renderEncoder setTexture:_colorMap atIndex:TextureIndexColor]; - [renderEncoder setTexture:_sampleTex + [renderEncoder setTexture:_sampleComputeTex atIndex:TextureIndexSamples]; [renderEncoder setBytes:&uniforms length:sizeof(UniformsCS) atIndex:BufferIndexUniformsCS]; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 225918ba..0af5919a 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -270,10 +270,11 @@ half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldP float2 duv1 = dfdx(uv); float2 duv2 = dfdy(uv); - // getting non-zere uv with 0 length duv1/2 on MBP 16", this leaves missing bump artifacts + // getting non-zero uv with 0 length duv1/2 on MBP 16", this leaves missing bump artifacts // in large triangle error so this is a patch to avoid that. - if ((length_squared(duv1) < 1e-12) && - (length_squared(duv2) < 1e-12)) { + if ((length_squared(duv1) < 1e-10) && + (length_squared(duv2) < 1e-10)) { + //return 0.0h; // flag pixels with no bump return vertexNormal; } diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp index 2bdb8321..d53fac5f 100644 --- a/kramv/KramViewerBase.cpp +++ b/kramv/KramViewerBase.cpp @@ -62,6 +62,11 @@ const char* ShowSettings::debugModeText() const { return text; } +bool ShowSettings::isEyedropperFromDrawable() { + return meshNumber > 0 || isPreview || isShowingAllLevelsAndMips || shapeChannel > 0; +} + + void ShowSettings::advanceMeshNumber(bool decrement) { int32_t numEnums = meshCount; int32_t number = meshNumber; diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index a0fc8eb1..f219534d 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -80,8 +80,8 @@ class ShowSettings { int32_t showAllPixelGap = 2; // These control which texture is viewed in single texture mode - int32_t mipLOD = 0; - int32_t maxLOD = 1; + int32_t mipNumber = 0; + int32_t mipCount = 1; int32_t faceNumber = 0; int32_t faceCount = 0; @@ -142,6 +142,9 @@ class ShowSettings { // whether files are pulled from folder(s) bool isFolder = false; + // can sample from drawable or from single source texture + bool isEyedropperFromDrawable(); + // can have up to 5 channels (xyz as xy, 2 other channels) int32_t numChannels = 0; @@ -161,6 +164,9 @@ class ShowSettings { int32_t textureLookupX = 0; int32_t textureLookupY = 0; + int32_t lastCursorX = 0; + int32_t lastCursorY = 0; + // exact pixel in the mip level int32_t textureLookupMipX = 0; int32_t textureLookupMipY = 0; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index fc28d890..0b26aa62 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -710,7 +710,7 @@ - (void)doZoomMath:(float)newZoom newPan:(float2&)newPan { float minY = -0.5f; if (_showSettings->isShowingAllLevelsAndMips) { maxX += 1.0f * (_showSettings->totalChunks() - 1); - minY -= 1.0f * (_showSettings->maxLOD - 1); + minY -= 1.0f * (_showSettings->mipCount - 1); } // that's in model space (+/0.5f, +/0.5f), so convert to texture space @@ -792,7 +792,7 @@ - (void)handleGesture:(NSGestureRecognizer *)gestureRecognizer CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f); int32_t numTexturesX = _showSettings->totalChunks(); - int32_t numTexturesY = _showSettings->maxLOD; + int32_t numTexturesY = _showSettings->mipCount; if (_showSettings->isShowingAllLevelsAndMips) { imageRect.origin.y -= (numTexturesY - 1 ) * imageRect.size.height; @@ -906,6 +906,12 @@ float4 toSnorm8(float4 c) return (255.0 / 127.0) * c - (128 / 127.0); } +float4 toSnorm(float4 c) +{ + return 2.0f * c - 1.0f; +} + + - (void)updateEyedropper { if ((!_showSettings->isHudShown)) { @@ -919,6 +925,32 @@ - (void)updateEyedropper { // don't wait on renderer to update this matrix Renderer* renderer = (Renderer*)self.delegate; + + if (_showSettings->isEyedropperFromDrawable()) { + // this only needs the cursor location, but can't supply uv to displayPixelData + + if (_showSettings->lastCursorX != _showSettings->cursorX || + _showSettings->lastCursorY != _showSettings->cursorY) + { + // TODO: this means pan/zoom doesn't update data, may want to track some absolute + // location in virtal canvas. + + _showSettings->lastCursorX = _showSettings->cursorX; + _showSettings->lastCursorY = _showSettings->cursorY; + + // This just samples from drawable, so no re-render is needed + [self showEyedropperData:float2m(0,0)]; + + // TODO: remove this, but only way to get drawSamples to execute right now, but then + // entire texture re-renders and that's not power efficient. Really just want to sample + // from the already rendered texture since content isn't animated. + + self.needsDisplay = YES; + } + + return; + } + float4x4 projectionViewModelMatrix = [renderer computeImageTransform:_showSettings->panX panY:_showSettings->panY zoom:_showSettings->zoom]; // convert to clip space, or else need to apply additional viewport transform @@ -952,8 +984,7 @@ - (void)updateEyedropper { pixel.x *= 0.999f; pixel.y *= 0.999f; - float uvX = pixel.x; - float uvY = pixel.y; + float2 uv = pixel.xy; // pixels are 0 based pixel.x *= _showSettings->imageBoundsX; @@ -962,7 +993,7 @@ - (void)updateEyedropper { // TODO: finish this logic, need to account for gaps too, and then isolate to a given level and mip to sample // if (_showSettings->isShowingAllLevelsAndMips) { // pixel.x *= _showSettings->totalChunks(); -// pixel.y *= _showSettings->maxLOD; +// pixel.y *= _showSettings->mipCount; // } // TODO: clearing out the last px visited makes it hard to gather data @@ -985,6 +1016,7 @@ - (void)updateEyedropper { return; } + // Note: fromView: nil returns isFlipped coordinate, fromView:self flips it back. int32_t newX = (int32_t)pixel.x; @@ -996,21 +1028,96 @@ - (void)updateEyedropper { // Note: this only samples from the original texture via compute shaders // so preview mode pixel colors are not conveyed. But can see underlying data driving preview. - MyMTLPixelFormat format = (MyMTLPixelFormat)_showSettings->originalFormat; - - // DONE: use these to format the text - bool isSrgb = isSrgbFormat(format); - bool isSigned = isSignedFormat(format); - bool isHdr = isHdrFormat(format); - int32_t numChannels = _showSettings->numChannels; - // %.0f rounds the value, but want truncation _showSettings->textureLookupX = newX; _showSettings->textureLookupY = newY; + + [self showEyedropperData:uv]; + + // TODO: remove this, but only way to get drawSamples to execute right now, but then + // entire texture re-renders and that's not power efficient. + self.needsDisplay = YES; + + } +} + +- (void)showEyedropperData:(float2)uv { + string text; + string tmp; + + float4 c = _showSettings->textureResult; + + // DONE: use these to format the text + MyMTLPixelFormat format = _showSettings->originalFormat; + bool isSrgb = isSrgbFormat(format); + bool isSigned = isSignedFormat(format); + + bool isHdr = isHdrFormat(format); + bool isFloat = isHdr; + + int32_t numChannels = _showSettings->numChannels; + + bool isNormal = _showSettings->isNormal; + bool isColor = !isNormal; + + bool isDirection = false; + bool isValue = false; + + if (_showSettings->isEyedropperFromDrawable()) { + // TODO: could write barycentric, then lookup uv from that + // then could show the block info. + + // interpret based on shapeChannel, debugMode, etc + switch(_showSettings->shapeChannel) { + case ShapeChannelDepth: + isValue = true; + isFloat = true; + numChannels = 1; + break; + case ShapeChannelUV0: + isValue = true; + isSigned = true; + numChannels = 2; // TODO: fix for 3d uvw + isFloat = true; + break; + + case ShapeChannelFaceNormal: + case ShapeChannelNormal: + case ShapeChannelTangent: + case ShapeChannelBitangent: + isSigned = false; // writing to 16f as unorm, so need conversion below + isDirection = true; + numChannels = 3; + + // convert unorm to snnorm + c = toSnorm(c); + break; + + case ShapeChannelMipLevel: + isValue = true; + isSigned = false; + isFloat = true; + + // viz is mipNumber as alpha + numChannels = 1; + c.r = 4.0 - (c.a * 4.0); + break; + + default: + break; + } + + // debug mode + + // preview vs. not + + + } + else { + // this will be out of sync with gpu eval, so may want to only display px from returned lookup // this will always be a linear color - float4 c = _showSettings->textureResult; int32_t x = _showSettings->textureResultX; int32_t y = _showSettings->textureResultY; @@ -1025,10 +1132,7 @@ - (void)updateEyedropper { append_sprintf(text, "px:%d %d\n", x, y); // show block num - int mipLOD = _showSettings->mipLOD; - - // TODO: these block numbers are not accurate on Toof at 4x4 - // there is resizing going on to the dimensions + int mipLOD = _showSettings->mipNumber; int mipX = _showSettings->imageBoundsX; int mipY = _showSettings->imageBoundsY; @@ -1039,8 +1143,8 @@ - (void)updateEyedropper { mipX = std::max(1, mipX); mipY = std::max(1, mipY); - mipX = (int32_t)(uvX * mipX); - mipY = (int32_t)(uvY * mipY); + mipX = (int32_t)(uv.x * mipX); + mipY = (int32_t)(uv.y * mipY); _showSettings->textureLookupMipX = mipX; _showSettings->textureLookupMipY = mipY; @@ -1063,96 +1167,102 @@ - (void)updateEyedropper { // TODO: more criteria here, can have 2 channel PBR metal-roughness // also have 4 channel normals where zw store other data. - bool isNormal = _showSettings->isNormal; - bool isFloat = isHdr; bool isDecodeSigned = isSignedFormat(_showSettings->decodedFormat); if (isSigned && !isDecodeSigned) { c = toSnorm8(c); } + } + + if (isValue) { + printChannels(tmp, "val: ", c, numChannels, isFloat, isSigned); + text += tmp; + } + else if (isDirection) { + // print direction + isFloat = true; + isSigned = true; - if (isNormal) { - float nx = c.x; - float ny = c.y; - - // unorm -> snorm - if (!isSigned) { - nx = toSnorm8(nx); - ny = toSnorm8(ny); - } - - // Note: not clamping nx,ny to < 1 like in shader - - // this is always postive on tan-space normals - // assuming we're not viewing world normals - const float maxLen2 = 0.999 * 0.999; - float len2 = nx * nx + ny * ny; - if (len2 > maxLen2) - len2 = maxLen2; - - float nz = sqrt(1.0f - len2); - - // print the underlying color (some nmaps are xy in 4 channels) - string tmp; - printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned); - text += tmp; + printChannels(tmp, "dir: ", c, numChannels, isFloat, isSigned); + text += tmp; + } + else if (isNormal) { + float nx = c.x; + float ny = c.y; + + // unorm -> snorm + if (!isSigned) { + nx = toSnorm8(nx); + ny = toSnorm8(ny); + } + + // Note: not clamping nx,ny to < 1 like in shader + + // this is always postive on tan-space normals + // assuming we're not viewing world normals + const float maxLen2 = 0.999 * 0.999; + float len2 = nx * nx + ny * ny; + if (len2 > maxLen2) + len2 = maxLen2; + + float nz = sqrt(1.0f - len2); + + // print the underlying color (some nmaps are xy in 4 channels) + printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned); + text += tmp; + + // print direction + float4 d = float4m(nx,ny,nz,0.0f); + isFloat = true; + isSigned = true; + printChannels(tmp, "dir: ", d, 3, isFloat, isSigned); + text += tmp; + } + else if (isColor) { + // DONE: write some print helpers based on float4 and length + printChannels(tmp, "lin: ", c, numChannels, isFloat, isSigned); + text += tmp; + + if (isSrgb) { + // this saturates the value, so don't use for extended srgb + float4 s = linearToSRGB(c); - // print direction - float4 d = float4m(nx,ny,nz,0.0f); - isFloat = true; - isSigned = true; - printChannels(tmp, "dr: ", d, 3, isFloat, isSigned); + printChannels(tmp, "srg: ", s, numChannels, isFloat, isSigned); text += tmp; } - else { - // DONE: write some print helpers based on float4 and length - string tmp; - printChannels(tmp, "ln: ", c, numChannels, isFloat, isSigned); + + // display the premul values too, but not fully transparent pixels + if (c.a > 0.0 && c.a < 1.0f) + { + printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned); text += tmp; + // TODO: do we need the premul srgb color too? if (isSrgb) { // this saturates the value, so don't use for extended srgb float4 s = linearToSRGB(c); - printChannels(tmp, "sr: ", s, numChannels, isFloat, isSigned); - text += tmp; - } - - // display the premul values too, but not fully transparent pixels - if (c.a > 0.0 && c.a < 1.0f) - { - printChannels(tmp, "lnp: ", toPremul(c), numChannels, isFloat, isSigned); + printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat, isSigned); text += tmp; - - // TODO: do we need the premul srgb color too? - if (isSrgb) { - // this saturates the value, so don't use for extended srgb - float4 s = linearToSRGB(c); - - printChannels(tmp, "srp: ", toPremul(s), numChannels, isFloat, isSigned); - text += tmp; - } } } - - [self setEyedropperText:text.c_str()]; + } + + [self setEyedropperText:text.c_str()]; + + // TODO: range display of pixels is useful, only showing pixels that fall + // within a given range, but would need slider then, and determine range of pixels. + // TODO: Auto-range is also useful for depth (ignore far plane of 0 or 1). + + // TOOD: display histogram from compute, bin into buffer counts of pixels + + // DONE: stop clobbering hud text, need another set of labels + // and a zoom preview of the pixels under the cursor. + // Otherwise, can't really see the underlying color. + + // TODO: Stuff these on clipboard with a click, or use cmd+C? - // TODO: range display of pixels is useful, only showing pixels that fall - // within a given range, but would need slider then, and determine range of pixels. - // TODO: Auto-range is also useful for depth (ignore far plane of 0 or 1). - - // TOOD: display histogram from compute, bin into buffer counts of pixels - - // DONE: stop clobbering hud text, need another set of labels - // and a zoom preview of the pixels under the cursor. - // Otherwise, can't really see the underlying color. - - // TODO: Stuff these on clipboard with a click, or use cmd+C? - // TODO: remove this, but only way to get drawSamples to execute right now, but then - // entire texture re-renders and that's not power efficient. - self.needsDisplay = YES; - } } - (void)setEyedropperText:(const char*)text { @@ -1226,7 +1336,7 @@ - (void)scrollWheel:(NSEvent *)event CGRect viewRect = CGRectMake(-1.0f, -1.0f, 2.0f, 2.0f); int32_t numTexturesX = _showSettings->totalChunks(); - int32_t numTexturesY = _showSettings->maxLOD; + int32_t numTexturesY = _showSettings->mipCount; if (_showSettings->isShowingAllLevelsAndMips) { imageRect.origin.y -= (numTexturesY - 1 ) * imageRect.size.height; @@ -1298,11 +1408,11 @@ - (void)updateUIAfterLoad { // here and in HandleKey. // base on showSettings, hide some fo the buttons - bool isShowAllHidden = _showSettings->totalChunks() <= 1 && _showSettings->maxLOD <= 1; + bool isShowAllHidden = _showSettings->totalChunks() <= 1 && _showSettings->mipCount <= 1; bool isArrayHidden = _showSettings->arrayCount <= 1; bool isFaceSliceHidden = _showSettings->faceCount <= 1 && _showSettings->sliceCount <= 1; - bool isMipHidden = _showSettings->maxLOD <= 1; + bool isMipHidden = _showSettings->mipCount <= 1; bool isJumpToNextHidden = !(_showSettings->isArchive || _showSettings->isFolder); @@ -1388,7 +1498,7 @@ - (void)updateUIControlState auto arrayState = toState(_showSettings->arrayNumber > 0); auto faceState = toState(_showSettings->faceNumber > 0); - auto mipState = toState(_showSettings->mipLOD > 0); + auto mipState = toState(_showSettings->mipNumber > 0); auto meshState = toState(_showSettings->meshNumber > 0); auto meshChannelState = toState(_showSettings->shapeChannel > 0); @@ -1723,7 +1833,7 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown // This zoom needs to be checked against zoom limits // there's a cap on the zoom multiplier. // This is reducing zoom which expands the image. - zoom *= 1.0f / (1 << _showSettings->mipLOD); + zoom *= 1.0f / (1 << _showSettings->mipNumber); // even if zoom same, still do this since it resets the pan _showSettings->zoom = zoom; @@ -1937,14 +2047,14 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown // mip up/down case Key::M: - if (_showSettings->maxLOD > 1) { + if (_showSettings->mipCount > 1) { if (isShiftKeyDown) { - _showSettings->mipLOD = MAX(_showSettings->mipLOD - 1, 0); + _showSettings->mipNumber = MAX(_showSettings->mipNumber - 1, 0); } else { - _showSettings->mipLOD = MIN(_showSettings->mipLOD + 1, _showSettings->maxLOD - 1); + _showSettings->mipNumber = MIN(_showSettings->mipNumber + 1, _showSettings->mipCount - 1); } - sprintf(text, "Mip %d/%d", _showSettings->mipLOD, _showSettings->maxLOD); + sprintf(text, "Mip %d/%d", _showSettings->mipNumber, _showSettings->mipCount); isChanged = true; } break; @@ -2752,7 +2862,11 @@ - (void)viewDidLoad [super viewDidLoad]; _view = (MyMTKView *)self.view; - + + // have to disable this since reading back from textures + // that slows the blit to the screen + _view.framebufferOnly = NO; + _view.device = MTLCreateSystemDefaultDevice(); if(!_view.device) From f4a52142934ac6eab3f3bc2e35eebcc836b8d637 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 13 Jun 2021 17:09:49 -0700 Subject: [PATCH 127/901] kramv - debug fragment tangents, and fix bitangent shape shannel for vertex tangents --- kramv/KramShaders.metal | 71 +++++++++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 0af5919a..e2d75080 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -256,7 +256,8 @@ half3 toNormal(half3 n) // Then transforms the bumpNormal to that space. No tangent is needed. // The downside is this must all be fp32, and all done in fragment shader and use derivatives. // Derivatives are known to be caclulated differently depending on hw and different precision. -half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldPos, float2 uv) + +float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv) { float3 N = toFloat(vertexNormal); @@ -265,24 +266,24 @@ half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldP //N.y = -N.y; // get edge vectors of the pixel triangle - float3 dp1 = dfdx(worldPos); - float3 dp2 = dfdy(worldPos); - float2 duv1 = dfdx(uv); - float2 duv2 = dfdy(uv); + float3 dpx = dfdx(worldPos); + float3 dpy = dfdy(worldPos); + float2 duvx = dfdx(uv); + float2 duvy = dfdy(uv); // getting non-zero uv with 0 length duv1/2 on MBP 16", this leaves missing bump artifacts // in large triangle error so this is a patch to avoid that. - if ((length_squared(duv1) < 1e-10) && - (length_squared(duv2) < 1e-10)) { - //return 0.0h; // flag pixels with no bump - return vertexNormal; - } +// if ((length_squared(duvx) < 1e-10) && +// (length_squared(duvy) < 1e-10)) { +// //return 0.0h; // flag pixels with no bump +// //return vertexNormal; +// } // solve the linear system - float3 dp2perp = cross(dp2, N); - float3 dp1perp = cross(N, dp1); - float3 T = dp2perp * duv1.x + dp1perp * duv2.x; - float3 B = dp2perp * duv1.y + dp1perp * duv2.y; + float3 dp2perp = cross(dpy, N); + float3 dp1perp = cross(N, dpx); + float3 T = dp2perp * duvx.x + dp1perp * duvy.x; + float3 B = dp2perp * duvx.y + dp1perp * duvy.y; float invmax = rsqrt(max(length_squared(T), length_squared(B))); // keeps relative magnitude of two vectors, they're not both unit vecs @@ -292,9 +293,17 @@ half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldP // had to flip this sign to get lighting to match vertex data T = -T; + float3x3 basis = float3x3(T, B, N); + return basis; +} + +half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldPos, float2 uv) +{ + float3x3 basis = generateFragmentTangentBasis(vertexNormal, worldPos, uv); + // construct a scale-invariant frame // drop to half to match other call - bumpNormal = toHalf(float3x3(T, B, N) * toFloat(bumpNormal)); + bumpNormal = toHalf(basis * toFloat(bumpNormal)); return bumpNormal; } @@ -817,27 +826,35 @@ float4 DrawPixels( } if (uniforms.shapeChannel != ShShapeChannelNone) { - // TODO: Really hard to interpret direction from color - // see about use the vector flow fields + // Hard to interpret direction from color, but have eyedropper to decipher render color. + // See about using the vector flow fields to see values across render, but needs fsqd pass. if (uniforms.shapeChannel == ShShapeChannelUV0) { + // fract so wrap will show repeating uv in 0,1, and never negative or large values + // don't have mirror address modes yet. c.rgb = fract(in.texCoordXYZ); } else if (uniforms.shapeChannel == ShShapeChannelNormal) { c.rgb = toUnorm(toFloat(in.normal)); } - else if (uniforms.useTangent && uniforms.shapeChannel == ShShapeChannelTangent) { - // TODO: make this work with useTangent = false - // may have to call routine again, or pass back basis - - c.rgb = toUnorm(toFloat(in.tangent.xyz)); + else if (uniforms.shapeChannel == ShShapeChannelTangent) { + if (uniforms.useTangent) { + c.rgb = toUnorm(toFloat(in.tangent.xyz)); + } + else { + float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord); + c.rgb = toUnorm(basis[0]); + } } else if (uniforms.shapeChannel == ShShapeChannelBitangent) { - // TODO: make this work with useTangent = false - // may have to call routine again, or pass back basis - - half3 bitangent = cross(in.tangent.xyz, in.normal) * in.tangent.w; - c.rgb = toUnorm(toFloat(bitangent)); + if (uniforms.useTangent) { + half3 bitangent = cross(in.normal, in.tangent.xyz) * in.tangent.w; + c.rgb = toUnorm(toFloat(bitangent)); + } + else { + float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord); + c.rgb = toUnorm(basis[1]); // bitan + } } else if (uniforms.shapeChannel == ShShapeChannelDepth) { c.rgb = saturate(in.position.z / in.position.w); From 87a05fc8a04a0cab1a1cf77bc5c5027dee3f305d Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 13 Jun 2021 22:52:34 -0700 Subject: [PATCH 128/901] kramv - fix fragment basis --- kramv/KramRenderer.mm | 17 +++++-- kramv/KramShaders.metal | 107 ++++++++++++++++++++++++++++++++-------- kramv/KramViewerMain.mm | 8 +-- 3 files changed, 106 insertions(+), 26 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index a59d21ae..517aadc8 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -975,19 +975,30 @@ - (void)resetSomeImageSettings:(BOOL)isNewFile { _showSettings->zoom = _showSettings->zoomFit; - // test rendering with inversion and mirroring + // test rendering with inversion and mirroring and non-uniform scale bool doInvertX = false; + bool doScaleX = false; // have one of these for each texture added to the viewer float scaleX = MAX(1, _showSettings->imageBoundsX); float scaleY = MAX(1, _showSettings->imageBoundsY); float scaleZ = MAX(scaleX, scaleY); // don't want 1.0f, or specular is all off due to extreme scale differences - _modelMatrix = float4x4(float4m(doInvertX ? -scaleX : scaleX, scaleY, scaleZ, 1.0f)); // non uniform scale + + float tmpScaleX = scaleX; + if (doInvertX) { + tmpScaleX = -tmpScaleX; + } + if (doScaleX) { + tmpScaleX *= 2.0f; + } + + _modelMatrix = float4x4(float4m(tmpScaleX, scaleY, scaleZ, 1.0f)); // non uniform scale _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back // uniform scaled 3d primitiv float scale = MAX(scaleX, scaleY); - _modelMatrix3D = float4x4(float4m(doInvertX ? -scale : scale, scale, scale, 1.0f)); // uniform scale + + _modelMatrix3D = float4x4(float4m((doScaleX || doInvertX) ? tmpScaleX : scale, scale, scale, 1.0f)); // uniform scale _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back } diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index e2d75080..307e2287 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -257,13 +257,14 @@ half3 toNormal(half3 n) // The downside is this must all be fp32, and all done in fragment shader and use derivatives. // Derivatives are known to be caclulated differently depending on hw and different precision. -float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv) +float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv, thread bool& success) { + // normalizing this didn't help the reconstruction float3 N = toFloat(vertexNormal); // for OpenGL +Y convention, flip N.y // but this doesn't match explicit tangents case, see if those are wrong. - //N.y = -N.y; + // N.y = -N.y; // get edge vectors of the pixel triangle float3 dpx = dfdx(worldPos); @@ -271,24 +272,64 @@ float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float float2 duvx = dfdx(uv); float2 duvy = dfdy(uv); - // getting non-zero uv with 0 length duv1/2 on MBP 16", this leaves missing bump artifacts - // in large triangle error so this is a patch to avoid that. -// if ((length_squared(duvx) < 1e-10) && -// (length_squared(duvy) < 1e-10)) { -// //return 0.0h; // flag pixels with no bump -// //return vertexNormal; -// } + // May be pixel noise from this when up close and the derivatives exceed float precision + // so this to identify one failure case where the uv derivatives are clamped to zero. // solve the linear system float3 dp2perp = cross(dpy, N); float3 dp1perp = cross(N, dpx); float3 T = dp2perp * duvx.x + dp1perp * duvy.x; float3 B = dp2perp * duvx.y + dp1perp * duvy.y; - float invmax = rsqrt(max(length_squared(T), length_squared(B))); + + // The author talks about preserving non-uniform scale of the worldPos, but the problem is that + // the duvx/y also can be scaled with respect to one another, and the code doesn't + // knock that out. So with uniform scale and non-uniform uv, invmax also causes non-uniform scale of T/B. + // The normalize code below eliminates non-uniform worldPos scale and non-uniform uv scale. + // But we have a vertNormal that is also normalized. + + float Tlen = length_squared(T); + float Blen = length_squared(B); + + if (Tlen < 1e-10 || Blen < 1e-10) { + success = false; + return float3x3(0.0f); + } + + success = true; + +#if 1 + // Still see some less smooth gradation across sphere compared with vertex tangents + // Maybe N needs to be interpolated as float3 instead of half3 to use this? Bitan looks + // smoother than the tangent. + // Eliminate scale invariance to match vertex basis which is normalized before interpolation. + // This loses that hemisphere is 1x v vertically, and u is 2x rate around the sphere. Tan = 1/2 B then. + // Blocky triangles from this algorithm are because worldPos is linearly interpolated across + // the face of the flat poly, where vertex normals are smoothly interpolated across 3 points of triangle. + + + // Tangent looks much more blocky than Bitangent across the sphere. Why is that? + + T *= rsqrt(Tlen); + B *= rsqrt(Blen); + +#else + // math seems off when sphere u is 2x the rate, tangent is calculated as 0.5 length + // but the stretch is already accounted for by position vs. uv rate. + // Don't want to scale N.x by 0.5, since it's really v that is more squished on model. + + // Seeing tan/bitan that are 0.5 instead of 1.0 in length compared to the vertex tangents. + // This changes the lighting intensities since N is unit length. See explanation above. + + // Note: min gens larger than 1 directions, but the normals look more correct + // like it's the inverse normal transform. But lighting shifts. + + float invmax = rsqrt(max(Tlen, Blen)); + // keeps relative magnitude of two vectors, they're not both unit vecs T *= invmax; B *= invmax; +#endif // had to flip this sign to get lighting to match vertex data T = -T; @@ -299,7 +340,12 @@ float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldPos, float2 uv) { - float3x3 basis = generateFragmentTangentBasis(vertexNormal, worldPos, uv); + bool success = false; + float3x3 basis = generateFragmentTangentBasis(vertexNormal, worldPos, uv, success); + + if (!success) { + return vertexNormal; + } // construct a scale-invariant frame // drop to half to match other call @@ -502,7 +548,14 @@ ColorInOut DrawImageFunc( // deal with full basis - if (uniforms.isNormalMapPreview) { + bool needsBasis = + uniforms.isNormalMapPreview || + // these need normal transformed to world space + uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelTangent || + uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelNormal || + uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelBitangent; + + if (needsBasis) { float3 normal = in.normal; float3 tangent = in.tangent.xyz; transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2.xyz, uniforms.useTangent); @@ -835,26 +888,40 @@ float4 DrawPixels( c.rgb = fract(in.texCoordXYZ); } else if (uniforms.shapeChannel == ShShapeChannelNormal) { - c.rgb = toUnorm(toFloat(in.normal)); + c.rgb = toFloat(in.normal); + + c.rgb = toUnorm(c.rgb); } else if (uniforms.shapeChannel == ShShapeChannelTangent) { if (uniforms.useTangent) { - c.rgb = toUnorm(toFloat(in.tangent.xyz)); + c.rgb = toFloat(in.tangent.xyz); } else { - float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord); - c.rgb = toUnorm(basis[0]); + bool success = false; + float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord, success); + if (!success) + c.rgb = 0; + else + c.rgb = basis[0]; } + + c.rgb = toUnorm(c.rgb); } else if (uniforms.shapeChannel == ShShapeChannelBitangent) { if (uniforms.useTangent) { half3 bitangent = cross(in.normal, in.tangent.xyz) * in.tangent.w; - c.rgb = toUnorm(toFloat(bitangent)); + c.rgb = toFloat(bitangent); } else { - float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord); - c.rgb = toUnorm(basis[1]); // bitan + bool success = false; + float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord, success); + if (!success) + c.rgb = 0; + else + c.rgb = basis[1]; // bitan } + + c.rgb = toUnorm(c.rgb); } else if (uniforms.shapeChannel == ShShapeChannelDepth) { c.rgb = saturate(in.position.z / in.position.w); @@ -865,7 +932,7 @@ float4 DrawPixels( // TODO: incorporate facing? - c.rgb = saturate(toUnorm(faceNormal)); + c.rgb = toUnorm(faceNormal); } else if (uniforms.shapeChannel == ShShapeChannelMipLevel) { c = toMipLevelColor(in.texCoord * textureSize.xy); // only for 2d textures diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 0b26aa62..e170a41f 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1071,22 +1071,24 @@ - (void)showEyedropperData:(float2)uv { // interpret based on shapeChannel, debugMode, etc switch(_showSettings->shapeChannel) { case ShapeChannelDepth: + isSigned = false; // using fract on uv + isValue = true; isFloat = true; numChannels = 1; break; case ShapeChannelUV0: + isSigned = false; // using fract on uv + isValue = true; - isSigned = true; - numChannels = 2; // TODO: fix for 3d uvw isFloat = true; + numChannels = 2; // TODO: fix for 3d uvw break; case ShapeChannelFaceNormal: case ShapeChannelNormal: case ShapeChannelTangent: case ShapeChannelBitangent: - isSigned = false; // writing to 16f as unorm, so need conversion below isDirection = true; numChannels = 3; From 0a4f25e5d06ee8572522acdf8a7840f10f3fecea Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 14 Jun 2021 08:56:04 -0700 Subject: [PATCH 129/901] kramv - world basis needed for preview This generates world space basis --- kramv/KramShaders.metal | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 307e2287..075d54f0 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -548,14 +548,15 @@ ColorInOut DrawImageFunc( // deal with full basis - bool needsBasis = - uniforms.isNormalMapPreview || + bool needsWorldBasis = + uniforms.isPreview || + //uniforms.isNormalMapPreview || // these need normal transformed to world space uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelTangent || uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelNormal || uniforms.shapeChannel == ShaderShapeChannel::ShShapeChannelBitangent; - if (needsBasis) { + if (needsWorldBasis) { float3 normal = in.normal; float3 tangent = in.tangent.xyz; transformBasis(normal, tangent, uniforms.modelMatrix, uniforms.modelMatrixInvScale2.xyz, uniforms.useTangent); From 6fdcaa034b6bca0c56a7ed5efaa53fe731441570 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 14 Jun 2021 10:08:29 -0700 Subject: [PATCH 130/901] kramv - turn on specular --- kramv/KramRenderer.mm | 15 ++++++++++++++- kramv/KramShaders.metal | 32 ++++++++++++++++++++++++-------- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 517aadc8..ce2894ee 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -939,6 +939,8 @@ - (void)updateImageSettings:(const string&)fullFilename image:(KTXImage&)image _showSettings->imageBoundsY = (int32_t)image.height; } +float zoom3D = 1.0f; + - (void)resetSomeImageSettings:(BOOL)isNewFile { // only reset these on new texture, but have to revalidate @@ -995,9 +997,17 @@ - (void)resetSomeImageSettings:(BOOL)isNewFile { _modelMatrix = float4x4(float4m(tmpScaleX, scaleY, scaleZ, 1.0f)); // non uniform scale _modelMatrix = _modelMatrix * matrix4x4_translation(0.0f, 0.0f, -1.0); // set z=-1 unit back - // uniform scaled 3d primitiv + // uniform scaled 3d primitive float scale = MAX(scaleX, scaleY); + // store the zoom into thew view matrix + // fragment tangents seem to break down at high model scale due to precision differences between worldPos and uv + static bool useZoom3D = false; + if (useZoom3D) { + zoom3D = scale; // * _showSettings->viewSizeX / 2.0f; + scale = 1.0; + } + _modelMatrix3D = float4x4(float4m((doScaleX || doInvertX) ? tmpScaleX : scale, scale, scale, 1.0f)); // uniform scale _modelMatrix3D = _modelMatrix3D * matrix4x4_translation(0.0f, 0.0f, -1.0f); // set z=-1 unit back } @@ -1008,6 +1018,9 @@ - (float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom // non-uniform scale is okay here, only affects ortho volume // setting this to uniform zoom and object is not visible, zoom can be 20x in x and y + if (_showSettings->is3DView) { + zoom *= zoom3D; + } float4x4 viewMatrix = float4x4(float4m(zoom, zoom, 1.0f, 1.0f)); viewMatrix = panTransform * viewMatrix; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 075d54f0..32469d96 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -276,8 +276,13 @@ float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float // so this to identify one failure case where the uv derivatives are clamped to zero. // solve the linear system - float3 dp2perp = cross(dpy, N); float3 dp1perp = cross(N, dpx); + float3 dp2perp = cross(dpy, N); + + // When one of the duvx or duvy is 0 or close to it, then that's when I see + // tangent differences to the vertex tangents. dp2perp is knocked out by this. + // These artifacts are still present even moving scale into view matrix. + float3 T = dp2perp * duvx.x + dp1perp * duvy.x; float3 B = dp2perp * duvx.y + dp1perp * duvy.y; @@ -681,6 +686,8 @@ vertex ColorInOut DrawVolumeVS( } float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) { + if (albedo.a == 0.0) + return albedo; float3 lightDir = normalize(float3(1,1,1)); // looking down -Z axis float3 lightColor = float3(1,1,1); @@ -689,22 +696,24 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) float3 diffuse = float3(0.0); float3 ambient = float3(0.0); - bool doSpecular = false; // this is a bit too bright, and can confuse + bool doSpecular = true; // can confuse lighting review bool doDiffuse = true; bool doAmbient = true; + float dotNL = dot(n, lightDir); + if (doSpecular) { float3 ref = normalize(reflect(viewDir, n)); // above can be interpolated float dotRL = saturate(dot(ref, lightDir)); - dotRL = pow(dotRL, 4.0); // * saturate(dotNL * 8.0); // no spec without diffuse - specular = saturate(dotRL * lightColor.rgb); + dotRL = pow(dotRL, 8.0) * saturate(dotNL * 8.0); // no spec without diffuse + specular = dotRL * lightColor.rgb; } if (doDiffuse) { - float dotNL = saturate(dot(n, lightDir)); + float dotNLSat = saturate(dotNL); // soften the terminator off the vertNormal // this is so no diffuse if normal completely off from vertex normal @@ -712,13 +721,13 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) float dotVertex = saturate(dot(vertexNormal, n)); dotNL *= saturate(9.0 * dotVertex); - diffuse = dotNL * lightColor.rgb; + diffuse = dotNLSat * lightColor.rgb; } if (doAmbient) { // can misconstrue as diffuse with this, but make dark side not look flat - float dotNLUnsat = dot(n, lightDir); - ambient = mix(0.1, 0.3, saturate(dotNLUnsat * 0.5 + 0.5)); + float dotNLUnsat = dotNL; + ambient = mix(0.1, 0.2, saturate(dotNLUnsat * 0.5 + 0.5)); } // attenuate, and not saturate below, so no HDR yet @@ -726,7 +735,14 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) diffuse *= 0.7; //ambient *= 0.2; +#if 0 + // attenuating albedo with specular knocks it all out albedo.xyz *= saturate(ambient + diffuse + specular); +#else + albedo.xyz *= saturate(diffuse + ambient); + albedo.xyz += specular; + albedo.xyz = saturate(albedo.xyz); +#endif return albedo; } From 464d09a521ade367efdf0ed1851c72273fb9baa0 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 14 Jun 2021 11:13:42 -0700 Subject: [PATCH 131/901] kramv - remove tangent tolerance --- kramv/KramShaders.metal | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 32469d96..0840f564 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -295,7 +295,9 @@ float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float float Tlen = length_squared(T); float Blen = length_squared(B); - if (Tlen < 1e-10 || Blen < 1e-10) { + // Tried 1e-10 tolerance here, but code hits that when zooming in closely to a shape. Normal map doesn't look good using vertNormal + // so instead only check for the zero case. + if (Tlen == 0.0 || Blen == 0.0) { success = false; return float3x3(0.0f); } @@ -696,7 +698,10 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) float3 diffuse = float3(0.0); float3 ambient = float3(0.0); - bool doSpecular = true; // can confuse lighting review + // Need lighting control in UI, otherwise specular just adds a big bright + // circle to all texture previews since it's additive. + + bool doSpecular = false; // can confuse lighting review bool doDiffuse = true; bool doAmbient = true; From 6ea533cc85ccd3d03ba7e27379c576af4fd22255 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 19 Jun 2021 14:41:19 -0700 Subject: [PATCH 132/901] kramv - improve fragment tangents, don't crash on resize --- kramv/KramRenderer.mm | 38 ++++++++----- kramv/KramShaders.metal | 117 +++++++++++++++------------------------- 2 files changed, 69 insertions(+), 86 deletions(-) diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index ce2894ee..0823232d 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -1575,6 +1575,10 @@ - (void)drawSample return; } + // this can occur during a resize + if (!_lastDrawableTexture) + return; + id commandBuffer = [_commandQueue commandBuffer]; if (!commandBuffer) return; @@ -1593,20 +1597,25 @@ - (void)drawSample MTLOrigin srcOrigin = MTLOriginMake(_showSettings->cursorX, _showSettings->cursorY, 0); srcOrigin.x *= _showSettings->viewContentScaleFactor; srcOrigin.y *= _showSettings->viewContentScaleFactor; - - // Note: here we don't know the uv in original texture, would have to write that out to another - // texture. Also on shapes, texel may not change but lighting might. - // can simply blit the color out of the render buffer - id blitCommandEncoder = [commandBuffer blitCommandEncoder]; - if (blitCommandEncoder) { - [blitCommandEncoder copyFromTexture:_lastDrawableTexture - sourceSlice:0 sourceLevel:0 sourceOrigin:srcOrigin sourceSize:MTLSizeMake(1,1,1) - toTexture:_sampleRenderTex - destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0,0,0) - ]; - [blitCommandEncoder synchronizeResource:_sampleRenderTex]; - [blitCommandEncoder endEncoding]; + if ((srcOrigin.x >= 0 && srcOrigin.x < _lastDrawableTexture.width) && + (srcOrigin.y >= 0 && srcOrigin.y < _lastDrawableTexture.height)) + { + + // Note: here we don't know the uv in original texture, would have to write that out to another + // texture. Also on shapes, texel may not change but lighting might. + + // can simply blit the color out of the render buffer + id blitCommandEncoder = [commandBuffer blitCommandEncoder]; + if (blitCommandEncoder) { + [blitCommandEncoder copyFromTexture:_lastDrawableTexture + sourceSlice:0 sourceLevel:0 sourceOrigin:srcOrigin sourceSize:MTLSizeMake(1,1,1) + toTexture:_sampleRenderTex + destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0,0,0) + ]; + [blitCommandEncoder synchronizeResource:_sampleRenderTex]; + [blitCommandEncoder endEncoding]; + } } } else { @@ -1731,6 +1740,9 @@ - (void)drawSamples:(id)commandBuffer lookupX:(int32_t)lookupX - (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size { + // Don't crashing trying to readback from the cached drawable during a resize. + _lastDrawableTexture = nil; + /// Respond to drawable size or orientation changes here _showSettings->viewSizeX = size.width; _showSettings->viewSizeY = size.height; diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 0840f564..7971e633 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -256,99 +256,70 @@ half3 toNormal(half3 n) // Then transforms the bumpNormal to that space. No tangent is needed. // The downside is this must all be fp32, and all done in fragment shader and use derivatives. // Derivatives are known to be caclulated differently depending on hw and different precision. +float length_squared(float x) { + return x * x; +} -float3x3 generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv, thread bool& success) +bool generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv, thread float3x3& basis) { - // normalizing this didn't help the reconstruction float3 N = toFloat(vertexNormal); - // for OpenGL +Y convention, flip N.y - // but this doesn't match explicit tangents case, see if those are wrong. - // N.y = -N.y; + // normalizing this didn't help the reconstruction + //N = normalize(N); // get edge vectors of the pixel triangle float3 dpx = dfdx(worldPos); float3 dpy = dfdy(worldPos); + + // could also pass isFrontFacing, should this almost always be true + //float3 faceNormal = cross(dpy, dpx); // because dpy is down on screen + //bool isFlipped = dot(faceNormal, N) > 0; + + // These are much smaller in magnitude than the position derivatives float2 duvx = dfdx(uv); float2 duvy = dfdy(uv); - // May be pixel noise from this when up close and the derivatives exceed float precision - // so this to identify one failure case where the uv derivatives are clamped to zero. - // solve the linear system - float3 dp1perp = cross(N, dpx); - float3 dp2perp = cross(dpy, N); - + float3 dp1perp = cross(N, dpx); // vertical + float3 dp2perp = cross(dpy, N); // horizontal + // When one of the duvx or duvy is 0 or close to it, then that's when I see // tangent differences to the vertex tangents. dp2perp is knocked out by this. // These artifacts are still present even moving scale into view matrix. - float3 T = dp2perp * duvx.x + dp1perp * duvy.x; - float3 B = dp2perp * duvx.y + dp1perp * duvy.y; - - // The author talks about preserving non-uniform scale of the worldPos, but the problem is that - // the duvx/y also can be scaled with respect to one another, and the code doesn't - // knock that out. So with uniform scale and non-uniform uv, invmax also causes non-uniform scale of T/B. - // The normalize code below eliminates non-uniform worldPos scale and non-uniform uv scale. - // But we have a vertNormal that is also normalized. - float Tlen = length_squared(T); + float3 B = dp2perp * duvx.y + dp1perp * duvy.y; float Blen = length_squared(B); - // Tried 1e-10 tolerance here, but code hits that when zooming in closely to a shape. Normal map doesn't look good using vertNormal - // so instead only check for the zero case. - if (Tlen == 0.0 || Blen == 0.0) { - success = false; - return float3x3(0.0f); - } - - success = true; - -#if 1 - // Still see some less smooth gradation across sphere compared with vertex tangents - // Maybe N needs to be interpolated as float3 instead of half3 to use this? Bitan looks - // smoother than the tangent. - - // Eliminate scale invariance to match vertex basis which is normalized before interpolation. - // This loses that hemisphere is 1x v vertically, and u is 2x rate around the sphere. Tan = 1/2 B then. - // Blocky triangles from this algorithm are because worldPos is linearly interpolated across - // the face of the flat poly, where vertex normals are smoothly interpolated across 3 points of triangle. - - - // Tangent looks much more blocky than Bitangent across the sphere. Why is that? - - T *= rsqrt(Tlen); - B *= rsqrt(Blen); - -#else - // math seems off when sphere u is 2x the rate, tangent is calculated as 0.5 length - // but the stretch is already accounted for by position vs. uv rate. - // Don't want to scale N.x by 0.5, since it's really v that is more squished on model. - - // Seeing tan/bitan that are 0.5 instead of 1.0 in length compared to the vertex tangents. - // This changes the lighting intensities since N is unit length. See explanation above. - - // Note: min gens larger than 1 directions, but the normals look more correct - // like it's the inverse normal transform. But lighting shifts. - - float invmax = rsqrt(max(Tlen, Blen)); + // could use B = dp1perp + if (Blen == 0.0) + return false; + + // float x = length_squared(duvx.x) + length_squared(duvy.x); // used for tangent + // float y = length_squared(duvx.y) + length_squared(duvy.y); // used for bitangent + + float3 T; + //if (x <= y) { + B *= rsqrt(Blen); + T = cross(B, N); + // } +// else { +// T = dp2perp * duvx.x + dp1perp * duvy.x; +// float Tlen = length_squared(T); +// +// T *= rsqrt(Tlen); +// T = -T; +// B = cross(N, T); +// } - // keeps relative magnitude of two vectors, they're not both unit vecs - T *= invmax; - B *= invmax; -#endif - - // had to flip this sign to get lighting to match vertex data - T = -T; - - float3x3 basis = float3x3(T, B, N); - return basis; + basis = float3x3(T, B, N); + return true; } half3 transformNormalByBasis(half3 bumpNormal, half3 vertexNormal, float3 worldPos, float2 uv) { - bool success = false; - float3x3 basis = generateFragmentTangentBasis(vertexNormal, worldPos, uv, success); + float3x3 basis; + bool success = generateFragmentTangentBasis(vertexNormal, worldPos, uv, basis); if (!success) { return vertexNormal; @@ -919,8 +890,8 @@ float4 DrawPixels( c.rgb = toFloat(in.tangent.xyz); } else { - bool success = false; - float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord, success); + float3x3 basis; + bool success = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord, basis); if (!success) c.rgb = 0; else @@ -935,8 +906,8 @@ float4 DrawPixels( c.rgb = toFloat(bitangent); } else { - bool success = false; - float3x3 basis = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord, success); + float3x3 basis; + bool success = generateFragmentTangentBasis(in.normal, in.worldPos, in.texCoord, basis); if (!success) c.rgb = 0; else From e755bf8becf6de1dc6a524b5622403eff08e2bfd Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 20 Jun 2021 11:20:15 -0700 Subject: [PATCH 133/901] kramv - fix ortho lighting No camera orient, so viewDir is always 00-1 --- kramv/KramShaders.metal | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 7971e633..2d0b155c 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -6,11 +6,6 @@ using namespace metal; -// TODO: Getting weird triangle artifacts on AMC 5500m on 16" MBP with useTangent = false. -// Seems that uv derivatives used for basis generation are 0 in gpu capture -// even though the uv itself are not. That shouldn't be possible. -// This results in large triangular artitfacts at the bottom of the sphere/capsule. - //--------------------------------- // helpers @@ -672,7 +667,7 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) // Need lighting control in UI, otherwise specular just adds a big bright // circle to all texture previews since it's additive. - bool doSpecular = false; // can confuse lighting review + bool doSpecular = false; // can confuse lighting review, make option to enable or everything has bright white spot bool doDiffuse = true; bool doAmbient = true; @@ -723,6 +718,15 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) return albedo; } +float3 calculateViewDir(float3 worldPos, float3 cameraPosition) { + // ortho case + return float3(0,0,-1); + + // TODO: need perspective preview + //return normalize(worldPos - cameraPosition); +} + + // TODO: eliminate the toUnorm() calls below, rendering to rgba16f but then present // doesn't have enough info to remap 16F to the display. @@ -777,7 +781,7 @@ float4 DrawPixels( uniforms.isSwizzleAGToRG, uniforms.isSigned, facing); - float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition); + float3 viewDir = calculateViewDir(in.worldPos, uniforms.cameraPosition); c = doLighting(float4(1.0), viewDir, toFloat(n), toFloat(in.normal)); c.a = 1; @@ -788,7 +792,7 @@ float4 DrawPixels( c.xyz = toUnorm(c.xyz); } else { // TODO: need an isAlbedo test - float3 viewDir = normalize(in.worldPos - uniforms.cameraPosition); + float3 viewDir = calculateViewDir(in.worldPos, uniforms.cameraPosition); if (uniforms.isNormalMapPreview) { half4 nmapH = toHalf(nmap); From 9c2608de6461a41df74758c237b47ab52bd62cdd Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 27 Jun 2021 22:45:33 -0700 Subject: [PATCH 134/901] kramv - fix mirrored uv with fragment tangents, more specular lighting work --- kramv/KramShaders.metal | 164 ++++++++++++++++++++++++++++++---------- 1 file changed, 122 insertions(+), 42 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 2d0b155c..067f3599 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -9,6 +9,8 @@ using namespace metal; //--------------------------------- // helpers +//constant float PI = 3.1415927; + float toUnorm8(float c) { return (127.0 / 255.0) * c + (128.0 / 255.0); @@ -255,6 +257,11 @@ float length_squared(float x) { return x * x; } +// how is this not a built-in? +float cross(float2 lhs, float2 rhs) { + return lhs.x * rhs.y - rhs.x * lhs.y; +} + bool generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv, thread float3x3& basis) { float3 N = toFloat(vertexNormal); @@ -262,11 +269,24 @@ bool generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv // normalizing this didn't help the reconstruction //N = normalize(N); + // Original code pases viewDir, but that is constant for ortho view and would only work for perspective. + // Comment was that cameraPos drops out since it's constant, but perspective viewDir is also typically normalized too. + // Here using worldPos but it has much larger magnitude than uv then. + // get edge vectors of the pixel triangle float3 dpx = dfdx(worldPos); float3 dpy = dfdy(worldPos); + //N = normalize(cross(dpy, dpx)); + + //dpx.y = -dpx.y; + //dpy.y = -dpy.y; + // could also pass isFrontFacing, should this almost always be true + + // The math problem here seems related to that we're using the planar dpx/dpy. + // but the normal is interpolated on the sphere, and plane is likely closer to dNx/dNy. + //float3 faceNormal = cross(dpy, dpx); // because dpy is down on screen //bool isFlipped = dot(faceNormal, N) > 0; @@ -274,39 +294,65 @@ bool generateFragmentTangentBasis(half3 vertexNormal, float3 worldPos, float2 uv float2 duvx = dfdx(uv); float2 duvy = dfdy(uv); + // flip T based on uv direction to handle mirrored UV + float uvPlaneSign = sign(cross(duvy, duvx)); + +#if 1 + + // can't really tell this from using N + float3 useN; + + //float3 faceNormal = cross(dpy, dpx); + //useN = faceNormal; + + useN = N; + // solve the linear system - float3 dp1perp = cross(N, dpx); // vertical - float3 dp2perp = cross(dpy, N); // horizontal - - // When one of the duvx or duvy is 0 or close to it, then that's when I see - // tangent differences to the vertex tangents. dp2perp is knocked out by this. - // These artifacts are still present even moving scale into view matrix. + float3 dp1perp = cross(useN, dpx); // vertical + float3 dp2perp = cross(dpy, useN); // horizontal +#else + float3 dp1perp = -dpy; + float3 dp2perp = dpx; +#endif + // could use B = dp1perp + //if (Blen == 0.0) + // return false; - float3 B = dp2perp * duvx.y + dp1perp * duvy.y; + float3 T, B; + +#if 0 + B = normalize(dp1perp); + T = -normalize(dp2perp); +#elif 1 + B = dp2perp * duvx.y + dp1perp * duvy.y; float Blen = length_squared(B); + + // vertical ridges with T.y flipping sign + B *= rsqrt(Blen); + T = cross(B, N); + + // This switches to lhcs on left side of mirrored sphere + // May just be that ModelIO has generated bad basis on that left side. + T *= -uvPlaneSign; + +#elif 0 + // This calc just doesn't look as good + + // trapezoidal pattern wih T.y flipping sign + T = dp2perp * duvx.x + dp1perp * duvy.x; + float Tlen = length_squared(T); + + T *= rsqrt(Tlen); + + //T = -T; + + // Fixes tangent on mirrored sphere but Bitangent is wrong, does this mean uv wrap switches to lhcs instead of rhcs? + T *= uvPlaneSign; + + B = cross(N, T); +#endif - // could use B = dp1perp - if (Blen == 0.0) - return false; - - // float x = length_squared(duvx.x) + length_squared(duvy.x); // used for tangent - // float y = length_squared(duvx.y) + length_squared(duvy.y); // used for bitangent - - float3 T; - //if (x <= y) { - B *= rsqrt(Blen); - T = cross(B, N); - // } -// else { -// T = dp2perp * duvx.x + dp1perp * duvy.x; -// float Tlen = length_squared(T); -// -// T *= rsqrt(Tlen); -// T = -T; -// B = cross(N, T); -// } - basis = float3x3(T, B, N); return true; } @@ -653,7 +699,7 @@ vertex ColorInOut DrawVolumeVS( return out; } -float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) { +float4 doLighting(float4 albedo, float3 viewDir, float3 bumpNormal, float3 vertexNormal) { if (albedo.a == 0.0) return albedo; @@ -666,20 +712,55 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) // Need lighting control in UI, otherwise specular just adds a big bright // circle to all texture previews since it's additive. - + bool doBlinnPhongSpecular = false; bool doSpecular = false; // can confuse lighting review, make option to enable or everything has bright white spot bool doDiffuse = true; bool doAmbient = true; - float dotNL = dot(n, lightDir); + // see here about energy normalization, not going to GGX just yet + // http://www.thetenthplanet.de/archives/255 + float dotVertexNL = dot(vertexNormal, lightDir); + + float dotNL = dot(bumpNormal, lightDir); if (doSpecular) { - float3 ref = normalize(reflect(viewDir, n)); - - // above can be interpolated - float dotRL = saturate(dot(ref, lightDir)); - dotRL = pow(dotRL, 8.0) * saturate(dotNL * 8.0); // no spec without diffuse - specular = dotRL * lightColor.rgb; + if (dotVertexNL > 0.0) { + float specularAmount; + + // in lieu of a roughness map, do this + // fake energy conservation by multiply with gloss + // https://www.youtube.com/watch?v=E4PHFnvMzFc&t=946s + float gloss = 0.6; + float specularExp = exp2(gloss * 11.0) + 2.0; + float energyNormalization = gloss; + + if (doBlinnPhongSpecular) { + // this doesn't look so good as a highlight in ortho at least + float3 E = -viewDir; + float3 H = normalize(lightDir + E); + float dotHN = saturate(dot(H, bumpNormal)); + specularAmount = dotHN; + + // to make dotHN look like dotRL + // https://en.wikipedia.org/wiki/Blinn%E2%80%93Phong_reflection_model + specularExp *= 4.0; + + //energyNormalization = (specularExp + 1.0) / (2.0 * PI); + } + else { + // phong + // and seem to recall a conversion to above but H = (L+V)/2, the normalize knocks out the 1/2 + float3 ref = normalize(reflect(viewDir, bumpNormal)); + float dotRL = saturate(dot(ref, lightDir)); + specularAmount = dotRL; + + //energyNormalization = (specularExp + 1.0) / (2.0 * PI); + } + + // above can be interpolated + specularAmount = pow(specularAmount, specularExp) * energyNormalization; + specular = specularAmount * lightColor.rgb; + } } if (doDiffuse) { @@ -689,7 +770,7 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 n, float3 vertexNormal) // soften the terminator off the vertNormal // this is so no diffuse if normal completely off from vertex normal // also limiting diffuse lighting bump to lighting by vertex normal - float dotVertex = saturate(dot(vertexNormal, n)); + float dotVertex = saturate(dot(vertexNormal, bumpNormal)); dotNL *= saturate(9.0 * dotVertex); diffuse = dotNLSat * lightColor.rgb; @@ -726,9 +807,8 @@ float3 calculateViewDir(float3 worldPos, float3 cameraPosition) { //return normalize(worldPos - cameraPosition); } - -// TODO: eliminate the toUnorm() calls below, rendering to rgba16f but then present -// doesn't have enough info to remap 16F to the display. +// This is writing out to 16F and could write snorm data, but then that couldn't be displayed. +// So code first converts to Unorm. float4 DrawPixels( ColorInOut in [[stage_in]], @@ -1242,12 +1322,12 @@ fragment float4 DrawVolumePS( //-------------------------------------------------- + /* not using this yet, need a fsq and some frag coord to sample the normal map at discrete points // https://www.shadertoy.com/view/4s23DG // 2D vector field visualization by Morgan McGuire, @morgan3d, http://casual-effects.com -constant float PI = 3.1415927; constant int ARROW_V_STYLE = 1; constant int ARROW_LINE_STYLE = 2; From 98e7c85cd1d84eae67d72370c60b7373566979c3 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 3 Jul 2021 15:20:02 -0700 Subject: [PATCH 135/901] kramv - add lighting controls, add IQ's reflect call Lighting is off key "5". Either ambient + diffuse, or + specular. The specular tends to leave bright highlights on flat shapes so want to be able to turn it off. --- LICENSE | 2 +- kramv/KramRenderer.mm | 1 + kramv/KramShaders.h | 8 ++++++ kramv/KramShaders.metal | 55 ++++++++++++++++++++++++++++++++-------- kramv/KramViewerBase.cpp | 25 ++++++++++++++++++ kramv/KramViewerBase.h | 11 ++++++++ kramv/KramViewerMain.mm | 16 ++++++++++-- 7 files changed, 105 insertions(+), 13 deletions(-) diff --git a/LICENSE b/LICENSE index e7212644..a48f6407 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2020 Alec Miller +Copyright (c) 2020-2021 Alec Miller Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 0823232d..1c0e90db 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -1077,6 +1077,7 @@ - (void)_updateGameState uniforms.isSDF = _showSettings->isSDF; uniforms.numChannels = _showSettings->numChannels; + uniforms.lightingMode = (ShaderLightingMode)_showSettings->lightingMode; MyMTLTextureType textureType = MyMTLTextureType2D; MyMTLPixelFormat textureFormat = MyMTLPixelFormatInvalid; diff --git a/kramv/KramShaders.h b/kramv/KramShaders.h index f11491a4..b686db05 100644 --- a/kramv/KramShaders.h +++ b/kramv/KramShaders.h @@ -112,6 +112,11 @@ typedef NS_ENUM(int32_t, ShaderShapeChannel) // ShShapeChannelBumpNormal, }; +typedef NS_ENUM(int32_t, ShaderLightingMode) +{ + ShLightingModeDiffuse = 0, + ShLightingModeSpecular, +}; // TODO: placement of these elements in the struct breaks transfer // of data. This seems to work. Alignment issues with mixing these differently. @@ -158,6 +163,9 @@ struct Uniforms // View the r,g,b,a channels of the texture ShaderTextureChannels channels; // mask + + // Can turn on/off specular + ShaderLightingMode lightingMode; }; // uploaded separately, so multiple mips, faces, array can be drawn to the screen at one time diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 067f3599..3ab86961 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -699,7 +699,31 @@ vertex ColorInOut DrawVolumeVS( return out; } -float4 doLighting(float4 albedo, float3 viewDir, float3 bumpNormal, float3 vertexNormal) { + +float3 reflectIQ(float3 v, float3 n) +{ +#if 0 + // traditional refect + // v - 2 * n * dot(v n) + float3 r = reflect(v, n); + + return r; +#else + // Not sure why IQ uses the r notation + float3 r = n; + + // https://iquilezles.org/www/articles/dontflip/dontflip.htm + // works for any dimension + // also article has a clamp forumulation + + float k = dot(v, r); + + // reflect v if it's in the negative half plane defined by r + return (k > 0.0) ? v : (v - 2.0 * r * k); +#endif +} + +float4 doLighting(float4 albedo, float3 viewDir, float3 bumpNormal, float3 vertexNormal, ShaderLightingMode lightingMode) { if (albedo.a == 0.0) return albedo; @@ -713,24 +737,35 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 bumpNormal, float3 verte // Need lighting control in UI, otherwise specular just adds a big bright // circle to all texture previews since it's additive. bool doBlinnPhongSpecular = false; - bool doSpecular = false; // can confuse lighting review, make option to enable or everything has bright white spot + + bool doSpecular = true; // can confuse lighting review, make option to enable or everything has bright white spot bool doDiffuse = true; bool doAmbient = true; + if (lightingMode == ShLightingModeDiffuse) + { + doSpecular = false; + } + // see here about energy normalization, not going to GGX just yet // http://www.thetenthplanet.de/archives/255 - float dotVertexNL = dot(vertexNormal, lightDir); + + // Note: this isn't the same as the faceNormal, the vertexNormal is interpolated + // see iq's trick for flipping lighting in reflectIQ. + + // Use reflectIQ to flip specular, + //float dotVertexNL = dot(vertexNormal, lightDir); float dotNL = dot(bumpNormal, lightDir); if (doSpecular) { - if (dotVertexNL > 0.0) { + //if (dotVertexNL > 0.0) { float specularAmount; // in lieu of a roughness map, do this // fake energy conservation by multiply with gloss // https://www.youtube.com/watch?v=E4PHFnvMzFc&t=946s - float gloss = 0.6; + float gloss = 0.3; float specularExp = exp2(gloss * 11.0) + 2.0; float energyNormalization = gloss; @@ -750,7 +785,7 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 bumpNormal, float3 verte else { // phong // and seem to recall a conversion to above but H = (L+V)/2, the normalize knocks out the 1/2 - float3 ref = normalize(reflect(viewDir, bumpNormal)); + float3 ref = normalize(reflectIQ(viewDir, bumpNormal)); float dotRL = saturate(dot(ref, lightDir)); specularAmount = dotRL; @@ -760,7 +795,7 @@ float4 doLighting(float4 albedo, float3 viewDir, float3 bumpNormal, float3 verte // above can be interpolated specularAmount = pow(specularAmount, specularExp) * energyNormalization; specular = specularAmount * lightColor.rgb; - } + // } } if (doDiffuse) { @@ -862,7 +897,7 @@ float4 DrawPixels( float3 viewDir = calculateViewDir(in.worldPos, uniforms.cameraPosition); - c = doLighting(float4(1.0), viewDir, toFloat(n), toFloat(in.normal)); + c = doLighting(float4(1.0), viewDir, toFloat(n), toFloat(in.normal), uniforms.lightingMode); c.a = 1; } @@ -881,10 +916,10 @@ float4 DrawPixels( in.worldPos, in.texCoord, uniforms.useTangent, // to build TBN uniforms.isNormalMapSwizzleAGToRG, uniforms.isNormalMapSigned, facing); - c = doLighting(c, viewDir, toFloat(n), toFloat(in.normal)); + c = doLighting(c, viewDir, toFloat(n), toFloat(in.normal), uniforms.lightingMode); } else { - c = doLighting(c, viewDir, toFloat(in.normal), toFloat(in.normal)); + c = doLighting(c, viewDir, toFloat(in.normal), toFloat(in.normal), uniforms.lightingMode); } } diff --git a/kramv/KramViewerBase.cpp b/kramv/KramViewerBase.cpp index d53fac5f..b4316476 100644 --- a/kramv/KramViewerBase.cpp +++ b/kramv/KramViewerBase.cpp @@ -62,6 +62,17 @@ const char* ShowSettings::debugModeText() const { return text; } +const char* ShowSettings::lightingModeText() const { + const char* text = ""; + + switch(lightingMode) { + case LightingModeDiffuse: text = "Light Diffuse"; break; + case LightingModeSpecular: text = "Light Specular"; break; + default: break; + } + return text; +} + bool ShowSettings::isEyedropperFromDrawable() { return meshNumber > 0 || isPreview || isShowingAllLevelsAndMips || shapeChannel > 0; } @@ -98,6 +109,20 @@ void ShowSettings::advanceShapeChannel(bool decrement) { } } +void ShowSettings::advanceLightingMode(bool decrement) { + int32_t numEnums = LightingModeCount; + int32_t number = lightingMode; + if (decrement) { + number += numEnums - 1; + } + else { + number += 1; + } + + lightingMode = (LightingMode)(number % numEnums); +} + + void ShowSettings::advanceDebugMode(bool decrement) { int32_t numEnums = DebugModeCount; int32_t mode = debugMode; diff --git a/kramv/KramViewerBase.h b/kramv/KramViewerBase.h index f219534d..fc17a450 100644 --- a/kramv/KramViewerBase.h +++ b/kramv/KramViewerBase.h @@ -70,6 +70,13 @@ enum ShapeChannel ShapeChannelCount }; +enum LightingMode +{ + LightingModeDiffuse = 0, // amb + diffuse + LightingModeSpecular = 1, // amb + diffuse + specular + + LightingModeCount, +}; class ShowSettings { public: @@ -194,6 +201,8 @@ class ShowSettings { ShapeChannel shapeChannel = ShapeChannelNone; + LightingMode lightingMode = LightingModeDiffuse; + float4x4 projectionViewModelMatrix; bool isInverted; @@ -208,10 +217,12 @@ class ShowSettings { void advanceMeshNumber(bool decrement); void advanceDebugMode(bool decrement); void advanceShapeChannel(bool decrement); + void advanceLightingMode(bool decrement); const char* meshNumberText() const; const char* shapeChannelText() const; const char* debugModeText() const; + const char* lightingModeText() const; string lastFilename; double lastTimestamp = 0.0; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index e170a41f..bc558e80 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -498,7 +498,7 @@ - (nonnull ShowSettings*)showSettings { } - (NSStackView*)_addButtons { - const int32_t numButtons = 29; // 13; + const int32_t numButtons = 30; const char* names[numButtons*2] = { "?", "Help", @@ -531,6 +531,7 @@ - (NSStackView*)_addButtons { "8", "Shape", "6", "Shape Channel", + "5", "Lighting", "T", "Tangents", // TODO: need to shift hud over a little @@ -1504,8 +1505,9 @@ - (void)updateUIControlState auto meshState = toState(_showSettings->meshNumber > 0); auto meshChannelState = toState(_showSettings->shapeChannel > 0); + auto lightingState = toState(_showSettings->lightingMode != LightingModeDiffuse); auto tangentState = toState(_showSettings->useTangent); - + // TODO: UI state, and vertical state auto uiState = toState(_buttonStack.hidden); @@ -1534,6 +1536,7 @@ - (void)updateUIControlState [self findButton:"O"].state = previewState; [self findButton:"8"].state = meshState; [self findButton:"6"].state = meshChannelState; + [self findButton:"5"].state = lightingState; [self findButton:"W"].state = wrapState; [self findButton:"D"].state = gridState; [self findButton:"E"].state = debugState; @@ -1566,6 +1569,7 @@ - (void)updateUIControlState [self findMenuItem:"O"].state = previewState; [self findMenuItem:"8"].state = meshState; [self findMenuItem:"6"].state = meshChannelState; + [self findMenuItem:"5"].state = lightingState; [self findMenuItem:"T"].state = tangentState; [self findMenuItem:"W"].state = wrapState; @@ -1651,6 +1655,8 @@ - (IBAction)handleAction:(id)sender { keyCode = Key::Num8; else if (title == "6") keyCode = Key::Num6; + else if (title == "5") + keyCode = Key::Num5; else if (title == "T") keyCode = Key::T; @@ -1793,6 +1799,12 @@ - (bool)handleKey:(uint32_t)keyCode isShiftKeyDown:(bool)isShiftKeyDown isChanged = true; break; } + case Key::Num5: { + _showSettings->advanceLightingMode(isShiftKeyDown); + text = _showSettings->lightingModeText(); + isChanged = true; + break; + } case Key::T: { _showSettings->useTangent = !_showSettings->useTangent; if (_showSettings->useTangent) From 49d81cad753116368940c0f660f9c7987b8d0e3f Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 3 Jul 2021 22:42:56 -0700 Subject: [PATCH 136/901] kramv - add shader hotloading Run buildShaders.sh to rebuild the shaders into a .air file, then to a .metalllib. These are written to the bin folder, but can be placed anywhere. Drop this metallib onto the app, and then after running buildShaders.sh, can just select from the recently loaded menu item to hotload. All shaders and pipelines are rebuilt. The app starts off using the default.metallib that Xcode bundles into the app. Can iterate more rapidly on look and feel in kramv this way. --- kramv/Info.plist | 16 ++++++ kramv/KramRenderer.h | 2 + kramv/KramRenderer.mm | 120 ++++++++++++++++++++++++++++------------ kramv/KramViewerMain.mm | 15 +++++ scripts/buildShaders.sh | 4 ++ 5 files changed, 123 insertions(+), 34 deletions(-) create mode 100755 scripts/buildShaders.sh diff --git a/kramv/Info.plist b/kramv/Info.plist index c3a1e263..63515502 100644 --- a/kramv/Info.plist +++ b/kramv/Info.plist @@ -70,6 +70,22 @@ NSDocumentClass KramDocument + + CFBundleTypeIconSystemGenerated + 1 + CFBundleTypeName + METALLIB + CFBundleTypeRole + Viewer + LSHandlerRank + Default + LSItemContentTypes + + application/octet-stream + + NSDocumentClass + KramDocument + CFBundleExecutable $(EXECUTABLE_NAME) diff --git a/kramv/KramRenderer.h b/kramv/KramRenderer.h index 31490ae2..7840b6d6 100644 --- a/kramv/KramRenderer.h +++ b/kramv/KramRenderer.h @@ -38,6 +38,8 @@ namespace kram { - (simd::float4x4)computeImageTransform:(float)panX panY:(float)panY zoom:(float)zoom; +- (BOOL)hotloadShaders:(nonnull const char*)filename; + @end diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 1c0e90db..099cd710 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -27,6 +27,13 @@ using namespace kram; using namespace simd; +// Capture what we need to build the renderPieplines, without needing view +struct ViewFramebufferData { + MTLPixelFormat colorPixelFormat = MTLPixelFormatInvalid; + MTLPixelFormat depthStencilPixelFormat = MTLPixelFormatInvalid; + uint32_t sampleCount = 0; +}; + @implementation Renderer { dispatch_semaphore_t _inFlightSemaphore; @@ -102,6 +109,11 @@ @implementation Renderer MTKMesh *_meshCapsule; MTKMeshBufferAllocator *_metalAllocator; + id _shaderLibrary; + NSURL* _metallibFileURL; + NSDate* _metallibFileDate; + ViewFramebufferData _viewFramebuffer; + ShowSettings* _showSettings; } @@ -223,24 +235,29 @@ - (void)_createVertexDescriptor _mdlVertexDescriptor.attributes[VertexAttributeTexcoord].name = MDLVertexAttributeTextureCoordinate; _mdlVertexDescriptor.attributes[VertexAttributeNormal].name = MDLVertexAttributeNormal; _mdlVertexDescriptor.attributes[VertexAttributeTangent].name = MDLVertexAttributeTangent; - } + + - (void)_loadMetalWithView:(nonnull MTKView *)view { /// Load Metal state objects and initialize renderer dependent view properties - view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8; - //view.colorPixelFormat = MTLPixelFormatBGRA8Unorm_sRGB; // TODO: adjust this to draw srgb or not, prefer RGBA - - // have a mix of linear color and normals, don't want srgb conversion until displayed view.colorPixelFormat = MTLPixelFormatRGBA16Float; - + view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8; view.sampleCount = 1; + _viewFramebuffer.colorPixelFormat = view.colorPixelFormat; + _viewFramebuffer.depthStencilPixelFormat = view.depthStencilPixelFormat; + _viewFramebuffer.sampleCount = view.sampleCount; + [self _createVertexDescriptor]; - [self _createRenderPipelines:view]; + // first time use the default library, if reload is called then use different library + _shaderLibrary = [_device newDefaultLibrary]; + + + [self _createRenderPipelines]; //----------------------- @@ -272,51 +289,87 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view [self _createSampleRender]; } -- (void)_createComputePipelines +- (BOOL)hotloadShaders:(const char*)filename { - id defaultLibrary = [_device newDefaultLibrary]; + NSURL* _metallibFileURL = [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]]; + + NSError* err = nil; + NSDate *fileDate = nil; + [_metallibFileURL getResourceValue:&fileDate forKey:NSURLContentModificationDateKey error:&err]; + + // only reload if the metallib changed timestamp, otherwise default.metallib has most recent copy + if (err != nil || [_metallibFileDate isEqualToDate:fileDate]) { + return NO; + } + _metallibFileDate = fileDate; + + // Now dynamically load the metallib + NSData* dataNS = [NSData dataWithContentsOfURL:_metallibFileURL options:NSDataReadingMappedIfSafe + error:&err]; + if (dataNS == nil) { + return NO; + } + dispatch_data_t data = dispatch_data_create(dataNS.bytes, dataNS.length, dispatch_get_main_queue(), DISPATCH_DATA_DESTRUCTOR_DEFAULT); + + id shaderLibrary = [_device newLibraryWithData:data error:&err]; + if (err != nil) { + return NO; + } + _shaderLibrary = shaderLibrary; + + // rebuild the shaders and pipelines that use the shader + [self _createRenderPipelines]; + [self _createComputePipelines]; + + [self _createSampleRender]; + + return YES; +} + +- (void)_createComputePipelines +{ NSError *error = NULL; id computeFunction; //----------------------- - computeFunction = [defaultLibrary newFunctionWithName:@"SampleImageCS"]; + computeFunction = [_shaderLibrary newFunctionWithName:@"SampleImageCS"]; _pipelineStateImageCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error]; if (!_pipelineStateImageCS) { NSLog(@"Failed to create pipeline state, error %@", error); } - computeFunction = [defaultLibrary newFunctionWithName:@"SampleImageArrayCS"]; + computeFunction = [_shaderLibrary newFunctionWithName:@"SampleImageArrayCS"]; _pipelineStateImageArrayCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error]; if (!_pipelineStateImageArrayCS) { NSLog(@"Failed to create pipeline state, error %@", error); } - computeFunction = [defaultLibrary newFunctionWithName:@"SampleVolumeCS"]; + computeFunction = [_shaderLibrary newFunctionWithName:@"SampleVolumeCS"]; _pipelineStateVolumeCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error]; if (!_pipelineStateVolumeCS) { NSLog(@"Failed to create pipeline state, error %@", error); } - computeFunction = [defaultLibrary newFunctionWithName:@"SampleCubeCS"]; + computeFunction = [_shaderLibrary newFunctionWithName:@"SampleCubeCS"]; _pipelineStateCubeCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error]; if (!_pipelineStateCubeCS) { NSLog(@"Failed to create pipeline state, error %@", error); } - computeFunction = [defaultLibrary newFunctionWithName:@"SampleCubeArrayCS"]; + computeFunction = [_shaderLibrary newFunctionWithName:@"SampleCubeArrayCS"]; _pipelineStateCubeArrayCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error]; if (!_pipelineStateCubeArrayCS) { NSLog(@"Failed to create pipeline state, error %@", error); } - computeFunction = [defaultLibrary newFunctionWithName:@"SampleImage1DArrayCS"]; + computeFunction = [_shaderLibrary newFunctionWithName:@"SampleImage1DArrayCS"]; _pipelineState1DArrayCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error]; if (!_pipelineState1DArrayCS) { @@ -324,30 +377,28 @@ - (void)_createComputePipelines } } -- (void)_createRenderPipelines:(MTKView*)view +- (void)_createRenderPipelines { - id defaultLibrary = [_device newDefaultLibrary]; - id vertexFunction; id fragmentFunction; MTLRenderPipelineDescriptor *pipelineStateDescriptor = [[MTLRenderPipelineDescriptor alloc] init]; pipelineStateDescriptor.label = @"DrawImagePipeline"; - pipelineStateDescriptor.sampleCount = view.sampleCount; + pipelineStateDescriptor.sampleCount = _viewFramebuffer.sampleCount; pipelineStateDescriptor.vertexDescriptor = _mtlVertexDescriptor; - pipelineStateDescriptor.colorAttachments[0].pixelFormat = view.colorPixelFormat; + pipelineStateDescriptor.colorAttachments[0].pixelFormat = _viewFramebuffer.colorPixelFormat; // TODO: could drop these for images, but want a 3D preview of content // or might make these memoryless. - pipelineStateDescriptor.depthAttachmentPixelFormat = view.depthStencilPixelFormat; - pipelineStateDescriptor.stencilAttachmentPixelFormat = view.depthStencilPixelFormat; + pipelineStateDescriptor.depthAttachmentPixelFormat = _viewFramebuffer.depthStencilPixelFormat; + pipelineStateDescriptor.stencilAttachmentPixelFormat = _viewFramebuffer.depthStencilPixelFormat; NSError *error = NULL; //----------------------- - vertexFunction = [defaultLibrary newFunctionWithName:@"DrawImageVS"]; - fragmentFunction = [defaultLibrary newFunctionWithName:@"DrawImagePS"]; + vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawImageVS"]; + fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawImagePS"]; pipelineStateDescriptor.vertexFunction = vertexFunction; pipelineStateDescriptor.fragmentFunction = fragmentFunction; @@ -359,8 +410,8 @@ - (void)_createRenderPipelines:(MTKView*)view //----------------------- - vertexFunction = [defaultLibrary newFunctionWithName:@"DrawImageVS"]; // reused - fragmentFunction = [defaultLibrary newFunctionWithName:@"DrawImageArrayPS"]; + vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawImageVS"]; // reused + fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawImageArrayPS"]; pipelineStateDescriptor.vertexFunction = vertexFunction; pipelineStateDescriptor.fragmentFunction = fragmentFunction; @@ -372,8 +423,8 @@ - (void)_createRenderPipelines:(MTKView*)view //----------------------- - vertexFunction = [defaultLibrary newFunctionWithName:@"DrawImageVS"]; - fragmentFunction = [defaultLibrary newFunctionWithName:@"Draw1DArrayPS"]; + vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawImageVS"]; + fragmentFunction = [_shaderLibrary newFunctionWithName:@"Draw1DArrayPS"]; pipelineStateDescriptor.vertexFunction = vertexFunction; pipelineStateDescriptor.fragmentFunction = fragmentFunction; @@ -385,8 +436,8 @@ - (void)_createRenderPipelines:(MTKView*)view //----------------------- - vertexFunction = [defaultLibrary newFunctionWithName:@"DrawCubeVS"]; - fragmentFunction = [defaultLibrary newFunctionWithName:@"DrawCubePS"]; + vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawCubeVS"]; + fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawCubePS"]; pipelineStateDescriptor.vertexFunction = vertexFunction; pipelineStateDescriptor.fragmentFunction = fragmentFunction; @@ -398,8 +449,8 @@ - (void)_createRenderPipelines:(MTKView*)view //----------------------- - vertexFunction = [defaultLibrary newFunctionWithName:@"DrawCubeVS"]; // reused - fragmentFunction = [defaultLibrary newFunctionWithName:@"DrawCubeArrayPS"]; + vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawCubeVS"]; // reused + fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawCubeArrayPS"]; pipelineStateDescriptor.vertexFunction = vertexFunction; pipelineStateDescriptor.fragmentFunction = fragmentFunction; @@ -411,8 +462,8 @@ - (void)_createRenderPipelines:(MTKView*)view //----------------------- - vertexFunction = [defaultLibrary newFunctionWithName:@"DrawVolumeVS"]; - fragmentFunction = [defaultLibrary newFunctionWithName:@"DrawVolumePS"]; + vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawVolumeVS"]; + fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawVolumePS"]; pipelineStateDescriptor.vertexFunction = vertexFunction; pipelineStateDescriptor.fragmentFunction = fragmentFunction; @@ -1259,6 +1310,7 @@ - (void)_setUniformsLevel:(UniformsLevel&)uniforms mipLOD:(int32_t)mipLOD - (void)drawInMTKView:(nonnull MTKView *)view { @autoreleasepool { + /// Per frame updates here // TODO: move this out, needs to get called off mouseMove, but don't want to call drawMain diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index bc558e80..20f6dfc4 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -2659,6 +2659,21 @@ - (BOOL)loadTextureFromURL:(NSURL*)url { //------------------- + if (endsWithExtension(filename, ".metallib")) { + + Renderer* renderer = (Renderer*)self.delegate; + if ([renderer hotloadShaders: filename]) { + NSURL* metallibFileURL = [NSURL fileURLWithPath:[NSString stringWithUTF8String:filename]]; + + // add to recent docs, so can reload quickly + NSDocumentController* dc = [NSDocumentController sharedDocumentController]; + [dc noteNewRecentDocumentURL:metallibFileURL]; + + return YES; + } + return NO; + } + // file is not a supported extension if (!(endsWithExtension(filename, ".zip") || isPNGFilename(filename) || diff --git a/scripts/buildShaders.sh b/scripts/buildShaders.sh new file mode 100755 index 00000000..12879a2a --- /dev/null +++ b/scripts/buildShaders.sh @@ -0,0 +1,4 @@ +#!/bin/zsh + +xcrun -sdk macosx metal -c ../kramv/KramShaders.metal -o ../bin/KramShaders.air +xcrun -sdk macosx metallib ../bin/KramShaders.air -o ../bin/KramShaders.metallib \ No newline at end of file From ce91383a6d391383ba7e2b140dba76c38e90cb73 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sat, 3 Jul 2021 23:18:24 -0700 Subject: [PATCH 137/901] kramv - remove the bin/.air file once the .metallib is built. --- scripts/buildShaders.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scripts/buildShaders.sh b/scripts/buildShaders.sh index 12879a2a..1c01481f 100755 --- a/scripts/buildShaders.sh +++ b/scripts/buildShaders.sh @@ -1,4 +1,7 @@ #!/bin/zsh xcrun -sdk macosx metal -c ../kramv/KramShaders.metal -o ../bin/KramShaders.air -xcrun -sdk macosx metallib ../bin/KramShaders.air -o ../bin/KramShaders.metallib \ No newline at end of file +xcrun -sdk macosx metallib ../bin/KramShaders.air -o ../bin/KramShaders.metallib + +# don't need this after metallib built +rm ../bin/KramShaders.air \ No newline at end of file From 2b39bc43eec24747035160305881568ac2e99587 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Sun, 4 Jul 2021 14:58:01 -0700 Subject: [PATCH 138/901] kramv - simplify compute/render shader/pipeline creation --- kramv/KramLoader.mm | 4 +- kramv/KramRenderer.mm | 171 +++++++++++++--------------------------- kramv/KramViewerMain.mm | 4 +- 3 files changed, 60 insertions(+), 119 deletions(-) diff --git a/kramv/KramLoader.mm b/kramv/KramLoader.mm index 6053733e..f29d0e80 100644 --- a/kramv/KramLoader.mm +++ b/kramv/KramLoader.mm @@ -28,7 +28,7 @@ using namespace simd; string kram::toLower(const string& text) { - return string([[[NSString stringWithUTF8String:text.c_str()] lowercaseString] UTF8String]); + return string([NSString stringWithUTF8String:text.c_str()].lowercaseString.UTF8String); } // defer data need to blit staging MTLBuffer to MTLTexture at the start of rendering @@ -309,7 +309,7 @@ static uint32_t numberOfMipmapLevels(const Image& image) { - (BOOL)loadImageFromURL:(nonnull NSURL *)url image:(KTXImage&)image imageData:(KTXImageData&)imageData { - const char *path = [url.absoluteURL.path UTF8String]; + const char *path = url.absoluteURL.path.UTF8String; // TODO: could also ignore extension, and look at header/signature instead // files can be renamed to the incorrect extensions diff --git a/kramv/KramRenderer.mm b/kramv/KramRenderer.mm index 099cd710..bd80117e 100644 --- a/kramv/KramRenderer.mm +++ b/kramv/KramRenderer.mm @@ -327,63 +327,47 @@ - (BOOL)hotloadShaders:(const char*)filename return YES; } -- (void)_createComputePipelines +- (id)_createComputePipeline:(const char*)name { - NSError *error = NULL; - id computeFunction; - - //----------------------- - - computeFunction = [_shaderLibrary newFunctionWithName:@"SampleImageCS"]; - _pipelineStateImageCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error]; - if (!_pipelineStateImageCS) - { - NSLog(@"Failed to create pipeline state, error %@", error); - } - - computeFunction = [_shaderLibrary newFunctionWithName:@"SampleImageArrayCS"]; - _pipelineStateImageArrayCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error]; - if (!_pipelineStateImageArrayCS) - { - NSLog(@"Failed to create pipeline state, error %@", error); - } - - computeFunction = [_shaderLibrary newFunctionWithName:@"SampleVolumeCS"]; - _pipelineStateVolumeCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error]; - if (!_pipelineStateVolumeCS) - { - NSLog(@"Failed to create pipeline state, error %@", error); - } + NSString* nameNS = [NSString stringWithUTF8String:name]; + NSError *error = nil; + id computeFunction = [_shaderLibrary newFunctionWithName:nameNS]; - computeFunction = [_shaderLibrary newFunctionWithName:@"SampleCubeCS"]; - _pipelineStateCubeCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error]; - if (!_pipelineStateCubeCS) - { - NSLog(@"Failed to create pipeline state, error %@", error); + id pipe; + if (computeFunction) { + computeFunction.label = nameNS; + + pipe = [_device newComputePipelineStateWithFunction:computeFunction error:&error]; } - - computeFunction = [_shaderLibrary newFunctionWithName:@"SampleCubeArrayCS"]; - _pipelineStateCubeArrayCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error]; - if (!_pipelineStateCubeArrayCS) - { - NSLog(@"Failed to create pipeline state, error %@", error); + + if (!pipe) { + KLOGE("kramv", "Failed to create compute pipeline state for %s, error %s", name, error ? error.localizedDescription.UTF8String : ""); + return nil; } - computeFunction = [_shaderLibrary newFunctionWithName:@"SampleImage1DArrayCS"]; - _pipelineState1DArrayCS = [_device newComputePipelineStateWithFunction:computeFunction error:&error]; - if (!_pipelineState1DArrayCS) - { - NSLog(@"Failed to create pipeline state, error %@", error); - } + return pipe; } -- (void)_createRenderPipelines +- (void)_createComputePipelines { + _pipelineStateImageCS = [self _createComputePipeline:"SampleImageCS"]; + _pipelineStateImageArrayCS = [self _createComputePipeline:"SampleImageArrayCS"]; + _pipelineStateVolumeCS = [self _createComputePipeline:"SampleVolumeCS"]; + _pipelineStateCubeCS = [self _createComputePipeline:"SampleCubeCS"]; + _pipelineStateCubeArrayCS = [self _createComputePipeline:"SampleCubeArrayCS"]; + _pipelineState1DArrayCS = [self _createComputePipeline:"SampleImage1DArrayCS"]; +} + +- (id)_createRenderPipeline:(const char*)vs fs:(const char*)fs +{ + NSString* vsNameNS = [NSString stringWithUTF8String:vs]; + NSString* fsNameNS = [NSString stringWithUTF8String:fs]; + id vertexFunction; id fragmentFunction; MTLRenderPipelineDescriptor *pipelineStateDescriptor = [[MTLRenderPipelineDescriptor alloc] init]; - pipelineStateDescriptor.label = @"DrawImagePipeline"; + pipelineStateDescriptor.label = fsNameNS; pipelineStateDescriptor.sampleCount = _viewFramebuffer.sampleCount; pipelineStateDescriptor.vertexDescriptor = _mtlVertexDescriptor; pipelineStateDescriptor.colorAttachments[0].pixelFormat = _viewFramebuffer.colorPixelFormat; @@ -397,81 +381,38 @@ - (void)_createRenderPipelines //----------------------- - vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawImageVS"]; - fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawImagePS"]; - pipelineStateDescriptor.vertexFunction = vertexFunction; - pipelineStateDescriptor.fragmentFunction = fragmentFunction; + vertexFunction = [_shaderLibrary newFunctionWithName:vsNameNS]; + fragmentFunction = [_shaderLibrary newFunctionWithName:fsNameNS]; - _pipelineStateImage = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error]; - if (!_pipelineStateImage) - { - NSLog(@"Failed to create pipeline state, error %@", error); - } - - //----------------------- - - vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawImageVS"]; // reused - fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawImageArrayPS"]; - pipelineStateDescriptor.vertexFunction = vertexFunction; - pipelineStateDescriptor.fragmentFunction = fragmentFunction; - - _pipelineStateImageArray = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error]; - if (!_pipelineStateImageArray) - { - NSLog(@"Failed to create pipeline state, error %@", error); - } - - //----------------------- - - vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawImageVS"]; - fragmentFunction = [_shaderLibrary newFunctionWithName:@"Draw1DArrayPS"]; - pipelineStateDescriptor.vertexFunction = vertexFunction; - pipelineStateDescriptor.fragmentFunction = fragmentFunction; + id pipe; - _pipelineState1DArray = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error]; - if (!_pipelineState1DArray) - { - NSLog(@"Failed to create pipeline state, error %@", error); - } - - //----------------------- - - vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawCubeVS"]; - fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawCubePS"]; - pipelineStateDescriptor.vertexFunction = vertexFunction; - pipelineStateDescriptor.fragmentFunction = fragmentFunction; - - _pipelineStateCube = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error]; - if (!_pipelineStateCube) - { - NSLog(@"Failed to create pipeline state, error %@", error); + if (vertexFunction && fragmentFunction) { + vertexFunction.label = vsNameNS; + fragmentFunction.label = fsNameNS; + + pipelineStateDescriptor.vertexFunction = vertexFunction; + pipelineStateDescriptor.fragmentFunction = fragmentFunction; + + pipe = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error]; } - //----------------------- - - vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawCubeVS"]; // reused - fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawCubeArrayPS"]; - pipelineStateDescriptor.vertexFunction = vertexFunction; - pipelineStateDescriptor.fragmentFunction = fragmentFunction; - - _pipelineStateCubeArray = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error]; - if (!_pipelineStateCubeArray) + if (!pipe) { - NSLog(@"Failed to create pipeline state, error %@", error); + KLOGE("kramv", "Failed to create render pipeline state for %s, error %s", fs, error ? error.description.UTF8String : ""); + return nil; } - //----------------------- - - vertexFunction = [_shaderLibrary newFunctionWithName:@"DrawVolumeVS"]; - fragmentFunction = [_shaderLibrary newFunctionWithName:@"DrawVolumePS"]; - pipelineStateDescriptor.vertexFunction = vertexFunction; - pipelineStateDescriptor.fragmentFunction = fragmentFunction; - - _pipelineStateVolume = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error]; - if (!_pipelineStateVolume) - { - NSLog(@"Failed to create pipeline state, error %@", error); - } + return pipe; +} + +- (void)_createRenderPipelines +{ + _pipelineStateImage = [self _createRenderPipeline:"DrawImageVS" fs:"DrawImagePS"]; + _pipelineStateImageArray = [self _createRenderPipeline:"DrawImageVS" fs:"DrawImageArrayPS"]; + _pipelineState1DArray = [self _createRenderPipeline:"DrawImageVS" fs:"Draw1DArrayPS"]; + _pipelineStateCube = [self _createRenderPipeline:"DrawCubeVS" fs:"DrawCubePS"]; + _pipelineStateCubeArray = [self _createRenderPipeline:"DrawCubeVS" fs:"DrawCubeArrayPS"]; + _pipelineStateVolume = [self _createRenderPipeline:"DrawVolumeVS" fs:"DrawVolumePS"]; } - (void)_createSampleRender @@ -572,7 +513,7 @@ - (MTKMesh*)_createMeshAsset:(const char*)name mdlMesh:(MDLMesh*)mdlMesh doFlipU if(!mesh || error) { - NSLog(@"Error creating MetalKit mesh %@", error.localizedDescription); + KLOGE("kramv", "Error creating MetalKit mesh %s", error.localizedDescription.UTF8String); return nil; } @@ -839,7 +780,7 @@ - (BOOL)loadTextureFromImage:(const string&)fullFilename - (BOOL)loadTexture:(nonnull NSURL *)url { - string fullFilename = [url.path UTF8String]; + string fullFilename = url.path.UTF8String; // can use this to pull, or use fstat on FileHelper NSDate *fileDate = nil; diff --git a/kramv/KramViewerMain.mm b/kramv/KramViewerMain.mm index 20f6dfc4..4827bedd 100644 --- a/kramv/KramViewerMain.mm +++ b/kramv/KramViewerMain.mm @@ -1595,11 +1595,11 @@ - (IBAction)handleAction:(id)sender { // sender is the UI element/NSButton if ([sender isKindOfClass:[NSButton class]]) { NSButton* button = (NSButton*)sender; - title = [button.title UTF8String]; + title = button.title.UTF8String; } else if ([sender isKindOfClass:[NSMenuItem class]]) { NSMenuItem* menuItem = (NSMenuItem*)sender; - title = [menuItem.toolTip UTF8String]; + title = menuItem.toolTip.UTF8String; } else { KLOGE("kram", "unknown UI element"); From 5bf199c0d7119dce17fcbbf92f26df420e9fb027 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Mon, 5 Jul 2021 23:01:00 -0700 Subject: [PATCH 139/901] kramv - small shader cleanup --- kramv/KramShaders.metal | 8 ++++---- libkram/CMakeLists.txt | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/kramv/KramShaders.metal b/kramv/KramShaders.metal index 3ab86961..f48f5413 100644 --- a/kramv/KramShaders.metal +++ b/kramv/KramShaders.metal @@ -536,8 +536,8 @@ struct Vertex float2 texCoord [[attribute(VertexAttributeTexcoord)]]; // basis - float3 normal [[attribute(VertexAttributeNormal)]];; // consider hallf - float4 tangent [[attribute(VertexAttributeTangent)]];; // tan + bitanSign + float3 normal [[attribute(VertexAttributeNormal)]]; // consider half + float4 tangent [[attribute(VertexAttributeTangent)]]; // tan + bitanSign }; struct ColorInOut @@ -847,7 +847,7 @@ float3 calculateViewDir(float3 worldPos, float3 cameraPosition) { float4 DrawPixels( ColorInOut in [[stage_in]], - bool facing [[front_facing]], + bool facing, constant Uniforms& uniforms, float4 c, float4 nmap, @@ -882,7 +882,7 @@ float4 DrawPixels( // distance to edge in pixels (scalar) float pixelDist = dist * onePixel; - // typicaly source recommends smoothstep, so that get a soft instead of hard ramp of alpha at edges + // typically source recommends smoothstep, so that get a soft instead of hard ramp of alpha at edges // store as preml alpha c.rgba = saturate(pixelDist); diff --git a/libkram/CMakeLists.txt b/libkram/CMakeLists.txt index e5fd590b..7d5e3dbf 100644 --- a/libkram/CMakeLists.txt +++ b/libkram/CMakeLists.txt @@ -61,7 +61,7 @@ file(GLOB_RECURSE libSources CONFIGURE_DEPENDS "${SOURCE_DIR}/astc-encoder/*.cpp" "${SOURCE_DIR}/astc-encoder/*.h" - # ATE is Apple specifi+ macOS) + # ATE is Apple specific to macOS) "${SOURCE_DIR}/ate/*.mm" "${SOURCE_DIR}/ate/*.h" From 44de29f34d57cba271f3a69fa6d554ddfe789885 Mon Sep 17 00:00:00 2001 From: Alec Miller Date: Fri, 6 Aug 2021 00:26:50 -0700 Subject: [PATCH 140/901] kram - add eastl files --- .../eastl/include/EABase/config/eacompiler.h | 1778 +++++++ .../include/EABase/config/eacompilertraits.h | 2561 ++++++++++ .../eastl/include/EABase/config/eaplatform.h | 738 +++ libkram/eastl/include/EABase/eabase.h | 1011 ++++ libkram/eastl/include/EABase/eahave.h | 877 ++++ libkram/eastl/include/EABase/earesult.h | 62 + libkram/eastl/include/EABase/eastdarg.h | 99 + libkram/eastl/include/EABase/eaunits.h | 54 + libkram/eastl/include/EABase/int128.h | 1268 +++++ libkram/eastl/include/EABase/nullptr.h | 102 + libkram/eastl/include/EABase/version.h | 36 + libkram/eastl/include/EASTL/algorithm.h | 4221 +++++++++++++++++ libkram/eastl/include/EASTL/allocator.h | 395 ++ .../eastl/include/EASTL/allocator_malloc.h | 130 + libkram/eastl/include/EASTL/any.h | 652 +++ libkram/eastl/include/EASTL/array.h | 530 +++ libkram/eastl/include/EASTL/atomic.h | 1772 +++++++ libkram/eastl/include/EASTL/bitset.h | 2232 +++++++++ libkram/eastl/include/EASTL/bitvector.h | 1474 ++++++ libkram/eastl/include/EASTL/bonus/adaptors.h | 88 + .../eastl/include/EASTL/bonus/call_traits.h | 117 + .../include/EASTL/bonus/compressed_pair.h | 460 ++ .../include/EASTL/bonus/fixed_ring_buffer.h | 50 + .../include/EASTL/bonus/fixed_tuple_vector.h | 210 + .../include/EASTL/bonus/intrusive_sdlist.h | 694 +++ .../include/EASTL/bonus/intrusive_slist.h | 321 ++ libkram/eastl/include/EASTL/bonus/list_map.h | 932 ++++ libkram/eastl/include/EASTL/bonus/lru_cache.h | 424 ++ .../eastl/include/EASTL/bonus/ring_buffer.h | 1581 ++++++ .../eastl/include/EASTL/bonus/sort_extra.h | 204 + .../eastl/include/EASTL/bonus/tuple_vector.h | 1592 +++++++ libkram/eastl/include/EASTL/chrono.h | 744 +++ libkram/eastl/include/EASTL/core_allocator.h | 70 + .../include/EASTL/core_allocator_adapter.h | 368 ++ libkram/eastl/include/EASTL/deque.h | 2687 +++++++++++ libkram/eastl/include/EASTL/finally.h | 93 + libkram/eastl/include/EASTL/fixed_allocator.h | 455 ++ libkram/eastl/include/EASTL/fixed_function.h | 218 + libkram/eastl/include/EASTL/fixed_hash_map.h | 822 ++++ libkram/eastl/include/EASTL/fixed_hash_set.h | 782 +++ libkram/eastl/include/EASTL/fixed_list.h | 388 ++ libkram/eastl/include/EASTL/fixed_map.h | 580 +++ libkram/eastl/include/EASTL/fixed_set.h | 578 +++ libkram/eastl/include/EASTL/fixed_slist.h | 389 ++ libkram/eastl/include/EASTL/fixed_string.h | 805 ++++ libkram/eastl/include/EASTL/fixed_substring.h | 265 ++ libkram/eastl/include/EASTL/fixed_vector.h | 625 +++ libkram/eastl/include/EASTL/functional.h | 1266 +++++ libkram/eastl/include/EASTL/hash_map.h | 580 +++ libkram/eastl/include/EASTL/hash_set.h | 468 ++ libkram/eastl/include/EASTL/heap.h | 685 +++ .../eastl/include/EASTL/initializer_list.h | 96 + .../include/EASTL/internal/atomic/arch/arch.h | 65 + .../internal/atomic/arch/arch_add_fetch.h | 173 + .../internal/atomic/arch/arch_and_fetch.h | 173 + .../atomic/arch/arch_cmpxchg_strong.h | 430 ++ .../internal/atomic/arch/arch_cmpxchg_weak.h | 430 ++ .../atomic/arch/arch_compiler_barrier.h | 19 + .../internal/atomic/arch/arch_cpu_pause.h | 25 + .../internal/atomic/arch/arch_exchange.h | 173 + .../internal/atomic/arch/arch_fetch_add.h | 173 + .../internal/atomic/arch/arch_fetch_and.h | 173 + .../internal/atomic/arch/arch_fetch_or.h | 173 + .../internal/atomic/arch/arch_fetch_sub.h | 173 + .../internal/atomic/arch/arch_fetch_xor.h | 173 + .../EASTL/internal/atomic/arch/arch_load.h | 125 + .../atomic/arch/arch_memory_barrier.h | 47 + .../internal/atomic/arch/arch_or_fetch.h | 173 + .../internal/atomic/arch/arch_signal_fence.h | 21 + .../EASTL/internal/atomic/arch/arch_store.h | 113 + .../internal/atomic/arch/arch_sub_fetch.h | 173 + .../internal/atomic/arch/arch_thread_fence.h | 49 + .../internal/atomic/arch/arch_xor_fetch.h | 173 + .../include/EASTL/internal/atomic/atomic.h | 252 + .../EASTL/internal/atomic/atomic_asserts.h | 75 + .../EASTL/internal/atomic/atomic_base_width.h | 346 ++ .../EASTL/internal/atomic/atomic_casts.h | 190 + .../EASTL/internal/atomic/atomic_flag.h | 170 + .../internal/atomic/atomic_flag_standalone.h | 69 + .../EASTL/internal/atomic/atomic_integral.h | 343 ++ .../EASTL/internal/atomic/atomic_macros.h | 67 + .../atomic/atomic_macros/atomic_macros.h | 145 + .../atomic_macros/atomic_macros_add_fetch.h | 98 + .../atomic_macros/atomic_macros_and_fetch.h | 98 + .../atomic/atomic_macros/atomic_macros_base.h | 65 + .../atomic_macros_cmpxchg_strong.h | 245 + .../atomic_macros_cmpxchg_weak.h | 245 + .../atomic_macros_compiler_barrier.h | 30 + .../atomic_macros/atomic_macros_cpu_pause.h | 22 + .../atomic_macros/atomic_macros_exchange.h | 98 + .../atomic_macros/atomic_macros_fetch_add.h | 98 + .../atomic_macros/atomic_macros_fetch_and.h | 98 + .../atomic_macros/atomic_macros_fetch_or.h | 98 + .../atomic_macros/atomic_macros_fetch_sub.h | 98 + .../atomic_macros/atomic_macros_fetch_xor.h | 98 + .../atomic/atomic_macros/atomic_macros_load.h | 75 + .../atomic_macros_memory_barrier.h | 38 + .../atomic_macros/atomic_macros_or_fetch.h | 98 + .../atomic_macros_signal_fence.h | 34 + .../atomic_macros/atomic_macros_store.h | 68 + .../atomic_macros/atomic_macros_sub_fetch.h | 98 + .../atomic_macros_thread_fence.h | 34 + .../atomic_macros/atomic_macros_xor_fetch.h | 98 + .../internal/atomic/atomic_memory_order.h | 44 + .../EASTL/internal/atomic/atomic_pointer.h | 281 ++ .../atomic/atomic_pop_compiler_options.h | 11 + .../atomic/atomic_push_compiler_options.h | 17 + .../internal/atomic/atomic_size_aligned.h | 197 + .../EASTL/internal/atomic/atomic_standalone.h | 470 ++ .../EASTL/internal/atomic/compiler/compiler.h | 120 + .../atomic/compiler/compiler_add_fetch.h | 173 + .../atomic/compiler/compiler_and_fetch.h | 173 + .../atomic/compiler/compiler_barrier.h | 36 + .../atomic/compiler/compiler_cmpxchg_strong.h | 430 ++ .../atomic/compiler/compiler_cmpxchg_weak.h | 430 ++ .../atomic/compiler/compiler_cpu_pause.h | 32 + .../atomic/compiler/compiler_exchange.h | 173 + .../atomic/compiler/compiler_fetch_add.h | 173 + .../atomic/compiler/compiler_fetch_and.h | 173 + .../atomic/compiler/compiler_fetch_or.h | 173 + .../atomic/compiler/compiler_fetch_sub.h | 173 + .../atomic/compiler/compiler_fetch_xor.h | 173 + .../internal/atomic/compiler/compiler_load.h | 139 + .../atomic/compiler/compiler_memory_barrier.h | 47 + .../atomic/compiler/compiler_or_fetch.h | 173 + .../atomic/compiler/compiler_signal_fence.h | 49 + .../internal/atomic/compiler/compiler_store.h | 113 + .../atomic/compiler/compiler_sub_fetch.h | 173 + .../atomic/compiler/compiler_thread_fence.h | 49 + .../atomic/compiler/compiler_xor_fetch.h | 173 + .../atomic/compiler/gcc/compiler_gcc.h | 154 + .../compiler/gcc/compiler_gcc_add_fetch.h | 118 + .../compiler/gcc/compiler_gcc_and_fetch.h | 118 + .../compiler/gcc/compiler_gcc_barrier.h | 30 + .../gcc/compiler_gcc_cmpxchg_strong.h | 182 + .../compiler/gcc/compiler_gcc_cmpxchg_weak.h | 182 + .../compiler/gcc/compiler_gcc_cpu_pause.h | 31 + .../compiler/gcc/compiler_gcc_exchange.h | 118 + .../compiler/gcc/compiler_gcc_fetch_add.h | 118 + .../compiler/gcc/compiler_gcc_fetch_and.h | 118 + .../compiler/gcc/compiler_gcc_fetch_or.h | 118 + .../compiler/gcc/compiler_gcc_fetch_sub.h | 118 + .../compiler/gcc/compiler_gcc_fetch_xor.h | 118 + .../atomic/compiler/gcc/compiler_gcc_load.h | 90 + .../compiler/gcc/compiler_gcc_or_fetch.h | 118 + .../compiler/gcc/compiler_gcc_signal_fence.h | 38 + .../atomic/compiler/gcc/compiler_gcc_store.h | 89 + .../compiler/gcc/compiler_gcc_sub_fetch.h | 118 + .../compiler/gcc/compiler_gcc_thread_fence.h | 38 + .../compiler/gcc/compiler_gcc_xor_fetch.h | 118 + .../atomic/compiler/msvc/compiler_msvc.h | 260 + .../compiler/msvc/compiler_msvc_add_fetch.h | 104 + .../compiler/msvc/compiler_msvc_and_fetch.h | 121 + .../compiler/msvc/compiler_msvc_barrier.h | 31 + .../msvc/compiler_msvc_cmpxchg_strong.h | 195 + .../msvc/compiler_msvc_cmpxchg_weak.h | 162 + .../compiler/msvc/compiler_msvc_cpu_pause.h | 27 + .../compiler/msvc/compiler_msvc_exchange.h | 125 + .../compiler/msvc/compiler_msvc_fetch_add.h | 101 + .../compiler/msvc/compiler_msvc_fetch_and.h | 118 + .../compiler/msvc/compiler_msvc_fetch_or.h | 118 + .../compiler/msvc/compiler_msvc_fetch_sub.h | 104 + .../compiler/msvc/compiler_msvc_fetch_xor.h | 118 + .../compiler/msvc/compiler_msvc_or_fetch.h | 121 + .../msvc/compiler_msvc_signal_fence.h | 34 + .../compiler/msvc/compiler_msvc_sub_fetch.h | 107 + .../compiler/msvc/compiler_msvc_xor_fetch.h | 121 + .../include/EASTL/internal/char_traits.h | 464 ++ libkram/eastl/include/EASTL/internal/config.h | 1877 ++++++++ .../eastl/include/EASTL/internal/copy_help.h | 215 + .../include/EASTL/internal/enable_shared.h | 83 + .../eastl/include/EASTL/internal/fill_help.h | 484 ++ .../eastl/include/EASTL/internal/fixed_pool.h | 1631 +++++++ .../eastl/include/EASTL/internal/function.h | 161 + .../include/EASTL/internal/function_detail.h | 673 +++ .../include/EASTL/internal/function_help.h | 51 + .../include/EASTL/internal/functional_base.h | 389 ++ .../include/EASTL/internal/generic_iterator.h | 208 + .../eastl/include/EASTL/internal/hashtable.h | 3222 +++++++++++++ .../eastl/include/EASTL/internal/in_place_t.h | 82 + .../include/EASTL/internal/integer_sequence.h | 74 + .../EASTL/internal/intrusive_hashtable.h | 989 ++++ libkram/eastl/include/EASTL/internal/mem_fn.h | 304 ++ .../include/EASTL/internal/memory_base.h | 37 + .../eastl/include/EASTL/internal/move_help.h | 162 + .../include/EASTL/internal/pair_fwd_decls.h | 16 + .../EASTL/internal/piecewise_construct_t.h | 46 + .../include/EASTL/internal/red_black_tree.h | 2400 ++++++++++ .../eastl/include/EASTL/internal/smart_ptr.h | 264 ++ .../include/EASTL/internal/thread_support.h | 244 + .../include/EASTL/internal/tuple_fwd_decls.h | 56 + .../include/EASTL/internal/type_compound.h | 800 ++++ .../include/EASTL/internal/type_fundamental.h | 289 ++ .../eastl/include/EASTL/internal/type_pod.h | 1945 ++++++++ .../include/EASTL/internal/type_properties.h | 380 ++ .../EASTL/internal/type_transformations.h | 606 +++ .../eastl/include/EASTL/intrusive_hash_map.h | 98 + .../eastl/include/EASTL/intrusive_hash_set.h | 100 + libkram/eastl/include/EASTL/intrusive_list.h | 1315 +++++ libkram/eastl/include/EASTL/intrusive_ptr.h | 426 ++ libkram/eastl/include/EASTL/iterator.h | 1192 +++++ libkram/eastl/include/EASTL/linked_array.h | 336 ++ libkram/eastl/include/EASTL/linked_ptr.h | 426 ++ libkram/eastl/include/EASTL/list.h | 2168 +++++++++ libkram/eastl/include/EASTL/map.h | 684 +++ libkram/eastl/include/EASTL/memory.h | 1685 +++++++ libkram/eastl/include/EASTL/meta.h | 222 + libkram/eastl/include/EASTL/numeric.h | 247 + libkram/eastl/include/EASTL/numeric_limits.h | 1718 +++++++ libkram/eastl/include/EASTL/optional.h | 708 +++ libkram/eastl/include/EASTL/priority_queue.h | 491 ++ libkram/eastl/include/EASTL/queue.h | 366 ++ libkram/eastl/include/EASTL/random.h | 254 + libkram/eastl/include/EASTL/ratio.h | 320 ++ libkram/eastl/include/EASTL/safe_ptr.h | 485 ++ libkram/eastl/include/EASTL/scoped_array.h | 237 + libkram/eastl/include/EASTL/scoped_ptr.h | 256 + .../eastl/include/EASTL/segmented_vector.h | 523 ++ libkram/eastl/include/EASTL/set.h | 655 +++ libkram/eastl/include/EASTL/shared_array.h | 434 ++ libkram/eastl/include/EASTL/shared_ptr.h | 1696 +++++++ libkram/eastl/include/EASTL/slist.h | 1930 ++++++++ libkram/eastl/include/EASTL/sort.h | 2019 ++++++++ libkram/eastl/include/EASTL/span.h | 427 ++ libkram/eastl/include/EASTL/stack.h | 346 ++ libkram/eastl/include/EASTL/string.h | 4100 ++++++++++++++++ libkram/eastl/include/EASTL/string_hash_map.h | 189 + libkram/eastl/include/EASTL/string_map.h | 167 + libkram/eastl/include/EASTL/string_view.h | 631 +++ libkram/eastl/include/EASTL/tuple.h | 1006 ++++ libkram/eastl/include/EASTL/type_traits.h | 1060 +++++ libkram/eastl/include/EASTL/unique_ptr.h | 732 +++ libkram/eastl/include/EASTL/unordered_map.h | 55 + libkram/eastl/include/EASTL/unordered_set.h | 53 + libkram/eastl/include/EASTL/utility.h | 872 ++++ libkram/eastl/include/EASTL/variant.h | 1236 +++++ libkram/eastl/include/EASTL/vector.h | 2055 ++++++++ libkram/eastl/include/EASTL/vector_map.h | 906 ++++ libkram/eastl/include/EASTL/vector_multimap.h | 843 ++++ libkram/eastl/include/EASTL/vector_multiset.h | 764 +++ libkram/eastl/include/EASTL/vector_set.h | 793 ++++ libkram/eastl/include/EASTL/version.h | 15 + libkram/eastl/include/EASTL/weak_ptr.h | 17 + libkram/eastl/source/allocator_eastl.cpp | 56 + libkram/eastl/source/assert.cpp | 108 + libkram/eastl/source/atomic.cpp | 25 + libkram/eastl/source/fixed_pool.cpp | 70 + libkram/eastl/source/hashtable.cpp | 177 + libkram/eastl/source/intrusive_list.cpp | 87 + libkram/eastl/source/numeric_limits.cpp | 572 +++ libkram/eastl/source/red_black_tree.cpp | 518 ++ libkram/eastl/source/string.cpp | 464 ++ libkram/eastl/source/thread_support.cpp | 121 + 253 files changed, 112522 insertions(+) create mode 100644 libkram/eastl/include/EABase/config/eacompiler.h create mode 100644 libkram/eastl/include/EABase/config/eacompilertraits.h create mode 100644 libkram/eastl/include/EABase/config/eaplatform.h create mode 100644 libkram/eastl/include/EABase/eabase.h create mode 100644 libkram/eastl/include/EABase/eahave.h create mode 100644 libkram/eastl/include/EABase/earesult.h create mode 100644 libkram/eastl/include/EABase/eastdarg.h create mode 100644 libkram/eastl/include/EABase/eaunits.h create mode 100644 libkram/eastl/include/EABase/int128.h create mode 100644 libkram/eastl/include/EABase/nullptr.h create mode 100644 libkram/eastl/include/EABase/version.h create mode 100644 libkram/eastl/include/EASTL/algorithm.h create mode 100644 libkram/eastl/include/EASTL/allocator.h create mode 100644 libkram/eastl/include/EASTL/allocator_malloc.h create mode 100644 libkram/eastl/include/EASTL/any.h create mode 100644 libkram/eastl/include/EASTL/array.h create mode 100644 libkram/eastl/include/EASTL/atomic.h create mode 100644 libkram/eastl/include/EASTL/bitset.h create mode 100644 libkram/eastl/include/EASTL/bitvector.h create mode 100644 libkram/eastl/include/EASTL/bonus/adaptors.h create mode 100644 libkram/eastl/include/EASTL/bonus/call_traits.h create mode 100644 libkram/eastl/include/EASTL/bonus/compressed_pair.h create mode 100644 libkram/eastl/include/EASTL/bonus/fixed_ring_buffer.h create mode 100644 libkram/eastl/include/EASTL/bonus/fixed_tuple_vector.h create mode 100644 libkram/eastl/include/EASTL/bonus/intrusive_sdlist.h create mode 100644 libkram/eastl/include/EASTL/bonus/intrusive_slist.h create mode 100644 libkram/eastl/include/EASTL/bonus/list_map.h create mode 100644 libkram/eastl/include/EASTL/bonus/lru_cache.h create mode 100644 libkram/eastl/include/EASTL/bonus/ring_buffer.h create mode 100644 libkram/eastl/include/EASTL/bonus/sort_extra.h create mode 100644 libkram/eastl/include/EASTL/bonus/tuple_vector.h create mode 100644 libkram/eastl/include/EASTL/chrono.h create mode 100644 libkram/eastl/include/EASTL/core_allocator.h create mode 100644 libkram/eastl/include/EASTL/core_allocator_adapter.h create mode 100644 libkram/eastl/include/EASTL/deque.h create mode 100644 libkram/eastl/include/EASTL/finally.h create mode 100644 libkram/eastl/include/EASTL/fixed_allocator.h create mode 100644 libkram/eastl/include/EASTL/fixed_function.h create mode 100644 libkram/eastl/include/EASTL/fixed_hash_map.h create mode 100644 libkram/eastl/include/EASTL/fixed_hash_set.h create mode 100644 libkram/eastl/include/EASTL/fixed_list.h create mode 100644 libkram/eastl/include/EASTL/fixed_map.h create mode 100644 libkram/eastl/include/EASTL/fixed_set.h create mode 100644 libkram/eastl/include/EASTL/fixed_slist.h create mode 100644 libkram/eastl/include/EASTL/fixed_string.h create mode 100644 libkram/eastl/include/EASTL/fixed_substring.h create mode 100644 libkram/eastl/include/EASTL/fixed_vector.h create mode 100644 libkram/eastl/include/EASTL/functional.h create mode 100644 libkram/eastl/include/EASTL/hash_map.h create mode 100644 libkram/eastl/include/EASTL/hash_set.h create mode 100644 libkram/eastl/include/EASTL/heap.h create mode 100644 libkram/eastl/include/EASTL/initializer_list.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_add_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_and_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_strong.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_weak.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_compiler_barrier.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_cpu_pause.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_exchange.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_add.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_and.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_or.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_sub.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_xor.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_load.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_memory_barrier.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_or_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_signal_fence.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_store.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_sub_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_thread_fence.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/arch/arch_xor_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_asserts.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_base_width.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_casts.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_flag.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_flag_standalone.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_integral.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_add_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_and_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_base.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_strong.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_weak.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_compiler_barrier.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cpu_pause.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_exchange.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_add.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_and.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_or.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_sub.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_xor.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_load.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_memory_barrier.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_or_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_signal_fence.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_store.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_sub_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_thread_fence.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_xor_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_memory_order.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_pointer.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_pop_compiler_options.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_push_compiler_options.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_size_aligned.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/atomic_standalone.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_add_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_and_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_barrier.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_strong.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_weak.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cpu_pause.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_exchange.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_add.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_and.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_or.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_sub.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_xor.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_load.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_memory_barrier.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_or_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_signal_fence.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_store.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_sub_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_thread_fence.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_xor_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_add_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_and_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_barrier.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_strong.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_weak.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cpu_pause.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_exchange.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_add.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_and.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_or.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_sub.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_xor.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_load.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_or_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_signal_fence.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_store.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_sub_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_thread_fence.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_xor_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_add_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_and_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_barrier.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_strong.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_weak.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cpu_pause.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_exchange.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_add.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_and.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_or.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_sub.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_xor.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_or_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_signal_fence.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_sub_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_xor_fetch.h create mode 100644 libkram/eastl/include/EASTL/internal/char_traits.h create mode 100644 libkram/eastl/include/EASTL/internal/config.h create mode 100644 libkram/eastl/include/EASTL/internal/copy_help.h create mode 100644 libkram/eastl/include/EASTL/internal/enable_shared.h create mode 100644 libkram/eastl/include/EASTL/internal/fill_help.h create mode 100644 libkram/eastl/include/EASTL/internal/fixed_pool.h create mode 100644 libkram/eastl/include/EASTL/internal/function.h create mode 100644 libkram/eastl/include/EASTL/internal/function_detail.h create mode 100644 libkram/eastl/include/EASTL/internal/function_help.h create mode 100644 libkram/eastl/include/EASTL/internal/functional_base.h create mode 100644 libkram/eastl/include/EASTL/internal/generic_iterator.h create mode 100644 libkram/eastl/include/EASTL/internal/hashtable.h create mode 100644 libkram/eastl/include/EASTL/internal/in_place_t.h create mode 100644 libkram/eastl/include/EASTL/internal/integer_sequence.h create mode 100644 libkram/eastl/include/EASTL/internal/intrusive_hashtable.h create mode 100644 libkram/eastl/include/EASTL/internal/mem_fn.h create mode 100644 libkram/eastl/include/EASTL/internal/memory_base.h create mode 100644 libkram/eastl/include/EASTL/internal/move_help.h create mode 100644 libkram/eastl/include/EASTL/internal/pair_fwd_decls.h create mode 100644 libkram/eastl/include/EASTL/internal/piecewise_construct_t.h create mode 100644 libkram/eastl/include/EASTL/internal/red_black_tree.h create mode 100644 libkram/eastl/include/EASTL/internal/smart_ptr.h create mode 100644 libkram/eastl/include/EASTL/internal/thread_support.h create mode 100644 libkram/eastl/include/EASTL/internal/tuple_fwd_decls.h create mode 100644 libkram/eastl/include/EASTL/internal/type_compound.h create mode 100644 libkram/eastl/include/EASTL/internal/type_fundamental.h create mode 100644 libkram/eastl/include/EASTL/internal/type_pod.h create mode 100644 libkram/eastl/include/EASTL/internal/type_properties.h create mode 100644 libkram/eastl/include/EASTL/internal/type_transformations.h create mode 100644 libkram/eastl/include/EASTL/intrusive_hash_map.h create mode 100644 libkram/eastl/include/EASTL/intrusive_hash_set.h create mode 100644 libkram/eastl/include/EASTL/intrusive_list.h create mode 100644 libkram/eastl/include/EASTL/intrusive_ptr.h create mode 100644 libkram/eastl/include/EASTL/iterator.h create mode 100644 libkram/eastl/include/EASTL/linked_array.h create mode 100644 libkram/eastl/include/EASTL/linked_ptr.h create mode 100644 libkram/eastl/include/EASTL/list.h create mode 100644 libkram/eastl/include/EASTL/map.h create mode 100644 libkram/eastl/include/EASTL/memory.h create mode 100644 libkram/eastl/include/EASTL/meta.h create mode 100644 libkram/eastl/include/EASTL/numeric.h create mode 100644 libkram/eastl/include/EASTL/numeric_limits.h create mode 100644 libkram/eastl/include/EASTL/optional.h create mode 100644 libkram/eastl/include/EASTL/priority_queue.h create mode 100644 libkram/eastl/include/EASTL/queue.h create mode 100644 libkram/eastl/include/EASTL/random.h create mode 100644 libkram/eastl/include/EASTL/ratio.h create mode 100644 libkram/eastl/include/EASTL/safe_ptr.h create mode 100644 libkram/eastl/include/EASTL/scoped_array.h create mode 100644 libkram/eastl/include/EASTL/scoped_ptr.h create mode 100644 libkram/eastl/include/EASTL/segmented_vector.h create mode 100644 libkram/eastl/include/EASTL/set.h create mode 100644 libkram/eastl/include/EASTL/shared_array.h create mode 100644 libkram/eastl/include/EASTL/shared_ptr.h create mode 100644 libkram/eastl/include/EASTL/slist.h create mode 100644 libkram/eastl/include/EASTL/sort.h create mode 100644 libkram/eastl/include/EASTL/span.h create mode 100644 libkram/eastl/include/EASTL/stack.h create mode 100644 libkram/eastl/include/EASTL/string.h create mode 100644 libkram/eastl/include/EASTL/string_hash_map.h create mode 100644 libkram/eastl/include/EASTL/string_map.h create mode 100644 libkram/eastl/include/EASTL/string_view.h create mode 100644 libkram/eastl/include/EASTL/tuple.h create mode 100644 libkram/eastl/include/EASTL/type_traits.h create mode 100644 libkram/eastl/include/EASTL/unique_ptr.h create mode 100644 libkram/eastl/include/EASTL/unordered_map.h create mode 100644 libkram/eastl/include/EASTL/unordered_set.h create mode 100644 libkram/eastl/include/EASTL/utility.h create mode 100644 libkram/eastl/include/EASTL/variant.h create mode 100644 libkram/eastl/include/EASTL/vector.h create mode 100644 libkram/eastl/include/EASTL/vector_map.h create mode 100644 libkram/eastl/include/EASTL/vector_multimap.h create mode 100644 libkram/eastl/include/EASTL/vector_multiset.h create mode 100644 libkram/eastl/include/EASTL/vector_set.h create mode 100644 libkram/eastl/include/EASTL/version.h create mode 100644 libkram/eastl/include/EASTL/weak_ptr.h create mode 100644 libkram/eastl/source/allocator_eastl.cpp create mode 100644 libkram/eastl/source/assert.cpp create mode 100644 libkram/eastl/source/atomic.cpp create mode 100644 libkram/eastl/source/fixed_pool.cpp create mode 100644 libkram/eastl/source/hashtable.cpp create mode 100644 libkram/eastl/source/intrusive_list.cpp create mode 100644 libkram/eastl/source/numeric_limits.cpp create mode 100644 libkram/eastl/source/red_black_tree.cpp create mode 100644 libkram/eastl/source/string.cpp create mode 100644 libkram/eastl/source/thread_support.cpp diff --git a/libkram/eastl/include/EABase/config/eacompiler.h b/libkram/eastl/include/EABase/config/eacompiler.h new file mode 100644 index 00000000..bd656ed9 --- /dev/null +++ b/libkram/eastl/include/EABase/config/eacompiler.h @@ -0,0 +1,1778 @@ +/*----------------------------------------------------------------------------- + * config/eacompiler.h + * + * Copyright (c) Electronic Arts Inc. All rights reserved. + *----------------------------------------------------------------------------- + * Currently supported defines include: + * EA_COMPILER_GNUC + * EA_COMPILER_ARM + * EA_COMPILER_EDG + * EA_COMPILER_SN + * EA_COMPILER_MSVC + * EA_COMPILER_METROWERKS + * EA_COMPILER_INTEL + * EA_COMPILER_BORLANDC + * EA_COMPILER_IBM + * EA_COMPILER_QNX + * EA_COMPILER_GREEN_HILLS + * EA_COMPILER_CLANG + * EA_COMPILER_CLANG_CL + * + * EA_COMPILER_VERSION = + * EA_COMPILER_NAME = + * EA_COMPILER_STRING = + * + * EA_COMPILER_VA_COPY_REQUIRED + * + * C++98/03 functionality + * EA_COMPILER_NO_STATIC_CONSTANTS + * EA_COMPILER_NO_TEMPLATE_SPECIALIZATION + * EA_COMPILER_NO_TEMPLATE_PARTIAL_SPECIALIZATION + * EA_COMPILER_NO_MEMBER_TEMPLATES + * EA_COMPILER_NO_MEMBER_TEMPLATE_SPECIALIZATION + * EA_COMPILER_NO_TEMPLATE_TEMPLATES + * EA_COMPILER_NO_MEMBER_TEMPLATE_FRIENDS + * EA_COMPILER_NO_VOID_RETURNS + * EA_COMPILER_NO_COVARIANT_RETURN_TYPE + * EA_COMPILER_NO_DEDUCED_TYPENAME + * EA_COMPILER_NO_ARGUMENT_DEPENDENT_LOOKUP + * EA_COMPILER_NO_EXCEPTION_STD_NAMESPACE + * EA_COMPILER_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS + * EA_COMPILER_NO_RTTI + * EA_COMPILER_NO_EXCEPTIONS + * EA_COMPILER_NO_NEW_THROW_SPEC + * EA_THROW_SPEC_NEW / EA_THROW_SPEC_DELETE + * EA_COMPILER_NO_UNWIND + * EA_COMPILER_NO_STANDARD_CPP_LIBRARY + * EA_COMPILER_NO_STATIC_VARIABLE_INIT + * EA_COMPILER_NO_STATIC_FUNCTION_INIT + * EA_COMPILER_NO_VARIADIC_MACROS + * + * C++11 functionality + * EA_COMPILER_NO_RVALUE_REFERENCES + * EA_COMPILER_NO_EXTERN_TEMPLATE + * EA_COMPILER_NO_RANGE_BASED_FOR_LOOP + * EA_COMPILER_NO_CONSTEXPR + * EA_COMPILER_NO_OVERRIDE + * EA_COMPILER_NO_INHERITANCE_FINAL + * EA_COMPILER_NO_NULLPTR + * EA_COMPILER_NO_AUTO + * EA_COMPILER_NO_DECLTYPE + * EA_COMPILER_NO_DEFAULTED_FUNCTIONS + * EA_COMPILER_NO_DELETED_FUNCTIONS + * EA_COMPILER_NO_LAMBDA_EXPRESSIONS + * EA_COMPILER_NO_TRAILING_RETURN_TYPES + * EA_COMPILER_NO_STRONGLY_TYPED_ENUMS + * EA_COMPILER_NO_FORWARD_DECLARED_ENUMS + * EA_COMPILER_NO_VARIADIC_TEMPLATES + * EA_COMPILER_NO_TEMPLATE_ALIASES + * EA_COMPILER_NO_INITIALIZER_LISTS + * EA_COMPILER_NO_NORETURN + * EA_COMPILER_NO_CARRIES_DEPENDENCY + * EA_COMPILER_NO_FALLTHROUGH + * EA_COMPILER_NO_NODISCARD + * EA_COMPILER_NO_MAYBE_UNUSED + * EA_COMPILER_NO_NONSTATIC_MEMBER_INITIALIZERS + * EA_COMPILER_NO_RIGHT_ANGLE_BRACKETS + * EA_COMPILER_NO_ALIGNOF + * EA_COMPILER_NO_ALIGNAS + * EA_COMPILER_NO_DELEGATING_CONSTRUCTORS + * EA_COMPILER_NO_INHERITING_CONSTRUCTORS + * EA_COMPILER_NO_USER_DEFINED_LITERALS + * EA_COMPILER_NO_STANDARD_LAYOUT_TYPES + * EA_COMPILER_NO_EXTENDED_SIZEOF + * EA_COMPILER_NO_INLINE_NAMESPACES + * EA_COMPILER_NO_UNRESTRICTED_UNIONS + * EA_COMPILER_NO_EXPLICIT_CONVERSION_OPERATORS + * EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS + * EA_COMPILER_NO_LOCAL_CLASS_TEMPLATE_PARAMETERS + * EA_COMPILER_NO_NOEXCEPT + * EA_COMPILER_NO_RAW_LITERALS + * EA_COMPILER_NO_UNICODE_STRING_LITERALS + * EA_COMPILER_NO_NEW_CHARACTER_TYPES + * EA_COMPILER_NO_UNICODE_CHAR_NAME_LITERALS + * EA_COMPILER_NO_UNIFIED_INITIALIZATION_SYNTAX + * EA_COMPILER_NO_EXTENDED_FRIEND_DECLARATIONS + * + * C++14 functionality + * EA_COMPILER_NO_VARIABLE_TEMPLATES + * + * C++17 functionality + * EA_COMPILER_NO_INLINE_VARIABLES + * EA_COMPILER_NO_ALIGNED_NEW + * + * C++20 functionality + * EA_COMPILER_NO_DESIGNATED_INITIALIZERS + * + *----------------------------------------------------------------------------- + * + * Supplemental documentation + * EA_COMPILER_NO_STATIC_CONSTANTS + * Code such as this is legal, but some compilers fail to compile it: + * struct A{ static const a = 1; }; + * + * EA_COMPILER_NO_TEMPLATE_SPECIALIZATION + * Some compilers fail to allow template specialization, such as with this: + * template void DoSomething(U u); + * void DoSomething(int x); + * + * EA_COMPILER_NO_TEMPLATE_PARTIAL_SPECIALIZATION + * Some compilers fail to allow partial template specialization, such as with this: + * template class vector{ }; // Primary templated class. + * template class vector{ }; // Partially specialized version. + * + * EA_COMPILER_NO_MEMBER_TEMPLATES + * Some compilers fail to allow member template functions such as this: + * struct A{ template void DoSomething(U u); }; + * + * EA_COMPILER_NO_MEMBER_TEMPLATE_SPECIALIZATION + * Some compilers fail to allow member template specialization, such as with this: + * struct A{ + * template void DoSomething(U u); + * void DoSomething(int x); + * }; + * + * EA_COMPILER_NO_TEMPLATE_TEMPLATES + * Code such as this is legal: + * template class U> + * U SomeFunction(const U x) { return x.DoSomething(); } + * + * EA_COMPILER_NO_MEMBER_TEMPLATE_FRIENDS + * Some compilers fail to compile templated friends, as with this: + * struct A{ template friend class SomeFriend; }; + * This is described in the C++ Standard at 14.5.3. + * + * EA_COMPILER_NO_VOID_RETURNS + * This is legal C++: + * void DoNothing1(){ }; + * void DoNothing2(){ return DoNothing1(); } + * + * EA_COMPILER_NO_COVARIANT_RETURN_TYPE + * See the C++ standard sec 10.3,p5. + * + * EA_COMPILER_NO_DEDUCED_TYPENAME + * Some compilers don't support the use of 'typename' for + * dependent types in deduced contexts, as with this: + * template void Function(T, typename T::type); + * + * EA_COMPILER_NO_ARGUMENT_DEPENDENT_LOOKUP + * Also known as Koenig lookup. Basically, if you have a function + * that is a namespace and you call that function without prefixing + * it with the namespace the compiler should look at any arguments + * you pass to that function call and search their namespace *first* + * to see if the given function exists there. + * + * EA_COMPILER_NO_EXCEPTION_STD_NAMESPACE + * is in namespace std. Some std libraries fail to + * put the contents of in namespace std. The following + * code should normally be legal: + * void Function(){ std::terminate(); } + * + * EA_COMPILER_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS + * Some compilers fail to execute DoSomething() properly, though they + * succeed in compiling it, as with this: + * template + * bool DoSomething(int j){ return i == j; }; + * DoSomething<1>(2); + * + * EA_COMPILER_NO_EXCEPTIONS + * The compiler is configured to disallow the use of try/throw/catch + * syntax (often to improve performance). Use of such syntax in this + * case will cause a compilation error. + * + * EA_COMPILER_NO_UNWIND + * The compiler is configured to allow the use of try/throw/catch + * syntax and behaviour but disables the generation of stack unwinding + * code for responding to exceptions (often to improve performance). + * + *---------------------------------------------------------------------------*/ + +#ifndef INCLUDED_eacompiler_H +#define INCLUDED_eacompiler_H + + #include + + // Note: This is used to generate the EA_COMPILER_STRING macros + #ifndef INTERNAL_STRINGIZE + #define INTERNAL_STRINGIZE(x) INTERNAL_PRIMITIVE_STRINGIZE(x) + #endif + #ifndef INTERNAL_PRIMITIVE_STRINGIZE + #define INTERNAL_PRIMITIVE_STRINGIZE(x) #x + #endif + + // EA_COMPILER_HAS_FEATURE + #ifndef EA_COMPILER_HAS_FEATURE + #if defined(__clang__) + #define EA_COMPILER_HAS_FEATURE(x) __has_feature(x) + #else + #define EA_COMPILER_HAS_FEATURE(x) 0 + #endif + #endif + + + // EA_COMPILER_HAS_BUILTIN + #ifndef EA_COMPILER_HAS_BUILTIN + #if defined(__clang__) + #define EA_COMPILER_HAS_BUILTIN(x) __has_builtin(x) + #else + #define EA_COMPILER_HAS_BUILTIN(x) 0 + #endif + #endif + + + // EDG (EDG compiler front-end, used by other compilers such as SN) + #if defined(__EDG_VERSION__) + #define EA_COMPILER_EDG 1 + + #if defined(_MSC_VER) + #define EA_COMPILER_EDG_VC_MODE 1 + #endif + #if defined(__GNUC__) + #define EA_COMPILER_EDG_GCC_MODE 1 + #endif + #endif + + // EA_COMPILER_WINRTCX_ENABLED + // + // Defined as 1 if the compiler has its available C++/CX support enabled, else undefined. + // This specifically means the corresponding compilation unit has been built with Windows Runtime + // Components enabled, usually via the '-ZW' compiler flags being used. This option allows for using + // ref counted hat-type '^' objects and other C++/CX specific keywords like "ref new" + #if !defined(EA_COMPILER_WINRTCX_ENABLED) && defined(__cplusplus_winrt) + #define EA_COMPILER_WINRTCX_ENABLED 1 + #endif + + + // EA_COMPILER_CPP11_ENABLED + // + // Defined as 1 if the compiler has its available C++11 support enabled, else undefined. + // This does not mean that all of C++11 or any particular feature of C++11 is supported + // by the compiler. It means that whatever C++11 support the compiler has is enabled. + // This also includes existing and older compilers that still identify C++11 as C++0x. + // + // We cannot use (__cplusplus >= 201103L) alone because some compiler vendors have + // decided to not define __cplusplus like thus until they have fully completed their + // C++11 support. + // + #if !defined(EA_COMPILER_CPP11_ENABLED) && defined(__cplusplus) + #if (__cplusplus >= 201103L) // Clang and GCC defines this like so in C++11 mode. + #define EA_COMPILER_CPP11_ENABLED 1 + #elif defined(__GNUC__) && defined(__GXX_EXPERIMENTAL_CXX0X__) + #define EA_COMPILER_CPP11_ENABLED 1 + #elif defined(_MSC_VER) && _MSC_VER >= 1600 // Microsoft unilaterally enables its C++11 support; there is no way to disable it. + #define EA_COMPILER_CPP11_ENABLED 1 + #elif defined(__EDG_VERSION__) // && ??? + // To do: Is there a generic way to determine this? + #endif + #endif + + + // EA_COMPILER_CPP14_ENABLED + // + // Defined as 1 if the compiler has its available C++14 support enabled, else undefined. + // This does not mean that all of C++14 or any particular feature of C++14 is supported + // by the compiler. It means that whatever C++14 support the compiler has is enabled. + // + // We cannot use (__cplusplus >= 201402L) alone because some compiler vendors have + // decided to not define __cplusplus like thus until they have fully completed their + // C++14 support. + #if !defined(EA_COMPILER_CPP14_ENABLED) && defined(__cplusplus) + #if (__cplusplus >= 201402L) // Clang and GCC defines this like so in C++14 mode. + #define EA_COMPILER_CPP14_ENABLED 1 + #elif defined(_MSC_VER) && (_MSC_VER >= 1900) // VS2015+ + #define EA_COMPILER_CPP14_ENABLED 1 + #endif + #endif + + + // EA_COMPILER_CPP17_ENABLED + // + // Defined as 1 if the compiler has its available C++17 support enabled, else undefined. + // This does not mean that all of C++17 or any particular feature of C++17 is supported + // by the compiler. It means that whatever C++17 support the compiler has is enabled. + // + // We cannot use (__cplusplus >= 201703L) alone because some compiler vendors have + // decided to not define __cplusplus like thus until they have fully completed their + // C++17 support. + #if !defined(EA_COMPILER_CPP17_ENABLED) && defined(__cplusplus) + #if (__cplusplus >= 201703L) + #define EA_COMPILER_CPP17_ENABLED 1 + #elif defined(_MSVC_LANG) && (_MSVC_LANG >= 201703L) // C++17+ + #define EA_COMPILER_CPP17_ENABLED 1 + #endif + #endif + + + // EA_COMPILER_CPP20_ENABLED + // + // Defined as 1 if the compiler has its available C++20 support enabled, else undefined. + // This does not mean that all of C++20 or any particular feature of C++20 is supported + // by the compiler. It means that whatever C++20 support the compiler has is enabled. + // + // We cannot use (__cplusplus >= 202003L) alone because some compiler vendors have + // decided to not define __cplusplus like thus until they have fully completed their + // C++20 support. + #if !defined(EA_COMPILER_CPP20_ENABLED) && defined(__cplusplus) + // TODO(rparoin): enable once a C++20 value for the __cplusplus macro has been published + // #if (__cplusplus >= 202003L) + // #define EA_COMPILER_CPP20_ENABLED 1 + // #elif defined(_MSVC_LANG) && (_MSVC_LANG >= 202003L) // C++20+ + // #define EA_COMPILER_CPP20_ENABLED 1 + // #endif + #endif + + + + #if defined(__ARMCC_VERSION) + // Note that this refers to the ARM RVCT compiler (armcc or armcpp), but there + // are other compilers that target ARM processors, such as GCC and Microsoft VC++. + // If you want to detect compiling for the ARM processor, check for EA_PROCESSOR_ARM + // being defined. + // This compiler is also identified by defined(__CC_ARM) || defined(__ARMCC__). + #define EA_COMPILER_RVCT 1 + #define EA_COMPILER_ARM 1 + #define EA_COMPILER_VERSION __ARMCC_VERSION + #define EA_COMPILER_NAME "RVCT" + //#define EA_COMPILER_STRING (defined below) + + // Clang's GCC-compatible driver. + #elif defined(__clang__) && !defined(_MSC_VER) + #define EA_COMPILER_CLANG 1 + #define EA_COMPILER_VERSION (__clang_major__ * 100 + __clang_minor__) + #define EA_COMPILER_NAME "clang" + #define EA_COMPILER_STRING EA_COMPILER_NAME __clang_version__ + + // GCC (a.k.a. GNUC) + #elif defined(__GNUC__) // GCC compilers exist for many platforms. + #define EA_COMPILER_GNUC 1 + #define EA_COMPILER_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__) + #define EA_COMPILER_NAME "GCC" + #define EA_COMPILER_STRING EA_COMPILER_NAME " compiler, version " INTERNAL_STRINGIZE( __GNUC__ ) "." INTERNAL_STRINGIZE( __GNUC_MINOR__ ) + + #if (__GNUC__ == 2) && (__GNUC_MINOR__ < 95) // If GCC < 2.95... + #define EA_COMPILER_NO_MEMBER_TEMPLATES 1 + #endif + #if (__GNUC__ == 2) && (__GNUC_MINOR__ <= 97) // If GCC <= 2.97... + #define EA_COMPILER_NO_MEMBER_TEMPLATE_FRIENDS 1 + #endif + #if (__GNUC__ == 3) && ((__GNUC_MINOR__ == 1) || (__GNUC_MINOR__ == 2)) // If GCC 3.1 or 3.2 (but not pre 3.1 or post 3.2)... + #define EA_COMPILER_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS 1 + #endif + + // Borland C++ + #elif defined(__BORLANDC__) + #define EA_COMPILER_BORLANDC 1 + #define EA_COMPILER_VERSION __BORLANDC__ + #define EA_COMPILER_NAME "Borland C" + //#define EA_COMPILER_STRING (defined below) + + #if (__BORLANDC__ <= 0x0550) // If Borland C++ Builder 4 and 5... + #define EA_COMPILER_NO_MEMBER_TEMPLATE_FRIENDS 1 + #endif + #if (__BORLANDC__ >= 0x561) && (__BORLANDC__ < 0x600) + #define EA_COMPILER_NO_MEMBER_FUNCTION_SPECIALIZATION 1 + #endif + + + // Intel C++ + // The Intel Windows compiler masquerades as VC++ and defines _MSC_VER. + // The Intel compiler is based on the EDG compiler front-end. + #elif defined(__ICL) || defined(__ICC) + #define EA_COMPILER_INTEL 1 + + // Should we enable the following? We probably should do so since enabling it does a lot more good than harm + // for users. The Intel Windows compiler does a pretty good job of emulating VC++ and so the user would likely + // have to handle few special cases where the Intel compiler doesn't emulate VC++ correctly. + #if defined(_MSC_VER) + #define EA_COMPILER_MSVC 1 + #define EA_COMPILER_MICROSOFT 1 + #endif + + // Should we enable the following? This isn't as clear because as of this writing we don't know if the Intel + // compiler truly emulates GCC well enough that enabling this does more good than harm. + #if defined(__GNUC__) + #define EA_COMPILER_GNUC 1 + #endif + + #if defined(__ICL) + #define EA_COMPILER_VERSION __ICL + #elif defined(__ICC) + #define EA_COMPILER_VERSION __ICC + #endif + #define EA_COMPILER_NAME "Intel C++" + #if defined(_MSC_VER) + #define EA_COMPILER_STRING EA_COMPILER_NAME " compiler, version " INTERNAL_STRINGIZE( EA_COMPILER_VERSION ) ", EDG version " INTERNAL_STRINGIZE( __EDG_VERSION__ ) ", VC++ version " INTERNAL_STRINGIZE( _MSC_VER ) + #elif defined(__GNUC__) + #define EA_COMPILER_STRING EA_COMPILER_NAME " compiler, version " INTERNAL_STRINGIZE( EA_COMPILER_VERSION ) ", EDG version " INTERNAL_STRINGIZE( __EDG_VERSION__ ) ", GCC version " INTERNAL_STRINGIZE( __GNUC__ ) + #else + #define EA_COMPILER_STRING EA_COMPILER_NAME " compiler, version " INTERNAL_STRINGIZE( EA_COMPILER_VERSION ) ", EDG version " INTERNAL_STRINGIZE( __EDG_VERSION__ ) + #endif + + + #elif defined(_MSC_VER) + #define EA_COMPILER_MSVC 1 + #define EA_COMPILER_MICROSOFT 1 + #define EA_COMPILER_VERSION _MSC_VER + #define EA_COMPILER_NAME "Microsoft Visual C++" + //#define EA_COMPILER_STRING (defined below) + + #if defined(__clang__) + // Clang's MSVC-compatible driver. + #define EA_COMPILER_CLANG_CL 1 + #endif + + #define EA_STANDARD_LIBRARY_MSVC 1 + #define EA_STANDARD_LIBRARY_MICROSOFT 1 + + #if (_MSC_VER <= 1200) // If VC6.x and earlier... + #if (_MSC_VER < 1200) + #define EA_COMPILER_MSVCOLD 1 + #else + #define EA_COMPILER_MSVC6 1 + #endif + + #if (_MSC_VER < 1200) // If VC5.x or earlier... + #define EA_COMPILER_NO_TEMPLATE_SPECIALIZATION 1 + #endif + #define EA_COMPILER_NO_EXPLICIT_FUNCTION_TEMPLATE_ARGUMENTS 1 // The compiler compiles this OK, but executes it wrong. Fixed in VC7.0 + #define EA_COMPILER_NO_VOID_RETURNS 1 // The compiler fails to compile such cases. Fixed in VC7.0 + #define EA_COMPILER_NO_EXCEPTION_STD_NAMESPACE 1 // The compiler fails to compile such cases. Fixed in VC7.0 + #define EA_COMPILER_NO_DEDUCED_TYPENAME 1 // The compiler fails to compile such cases. Fixed in VC7.0 + #define EA_COMPILER_NO_STATIC_CONSTANTS 1 // The compiler fails to compile such cases. Fixed in VC7.0 + #define EA_COMPILER_NO_COVARIANT_RETURN_TYPE 1 // The compiler fails to compile such cases. Fixed in VC7.1 + #define EA_COMPILER_NO_ARGUMENT_DEPENDENT_LOOKUP 1 // The compiler compiles this OK, but executes it wrong. Fixed in VC7.1 + #define EA_COMPILER_NO_TEMPLATE_TEMPLATES 1 // The compiler fails to compile such cases. Fixed in VC7.1 + #define EA_COMPILER_NO_TEMPLATE_PARTIAL_SPECIALIZATION 1 // The compiler fails to compile such cases. Fixed in VC7.1 + #define EA_COMPILER_NO_MEMBER_TEMPLATE_FRIENDS 1 // The compiler fails to compile such cases. Fixed in VC7.1 + //#define EA_COMPILER_NO_MEMBER_TEMPLATES 1 // VC6.x supports member templates properly 95% of the time. So do we flag the remaining 5%? + //#define EA_COMPILER_NO_MEMBER_TEMPLATE_SPECIALIZATION 1 // VC6.x supports member templates properly 95% of the time. So do we flag the remaining 5%? + + #elif (_MSC_VER <= 1300) // If VC7.0 and earlier... + #define EA_COMPILER_MSVC7 1 + + #define EA_COMPILER_NO_COVARIANT_RETURN_TYPE 1 // The compiler fails to compile such cases. Fixed in VC7.1 + #define EA_COMPILER_NO_ARGUMENT_DEPENDENT_LOOKUP 1 // The compiler compiles this OK, but executes it wrong. Fixed in VC7.1 + #define EA_COMPILER_NO_TEMPLATE_TEMPLATES 1 // The compiler fails to compile such cases. Fixed in VC7.1 + #define EA_COMPILER_NO_TEMPLATE_PARTIAL_SPECIALIZATION 1 // The compiler fails to compile such cases. Fixed in VC7.1 + #define EA_COMPILER_NO_MEMBER_TEMPLATE_FRIENDS 1 // The compiler fails to compile such cases. Fixed in VC7.1 + #define EA_COMPILER_NO_MEMBER_FUNCTION_SPECIALIZATION 1 // This is the case only for VC7.0 and not VC6 or VC7.1+. Fixed in VC7.1 + //#define EA_COMPILER_NO_MEMBER_TEMPLATES 1 // VC7.0 supports member templates properly 95% of the time. So do we flag the remaining 5%? + + #elif (_MSC_VER < 1400) // VS2003 _MSC_VER of 1300 means VC7 (VS2003) + // The VC7.1 and later compiler is fairly close to the C++ standard + // and thus has no compiler limitations that we are concerned about. + #define EA_COMPILER_MSVC7_2003 1 + #define EA_COMPILER_MSVC7_1 1 + + #elif (_MSC_VER < 1500) // VS2005 _MSC_VER of 1400 means VC8 (VS2005) + #define EA_COMPILER_MSVC8_2005 1 + #define EA_COMPILER_MSVC8_0 1 + + #elif (_MSC_VER < 1600) // VS2008. _MSC_VER of 1500 means VC9 (VS2008) + #define EA_COMPILER_MSVC9_2008 1 + #define EA_COMPILER_MSVC9_0 1 + + #elif (_MSC_VER < 1700) // VS2010 _MSC_VER of 1600 means VC10 (VS2010) + #define EA_COMPILER_MSVC_2010 1 + #define EA_COMPILER_MSVC10_0 1 + + #elif (_MSC_VER < 1800) // VS2012 _MSC_VER of 1700 means VS2011/VS2012 + #define EA_COMPILER_MSVC_2011 1 // Microsoft changed the name to VS2012 before shipping, despite referring to it as VS2011 up to just a few weeks before shipping. + #define EA_COMPILER_MSVC11_0 1 + #define EA_COMPILER_MSVC_2012 1 + #define EA_COMPILER_MSVC12_0 1 + + #elif (_MSC_VER < 1900) // VS2013 _MSC_VER of 1800 means VS2013 + #define EA_COMPILER_MSVC_2013 1 + #define EA_COMPILER_MSVC13_0 1 + + #elif (_MSC_VER < 1910) // VS2015 _MSC_VER of 1900 means VS2015 + #define EA_COMPILER_MSVC_2015 1 + #define EA_COMPILER_MSVC14_0 1 + + #elif (_MSC_VER < 1911) // VS2017 _MSC_VER of 1910 means VS2017 + #define EA_COMPILER_MSVC_2017 1 + #define EA_COMPILER_MSVC15_0 1 + + #endif + + + // IBM + #elif defined(__xlC__) + #define EA_COMPILER_IBM 1 + #define EA_COMPILER_NAME "IBM XL C" + #define EA_COMPILER_VERSION __xlC__ + #define EA_COMPILER_STRING "IBM XL C compiler, version " INTERNAL_STRINGIZE( __xlC__ ) + + // Unknown + #else // Else the compiler is unknown + + #define EA_COMPILER_VERSION 0 + #define EA_COMPILER_NAME "Unknown" + + #endif + + #ifndef EA_COMPILER_STRING + #define EA_COMPILER_STRING EA_COMPILER_NAME " compiler, version " INTERNAL_STRINGIZE(EA_COMPILER_VERSION) + #endif + + + // Deprecated definitions + // For backwards compatibility, should be supported for at least the life of EABase v2.0.x. + #ifndef EA_COMPILER_NO_TEMPLATE_PARTIAL_SPECIALIZATION + #define EA_COMPILER_PARTIAL_TEMPLATE_SPECIALIZATION 1 + #endif + #ifndef EA_COMPILER_NO_TEMPLATE_SPECIALIZATION + #define EA_COMPILER_TEMPLATE_SPECIALIZATION 1 + #endif + #ifndef EA_COMPILER_NO_MEMBER_TEMPLATES + #define EA_COMPILER_MEMBER_TEMPLATES 1 + #endif + #ifndef EA_COMPILER_NO_MEMBER_TEMPLATE_SPECIALIZATION + #define EA_COMPILER_MEMBER_TEMPLATE_SPECIALIZATION 1 + #endif + + + + /////////////////////////////////////////////////////////////////////////////// + // EA_COMPILER_VA_COPY_REQUIRED + // + // Defines whether va_copy must be used to copy or save va_list objects between uses. + // Some compilers on some platforms implement va_list whereby its contents + // are destroyed upon usage, even if passed by value to another function. + // With these compilers you can use va_copy to save and restore a va_list. + // Known compiler/platforms that destroy va_list contents upon usage include: + // CodeWarrior on PowerPC + // GCC on x86-64 + // However, va_copy is part of the C99 standard and not part of earlier C and + // C++ standards. So not all compilers support it. VC++ doesn't support va_copy, + // but it turns out that VC++ doesn't usually need it on the platforms it supports, + // and va_copy can usually be implemented via memcpy(va_list, va_list) with VC++. + /////////////////////////////////////////////////////////////////////////////// + + #ifndef EA_COMPILER_VA_COPY_REQUIRED + #if ((defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__)) && (!defined(__i386__) || defined(__x86_64__)) && !defined(__ppc__) && !defined(__PPC__) && !defined(__PPC64__) + #define EA_COMPILER_VA_COPY_REQUIRED 1 + #endif + #endif + + + // EA_COMPILER_NO_RTTI + // + // If EA_COMPILER_NO_RTTI is defined, then RTTI (run-time type information) + // is not available (possibly due to being disabled by the user). + // + #if defined(__EDG_VERSION__) && !defined(__RTTI) + #define EA_COMPILER_NO_RTTI 1 + #elif defined(__clang__) && !EA_COMPILER_HAS_FEATURE(cxx_rtti) + #define EA_COMPILER_NO_RTTI 1 + #elif defined(__IBMCPP__) && !defined(__RTTI_ALL__) + #define EA_COMPILER_NO_RTTI 1 + #elif defined(__GXX_ABI_VERSION) && !defined(__GXX_RTTI) + #define EA_COMPILER_NO_RTTI 1 + #elif defined(_MSC_VER) && !defined(_CPPRTTI) + #define EA_COMPILER_NO_RTTI 1 + #elif defined(__ARMCC_VERSION) && defined(__TARGET_CPU_MPCORE) && !defined(__RTTI) + #define EA_COMPILER_NO_RTTI 1 + #endif + + + + // EA_COMPILER_NO_EXCEPTIONS / EA_COMPILER_NO_UNWIND + // + // If EA_COMPILER_NO_EXCEPTIONS is defined, then the compiler is + // configured to not recognize C++ exception-handling statements + // such as try/catch/throw. Thus, when EA_COMPILER_NO_EXCEPTIONS is + // defined, code that attempts to use exception handling statements + // will usually cause a compilation error. If is often desirable + // for projects to disable exception handling because exception + // handling causes extra code and/or data generation which might + // not be needed, especially if it is known that exceptions won't + // be happening. When writing code that is to be portable between + // systems of which some enable exception handling while others + // don't, check for EA_COMPILER_NO_EXCEPTIONS being defined. + // + #if !defined(EA_COMPILER_NO_EXCEPTIONS) && !defined(EA_COMPILER_NO_UNWIND) + #if defined(EA_COMPILER_GNUC) && defined(_NO_EX) // GCC on some platforms defines _NO_EX when exceptions are disabled. + #define EA_COMPILER_NO_EXCEPTIONS 1 + + #elif (defined(EA_COMPILER_CLANG) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_INTEL) || defined(EA_COMPILER_RVCT)) && !defined(__EXCEPTIONS) // GCC and most EDG-based compilers define __EXCEPTIONS when exception handling is enabled. + #define EA_COMPILER_NO_EXCEPTIONS 1 + + #elif (defined(EA_COMPILER_MSVC)) && !defined(_CPPUNWIND) + #define EA_COMPILER_NO_UNWIND 1 + + #endif // EA_COMPILER_NO_EXCEPTIONS / EA_COMPILER_NO_UNWIND + #endif // !defined(EA_COMPILER_NO_EXCEPTIONS) && !defined(EA_COMPILER_NO_UNWIND) + + + // ------------------------------------------------------------------------ + // EA_DISABLE_ALL_VC_WARNINGS / EA_RESTORE_ALL_VC_WARNINGS + // + // Disable and re-enable all warning(s) within code. + // + // Example usage: + // EA_DISABLE_ALL_VC_WARNINGS() + // + // EA_RESTORE_ALL_VC_WARNINGS() + // + //This is duplicated from EABase's eacompilertraits.h + #ifndef EA_DISABLE_ALL_VC_WARNINGS + #if defined(_MSC_VER) + #define EA_DISABLE_ALL_VC_WARNINGS() \ + __pragma(warning(push, 0)) \ + __pragma(warning(disable: 4244 4265 4267 4350 4472 4509 4548 4623 4710 4985 6320 4755 4625 4626 4702)) // Some warnings need to be explicitly called out. + #else + #define EA_DISABLE_ALL_VC_WARNINGS() + #endif + #endif + + //This is duplicated from EABase's eacompilertraits.h + #ifndef EA_RESTORE_ALL_VC_WARNINGS + #if defined(_MSC_VER) + #define EA_RESTORE_ALL_VC_WARNINGS() \ + __pragma(warning(pop)) + #else + #define EA_RESTORE_ALL_VC_WARNINGS() + #endif + #endif + + // Dinkumware + //This is duplicated from EABase's eahave.h + #if !defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && !defined(EA_NO_HAVE_DINKUMWARE_CPP_LIBRARY) + #if defined(__cplusplus) + EA_DISABLE_ALL_VC_WARNINGS() + #include // Need to trigger the compilation of yvals.h without directly using because it might not exist. + EA_RESTORE_ALL_VC_WARNINGS() + #endif + + #if defined(__cplusplus) && defined(_CPPLIB_VER) /* If using the Dinkumware Standard library... */ + #define EA_HAVE_DINKUMWARE_CPP_LIBRARY 1 + #else + #define EA_NO_HAVE_DINKUMWARE_CPP_LIBRARY 1 + #endif + #endif + + + // EA_COMPILER_NO_ALIGNED_NEW + // + // + #if !defined(EA_COMPILER_NO_ALIGNED_NEW) + #if defined(_HAS_ALIGNED_NEW) && _HAS_ALIGNED_NEW // VS2017 15.5 Preview + // supported. + #elif defined(EA_COMPILER_CPP17_ENABLED) + // supported. + #else + #define EA_COMPILER_NO_ALIGNED_NEW 1 + #endif + #endif + + // EA_COMPILER_NO_NEW_THROW_SPEC / EA_THROW_SPEC_NEW / EA_THROW_SPEC_DELETE + // + // If defined then the compiler's version of operator new is not decorated + // with a throw specification. This is useful for us to know because we + // often want to write our own overloaded operator new implementations. + // We need such operator new overrides to be declared identically to the + // way the compiler is defining operator new itself. + // + // Example usage: + // void* operator new(std::size_t) EA_THROW_SPEC_NEW(std::bad_alloc); + // void* operator new[](std::size_t) EA_THROW_SPEC_NEW(std::bad_alloc); + // void* operator new(std::size_t, const std::nothrow_t&) EA_THROW_SPEC_NEW_NONE(); + // void* operator new[](std::size_t, const std::nothrow_t&) EA_THROW_SPEC_NEW_NONE(); + // void operator delete(void*) EA_THROW_SPEC_DELETE_NONE(); + // void operator delete[](void*) EA_THROW_SPEC_DELETE_NONE(); + // void operator delete(void*, const std::nothrow_t&) EA_THROW_SPEC_DELETE_NONE(); + // void operator delete[](void*, const std::nothrow_t&) EA_THROW_SPEC_DELETE_NONE(); + // + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) + #if defined(_MSC_VER) && (_MSC_VER >= 1912) // VS2017 15.3+ + #define EA_THROW_SPEC_NEW(x) noexcept(false) + #define EA_THROW_SPEC_NEW_NONE() noexcept + #define EA_THROW_SPEC_DELETE_NONE() noexcept + + #elif defined(_MSC_VER) && (_MSC_VER >= 1910) // VS2017+ + #define EA_THROW_SPEC_NEW(x) throw(x) + #define EA_THROW_SPEC_NEW_NONE() throw() + #define EA_THROW_SPEC_DELETE_NONE() throw() + + #else + #if defined(EA_PLATFORM_SONY) + #define EA_THROW_SPEC_NEW(X) _THROWS(X) + #elif defined(_MSC_VER) + // Disabled warning "nonstandard extension used: 'throw (...)'" as this warning is a W4 warning which is usually off by default + // and doesn't convey any important information but will still complain when building with /Wall (which most teams do) + #define EA_THROW_SPEC_NEW(X) __pragma(warning(push)) __pragma(warning(disable: 4987)) _THROWS(X) __pragma(warning(pop)) + #else + #define EA_THROW_SPEC_NEW(X) _THROW1(X) + #endif + #define EA_THROW_SPEC_NEW_NONE() _THROW0() + #define EA_THROW_SPEC_DELETE_NONE() _THROW0() + + #endif + #elif defined(EA_COMPILER_NO_EXCEPTIONS) && !defined(EA_COMPILER_RVCT) && !defined(EA_PLATFORM_LINUX) && !defined(EA_PLATFORM_APPLE) && !defined(CS_UNDEFINED_STRING) + #define EA_COMPILER_NO_NEW_THROW_SPEC 1 + + #define EA_THROW_SPEC_NEW(x) + #define EA_THROW_SPEC_NEW_NONE() + #define EA_THROW_SPEC_DELETE_NONE() + #else + #define EA_THROW_SPEC_NEW(x) throw(x) + #define EA_THROW_SPEC_NEW_NONE() throw() + #define EA_THROW_SPEC_DELETE_NONE() throw() + #endif + + + // EA_COMPILER_NO_STANDARD_CPP_LIBRARY + // + // If defined, then the compiler doesn't provide a Standard C++ library. + // + #if defined(EA_PLATFORM_ANDROID) + // Disabled because EA's eaconfig/android_config/android_sdk packages currently + // don't support linking STL libraries. Perhaps we can figure out what linker arguments + // are needed for an app so we can manually specify them and then re-enable this code. + //#include + // + //#if (__ANDROID_API__ < 9) // Earlier versions of Android provide no std C++ STL implementation. + #define EA_COMPILER_NO_STANDARD_CPP_LIBRARY 1 + //#endif + #endif + + + // EA_COMPILER_NO_STATIC_VARIABLE_INIT + // + // If defined, it means that global or static C++ variables will be + // constructed. Not all compiler/platorm combinations support this. + // User code that needs to be portable must avoid having C++ variables + // that construct before main. + // + //#if defined(EA_PLATFORM_MOBILE) + // #define EA_COMPILER_NO_STATIC_VARIABLE_INIT 1 + //#endif + + + // EA_COMPILER_NO_STATIC_FUNCTION_INIT + // + // If defined, it means that functions marked as startup functions + // (e.g. __attribute__((constructor)) in GCC) are supported. It may + // be that some compiler/platform combinations don't support this. + // + //#if defined(XXX) // So far, all compiler/platforms we use support this. + // #define EA_COMPILER_NO_STATIC_VARIABLE_INIT 1 + //#endif + + // EA_COMPILER_NO_VARIADIC_MACROS + // + // If defined, the compiler doesn't support C99/C++11 variadic macros. + // With a variadic macro, you can do this: + // #define MY_PRINTF(format, ...) printf(format, __VA_ARGS__) + // + #if !defined(EA_COMPILER_NO_VARIADIC_MACROS) + #if defined(_MSC_VER) && (_MSC_VER < 1500) // If earlier than VS2008.. + #define EA_COMPILER_NO_VARIADIC_MACROS 1 + #elif defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__)) < 401 // If earlier than GCC 4.1.. + #define EA_COMPILER_NO_VARIADIC_MACROS 1 + #elif defined(EA_COMPILER_EDG) // Includes other compilers + // variadic macros are supported + #endif + #endif + + + // EA_COMPILER_NO_RVALUE_REFERENCES + // + // If defined, the compiler doesn't fully support C++11 rvalue reference semantics. + // This applies to the compiler only and not the Standard Library in use with the compiler, + // which is required by the Standard to have some support itself. + // + #if !defined(EA_COMPILER_NO_RVALUE_REFERENCES) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (_MSC_VER >= 1600) // VS2010+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 403) // EDG 4.3+. + // supported. Earlier EDG supported a subset of rvalue references. Implicit move constructors and assignment operators aren't supported until EDG 4.5. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && EA_COMPILER_HAS_FEATURE(cxx_rvalue_references) + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005) // GCC 4.5+ + // supported. + #else + #define EA_COMPILER_NO_RVALUE_REFERENCES 1 + #endif + #endif + + + // EA_COMPILER_NO_EXTERN_TEMPLATE + // + // If defined, the compiler doesn't support C++11 extern template. + // With extern templates, you can do this: + // extern template void DoSomething(KnownType u); + // + #if !defined(EA_COMPILER_NO_EXTERN_TEMPLATE) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (_MSC_VER >= 1700) // VS2012+... + // Extern template is supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && defined(__apple_build_version__) && (EA_COMPILER_VERSION >= 401) + // Extern template is supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && !defined(__apple_build_version__) // Clang other than Apple's Clang + // Extern template is supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4006) // GCC 4.6+ + // Extern template is supported. + #else + #define EA_COMPILER_NO_EXTERN_TEMPLATE 1 + #endif + #endif + + + // EA_COMPILER_NO_RANGE_BASED_FOR_LOOP + // + // If defined, the compiler doesn't support C++11 range-based for loops. + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2009/n2930.html + // You must #include for range-based for loops to work. + // Example usage: + // #include + // #include + // std::vector floatVector; + // for(float& f : floatVector) + // f += 1.0; + // + #if !defined(EA_COMPILER_NO_RANGE_BASED_FOR_LOOP) + #if defined(EA_COMPILER_CPP11_ENABLED) && (defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1700)) // VS2012+... + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 405) // EDG 4.5+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && (defined(__clang__) && (EA_COMPILER_VERSION >= 300)) // Clang 3.x+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && (defined(__GNUC__) && (EA_COMPILER_VERSION >= 4006)) // GCC 4.6+ + // supported. + #else + #define EA_COMPILER_NO_RANGE_BASED_FOR_LOOP 1 + #endif + #endif + + + // EA_COMPILER_NO_CONSTEXPR + // + // Refers to C++11 = constexpr (const expression) declarations. + // + #if !defined(EA_COMPILER_NO_CONSTEXPR) + #if defined(EA_COMPILER_CPP11_ENABLED) && (defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1900)) // VS2015+... Not present in VC++ up to and including VS2013. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 406) // EDG 4.6+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && EA_COMPILER_HAS_FEATURE(cxx_constexpr) + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4006) // GCC 4.6+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1900) // VS 2015+ + // supported. + #else + #define EA_COMPILER_NO_CONSTEXPR 1 + #endif + #endif + + + // EA_COMPILER_NO_CONSTEXPR_IF + // + // Refers to C++17 = constexpr if(const expression) conditionals. + // + #if !defined(EA_COMPILER_NO_CONSTEXPR_IF) + #if defined(EA_COMPILER_CPP17_ENABLED) && (defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1911)) // VS2017 15.3+ + // supported. + #elif defined(EA_COMPILER_CPP17_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 309) // Clang 3.9+ + // supported. + #elif defined(EA_COMPILER_CPP17_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 7000) // GCC 7+ + // supported. + #else + #define EA_COMPILER_NO_CONSTEXPR_IF 1 + #endif + #endif + + + // EA_COMPILER_NO_OVERRIDE + // + // Refers to the C++11 override specifier. + // + #ifndef EA_COMPILER_NO_OVERRIDE + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION > 1600) // VC++ > VS2010, even without C++11 support. VS2010 does support override, however will generate warnings due to the keyword being 'non-standard' + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209) // Clang 2.9+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007) // GCC 4.7+ + // supported. + #else + #define EA_COMPILER_NO_OVERRIDE 1 + #endif + #endif + + + // EA_COMPILER_NO_INHERITANCE_FINAL + // + // Refers to the C++11 final specifier. + // + #ifndef EA_COMPILER_NO_INHERITANCE_FINAL + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1500) // VS2008+, even without C++11 support. + // supported, though you need to use EA_INHERITANCE_FINAL for it to work with VS versions prior to 2012. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209) // Clang 2.9+ + // supported + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007) // GCC 4.7+ + // supported + #else + #define EA_COMPILER_NO_INHERITANCE_FINAL 1 + #endif + #endif + + + // EA_COMPILER_NO_AUTO + // + // Refers to C++11 auto. + // + #if !defined(EA_COMPILER_NO_AUTO) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600) // VS2010+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+. + // supported with the exception of the usage of braced initializer lists as of EDG 4.3. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209) // Clang 2.9+, including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004) // GCC 4.4+ + // supported. + #else + #define EA_COMPILER_NO_AUTO 1 + #endif + #endif + + + // EA_COMPILER_NO_NULLPTR + // + // Refers to C++11 nullptr (which is a built in type). std::nullptr_t is defined in C++11 . + // Note that implements a portable nullptr implementation. + // + #if !defined(EA_COMPILER_NO_NULLPTR) + #if (defined(_MSC_VER) && (_MSC_VER >= 1600)) && defined(EA_COMPILER_CPP11_ENABLED) + // supported + #elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006) && defined(EA_COMPILER_CPP11_ENABLED) + // supported + #elif defined(__clang__) && defined(EA_COMPILER_CPP11_ENABLED) + // supported + #elif defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 403) && defined(EA_COMPILER_CPP11_ENABLED) + // supported + #else + #define EA_COMPILER_NO_NULLPTR 1 + #endif + #endif + + + // EA_COMPILER_NO_DECLTYPE + // + // Refers to C++11 decltype. + // + #if !defined(EA_COMPILER_NO_DECLTYPE) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600) // VS2010+ + // supported, though VS2010 doesn't support the spec completely as specified in the final standard. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209) // Clang 2.9+, including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4003) // GCC 4.3+ + // supported. + #else + #define EA_COMPILER_NO_DECLTYPE 1 + #endif + #endif + + + + // EA_COMPILER_NO_DEFAULTED_FUNCTIONS + // EA_COMPILER_NO_DELETED_FUNCTIONS + // + // Refers to C++11 = default and = delete function declarations. + // + #if !defined(EA_COMPILER_NO_DEFAULTED_FUNCTIONS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800) // VS2013+ + // supported, but as of VS2013 it isn't supported for defaulted move constructors and move assignment operators. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+. + // supported, but as of EDG 4.3 it isn't supported for defaulted move constructors and move assignment operators until EDG 4.5. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) // Clang 3.0+, including Apple's Clang + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004) // GCC 4.4+ + // supported. + #else + // VC++ doesn't support it as of VS2012. + #define EA_COMPILER_NO_DEFAULTED_FUNCTIONS 1 + #endif + #endif + + #if !defined(EA_COMPILER_NO_DELETED_FUNCTIONS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800) // VS2013+ + // supported, but as of VS2013 it isn't supported for defaulted move constructors and move assignment operators. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209) // Clang 2.9+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004) // GCC 4.4+ + // supported. + #else + // VC++ doesn't support it as of VS2012. + #define EA_COMPILER_NO_DELETED_FUNCTIONS 1 + #endif + #endif + + + // EA_COMPILER_NO_LAMBDA_EXPRESSIONS + // + // Refers to C++11 lambda expressions. + // + #if !defined(EA_COMPILER_NO_LAMBDA_EXPRESSIONS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600) // VS2010+ + // supported, though VS2010 doesn't support the spec completely as specified in the final standard. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+. + // supported. However, converting lambdas to function pointers is not supported until EDG 4.5. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004) // GCC 4.4+ + // supported. + #else + #define EA_COMPILER_NO_LAMBDA_EXPRESSIONS 1 + #endif + #endif + + + // EA_COMPILER_NO_TRAILING_RETURN_TYPES + // + // Refers to C++11 trailing-return-type. Also sometimes referred to as "incomplete return type". + // + #if !defined(EA_COMPILER_NO_TRAILING_RETURN_TYPES) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600) // VS2010+ + // supported, though VS2010 doesn't support the spec completely as specified in the final standard. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 402) // EDG 4.2+. + // supported. However, use of "this" in trailing return types is not supported untiil EDG 4.4 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004) // GCC 4.4+ + // supported. + #else + #define EA_COMPILER_NO_TRAILING_RETURN_TYPES 1 + #endif + #endif + + + // EA_COMPILER_NO_STRONGLY_TYPED_ENUMS + // + // Refers to C++11 strongly typed enums, which includes enum classes and sized enums. Doesn't include forward-declared enums. + // + #if !defined(EA_COMPILER_NO_STRONGLY_TYPED_ENUMS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1700) // VS2012+ + // supported. A subset of this is actually supported by VS2010. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 400) // EDG 4.0+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209) // Clang 2.9+, including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004) // GCC 4.4+ + // supported. + #else + #define EA_COMPILER_NO_STRONGLY_TYPED_ENUMS 1 + #endif + #endif + + + // EA_COMPILER_NO_FORWARD_DECLARED_ENUMS + // + // Refers to C++11 forward declared enums. + // + #if !defined(EA_COMPILER_NO_FORWARD_DECLARED_ENUMS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1700) // VS2012+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 405) // EDG 4.5+. + // supported. EDG 4.3 supports basic forward-declared enums, but not forward-declared strongly typed enums. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4006) // GCC 4.6+ + // supported. + #else + #define EA_COMPILER_NO_FORWARD_DECLARED_ENUMS 1 + #endif + #endif + + + // EA_COMPILER_NO_VARIADIC_TEMPLATES + // + // Refers to C++11 variadic templates. + // + #if !defined(EA_COMPILER_NO_VARIADIC_TEMPLATES) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800) // VS2013+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (_MSC_FULL_VER == 170051025) // VS2012 November Preview for Windows only. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 403) // EDG 4.3+. + // supported, though 4.1 has partial support for variadic templates. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209) // Clang 2.9+, including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004) // GCC 4.4+ + // supported, though GCC 4.3 has partial support for variadic templates. + #else + #define EA_COMPILER_NO_VARIADIC_TEMPLATES 1 + #endif + #endif + + + // EA_COMPILER_NO_TEMPLATE_ALIASES + // + // Refers to C++11 alias templates. + // Example alias template usage: + // template + // using Dictionary = eastl::map; + // + // Dictionary StringIntDictionary; + // + #if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800) // VS2013+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 402) // EDG 4.2+. + // supported, though 4.1 has partial support for variadic templates. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007) // GCC 4.7+ + // supported, though GCC 4.3 has partial support for variadic templates. + #else + #define EA_COMPILER_NO_TEMPLATE_ALIASES 1 + #endif + #endif + + + // EA_COMPILER_NO_VARIABLE_TEMPLATES + // + // Refers to C++14 variable templates. + // Example variable template usage: + // template + // constexpr T pi = T(3.1415926535897932385); + // + #if !defined(EA_COMPILER_NO_VARIABLE_TEMPLATES) + #if defined(_MSC_VER) && (_MSC_FULL_VER >= 190023918) // VS2015 Update 2 and above. + // supported. + #elif defined(EA_COMPILER_CPP14_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 304) && !defined(__apple_build_version__) // Clang 3.4+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP14_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 5000) // GCC 5+ + // supported. + #elif !defined(EA_COMPILER_CPP14_ENABLED) + #define EA_COMPILER_NO_VARIABLE_TEMPLATES 1 + #endif + #endif + + + // EA_COMPILER_NO_INLINE_VARIABLES + // + // Refers to C++17 inline variables that allows the definition of variables in header files + // + // Example usage: + // struct Foo + // { + // static inline constexpr int kConstant = 42; // no out of class definition + // }; + // + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2015/n4424.pdf + // http://en.cppreference.com/w/cpp/language/inline + // + #if !defined(EA_COMPILER_NO_INLINE_VARIABLES) + #define EA_COMPILER_NO_INLINE_VARIABLES 1 + #endif + + + // EA_COMPILER_NO_INITIALIZER_LISTS + // + // Refers to C++11 initializer lists. + // This refers to the compiler support for this and not the Standard Library support (std::initializer_list). + // + #if !defined(EA_COMPILER_NO_INITIALIZER_LISTS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800) // VS2013+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (_MSC_FULL_VER == 170051025) // VS2012 November Preview for Windows only. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 405) // EDG 4.5+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004) // GCC 4.4+ + // supported, though GCC 4.3 has partial support for it. + #else + #define EA_COMPILER_NO_INITIALIZER_LISTS 1 + #endif + #endif + + + // EA_COMPILER_NO_NORETURN + // + // Refers to C++11 declaration attribute: noreturn. + // http://en.cppreference.com/w/cpp/language/attributes + // http://blog.aaronballman.com/2011/09/understanding-attributes/ + // + #if !defined(EA_COMPILER_NO_NORETURN) + #if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1300) // VS2003+ + // supported via __declspec(noreturn). You need to use that or EA_NORETURN. VC++ up to VS2013 doesn't support any C++11 attribute types. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 402) // EDG 4.2+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4008) // GCC 4.8+ + // supported. + #else + #define EA_COMPILER_NO_NORETURN 1 + #endif + #endif + + + // EA_COMPILER_NO_CARRIES_DEPENDENCY + // + // Refers to C++11 declaration attribute: carries_dependency. + // http://en.cppreference.com/w/cpp/language/attributes + // http://blog.aaronballman.com/2011/09/understanding-attributes/ + // + #if !defined(EA_COMPILER_NO_CARRIES_DEPENDENCY) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 402) // EDG 4.2+. + // supported; stricter than other compilers in its usage. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang. + // supported. + // Currently GNUC doesn't appear to support this attribute. + //#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4008) // GCC 4.8+ + // // supported. + #else + #define EA_COMPILER_NO_CARRIES_DEPENDENCY 1 + #endif + #endif + + + // EA_COMPILER_NO_FALLTHROUGH + // + // Refers to C++17 declaration attribute: fallthrough. + // http://en.cppreference.com/w/cpp/language/attributes + // + #if !defined(EA_COMPILER_NO_FALLTHROUGH) + #if defined(EA_COMPILER_CPP17_ENABLED) + // supported. + #else + #define EA_COMPILER_NO_FALLTHROUGH 1 + #endif + #endif + + + // EA_COMPILER_NO_NODISCARD + // + // Refers to C++17 declaration attribute: nodiscard. + // http://en.cppreference.com/w/cpp/language/attributes + // + #if !defined(EA_COMPILER_NO_NODISCARD) + #if defined(EA_COMPILER_CPP17_ENABLED) + // supported. + #else + #define EA_COMPILER_NO_NODISCARD 1 + #endif + #endif + + + // EA_COMPILER_NO_MAYBE_UNUSED + // + // Refers to C++17 declaration attribute: maybe_unused. + // http://en.cppreference.com/w/cpp/language/attributes + // + #if !defined(EA_COMPILER_NO_MAYBE_UNUSED) + #if defined(EA_COMPILER_CPP17_ENABLED) + // supported. + #elif defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1912) // VS2017 15.3+ + // supported. + #else + #define EA_COMPILER_NO_MAYBE_UNUSED 1 + #endif + #endif + + + // EA_COMPILER_NO_STRUCTURED_BINDING + // + // Indicates if target compiler supports the C++17 "structured binding" language feature. + // https://en.cppreference.com/w/cpp/language/structured_binding + // + // + #if !defined(EA_COMPILER_NO_STRUCTURED_BINDING) + #if defined(EA_COMPILER_CPP17_ENABLED) + // supported. + #elif defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1912) // VS2017 15.3+ + // supported. + #else + #define EA_COMPILER_NO_STRUCTURED_BINDING 1 + #endif + #endif + + + // EA_COMPILER_NO_DESIGNATED_INITIALIZERS + // + // Indicates the target compiler supports the C++20 "designated initializer" language feature. + // https://en.cppreference.com/w/cpp/language/aggregate_initialization + // + // Example: + // struct A { int x; int y; }; + // A a = { .y = 42, .x = 1 }; + // + #if !defined(EA_COMPILER_NO_DESIGNATED_INITIALIZERS) + #if defined(EA_COMPILER_CPP20_ENABLED) + // supported. + #else + #define EA_COMPILER_NO_DESIGNATED_INITIALIZERS 1 + #endif + #endif + + + // EA_COMPILER_NO_NONSTATIC_MEMBER_INITIALIZERS + // + // Refers to C++11 declaration attribute: carries_dependency. + // http://www.open-std.org/JTC1/SC22/WG21/docs/papers/2008/n2756.htm + // + #if !defined(EA_COMPILER_NO_NONSTATIC_MEMBER_INITIALIZERS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800) // VS2013+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007) // GCC 4.7+ + // supported. + #else + #define EA_COMPILER_NO_NONSTATIC_MEMBER_INITIALIZERS 1 + #endif + #endif + + + // EA_COMPILER_NO_RIGHT_ANGLE_BRACKETS + // + // Defines if the compiler supports >> (as opposed to > >) in template + // declarations such as typedef eastl::list> ListList; + // + #if !defined(EA_COMPILER_NO_RIGHT_ANGLE_BRACKETS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600) // VS2010+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209) // Clang 2.9+, including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4003) // GCC 4.3+ + // supported. + #else + #define EA_COMPILER_NO_RIGHT_ANGLE_BRACKETS 1 + #endif + #endif + + + // EA_COMPILER_NO_ALIGNOF + // + // Refers specifically to C++11 alignof and not old compiler extensions such as __alignof__(). + // However, EABase provides a portable EA_ALIGN_OF which works for all compilers. + // + #if !defined(EA_COMPILER_NO_ALIGNOF) + // Not supported by VC++ as of VS2013, though EA_ALIGN_OF is supported on all coompilers as an alternative. + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209) // Clang 2.9+, including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005) // GCC 4.5+ + // supported. + #else + #define EA_COMPILER_NO_ALIGNOF 1 + #endif + #endif + + + // EA_COMPILER_NO_ALIGNAS + // + // Refers to C++11 alignas. + // + #if !defined(EA_COMPILER_NO_ALIGNAS) + // Not supported by VC++ as of VS2013. + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4008) // GCC 4.8+ + // supported. + #else + #define EA_COMPILER_NO_ALIGNAS 1 + #endif + #endif + + + // EA_COMPILER_NO_DELEGATING_CONSTRUCTORS + // + // Refers to C++11 constructor delegation. + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2006/n1986.pdf + // https://www.ibm.com/developerworks/mydeveloperworks/blogs/5894415f-be62-4bc0-81c5-3956e82276f3/entry/c_0x_delegating_constructors + // + #if !defined(EA_COMPILER_NO_DELEGATING_CONSTRUCTORS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800) // VS2013+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 407) // EDG 4.7+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007) // GCC 4.7+ + // supported. + #else + #define EA_COMPILER_NO_DELEGATING_CONSTRUCTORS 1 + #endif + #endif + + + // EA_COMPILER_NO_INHERITING_CONSTRUCTORS + // + // Refers to C++11 constructor inheritance via 'using'. + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2540.htm + // + #if !defined(EA_COMPILER_NO_INHERITING_CONSTRUCTORS) + // Not supported by VC++ as of VS2013. + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && EA_COMPILER_HAS_FEATURE(cxx_inheriting_constructors) // Clang + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4008) // GCC 4.8+ + // supported. + #else + #define EA_COMPILER_NO_INHERITING_CONSTRUCTORS 1 + #endif + #endif + + + // EA_COMPILER_NO_USER_DEFINED_LITERALS + // + // http://en.cppreference.com/w/cpp/language/user_literal + // http://stackoverflow.com/questions/237804/what-new-capabilities-do-user-defined-literals-add-to-c + // + #if !defined(EA_COMPILER_NO_USER_DEFINED_LITERALS) + // Not supported by VC++ as of VS2013. + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007) // GCC 4.7+ + // supported. + #else + #define EA_COMPILER_NO_USER_DEFINED_LITERALS 1 + #endif + #endif + + + // EA_COMPILER_NO_STANDARD_LAYOUT_TYPES + // a.k.a. POD relaxation + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2342.htm + // + #if !defined(EA_COMPILER_NO_STANDARD_LAYOUT_TYPES) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1700) // VS2012+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005) // GCC 4.5+ + // supported. + #else + #define EA_COMPILER_NO_STANDARD_LAYOUT_TYPES 1 + #endif + #endif + + + // EA_COMPILER_NO_EXTENDED_SIZEOF + // + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2253.html + // Allows you to do this: sizeof(SomeClass::mSomeMember) + // + #if !defined(EA_COMPILER_NO_EXTENDED_SIZEOF) + // Not supported by VC++ as of VS2013. + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + // Versions of EDG prior to 4.5 only support extended sizeof in non-member functions. Full support was added in 4.5 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 405) // EDG 4.5+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005) // GCC 4.5+ + // supported. + #else + #define EA_COMPILER_NO_EXTENDED_SIZEOF 1 + #endif + #endif + + + // EA_COMPILER_NO_INLINE_NAMESPACES + // + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2535.htm + // http://blog.aaronballman.com/2011/07/inline-namespaces/ + // + #if !defined(EA_COMPILER_NO_INLINE_NAMESPACES) + // Not supported by VC++ as of VS2013. + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 405) // EDG 4.5+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209) // Clang 2.9+, including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004) // GCC 4.4+ + // supported. + #else + #define EA_COMPILER_NO_INLINE_NAMESPACES 1 + #endif + #endif + + + // EA_COMPILER_NO_UNRESTRICTED_UNIONS + // + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2544.pdf + // + #if !defined(EA_COMPILER_NO_UNRESTRICTED_UNIONS) + // Not supported by VC++ as of VS2013. + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 406) // EDG 4.6+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4006) // GCC 4.6+ + // supported. + #else + #define EA_COMPILER_NO_UNRESTRICTED_UNIONS 1 + #endif + #endif + + + // EA_COMPILER_NO_EXPLICIT_CONVERSION_OPERATORS + // + // http://en.wikipedia.org/wiki/C%2B%2B11#Explicit_conversion_operators + // + #if !defined(EA_COMPILER_NO_EXPLICIT_CONVERSION_OPERATORS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800) // VS2013+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (_MSC_FULL_VER == 170051025) // VS2012 November Preview for Windows only. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 404) // EDG 4.4+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005) // GCC 4.5+ + // supported. + #else + #define EA_COMPILER_NO_EXPLICIT_CONVERSION_OPERATORS 1 + #endif + #endif + + + // EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS + // + // The compiler does not support default template arguments for function templates. + // http://stackoverflow.com/questions/2447458/default-template-arguments-for-function-templates + // + #if !defined(EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800) // VS2013+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 403) // EDG 4.4+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209) // Clang 2.9+, including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4003) // GCC 4.3+ + // supported. + #else + #define EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS 1 + #endif + #endif + + + // EA_COMPILER_NO_LOCAL_CLASS_TEMPLATE_PARAMETERS + // + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2657.htm + // http://stackoverflow.com/questions/5751977/local-type-as-template-arguments-in-c + // + #if !defined(EA_COMPILER_NO_LOCAL_CLASS_TEMPLATE_PARAMETERS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600) // VS2010+ + // supported. + #if (EA_COMPILER_VERSION < 1700) // VS2010 generates a warning, but the C++ language now allows it. + #pragma warning(disable: 4836) // nonstandard extension used: local types or unnamed types cannot be used as template arguments. + #endif + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 402) // EDG 4.2+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209) // Clang 2.9+, including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005) // GCC 4.5+ + // supported. + #else + #define EA_COMPILER_NO_LOCAL_CLASS_TEMPLATE_PARAMETERS 1 + #endif + #endif + + + // EA_COMPILER_NO_NOEXCEPT + // + // C++11 noexcept + // http://en.cppreference.com/w/cpp/language/attributes + // http://en.cppreference.com/w/cpp/language/noexcept + // + #if !defined(EA_COMPILER_NO_NOEXCEPT) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1900) // VS2014+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 405) // EDG 4.5+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4006) // GCC 4.6+ + // supported. + #else + #define EA_COMPILER_NO_NOEXCEPT 1 + #endif + #endif + + + // EA_COMPILER_NO_RAW_LITERALS + // + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2442.htm + // http://en.wikipedia.org/wiki/C%2B%2B11#New_string_literals + // + #if !defined(EA_COMPILER_NO_RAW_LITERALS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800) // VS2013+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 407) // EDG 4.7+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005) // GCC 4.5+ + // supported. + #else + #define EA_COMPILER_NO_RAW_LITERALS 1 + #endif + #endif + + + // EA_COMPILER_NO_UNICODE_STRING_LITERALS + // + // http://en.wikipedia.org/wiki/C%2B%2B11#New_string_literals + // + #if !defined(EA_COMPILER_NO_UNICODE_STRING_LITERALS) + // Not supported by VC++ as of VS2013. + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 407) // EDG 4.7+. + // supported. It's not clear if it's v4.4 or v4.7 that adds this support. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 300) && !defined(__apple_build_version__) // Clang 3.0+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004) // GCC 4.4+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 407) // EDG 4.7+. + // supported. It's not clear if it's v4.4 or v4.7 that adds this support. + #else + #define EA_COMPILER_NO_UNICODE_STRING_LITERALS 1 + #endif + #endif + + + // EA_COMPILER_NO_NEW_CHARACTER_TYPES + // + // Refers to char16_t and char32_t as true native types (and not something simply typedef'd from uint16_t and uint32_t). + // http://en.cppreference.com/w/cpp/language/types + // + #if !defined(EA_COMPILER_NO_NEW_CHARACTER_TYPES) + #if defined(EA_COMPILER_NO_UNICODE_STRING_LITERALS) // Some compilers have had support for char16_t prior to support for u"", but it's not useful to have the former without the latter. + #define EA_COMPILER_NO_NEW_CHARACTER_TYPES 1 + #endif + #endif + + + // EA_COMPILER_NO_UNICODE_CHAR_NAME_LITERALS + // + // C++ 11 relaxed \u\U sequences in strings. + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2007/n2170.html + // + #if !defined(EA_COMPILER_NO_UNICODE_CHAR_NAME_LITERALS) + // VC++ up till at least VS2013 supports \u and \U but supports them wrong with respect to the C++11 Standard. + + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4005) // GCC 4.5+ + // supported. + #else + #define EA_COMPILER_NO_UNICODE_CHAR_NAME_LITERALS 1 + #endif + #endif + + + // EA_COMPILER_NO_UNIFIED_INITIALIZATION_SYNTAX + // + // http://en.wikipedia.org/wiki/C%2B%2B11#Uniform_initialization + // + #if !defined(EA_COMPILER_NO_UNIFIED_INITIALIZATION_SYNTAX) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1800) // VS2013+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 401) && defined(__apple_build_version__) // Apple clang 4.1+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 301) && !defined(__apple_build_version__) // Clang 3.1+, not including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4004) // GCC 4.4+ + // supported. + #else + #define EA_COMPILER_NO_UNIFIED_INITIALIZATION_SYNTAX 1 + #endif + #endif + + + // EA_COMPILER_NO_EXTENDED_FRIEND_DECLARATIONS + // + // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1791.pdf + // + #if !defined(EA_COMPILER_NO_EXTENDED_FRIEND_DECLARATIONS) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1600) // VS2010+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) // EDG 4.1+. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && (EA_COMPILER_VERSION >= 209) // Clang 2.9+, including Apple's Clang. + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4007) // GCC 4.7+ + // supported. + #else + #define EA_COMPILER_NO_EXTENDED_FRIEND_DECLARATIONS 1 + #endif + #endif + + + // EA_COMPILER_NO_THREAD_LOCAL + // + // Refers specifically to C++ thread_local, which is like compiler __thread implementations except + // that it also supports non-trivial classes (e.g. with ctors). EA_COMPILER_NO_THREAD_LOCAL refers + // specifically to full C++11 thread_local support. The EAThread package provides a wrapper for + // __thread via EA_THREAD_LOCAL (which unfortunately sounds like C++ thread_local). + // + // https://en.cppreference.com/w/cpp/keyword/thread_local + // + #if !defined(EA_COMPILER_NO_THREAD_LOCAL) + #if defined(EA_COMPILER_CPP11_ENABLED) && defined(__clang__) && EA_COMPILER_HAS_FEATURE(cxx_thread_local) + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(_MSC_VER) && (EA_COMPILER_VERSION >= 1900) // VS2015+ + // supported. + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(__GNUC__) && (EA_COMPILER_VERSION >= 4008) // GCC 4.8+ + // supported. + #else + #define EA_COMPILER_NO_THREAD_LOCAL 1 + #endif + #endif + + +#endif // INCLUDED_eacompiler_H + + + + + diff --git a/libkram/eastl/include/EABase/config/eacompilertraits.h b/libkram/eastl/include/EABase/config/eacompilertraits.h new file mode 100644 index 00000000..1d8bcb43 --- /dev/null +++ b/libkram/eastl/include/EABase/config/eacompilertraits.h @@ -0,0 +1,2561 @@ +/*----------------------------------------------------------------------------- + * config/eacompilertraits.h + * + * Copyright (c) Electronic Arts Inc. All rights reserved. + *----------------------------------------------------------------------------- + * Currently supported defines include: + * EA_PREPROCESSOR_JOIN + * + * EA_COMPILER_IS_ANSIC + * EA_COMPILER_IS_C99 + * EA_COMPILER_IS_C11 + * EA_COMPILER_HAS_C99_TYPES + * EA_COMPILER_IS_CPLUSPLUS + * EA_COMPILER_MANAGED_CPP + * EA_COMPILER_INTMAX_SIZE + * EA_OFFSETOF + * EA_SIZEOF_MEMBER + * + * EA_ALIGN_OF() + * EA_ALIGN_MAX_STATIC / EA_ALIGN_MAX_AUTOMATIC + * EA_ALIGN() / EA_PREFIX_ALIGN() / EA_POSTFIX_ALIGN() + * EA_ALIGNED() + * EA_PACKED() + * + * EA_LIKELY() + * EA_UNLIKELY() + * EA_INIT_PRIORITY() + * EA_MAY_ALIAS() + * EA_ASSUME() + * EA_ANALYSIS_ASSUME() + * EA_PURE + * EA_WEAK + * EA_UNUSED() + * EA_EMPTY() + * + * EA_WCHAR_T_NON_NATIVE + * EA_WCHAR_SIZE = + * + * EA_RESTRICT + * EA_DEPRECATED / EA_PREFIX_DEPRECATED / EA_POSTFIX_DEPRECATED + * EA_FORCE_INLINE / EA_PREFIX_FORCE_INLINE / EA_POSTFIX_FORCE_INLINE + * EA_NO_INLINE / EA_PREFIX_NO_INLINE / EA_POSTFIX_NO_INLINE + * EA_NO_VTABLE / EA_CLASS_NO_VTABLE / EA_STRUCT_NO_VTABLE + * EA_PASCAL + * EA_PASCAL_FUNC() + * EA_SSE = [0 | 1] + * EA_IMPORT + * EA_EXPORT + * EA_PRAGMA_ONCE_SUPPORTED + * EA_ONCE + * EA_OVERRIDE + * EA_INHERITANCE_FINAL + * EA_SEALED + * EA_ABSTRACT + * EA_CONSTEXPR / EA_CONSTEXPR_OR_CONST + * EA_CONSTEXPR_IF + * EA_EXTERN_TEMPLATE + * EA_NOEXCEPT + * EA_NORETURN + * EA_CARRIES_DEPENDENCY + * EA_NON_COPYABLE / struct EANonCopyable + * EA_OPTIMIZE_OFF / EA_OPTIMIZE_ON + * EA_SIGNED_RIGHT_SHIFT_IS_UNSIGNED + * + * EA_DISABLE_VC_WARNING / EA_RESTORE_VC_WARNING / EA_DISABLE_ALL_VC_WARNINGS / EA_RESTORE_ALL_VC_WARNINGS + * EA_DISABLE_GCC_WARNING / EA_RESTORE_GCC_WARNING + * EA_DISABLE_CLANG_WARNING / EA_RESTORE_CLANG_WARNING + * EA_DISABLE_SN_WARNING / EA_RESTORE_SN_WARNING / EA_DISABLE_ALL_SN_WARNINGS / EA_RESTORE_ALL_SN_WARNINGS + * EA_DISABLE_GHS_WARNING / EA_RESTORE_GHS_WARNING + * EA_DISABLE_EDG_WARNING / EA_RESTORE_EDG_WARNING + * EA_DISABLE_CW_WARNING / EA_RESTORE_CW_WARNING + * + * EA_DISABLE_DEFAULT_CTOR + * EA_DISABLE_COPY_CTOR + * EA_DISABLE_MOVE_CTOR + * EA_DISABLE_ASSIGNMENT_OPERATOR + * EA_DISABLE_MOVE_OPERATOR + * + * Todo: + * Find a way to reliably detect wchar_t size at preprocessor time and + * implement it below for EA_WCHAR_SIZE. + * + * Todo: + * Find out how to support EA_PASCAL and EA_PASCAL_FUNC for systems in + * which it hasn't yet been found out for. + *---------------------------------------------------------------------------*/ + + +#ifndef INCLUDED_eacompilertraits_H +#define INCLUDED_eacompilertraits_H + + #include + #include + + + // Metrowerks uses #defines in its core C header files to define + // the kind of information we need below (e.g. C99 compatibility) + + + + // Determine if this compiler is ANSI C compliant and if it is C99 compliant. + #if defined(__STDC__) + #define EA_COMPILER_IS_ANSIC 1 // The compiler claims to be ANSI C + + // Is the compiler a C99 compiler or equivalent? + // From ISO/IEC 9899:1999: + // 6.10.8 Predefined macro names + // __STDC_VERSION__ The integer constant 199901L. (150) + // + // 150) This macro was not specified in ISO/IEC 9899:1990 and was + // specified as 199409L in ISO/IEC 9899/AMD1:1995. The intention + // is that this will remain an integer constant of type long int + // that is increased with each revision of this International Standard. + // + #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) + #define EA_COMPILER_IS_C99 1 + #endif + + // Is the compiler a C11 compiler? + // From ISO/IEC 9899:2011: + // Page 176, 6.10.8.1 (Predefined macro names) : + // __STDC_VERSION__ The integer constant 201112L. (178) + // + #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) + #define EA_COMPILER_IS_C11 1 + #endif + #endif + + // Some compilers (e.g. GCC) define __USE_ISOC99 if they are not + // strictly C99 compilers (or are simply C++ compilers) but are set + // to use C99 functionality. Metrowerks defines _MSL_C99 as 1 in + // this case, but 0 otherwise. + #if (defined(__USE_ISOC99) || (defined(_MSL_C99) && (_MSL_C99 == 1))) && !defined(EA_COMPILER_IS_C99) + #define EA_COMPILER_IS_C99 1 + #endif + + // Metrowerks defines C99 types (e.g. intptr_t) instrinsically when in C99 mode (-lang C99 on the command line). + #if (defined(_MSL_C99) && (_MSL_C99 == 1)) + #define EA_COMPILER_HAS_C99_TYPES 1 + #endif + + #if defined(__GNUC__) + #if (((__GNUC__ * 100) + __GNUC_MINOR__) >= 302) // Also, GCC defines _HAS_C9X. + #define EA_COMPILER_HAS_C99_TYPES 1 // The compiler is not necessarily a C99 compiler, but it defines C99 types. + + #ifndef __STDC_LIMIT_MACROS + #define __STDC_LIMIT_MACROS 1 + #endif + + #ifndef __STDC_CONSTANT_MACROS + #define __STDC_CONSTANT_MACROS 1 // This tells the GCC compiler that we want it to use its native C99 types. + #endif + #endif + #endif + + #if defined(_MSC_VER) && (_MSC_VER >= 1600) + #define EA_COMPILER_HAS_C99_TYPES 1 + #endif + + #ifdef __cplusplus + #define EA_COMPILER_IS_CPLUSPLUS 1 + #endif + + + // ------------------------------------------------------------------------ + // EA_PREPROCESSOR_JOIN + // + // This macro joins the two arguments together, even when one of + // the arguments is itself a macro (see 16.3.1 in C++98 standard). + // This is often used to create a unique name with __LINE__. + // + // For example, this declaration: + // char EA_PREPROCESSOR_JOIN(unique_, __LINE__); + // expands to this: + // char unique_73; + // + // Note that all versions of MSVC++ up to at least version 7.1 + // fail to properly compile macros that use __LINE__ in them + // when the "program database for edit and continue" option + // is enabled. The result is that __LINE__ gets converted to + // something like __LINE__(Var+37). + // + #ifndef EA_PREPROCESSOR_JOIN + #define EA_PREPROCESSOR_JOIN(a, b) EA_PREPROCESSOR_JOIN1(a, b) + #define EA_PREPROCESSOR_JOIN1(a, b) EA_PREPROCESSOR_JOIN2(a, b) + #define EA_PREPROCESSOR_JOIN2(a, b) a##b + #endif + + + // ------------------------------------------------------------------------ + // EA_STRINGIFY + // + // Example usage: + // printf("Line: %s", EA_STRINGIFY(__LINE__)); + // + #ifndef EA_STRINGIFY + #define EA_STRINGIFY(x) EA_STRINGIFYIMPL(x) + #define EA_STRINGIFYIMPL(x) #x + #endif + + + // ------------------------------------------------------------------------ + // EA_IDENTITY + // + #ifndef EA_IDENTITY + #define EA_IDENTITY(x) x + #endif + + + // ------------------------------------------------------------------------ + // EA_COMPILER_MANAGED_CPP + // Defined if this is being compiled with Managed C++ extensions + #ifdef EA_COMPILER_MSVC + #if EA_COMPILER_VERSION >= 1300 + #ifdef _MANAGED + #define EA_COMPILER_MANAGED_CPP 1 + #endif + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_COMPILER_INTMAX_SIZE + // + // This is related to the concept of intmax_t uintmax_t, but is available + // in preprocessor form as opposed to compile-time form. At compile-time + // you can use intmax_t and uintmax_t to use the actual types. + // + #if defined(__GNUC__) && defined(__x86_64__) + #define EA_COMPILER_INTMAX_SIZE 16 // intmax_t is __int128_t (GCC extension) and is 16 bytes. + #else + #define EA_COMPILER_INTMAX_SIZE 8 // intmax_t is int64_t and is 8 bytes. + #endif + + + + // ------------------------------------------------------------------------ + // EA_LPAREN / EA_RPAREN / EA_COMMA / EA_SEMI + // + // These are used for using special characters in macro-using expressions. + // Note that this macro intentionally uses (), as in some cases it can't + // work unless it does. + // + // Example usage: + // int x = SOME_MACRO(SomeTemplate); + // + #ifndef EA_LPAREN + #define EA_LPAREN() ( + #endif + #ifndef EA_RPAREN + #define EA_RPAREN() ) + #endif + #ifndef EA_COMMA + #define EA_COMMA() , + #endif + #ifndef EA_SEMI + #define EA_SEMI() ; + #endif + + + + + // ------------------------------------------------------------------------ + // EA_OFFSETOF + // Implements a portable version of the non-standard offsetof macro. + // + // The offsetof macro is guaranteed to only work with POD types. However, we wish to use + // it for non-POD types but where we know that offsetof will still work for the cases + // in which we use it. GCC unilaterally gives a warning when using offsetof with a non-POD, + // even if the given usage happens to work. So we make a workaround version of offsetof + // here for GCC which has the same effect but tricks the compiler into not issuing the warning. + // The 65536 does the compiler fooling; the reinterpret_cast prevents the possibility of + // an overloaded operator& for the class getting in the way. + // + // Example usage: + // struct A{ int x; int y; }; + // size_t n = EA_OFFSETOF(A, y); + // + #if defined(__GNUC__) // We can't use GCC 4's __builtin_offsetof because it mistakenly complains about non-PODs that are really PODs. + #define EA_OFFSETOF(struct_, member_) ((size_t)(((uintptr_t)&reinterpret_cast((((struct_*)65536)->member_))) - 65536)) + #else + #define EA_OFFSETOF(struct_, member_) offsetof(struct_, member_) + #endif + + // ------------------------------------------------------------------------ + // EA_SIZEOF_MEMBER + // Implements a portable way to determine the size of a member. + // + // The EA_SIZEOF_MEMBER simply returns the size of a member within a class or struct; member + // access rules still apply. We offer two approaches depending on the compiler's support for non-static member + // initializers although most C++11 compilers support this. + // + // Example usage: + // struct A{ int x; int y; }; + // size_t n = EA_SIZEOF_MEMBER(A, y); + // + #ifndef EA_COMPILER_NO_EXTENDED_SIZEOF + #define EA_SIZEOF_MEMBER(struct_, member_) (sizeof(struct_::member_)) + #else + #define EA_SIZEOF_MEMBER(struct_, member_) (sizeof(((struct_*)0)->member_)) + #endif + + // ------------------------------------------------------------------------ + // alignment expressions + // + // Here we define + // EA_ALIGN_OF(type) // Returns size_t. + // EA_ALIGN_MAX_STATIC // The max align value that the compiler will respect for EA_ALIGN for static data (global and static variables). Some compilers allow high values, some allow no more than 8. EA_ALIGN_MIN is assumed to be 1. + // EA_ALIGN_MAX_AUTOMATIC // The max align value for automatic variables (variables declared as local to a function). + // EA_ALIGN(n) // Used as a prefix. n is byte alignment, with being a power of two. Most of the time you can use this and avoid using EA_PREFIX_ALIGN/EA_POSTFIX_ALIGN. + // EA_ALIGNED(t, v, n) // Type, variable, alignment. Used to align an instance. You should need this only for unusual compilers. + // EA_PACKED // Specifies that the given structure be packed (and not have its members aligned). + // + // Also we define the following for rare cases that it's needed. + // EA_PREFIX_ALIGN(n) // n is byte alignment, with being a power of two. You should need this only for unusual compilers. + // EA_POSTFIX_ALIGN(n) // Valid values for n are 1, 2, 4, 8, etc. You should need this only for unusual compilers. + // + // Example usage: + // size_t x = EA_ALIGN_OF(int); Non-aligned equivalents. Meaning + // EA_PREFIX_ALIGN(8) int x = 5; int x = 5; Align x on 8 for compilers that require prefix attributes. Can just use EA_ALIGN instead. + // EA_ALIGN(8) int x; int x; Align x on 8 for compilers that allow prefix attributes. + // int x EA_POSTFIX_ALIGN(8); int x; Align x on 8 for compilers that require postfix attributes. + // int x EA_POSTFIX_ALIGN(8) = 5; int x = 5; Align x on 8 for compilers that require postfix attributes. + // int x EA_POSTFIX_ALIGN(8)(5); int x(5); Align x on 8 for compilers that require postfix attributes. + // struct EA_PREFIX_ALIGN(8) X { int x; } EA_POSTFIX_ALIGN(8); struct X { int x; }; Define X as a struct which is aligned on 8 when used. + // EA_ALIGNED(int, x, 8) = 5; int x = 5; Align x on 8. + // EA_ALIGNED(int, x, 16)(5); int x(5); Align x on 16. + // EA_ALIGNED(int, x[3], 16); int x[3]; Align x array on 16. + // EA_ALIGNED(int, x[3], 16) = { 1, 2, 3 }; int x[3] = { 1, 2, 3 }; Align x array on 16. + // int x[3] EA_PACKED; int x[3]; Pack the 3 ints of the x array. GCC doesn't seem to support packing of int arrays. + // struct EA_ALIGN(32) X { int x; int y; }; struct X { int x; }; Define A as a struct which is aligned on 32 when used. + // EA_ALIGN(32) struct X { int x; int y; } Z; struct X { int x; } Z; Define A as a struct, and align the instance Z on 32. + // struct X { int x EA_PACKED; int y EA_PACKED; }; struct X { int x; int y; }; Pack the x and y members of struct X. + // struct X { int x; int y; } EA_PACKED; struct X { int x; int y; }; Pack the members of struct X. + // typedef EA_ALIGNED(int, int16, 16); int16 n16; typedef int int16; int16 n16; Define int16 as an int which is aligned on 16. + // typedef EA_ALIGNED(X, X16, 16); X16 x16; typedef X X16; X16 x16; Define X16 as an X which is aligned on 16. + + #if !defined(EA_ALIGN_MAX) // If the user hasn't globally set an alternative value... + #if defined(EA_PROCESSOR_ARM) // ARM compilers in general tend to limit automatic variables to 8 or less. + #define EA_ALIGN_MAX_STATIC 1048576 + #define EA_ALIGN_MAX_AUTOMATIC 1 // Typically they support only built-in natural aligment types (both arm-eabi and apple-abi). + #elif defined(EA_PLATFORM_APPLE) + #define EA_ALIGN_MAX_STATIC 1048576 + #define EA_ALIGN_MAX_AUTOMATIC 16 + #else + #define EA_ALIGN_MAX_STATIC 1048576 // Arbitrarily high value. What is the actual max? + #define EA_ALIGN_MAX_AUTOMATIC 1048576 + #endif + #endif + + // EDG intends to be compatible with GCC but has a bug whereby it + // fails to support calling a constructor in an aligned declaration when + // using postfix alignment attributes. Prefix works for alignment, but does not align + // the size like postfix does. Prefix also fails on templates. So gcc style post fix + // is still used, but the user will need to use EA_POSTFIX_ALIGN before the constructor parameters. + #if defined(__GNUC__) && (__GNUC__ < 3) + #define EA_ALIGN_OF(type) ((size_t)__alignof__(type)) + #define EA_ALIGN(n) + #define EA_PREFIX_ALIGN(n) + #define EA_POSTFIX_ALIGN(n) __attribute__((aligned(n))) + #define EA_ALIGNED(variable_type, variable, n) variable_type variable __attribute__((aligned(n))) + #define EA_PACKED __attribute__((packed)) + + // GCC 3.x+, IBM, and clang support prefix attributes. + #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__xlC__) || defined(__clang__) + #define EA_ALIGN_OF(type) ((size_t)__alignof__(type)) + #define EA_ALIGN(n) __attribute__((aligned(n))) + #define EA_PREFIX_ALIGN(n) + #define EA_POSTFIX_ALIGN(n) __attribute__((aligned(n))) + #define EA_ALIGNED(variable_type, variable, n) variable_type variable __attribute__((aligned(n))) + #define EA_PACKED __attribute__((packed)) + + // Metrowerks supports prefix attributes. + // Metrowerks does not support packed alignment attributes. + #elif defined(EA_COMPILER_INTEL) || defined(CS_UNDEFINED_STRING) || (defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1300)) + #define EA_ALIGN_OF(type) ((size_t)__alignof(type)) + #define EA_ALIGN(n) __declspec(align(n)) + #define EA_PREFIX_ALIGN(n) EA_ALIGN(n) + #define EA_POSTFIX_ALIGN(n) + #define EA_ALIGNED(variable_type, variable, n) EA_ALIGN(n) variable_type variable + #define EA_PACKED // See EA_PRAGMA_PACK_VC for an alternative. + + // Arm brand compiler + #elif defined(EA_COMPILER_ARM) + #define EA_ALIGN_OF(type) ((size_t)__ALIGNOF__(type)) + #define EA_ALIGN(n) __align(n) + #define EA_PREFIX_ALIGN(n) __align(n) + #define EA_POSTFIX_ALIGN(n) + #define EA_ALIGNED(variable_type, variable, n) __align(n) variable_type variable + #define EA_PACKED __packed + + #else // Unusual compilers + // There is nothing we can do about some of these. This is not as bad a problem as it seems. + // If the given platform/compiler doesn't support alignment specifications, then it's somewhat + // likely that alignment doesn't matter for that platform. Otherwise they would have defined + // functionality to manipulate alignment. + #define EA_ALIGN(n) + #define EA_PREFIX_ALIGN(n) + #define EA_POSTFIX_ALIGN(n) + #define EA_ALIGNED(variable_type, variable, n) variable_type variable + #define EA_PACKED + + #ifdef __cplusplus + template struct EAAlignOf1 { enum { s = sizeof (T), value = s ^ (s & (s - 1)) }; }; + template struct EAAlignOf2; + template struct helper { template struct Val { enum { value = size_diff }; }; }; + template <> struct helper<0> { template struct Val { enum { value = EAAlignOf2::value }; }; }; + template struct EAAlignOf2 { struct Big { T x; char c; }; + enum { diff = sizeof (Big) - sizeof (T), value = helper::template Val::value }; }; + template struct EAAlignof3 { enum { x = EAAlignOf2::value, y = EAAlignOf1::value, value = x < y ? x : y }; }; + #define EA_ALIGN_OF(type) ((size_t)EAAlignof3::value) + + #else + // C implementation of EA_ALIGN_OF + // This implementation works for most cases, but doesn't directly work + // for types such as function pointer declarations. To work with those + // types you need to typedef the type and then use the typedef in EA_ALIGN_OF. + #define EA_ALIGN_OF(type) ((size_t)offsetof(struct { char c; type m; }, m)) + #endif + #endif + + // EA_PRAGMA_PACK_VC + // + // Wraps #pragma pack in a way that allows for cleaner code. + // + // Example usage: + // EA_PRAGMA_PACK_VC(push, 1) + // struct X{ char c; int i; }; + // EA_PRAGMA_PACK_VC(pop) + // + #if !defined(EA_PRAGMA_PACK_VC) + #if defined(EA_COMPILER_MSVC) + #define EA_PRAGMA_PACK_VC(...) __pragma(pack(__VA_ARGS__)) + #elif !defined(EA_COMPILER_NO_VARIADIC_MACROS) + #define EA_PRAGMA_PACK_VC(...) + #else + // No support. However, all compilers of significance to us support variadic macros. + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_LIKELY / EA_UNLIKELY + // + // Defined as a macro which gives a hint to the compiler for branch + // prediction. GCC gives you the ability to manually give a hint to + // the compiler about the result of a comparison, though it's often + // best to compile shipping code with profiling feedback under both + // GCC (-fprofile-arcs) and VC++ (/LTCG:PGO, etc.). However, there + // are times when you feel very sure that a boolean expression will + // usually evaluate to either true or false and can help the compiler + // by using an explicity directive... + // + // Example usage: + // if(EA_LIKELY(a == 0)) // Tell the compiler that a will usually equal 0. + // { ... } + // + // Example usage: + // if(EA_UNLIKELY(a == 0)) // Tell the compiler that a will usually not equal 0. + // { ... } + // + #ifndef EA_LIKELY + #if (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) + #if defined(__cplusplus) + #define EA_LIKELY(x) __builtin_expect(!!(x), true) + #define EA_UNLIKELY(x) __builtin_expect(!!(x), false) + #else + #define EA_LIKELY(x) __builtin_expect(!!(x), 1) + #define EA_UNLIKELY(x) __builtin_expect(!!(x), 0) + #endif + #else + #define EA_LIKELY(x) (x) + #define EA_UNLIKELY(x) (x) + #endif + #endif + + // ------------------------------------------------------------------------ + // EA_HAS_INCLUDE_AVAILABLE + // + // Used to guard against the EA_HAS_INCLUDE() macro on compilers that do not + // support said feature. + // + // Example usage: + // + // #if EA_HAS_INCLUDE_AVAILABLE + // #if EA_HAS_INCLUDE("myinclude.h") + // #include "myinclude.h" + // #endif + // #endif + #if !defined(EA_HAS_INCLUDE_AVAILABLE) + #if EA_COMPILER_CPP17_ENABLED || EA_COMPILER_CLANG || EA_COMPILER_GNUC + #define EA_HAS_INCLUDE_AVAILABLE 1 + #else + #define EA_HAS_INCLUDE_AVAILABLE 0 + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_HAS_INCLUDE + // + // May be used in #if and #elif expressions to test for the existence + // of the header referenced in the operand. If possible it evaluates to a + // non-zero value and zero otherwise. The operand is the same form as the file + // in a #include directive. + // + // Example usage: + // + // #if EA_HAS_INCLUDE("myinclude.h") + // #include "myinclude.h" + // #endif + // + // #if EA_HAS_INCLUDE() + // #include + // #endif + + #if !defined(EA_HAS_INCLUDE) + #if EA_COMPILER_CPP17_ENABLED + #define EA_HAS_INCLUDE(x) __has_include(x) + #elif EA_COMPILER_CLANG + #define EA_HAS_INCLUDE(x) __has_include(x) + #elif EA_COMPILER_GNUC + #define EA_HAS_INCLUDE(x) __has_include(x) + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_INIT_PRIORITY_AVAILABLE + // + // This value is either not defined, or defined to 1. + // Defines if the GCC attribute init_priority is supported by the compiler. + // + #if !defined(EA_INIT_PRIORITY_AVAILABLE) + #if defined(__GNUC__) && !defined(__EDG__) // EDG typically #defines __GNUC__ but doesn't implement init_priority. + #define EA_INIT_PRIORITY_AVAILABLE 1 + #elif defined(__clang__) + #define EA_INIT_PRIORITY_AVAILABLE 1 // Clang implements init_priority + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_INIT_PRIORITY + // + // This is simply a wrapper for the GCC init_priority attribute that allows + // multiplatform code to be easier to read. This attribute doesn't apply + // to VC++ because VC++ uses file-level pragmas to control init ordering. + // + // Example usage: + // SomeClass gSomeClass EA_INIT_PRIORITY(2000); + // + #if !defined(EA_INIT_PRIORITY) + #if defined(EA_INIT_PRIORITY_AVAILABLE) + #define EA_INIT_PRIORITY(x) __attribute__ ((init_priority (x))) + #else + #define EA_INIT_PRIORITY(x) + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_INIT_SEG_AVAILABLE + // + // + #if !defined(EA_INIT_SEG_AVAILABLE) + #if defined(_MSC_VER) + #define EA_INIT_SEG_AVAILABLE 1 + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_INIT_SEG + // + // Specifies a keyword or code section that affects the order in which startup code is executed. + // + // https://docs.microsoft.com/en-us/cpp/preprocessor/init-seg?view=vs-2019 + // + // Example: + // EA_INIT_SEG(compiler) MyType gMyTypeGlobal; + // EA_INIT_SEG("my_section") MyOtherType gMyOtherTypeGlobal; + // + #if !defined(EA_INIT_SEG) + #if defined(EA_INIT_SEG_AVAILABLE) + #define EA_INIT_SEG(x) \ + __pragma(warning(push)) __pragma(warning(disable : 4074)) __pragma(warning(disable : 4075)) __pragma(init_seg(x)) \ + __pragma(warning(pop)) + #else + #define EA_INIT_SEG(x) + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_MAY_ALIAS_AVAILABLE + // + // Defined as 0, 1, or 2. + // Defines if the GCC attribute may_alias is supported by the compiler. + // Consists of a value 0 (unsupported, shouldn't be used), 1 (some support), + // or 2 (full proper support). + // + #ifndef EA_MAY_ALIAS_AVAILABLE + #if defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 303) + #if !defined(__EDG__) // define it as 1 while defining GCC's support as 2. + #define EA_MAY_ALIAS_AVAILABLE 2 + #else + #define EA_MAY_ALIAS_AVAILABLE 0 + #endif + #else + #define EA_MAY_ALIAS_AVAILABLE 0 + #endif + #endif + + + // EA_MAY_ALIAS + // + // Defined as a macro that wraps the GCC may_alias attribute. This attribute + // has no significance for VC++ because VC++ doesn't support the concept of + // strict aliasing. Users should avoid writing code that breaks strict + // aliasing rules; EA_MAY_ALIAS is for cases with no alternative. + // + // Example usage: + // void* EA_MAY_ALIAS gPtr = NULL; + // + // Example usage: + // typedef void* EA_MAY_ALIAS pvoid_may_alias; + // pvoid_may_alias gPtr = NULL; + // + #if EA_MAY_ALIAS_AVAILABLE + #define EA_MAY_ALIAS __attribute__((__may_alias__)) + #else + #define EA_MAY_ALIAS + #endif + + + // ------------------------------------------------------------------------ + // EA_ASSUME + // + // This acts the same as the VC++ __assume directive and is implemented + // simply as a wrapper around it to allow portable usage of it and to take + // advantage of it if and when it appears in other compilers. + // + // Example usage: + // void Function(int a) { + // switch(a) { + // case 1: + // DoSomething(1); + // break; + // case 2: + // DoSomething(-1); + // break; + // default: + // EA_ASSUME(0); // This tells the optimizer that the default cannot be reached. + // } + // } + // + #ifndef EA_ASSUME + #if defined(_MSC_VER) && (_MSC_VER >= 1300) // If VC7.0 and later + #define EA_ASSUME(x) __assume(x) + #else + #define EA_ASSUME(x) + #endif + #endif + + + + // ------------------------------------------------------------------------ + // EA_ANALYSIS_ASSUME + // + // This acts the same as the VC++ __analysis_assume directive and is implemented + // simply as a wrapper around it to allow portable usage of it and to take + // advantage of it if and when it appears in other compilers. + // + // Example usage: + // char Function(char* p) { + // EA_ANALYSIS_ASSUME(p != NULL); + // return *p; + // } + // + #ifndef EA_ANALYSIS_ASSUME + #if defined(_MSC_VER) && (_MSC_VER >= 1300) // If VC7.0 and later + #define EA_ANALYSIS_ASSUME(x) __analysis_assume(!!(x)) // !! because that allows for convertible-to-bool in addition to bool. + #else + #define EA_ANALYSIS_ASSUME(x) + #endif + #endif + + + + // ------------------------------------------------------------------------ + // EA_DISABLE_VC_WARNING / EA_RESTORE_VC_WARNING + // + // Disable and re-enable warning(s) within code. + // This is simply a wrapper for VC++ #pragma warning(disable: nnnn) for the + // purpose of making code easier to read due to avoiding nested compiler ifdefs + // directly in code. + // + // Example usage: + // EA_DISABLE_VC_WARNING(4127 3244) + // + // EA_RESTORE_VC_WARNING() + // + #ifndef EA_DISABLE_VC_WARNING + #if defined(_MSC_VER) + #define EA_DISABLE_VC_WARNING(w) \ + __pragma(warning(push)) \ + __pragma(warning(disable:w)) + #else + #define EA_DISABLE_VC_WARNING(w) + #endif + #endif + + #ifndef EA_RESTORE_VC_WARNING + #if defined(_MSC_VER) + #define EA_RESTORE_VC_WARNING() \ + __pragma(warning(pop)) + #else + #define EA_RESTORE_VC_WARNING() + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_ENABLE_VC_WARNING_AS_ERROR / EA_DISABLE_VC_WARNING_AS_ERROR + // + // Disable and re-enable treating a warning as error within code. + // This is simply a wrapper for VC++ #pragma warning(error: nnnn) for the + // purpose of making code easier to read due to avoiding nested compiler ifdefs + // directly in code. + // + // Example usage: + // EA_ENABLE_VC_WARNING_AS_ERROR(4996) + // + // EA_DISABLE_VC_WARNING_AS_ERROR() + // + #ifndef EA_ENABLE_VC_WARNING_AS_ERROR + #if defined(_MSC_VER) + #define EA_ENABLE_VC_WARNING_AS_ERROR(w) \ + __pragma(warning(push)) \ + __pragma(warning(error:w)) + #else + #define EA_ENABLE_VC_WARNING_AS_ERROR(w) + #endif + #endif + + #ifndef EA_DISABLE_VC_WARNING_AS_ERROR + #if defined(_MSC_VER) + #define EA_DISABLE_VC_WARNING_AS_ERROR() \ + __pragma(warning(pop)) + #else + #define EA_DISABLE_VC_WARNING_AS_ERROR() + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_DISABLE_GCC_WARNING / EA_RESTORE_GCC_WARNING + // + // Example usage: + // // Only one warning can be ignored per statement, due to how GCC works. + // EA_DISABLE_GCC_WARNING(-Wuninitialized) + // EA_DISABLE_GCC_WARNING(-Wunused) + // + // EA_RESTORE_GCC_WARNING() + // EA_RESTORE_GCC_WARNING() + // + #ifndef EA_DISABLE_GCC_WARNING + #if defined(EA_COMPILER_GNUC) + #define EAGCCWHELP0(x) #x + #define EAGCCWHELP1(x) EAGCCWHELP0(GCC diagnostic ignored x) + #define EAGCCWHELP2(x) EAGCCWHELP1(#x) + #endif + + #if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006) // Can't test directly for __GNUC__ because some compilers lie. + #define EA_DISABLE_GCC_WARNING(w) \ + _Pragma("GCC diagnostic push") \ + _Pragma(EAGCCWHELP2(w)) + #elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) + #define EA_DISABLE_GCC_WARNING(w) \ + _Pragma(EAGCCWHELP2(w)) + #else + #define EA_DISABLE_GCC_WARNING(w) + #endif + #endif + + #ifndef EA_RESTORE_GCC_WARNING + #if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006) + #define EA_RESTORE_GCC_WARNING() \ + _Pragma("GCC diagnostic pop") + #else + #define EA_RESTORE_GCC_WARNING() + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_DISABLE_ALL_GCC_WARNINGS / EA_RESTORE_ALL_GCC_WARNINGS + // + // This isn't possible except via using _Pragma("GCC system_header"), though + // that has some limitations in how it works. Another means is to manually + // disable individual warnings within a GCC diagnostic push statement. + // GCC doesn't have as many warnings as VC++ and EDG and so this may be feasible. + // ------------------------------------------------------------------------ + + + // ------------------------------------------------------------------------ + // EA_ENABLE_GCC_WARNING_AS_ERROR / EA_DISABLE_GCC_WARNING_AS_ERROR + // + // Example usage: + // // Only one warning can be treated as an error per statement, due to how GCC works. + // EA_ENABLE_GCC_WARNING_AS_ERROR(-Wuninitialized) + // EA_ENABLE_GCC_WARNING_AS_ERROR(-Wunused) + // + // EA_DISABLE_GCC_WARNING_AS_ERROR() + // EA_DISABLE_GCC_WARNING_AS_ERROR() + // + #ifndef EA_ENABLE_GCC_WARNING_AS_ERROR + #if defined(EA_COMPILER_GNUC) + #define EAGCCWERRORHELP0(x) #x + #define EAGCCWERRORHELP1(x) EAGCCWERRORHELP0(GCC diagnostic error x) + #define EAGCCWERRORHELP2(x) EAGCCWERRORHELP1(#x) + #endif + + #if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006) // Can't test directly for __GNUC__ because some compilers lie. + #define EA_ENABLE_GCC_WARNING_AS_ERROR(w) \ + _Pragma("GCC diagnostic push") \ + _Pragma(EAGCCWERRORHELP2(w)) + #elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) + #define EA_DISABLE_GCC_WARNING(w) \ + _Pragma(EAGCCWERRORHELP2(w)) + #else + #define EA_DISABLE_GCC_WARNING(w) + #endif + #endif + + #ifndef EA_DISABLE_GCC_WARNING_AS_ERROR + #if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006) + #define EA_DISABLE_GCC_WARNING_AS_ERROR() \ + _Pragma("GCC diagnostic pop") + #else + #define EA_DISABLE_GCC_WARNING_AS_ERROR() + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_DISABLE_CLANG_WARNING / EA_RESTORE_CLANG_WARNING + // + // Example usage: + // // Only one warning can be ignored per statement, due to how clang works. + // EA_DISABLE_CLANG_WARNING(-Wuninitialized) + // EA_DISABLE_CLANG_WARNING(-Wunused) + // + // EA_RESTORE_CLANG_WARNING() + // EA_RESTORE_CLANG_WARNING() + // + #ifndef EA_DISABLE_CLANG_WARNING + #if defined(EA_COMPILER_CLANG) || defined(EA_COMPILER_CLANG_CL) + #define EACLANGWHELP0(x) #x + #define EACLANGWHELP1(x) EACLANGWHELP0(clang diagnostic ignored x) + #define EACLANGWHELP2(x) EACLANGWHELP1(#x) + + #define EA_DISABLE_CLANG_WARNING(w) \ + _Pragma("clang diagnostic push") \ + _Pragma(EACLANGWHELP2(-Wunknown-warning-option))\ + _Pragma(EACLANGWHELP2(w)) + #else + #define EA_DISABLE_CLANG_WARNING(w) + #endif + #endif + + #ifndef EA_RESTORE_CLANG_WARNING + #if defined(EA_COMPILER_CLANG) || defined(EA_COMPILER_CLANG_CL) + #define EA_RESTORE_CLANG_WARNING() \ + _Pragma("clang diagnostic pop") + #else + #define EA_RESTORE_CLANG_WARNING() + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_DISABLE_ALL_CLANG_WARNINGS / EA_RESTORE_ALL_CLANG_WARNINGS + // + // The situation for clang is the same as for GCC. See above. + // ------------------------------------------------------------------------ + + + // ------------------------------------------------------------------------ + // EA_ENABLE_CLANG_WARNING_AS_ERROR / EA_DISABLE_CLANG_WARNING_AS_ERROR + // + // Example usage: + // // Only one warning can be treated as an error per statement, due to how clang works. + // EA_ENABLE_CLANG_WARNING_AS_ERROR(-Wuninitialized) + // EA_ENABLE_CLANG_WARNING_AS_ERROR(-Wunused) + // + // EA_DISABLE_CLANG_WARNING_AS_ERROR() + // EA_DISABLE_CLANG_WARNING_AS_ERROR() + // + #ifndef EA_ENABLE_CLANG_WARNING_AS_ERROR + #if defined(EA_COMPILER_CLANG) || defined(EA_COMPILER_CLANG_CL) + #define EACLANGWERRORHELP0(x) #x + #define EACLANGWERRORHELP1(x) EACLANGWERRORHELP0(clang diagnostic error x) + #define EACLANGWERRORHELP2(x) EACLANGWERRORHELP1(#x) + + #define EA_ENABLE_CLANG_WARNING_AS_ERROR(w) \ + _Pragma("clang diagnostic push") \ + _Pragma(EACLANGWERRORHELP2(w)) + #else + #define EA_DISABLE_CLANG_WARNING(w) + #endif + #endif + + #ifndef EA_DISABLE_CLANG_WARNING_AS_ERROR + #if defined(EA_COMPILER_CLANG) || defined(EA_COMPILER_CLANG_CL) + #define EA_DISABLE_CLANG_WARNING_AS_ERROR() \ + _Pragma("clang diagnostic pop") + #else + #define EA_DISABLE_CLANG_WARNING_AS_ERROR() + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_DISABLE_SN_WARNING / EA_RESTORE_SN_WARNING + // + // Note that we define this macro specifically for the SN compiler instead of + // having a generic one for EDG-based compilers. The reason for this is that + // while SN is indeed based on EDG, SN has different warning value mappings + // and thus warning 1234 for SN is not the same as 1234 for all other EDG compilers. + // + // Example usage: + // // Currently we are limited to one warning per line. + // EA_DISABLE_SN_WARNING(1787) + // EA_DISABLE_SN_WARNING(552) + // + // EA_RESTORE_SN_WARNING() + // EA_RESTORE_SN_WARNING() + // + #ifndef EA_DISABLE_SN_WARNING + #define EA_DISABLE_SN_WARNING(w) + #endif + + #ifndef EA_RESTORE_SN_WARNING + #define EA_RESTORE_SN_WARNING() + #endif + + + // ------------------------------------------------------------------------ + // EA_DISABLE_ALL_SN_WARNINGS / EA_RESTORE_ALL_SN_WARNINGS + // + // Example usage: + // EA_DISABLE_ALL_SN_WARNINGS() + // + // EA_RESTORE_ALL_SN_WARNINGS() + // + #ifndef EA_DISABLE_ALL_SN_WARNINGS + #define EA_DISABLE_ALL_SN_WARNINGS() + #endif + + #ifndef EA_RESTORE_ALL_SN_WARNINGS + #define EA_RESTORE_ALL_SN_WARNINGS() + #endif + + + + // ------------------------------------------------------------------------ + // EA_DISABLE_GHS_WARNING / EA_RESTORE_GHS_WARNING + // + // Disable warnings from the Green Hills compiler. + // + // Example usage: + // EA_DISABLE_GHS_WARNING(193) + // EA_DISABLE_GHS_WARNING(236, 5323) + // + // EA_RESTORE_GHS_WARNING() + // EA_RESTORE_GHS_WARNING() + // + #ifndef EA_DISABLE_GHS_WARNING + #define EA_DISABLE_GHS_WARNING(w) + #endif + + #ifndef EA_RESTORE_GHS_WARNING + #define EA_RESTORE_GHS_WARNING() + #endif + + + // ------------------------------------------------------------------------ + // EA_DISABLE_ALL_GHS_WARNINGS / EA_RESTORE_ALL_GHS_WARNINGS + // + // #ifndef EA_DISABLE_ALL_GHS_WARNINGS + // #if defined(EA_COMPILER_GREEN_HILLS) + // #define EA_DISABLE_ALL_GHS_WARNINGS(w) \_ + // _Pragma("_________") + // #else + // #define EA_DISABLE_ALL_GHS_WARNINGS(w) + // #endif + // #endif + // + // #ifndef EA_RESTORE_ALL_GHS_WARNINGS + // #if defined(EA_COMPILER_GREEN_HILLS) + // #define EA_RESTORE_ALL_GHS_WARNINGS() \_ + // _Pragma("_________") + // #else + // #define EA_RESTORE_ALL_GHS_WARNINGS() + // #endif + // #endif + + + + // ------------------------------------------------------------------------ + // EA_DISABLE_EDG_WARNING / EA_RESTORE_EDG_WARNING + // + // Example usage: + // // Currently we are limited to one warning per line. + // EA_DISABLE_EDG_WARNING(193) + // EA_DISABLE_EDG_WARNING(236) + // + // EA_RESTORE_EDG_WARNING() + // EA_RESTORE_EDG_WARNING() + // + #ifndef EA_DISABLE_EDG_WARNING + // EDG-based compilers are inconsistent in how the implement warning pragmas. + #if defined(EA_COMPILER_EDG) && !defined(EA_COMPILER_INTEL) && !defined(EA_COMPILER_RVCT) + #define EAEDGWHELP0(x) #x + #define EAEDGWHELP1(x) EAEDGWHELP0(diag_suppress x) + + #define EA_DISABLE_EDG_WARNING(w) \ + _Pragma("control %push diag") \ + _Pragma(EAEDGWHELP1(w)) + #else + #define EA_DISABLE_EDG_WARNING(w) + #endif + #endif + + #ifndef EA_RESTORE_EDG_WARNING + #if defined(EA_COMPILER_EDG) && !defined(EA_COMPILER_INTEL) && !defined(EA_COMPILER_RVCT) + #define EA_RESTORE_EDG_WARNING() \ + _Pragma("control %pop diag") + #else + #define EA_RESTORE_EDG_WARNING() + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_DISABLE_ALL_EDG_WARNINGS / EA_RESTORE_ALL_EDG_WARNINGS + // + //#ifndef EA_DISABLE_ALL_EDG_WARNINGS + // #if defined(EA_COMPILER_EDG) && !defined(EA_COMPILER_SN) + // #define EA_DISABLE_ALL_EDG_WARNINGS(w) \_ + // _Pragma("_________") + // #else + // #define EA_DISABLE_ALL_EDG_WARNINGS(w) + // #endif + //#endif + // + //#ifndef EA_RESTORE_ALL_EDG_WARNINGS + // #if defined(EA_COMPILER_EDG) && !defined(EA_COMPILER_SN) + // #define EA_RESTORE_ALL_EDG_WARNINGS() \_ + // _Pragma("_________") + // #else + // #define EA_RESTORE_ALL_EDG_WARNINGS() + // #endif + //#endif + + + + // ------------------------------------------------------------------------ + // EA_DISABLE_CW_WARNING / EA_RESTORE_CW_WARNING + // + // Note that this macro can only control warnings via numbers and not by + // names. The reason for this is that the compiler's syntax for such + // warnings is not the same as for numbers. + // + // Example usage: + // // Currently we are limited to one warning per line and must also specify the warning in the restore macro. + // EA_DISABLE_CW_WARNING(10317) + // EA_DISABLE_CW_WARNING(10324) + // + // EA_RESTORE_CW_WARNING(10317) + // EA_RESTORE_CW_WARNING(10324) + // + #ifndef EA_DISABLE_CW_WARNING + #define EA_DISABLE_CW_WARNING(w) + #endif + + #ifndef EA_RESTORE_CW_WARNING + + #define EA_RESTORE_CW_WARNING(w) + + #endif + + + // ------------------------------------------------------------------------ + // EA_DISABLE_ALL_CW_WARNINGS / EA_RESTORE_ALL_CW_WARNINGS + // + #ifndef EA_DISABLE_ALL_CW_WARNINGS + #define EA_DISABLE_ALL_CW_WARNINGS() + + #endif + + #ifndef EA_RESTORE_ALL_CW_WARNINGS + #define EA_RESTORE_ALL_CW_WARNINGS() + #endif + + + + // ------------------------------------------------------------------------ + // EA_PURE + // + // This acts the same as the GCC __attribute__ ((pure)) directive and is + // implemented simply as a wrapper around it to allow portable usage of + // it and to take advantage of it if and when it appears in other compilers. + // + // A "pure" function is one that has no effects except its return value and + // its return value is a function of only the function's parameters or + // non-volatile global variables. Any parameter or global variable access + // must be read-only. Loop optimization and subexpression elimination can be + // applied to such functions. A common example is strlen(): Given identical + // inputs, the function's return value (its only effect) is invariant across + // multiple invocations and thus can be pulled out of a loop and called but once. + // + // Example usage: + // EA_PURE void Function(); + // + #ifndef EA_PURE + #if defined(EA_COMPILER_GNUC) + #define EA_PURE __attribute__((pure)) + #elif defined(EA_COMPILER_ARM) // Arm brand compiler for ARM CPU + #define EA_PURE __pure + #else + #define EA_PURE + #endif + #endif + + + + // ------------------------------------------------------------------------ + // EA_WEAK + // EA_WEAK_SUPPORTED -- defined as 0 or 1. + // + // GCC + // The weak attribute causes the declaration to be emitted as a weak + // symbol rather than a global. This is primarily useful in defining + // library functions which can be overridden in user code, though it + // can also be used with non-function declarations. + // + // VC++ + // At link time, if multiple definitions of a COMDAT are seen, the linker + // picks one and discards the rest. If the linker option /OPT:REF + // is selected, then COMDAT elimination will occur to remove all the + // unreferenced data items in the linker output. + // + // Example usage: + // EA_WEAK void Function(); + // + #ifndef EA_WEAK + #if defined(_MSC_VER) && (_MSC_VER >= 1300) // If VC7.0 and later + #define EA_WEAK __declspec(selectany) + #define EA_WEAK_SUPPORTED 1 + #elif defined(_MSC_VER) || (defined(__GNUC__) && defined(__CYGWIN__)) + #define EA_WEAK + #define EA_WEAK_SUPPORTED 0 + #elif defined(EA_COMPILER_ARM) // Arm brand compiler for ARM CPU + #define EA_WEAK __weak + #define EA_WEAK_SUPPORTED 1 + #else // GCC and IBM compilers, others. + #define EA_WEAK __attribute__((weak)) + #define EA_WEAK_SUPPORTED 1 + #endif + #endif + + + + // ------------------------------------------------------------------------ + // EA_UNUSED + // + // Makes compiler warnings about unused variables go away. + // + // Example usage: + // void Function(int x) + // { + // int y; + // EA_UNUSED(x); + // EA_UNUSED(y); + // } + // + #ifndef EA_UNUSED + // The EDG solution below is pretty weak and needs to be augmented or replaced. + // It can't handle the C language, is limited to places where template declarations + // can be used, and requires the type x to be usable as a functions reference argument. + #if defined(__cplusplus) && defined(__EDG__) + template + inline void EABaseUnused(T const volatile & x) { (void)x; } + #define EA_UNUSED(x) EABaseUnused(x) + #else + #define EA_UNUSED(x) (void)x + #endif + #endif + + + + // ------------------------------------------------------------------------ + // EA_EMPTY + // + // Allows for a null statement, usually for the purpose of avoiding compiler warnings. + // + // Example usage: + // #ifdef EA_DEBUG + // #define MyDebugPrintf(x, y) printf(x, y) + // #else + // #define MyDebugPrintf(x, y) EA_EMPTY + // #endif + // + #ifndef EA_EMPTY + #define EA_EMPTY (void)0 + #endif + + + // ------------------------------------------------------------------------ + // EA_CURRENT_FUNCTION + // + // Provides a consistent way to get the current function name as a macro + // like the __FILE__ and __LINE__ macros work. The C99 standard specifies + // that __func__ be provided by the compiler, but most compilers don't yet + // follow that convention. However, many compilers have an alternative. + // + // We also define EA_CURRENT_FUNCTION_SUPPORTED for when it is not possible + // to have EA_CURRENT_FUNCTION work as expected. + // + // Defined inside a function because otherwise the macro might not be + // defined and code below might not compile. This happens with some + // compilers. + // + #ifndef EA_CURRENT_FUNCTION + #if defined __GNUC__ || (defined __ICC && __ICC >= 600) + #define EA_CURRENT_FUNCTION __PRETTY_FUNCTION__ + #elif defined(__FUNCSIG__) + #define EA_CURRENT_FUNCTION __FUNCSIG__ + #elif (defined __INTEL_COMPILER && __INTEL_COMPILER >= 600) || (defined __IBMCPP__ && __IBMCPP__ >= 500) || (defined CS_UNDEFINED_STRING && CS_UNDEFINED_STRING >= 0x4200) + #define EA_CURRENT_FUNCTION __FUNCTION__ + #elif defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901 + #define EA_CURRENT_FUNCTION __func__ + #else + #define EA_CURRENT_FUNCTION "(unknown function)" + #endif + #endif + + + // ------------------------------------------------------------------------ + // wchar_t + // Here we define: + // EA_WCHAR_T_NON_NATIVE + // EA_WCHAR_SIZE = + // + #ifndef EA_WCHAR_T_NON_NATIVE + // Compilers that always implement wchar_t as native include: + // COMEAU, new SN, and other EDG-based compilers. + // GCC + // Borland + // SunPro + // IBM Visual Age + #if defined(EA_COMPILER_INTEL) + #if (EA_COMPILER_VERSION < 700) + #define EA_WCHAR_T_NON_NATIVE 1 + #else + #if (!defined(_WCHAR_T_DEFINED) && !defined(_WCHAR_T)) + #define EA_WCHAR_T_NON_NATIVE 1 + #endif + #endif + #elif defined(EA_COMPILER_MSVC) || (defined(EA_COMPILER_CLANG) && defined(EA_PLATFORM_WINDOWS)) + #ifndef _NATIVE_WCHAR_T_DEFINED + #define EA_WCHAR_T_NON_NATIVE 1 + #endif + #elif defined(__EDG_VERSION__) && (!defined(_WCHAR_T) && (__EDG_VERSION__ < 400)) // EDG prior to v4 uses _WCHAR_T to indicate if wchar_t is native. v4+ may define something else, but we're not currently aware of it. + #define EA_WCHAR_T_NON_NATIVE 1 + #endif + #endif + + #ifndef EA_WCHAR_SIZE // If the user hasn't specified that it is a given size... + #if defined(__WCHAR_MAX__) // GCC defines this for most platforms. + #if (__WCHAR_MAX__ == 2147483647) || (__WCHAR_MAX__ == 4294967295) + #define EA_WCHAR_SIZE 4 + #elif (__WCHAR_MAX__ == 32767) || (__WCHAR_MAX__ == 65535) + #define EA_WCHAR_SIZE 2 + #elif (__WCHAR_MAX__ == 127) || (__WCHAR_MAX__ == 255) + #define EA_WCHAR_SIZE 1 + #else + #define EA_WCHAR_SIZE 4 + #endif + #elif defined(WCHAR_MAX) // The SN and Arm compilers define this. + #if (WCHAR_MAX == 2147483647) || (WCHAR_MAX == 4294967295) + #define EA_WCHAR_SIZE 4 + #elif (WCHAR_MAX == 32767) || (WCHAR_MAX == 65535) + #define EA_WCHAR_SIZE 2 + #elif (WCHAR_MAX == 127) || (WCHAR_MAX == 255) + #define EA_WCHAR_SIZE 1 + #else + #define EA_WCHAR_SIZE 4 + #endif + #elif defined(__WCHAR_BIT) // Green Hills (and other versions of EDG?) uses this. + #if (__WCHAR_BIT == 16) + #define EA_WCHAR_SIZE 2 + #elif (__WCHAR_BIT == 32) + #define EA_WCHAR_SIZE 4 + #elif (__WCHAR_BIT == 8) + #define EA_WCHAR_SIZE 1 + #else + #define EA_WCHAR_SIZE 4 + #endif + #elif defined(_WCMAX) // The SN and Arm compilers define this. + #if (_WCMAX == 2147483647) || (_WCMAX == 4294967295) + #define EA_WCHAR_SIZE 4 + #elif (_WCMAX == 32767) || (_WCMAX == 65535) + #define EA_WCHAR_SIZE 2 + #elif (_WCMAX == 127) || (_WCMAX == 255) + #define EA_WCHAR_SIZE 1 + #else + #define EA_WCHAR_SIZE 4 + #endif + #elif defined(EA_PLATFORM_UNIX) + // It is standard on Unix to have wchar_t be int32_t or uint32_t. + // All versions of GNUC default to a 32 bit wchar_t, but EA has used + // the -fshort-wchar GCC command line option to force it to 16 bit. + // If you know that the compiler is set to use a wchar_t of other than + // the default, you need to manually define EA_WCHAR_SIZE for the build. + #define EA_WCHAR_SIZE 4 + #else + // It is standard on Windows to have wchar_t be uint16_t. GCC + // defines wchar_t as int by default. Electronic Arts has + // standardized on wchar_t being an unsigned 16 bit value on all + // console platforms. Given that there is currently no known way to + // tell at preprocessor time what the size of wchar_t is, we declare + // it to be 2, as this is the Electronic Arts standard. If you have + // EA_WCHAR_SIZE != sizeof(wchar_t), then your code might not be + // broken, but it also won't work with wchar libraries and data from + // other parts of EA. Under GCC, you can force wchar_t to two bytes + // with the -fshort-wchar compiler argument. + #define EA_WCHAR_SIZE 2 + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_RESTRICT + // + // The C99 standard defines a new keyword, restrict, which allows for the + // improvement of code generation regarding memory usage. Compilers can + // generate significantly faster code when you are able to use restrict. + // + // Example usage: + // void DoSomething(char* EA_RESTRICT p1, char* EA_RESTRICT p2); + // + #ifndef EA_RESTRICT + #if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1400) // If VC8 (VS2005) or later... + #define EA_RESTRICT __restrict + #elif defined(EA_COMPILER_CLANG) + #define EA_RESTRICT __restrict + #elif defined(EA_COMPILER_GNUC) // Includes GCC and other compilers emulating GCC. + #define EA_RESTRICT __restrict // GCC defines 'restrict' (as opposed to __restrict) in C99 mode only. + #elif defined(EA_COMPILER_ARM) + #define EA_RESTRICT __restrict + #elif defined(EA_COMPILER_IS_C99) + #define EA_RESTRICT restrict + #else + // If the compiler didn't support restricted pointers, defining EA_RESTRICT + // away would result in compiling and running fine but you just wouldn't + // the same level of optimization. On the other hand, all the major compilers + // support restricted pointers. + #define EA_RESTRICT + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_DEPRECATED // Used as a prefix. + // EA_PREFIX_DEPRECATED // You should need this only for unusual compilers. + // EA_POSTFIX_DEPRECATED // You should need this only for unusual compilers. + // EA_DEPRECATED_MESSAGE // Used as a prefix and provides a deprecation message. + // + // Example usage: + // EA_DEPRECATED void Function(); + // EA_DEPRECATED_MESSAGE("Use 1.0v API instead") void Function(); + // + // or for maximum portability: + // EA_PREFIX_DEPRECATED void Function() EA_POSTFIX_DEPRECATED; + // + + #ifndef EA_DEPRECATED + #if defined(EA_COMPILER_CPP14_ENABLED) + #define EA_DEPRECATED [[deprecated]] + #elif defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION > 1300) // If VC7 (VS2003) or later... + #define EA_DEPRECATED __declspec(deprecated) + #elif defined(EA_COMPILER_MSVC) + #define EA_DEPRECATED + #else + #define EA_DEPRECATED __attribute__((deprecated)) + #endif + #endif + + #ifndef EA_PREFIX_DEPRECATED + #if defined(EA_COMPILER_CPP14_ENABLED) + #define EA_PREFIX_DEPRECATED [[deprecated]] + #define EA_POSTFIX_DEPRECATED + #elif defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION > 1300) // If VC7 (VS2003) or later... + #define EA_PREFIX_DEPRECATED __declspec(deprecated) + #define EA_POSTFIX_DEPRECATED + #elif defined(EA_COMPILER_MSVC) + #define EA_PREFIX_DEPRECATED + #define EA_POSTFIX_DEPRECATED + #else + #define EA_PREFIX_DEPRECATED + #define EA_POSTFIX_DEPRECATED __attribute__((deprecated)) + #endif + #endif + + #ifndef EA_DEPRECATED_MESSAGE + #if defined(EA_COMPILER_CPP14_ENABLED) + #define EA_DEPRECATED_MESSAGE(msg) [[deprecated(#msg)]] + #else + // Compiler does not support depreaction messages, explicitly drop the msg but still mark the function as deprecated + #define EA_DEPRECATED_MESSAGE(msg) EA_DEPRECATED + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_FORCE_INLINE // Used as a prefix. + // EA_PREFIX_FORCE_INLINE // You should need this only for unusual compilers. + // EA_POSTFIX_FORCE_INLINE // You should need this only for unusual compilers. + // + // Example usage: + // EA_FORCE_INLINE void Foo(); // Implementation elsewhere. + // EA_PREFIX_FORCE_INLINE void Foo() EA_POSTFIX_FORCE_INLINE; // Implementation elsewhere. + // + // Note that when the prefix version of this function is used, it replaces + // the regular C++ 'inline' statement. Thus you should not use both the + // C++ inline statement and this macro with the same function declaration. + // + // To force inline usage under GCC 3.1+, you use this: + // inline void Foo() __attribute__((always_inline)); + // or + // inline __attribute__((always_inline)) void Foo(); + // + // The CodeWarrior compiler doesn't have the concept of forcing inlining per function. + // + #ifndef EA_FORCE_INLINE + #if defined(EA_COMPILER_MSVC) + #define EA_FORCE_INLINE __forceinline + #elif defined(EA_COMPILER_GNUC) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 301) || defined(EA_COMPILER_CLANG) + #if defined(__cplusplus) + #define EA_FORCE_INLINE inline __attribute__((always_inline)) + #else + #define EA_FORCE_INLINE __inline__ __attribute__((always_inline)) + #endif + #else + #if defined(__cplusplus) + #define EA_FORCE_INLINE inline + #else + #define EA_FORCE_INLINE __inline + #endif + #endif + #endif + + #if defined(EA_COMPILER_GNUC) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 301) || defined(EA_COMPILER_CLANG) + #define EA_PREFIX_FORCE_INLINE inline + #define EA_POSTFIX_FORCE_INLINE __attribute__((always_inline)) + #else + #define EA_PREFIX_FORCE_INLINE inline + #define EA_POSTFIX_FORCE_INLINE + #endif + + + // ------------------------------------------------------------------------ + // EA_FORCE_INLINE_LAMBDA + // + // EA_FORCE_INLINE_LAMBDA is used to force inline a call to a lambda when possible. + // Force inlining a lambda can be useful to reduce overhead in situations where a lambda may + // may only be called once, or inlining allows the compiler to apply other optimizations that wouldn't + // otherwise be possible. + // + // The ability to force inline a lambda is currently only available on a subset of compilers. + // + // Example usage: + // + // auto lambdaFunction = []() EA_FORCE_INLINE_LAMBDA + // { + // }; + // + #ifndef EA_FORCE_INLINE_LAMBDA + #if defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG) + #define EA_FORCE_INLINE_LAMBDA __attribute__((always_inline)) + #else + #define EA_FORCE_INLINE_LAMBDA + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_NO_INLINE // Used as a prefix. + // EA_PREFIX_NO_INLINE // You should need this only for unusual compilers. + // EA_POSTFIX_NO_INLINE // You should need this only for unusual compilers. + // + // Example usage: + // EA_NO_INLINE void Foo(); // Implementation elsewhere. + // EA_PREFIX_NO_INLINE void Foo() EA_POSTFIX_NO_INLINE; // Implementation elsewhere. + // + // That this declaration is incompatbile with C++ 'inline' and any + // variant of EA_FORCE_INLINE. + // + // To disable inline usage under VC++ priof to VS2005, you need to use this: + // #pragma inline_depth(0) // Disable inlining. + // void Foo() { ... } + // #pragma inline_depth() // Restore to default. + // + // Since there is no easy way to disable inlining on a function-by-function + // basis in VC++ prior to VS2005, the best strategy is to write platform-specific + // #ifdefs in the code or to disable inlining for a given module and enable + // functions individually with EA_FORCE_INLINE. + // + #ifndef EA_NO_INLINE + #if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1400) // If VC8 (VS2005) or later... + #define EA_NO_INLINE __declspec(noinline) + #elif defined(EA_COMPILER_MSVC) + #define EA_NO_INLINE + #else + #define EA_NO_INLINE __attribute__((noinline)) + #endif + #endif + + #if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1400) // If VC8 (VS2005) or later... + #define EA_PREFIX_NO_INLINE __declspec(noinline) + #define EA_POSTFIX_NO_INLINE + #elif defined(EA_COMPILER_MSVC) + #define EA_PREFIX_NO_INLINE + #define EA_POSTFIX_NO_INLINE + #else + #define EA_PREFIX_NO_INLINE + #define EA_POSTFIX_NO_INLINE __attribute__((noinline)) + #endif + + + // ------------------------------------------------------------------------ + // EA_NO_VTABLE + // + // Example usage: + // class EA_NO_VTABLE X { + // virtual void InterfaceFunction(); + // }; + // + // EA_CLASS_NO_VTABLE(X) { + // virtual void InterfaceFunction(); + // }; + // + #ifdef EA_COMPILER_MSVC + #define EA_NO_VTABLE __declspec(novtable) + #define EA_CLASS_NO_VTABLE(x) class __declspec(novtable) x + #define EA_STRUCT_NO_VTABLE(x) struct __declspec(novtable) x + #else + #define EA_NO_VTABLE + #define EA_CLASS_NO_VTABLE(x) class x + #define EA_STRUCT_NO_VTABLE(x) struct x + #endif + + + // ------------------------------------------------------------------------ + // EA_PASCAL + // + // Also known on PC platforms as stdcall. + // This convention causes the compiler to assume that the called function + // will pop off the stack space used to pass arguments, unless it takes a + // variable number of arguments. + // + // Example usage: + // this: + // void DoNothing(int x); + // void DoNothing(int x){} + // would be written as this: + // void EA_PASCAL_FUNC(DoNothing(int x)); + // void EA_PASCAL_FUNC(DoNothing(int x)){} + // + #ifndef EA_PASCAL + #if defined(EA_COMPILER_MSVC) + #define EA_PASCAL __stdcall + #elif defined(EA_COMPILER_GNUC) && defined(EA_PROCESSOR_X86) + #define EA_PASCAL __attribute__((stdcall)) + #else + // Some compilers simply don't support pascal calling convention. + // As a result, there isn't an issue here, since the specification of + // pascal calling convention is for the purpose of disambiguating the + // calling convention that is applied. + #define EA_PASCAL + #endif + #endif + + #ifndef EA_PASCAL_FUNC + #if defined(EA_COMPILER_MSVC) + #define EA_PASCAL_FUNC(funcname_and_paramlist) __stdcall funcname_and_paramlist + #elif defined(EA_COMPILER_GNUC) && defined(EA_PROCESSOR_X86) + #define EA_PASCAL_FUNC(funcname_and_paramlist) __attribute__((stdcall)) funcname_and_paramlist + #else + #define EA_PASCAL_FUNC(funcname_and_paramlist) funcname_and_paramlist + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_SSE + // Visual C Processor Packs define _MSC_FULL_VER and are needed for SSE + // Intel C also has SSE support. + // EA_SSE is used to select FPU or SSE versions in hw_select.inl + // + // EA_SSE defines the level of SSE support: + // 0 indicates no SSE support + // 1 indicates SSE1 is supported + // 2 indicates SSE2 is supported + // 3 indicates SSE3 (or greater) is supported + // + // Note: SSE support beyond SSE3 can't be properly represented as a single + // version number. Instead users should use specific SSE defines (e.g. + // EA_SSE4_2) to detect what specific support is available. EA_SSE being + // equal to 3 really only indicates that SSE3 or greater is supported. + #ifndef EA_SSE + #if defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG) + #if defined(__SSE3__) + #define EA_SSE 3 + #elif defined(__SSE2__) + #define EA_SSE 2 + #elif defined(__SSE__) && __SSE__ + #define EA_SSE 1 + #else + #define EA_SSE 0 + #endif + #elif (defined(EA_SSE3) && EA_SSE3) || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING + #define EA_SSE 3 + #elif defined(EA_SSE2) && EA_SSE2 + #define EA_SSE 2 + #elif defined(EA_PROCESSOR_X86) && defined(_MSC_FULL_VER) && !defined(__NOSSE__) && defined(_M_IX86_FP) + #define EA_SSE _M_IX86_FP + #elif defined(EA_PROCESSOR_X86) && defined(EA_COMPILER_INTEL) && !defined(__NOSSE__) + #define EA_SSE 1 + #elif defined(EA_PROCESSOR_X86_64) + // All x64 processors support SSE2 or higher + #define EA_SSE 2 + #else + #define EA_SSE 0 + #endif + #endif + + // ------------------------------------------------------------------------ + // We define separate defines for SSE support beyond SSE1. These defines + // are particularly useful for detecting SSE4.x features since there isn't + // a single concept of SSE4. + // + // The following SSE defines are always defined. 0 indicates the + // feature/level of SSE is not supported, and 1 indicates support is + // available. + #ifndef EA_SSE2 + #if EA_SSE >= 2 + #define EA_SSE2 1 + #else + #define EA_SSE2 0 + #endif + #endif + #ifndef EA_SSE3 + #if EA_SSE >= 3 + #define EA_SSE3 1 + #else + #define EA_SSE3 0 + #endif + #endif + #ifndef EA_SSSE3 + #if defined __SSSE3__ || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING + #define EA_SSSE3 1 + #else + #define EA_SSSE3 0 + #endif + #endif + #ifndef EA_SSE4_1 + #if defined __SSE4_1__ || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING + #define EA_SSE4_1 1 + #else + #define EA_SSE4_1 0 + #endif + #endif + #ifndef EA_SSE4_2 + #if defined __SSE4_2__ || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING + #define EA_SSE4_2 1 + #else + #define EA_SSE4_2 0 + #endif + #endif + #ifndef EA_SSE4A + #if defined __SSE4A__ || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING + #define EA_SSE4A 1 + #else + #define EA_SSE4A 0 + #endif + #endif + + // ------------------------------------------------------------------------ + // EA_AVX + // EA_AVX may be used to determine if Advanced Vector Extensions are available for the target architecture + // + // EA_AVX defines the level of AVX support: + // 0 indicates no AVX support + // 1 indicates AVX1 is supported + // 2 indicates AVX2 is supported + #ifndef EA_AVX + #if defined __AVX2__ + #define EA_AVX 2 + #elif defined __AVX__ || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING + #define EA_AVX 1 + #else + #define EA_AVX 0 + #endif + #endif + #ifndef EA_AVX2 + #if EA_AVX >= 2 + #define EA_AVX2 1 + #else + #define EA_AVX2 0 + #endif + #endif + + // EA_FP16C may be used to determine the existence of float <-> half conversion operations on an x86 CPU. + // (For example to determine if _mm_cvtph_ps or _mm_cvtps_ph could be used.) + #ifndef EA_FP16C + #if defined __F16C__ || defined EA_PLATFORM_XBOXONE || defined CS_UNDEFINED_STRING + #define EA_FP16C 1 + #else + #define EA_FP16C 0 + #endif + #endif + + // EA_FP128 may be used to determine if __float128 is a supported type for use. This type is enabled by a GCC extension (_GLIBCXX_USE_FLOAT128) + // but has support by some implementations of clang (__FLOAT128__) + // PS4 does not support __float128 as of SDK 5.500 https://ps4.siedev.net/resources/documents/SDK/5.500/CPU_Compiler_ABI-Overview/0003.html + #ifndef EA_FP128 + #if (defined __FLOAT128__ || defined _GLIBCXX_USE_FLOAT128) && !defined(EA_PLATFORM_SONY) + #define EA_FP128 1 + #else + #define EA_FP128 0 + #endif + #endif + + // ------------------------------------------------------------------------ + // EA_ABM + // EA_ABM may be used to determine if Advanced Bit Manipulation sets are available for the target architecture (POPCNT, LZCNT) + // + #ifndef EA_ABM + #if defined(__ABM__) || defined(EA_PLATFORM_XBOXONE) || defined(EA_PLATFORM_SONY) || defined(CS_UNDEFINED_STRING) + #define EA_ABM 1 + #else + #define EA_ABM 0 + #endif + #endif + + // ------------------------------------------------------------------------ + // EA_NEON + // EA_NEON may be used to determine if NEON is supported. + #ifndef EA_NEON + #if defined(__ARM_NEON__) || defined(__ARM_NEON) + #define EA_NEON 1 + #else + #define EA_NEON 0 + #endif + #endif + + // ------------------------------------------------------------------------ + // EA_BMI + // EA_BMI may be used to determine if Bit Manipulation Instruction sets are available for the target architecture + // + // EA_BMI defines the level of BMI support: + // 0 indicates no BMI support + // 1 indicates BMI1 is supported + // 2 indicates BMI2 is supported + #ifndef EA_BMI + #if defined(__BMI2__) + #define EA_BMI 2 + #elif defined(__BMI__) || defined(EA_PLATFORM_XBOXONE) || defined(CS_UNDEFINED_STRING) + #define EA_BMI 1 + #else + #define EA_BMI 0 + #endif + #endif + #ifndef EA_BMI2 + #if EA_BMI >= 2 + #define EA_BMI2 1 + #else + #define EA_BMI2 0 + #endif + #endif + + // ------------------------------------------------------------------------ + // EA_FMA3 + // EA_FMA3 may be used to determine if Fused Multiply Add operations are available for the target architecture + // __FMA__ is defined only by GCC, Clang, and ICC; MSVC only defines __AVX__ and __AVX2__ + // FMA3 was introduced alongside AVX2 on Intel Haswell + // All AMD processors support FMA3 if AVX2 is also supported + // + // EA_FMA3 defines the level of FMA3 support: + // 0 indicates no FMA3 support + // 1 indicates FMA3 is supported + #ifndef EA_FMA3 + #if defined(__FMA__) || EA_AVX2 >= 1 + #define EA_FMA3 1 + #else + #define EA_FMA3 0 + #endif + #endif + + // ------------------------------------------------------------------------ + // EA_TBM + // EA_TBM may be used to determine if Trailing Bit Manipulation instructions are available for the target architecture + #ifndef EA_TBM + #if defined(__TBM__) + #define EA_TBM 1 + #else + #define EA_TBM 0 + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_IMPORT + // import declaration specification + // specifies that the declared symbol is imported from another dynamic library. + #ifndef EA_IMPORT + #if defined(EA_COMPILER_MSVC) + #define EA_IMPORT __declspec(dllimport) + #else + #define EA_IMPORT + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_EXPORT + // export declaration specification + // specifies that the declared symbol is exported from the current dynamic library. + // this is not the same as the C++ export keyword. The C++ export keyword has been + // removed from the language as of C++11. + #ifndef EA_EXPORT + #if defined(EA_COMPILER_MSVC) + #define EA_EXPORT __declspec(dllexport) + #else + #define EA_EXPORT + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_PRAGMA_ONCE_SUPPORTED + // + // This is a wrapper for the #pragma once preprocessor directive. + // It allows for some compilers (in particular VC++) to implement signifcantly + // faster include file preprocessing. #pragma once can be used to replace + // header include guards or to augment them. However, #pragma once isn't + // necessarily supported by all compilers and isn't guaranteed to be so in + // the future, so using #pragma once to replace traditional include guards + // is not strictly portable. Note that a direct #define for #pragma once is + // impossible with VC++, due to limitations, but can be done with other + // compilers/preprocessors via _Pragma("once"). + // + // Example usage (which includes traditional header guards for portability): + // #ifndef SOMEPACKAGE_SOMEHEADER_H + // #define SOMEPACKAGE_SOMEHEADER_H + // + // #if defined(EA_PRAGMA_ONCE_SUPPORTED) + // #pragma once + // #endif + // + // + // + // #endif + // + #if defined(_MSC_VER) || defined(__GNUC__) || defined(__EDG__) || defined(__APPLE__) + #define EA_PRAGMA_ONCE_SUPPORTED 1 + #endif + + + + // ------------------------------------------------------------------------ + // EA_ONCE + // + // Example usage (which includes traditional header guards for portability): + // #ifndef SOMEPACKAGE_SOMEHEADER_H + // #define SOMEPACKAGE_SOMEHEADER_H + // + // EA_ONCE() + // + // + // + // #endif + // + #if defined(EA_PRAGMA_ONCE_SUPPORTED) + #if defined(_MSC_VER) + #define EA_ONCE() __pragma(once) + #else + #define EA_ONCE() // _Pragma("once") It turns out that _Pragma("once") isn't supported by many compilers. + #endif + #endif + + + + // ------------------------------------------------------------------------ + // EA_OVERRIDE + // + // C++11 override + // See http://msdn.microsoft.com/en-us/library/jj678987.aspx for more information. + // You can use EA_FINAL_OVERRIDE to combine usage of EA_OVERRIDE and EA_INHERITANCE_FINAL in a single statement. + // + // Example usage: + // struct B { virtual void f(int); }; + // struct D : B { void f(int) EA_OVERRIDE; }; + // + #ifndef EA_OVERRIDE + #if defined(EA_COMPILER_NO_OVERRIDE) + #define EA_OVERRIDE + #else + #define EA_OVERRIDE override + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_INHERITANCE_FINAL + // + // Portably wraps the C++11 final specifier. + // See http://msdn.microsoft.com/en-us/library/jj678985.aspx for more information. + // You can use EA_FINAL_OVERRIDE to combine usage of EA_OVERRIDE and EA_INHERITANCE_FINAL in a single statement. + // This is not called EA_FINAL because that term is used within EA to denote debug/release/final builds. + // + // Example usage: + // struct B { virtual void f() EA_INHERITANCE_FINAL; }; + // + #ifndef EA_INHERITANCE_FINAL + #if defined(EA_COMPILER_NO_INHERITANCE_FINAL) + #define EA_INHERITANCE_FINAL + #elif (defined(_MSC_VER) && (EA_COMPILER_VERSION < 1700)) // Pre-VS2012 + #define EA_INHERITANCE_FINAL sealed + #else + #define EA_INHERITANCE_FINAL final + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_FINAL_OVERRIDE + // + // Portably wraps the C++11 override final specifiers combined. + // + // Example usage: + // struct A { virtual void f(); }; + // struct B : public A { virtual void f() EA_FINAL_OVERRIDE; }; + // + #ifndef EA_FINAL_OVERRIDE + #define EA_FINAL_OVERRIDE EA_OVERRIDE EA_INHERITANCE_FINAL + #endif + + + // ------------------------------------------------------------------------ + // EA_SEALED + // + // This is deprecated, as the C++11 Standard has final (EA_INHERITANCE_FINAL) instead. + // See http://msdn.microsoft.com/en-us/library/0w2w91tf.aspx for more information. + // Example usage: + // struct B { virtual void f() EA_SEALED; }; + // + #ifndef EA_SEALED + #if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1400) // VS2005 (VC8) and later + #define EA_SEALED sealed + #else + #define EA_SEALED + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_ABSTRACT + // + // This is a Microsoft language extension. + // See http://msdn.microsoft.com/en-us/library/b0z6b513.aspx for more information. + // Example usage: + // struct X EA_ABSTRACT { virtual void f(){} }; + // + #ifndef EA_ABSTRACT + #if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1400) // VS2005 (VC8) and later + #define EA_ABSTRACT abstract + #else + #define EA_ABSTRACT + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_CONSTEXPR + // EA_CONSTEXPR_OR_CONST + // + // Portable wrapper for C++11's 'constexpr' support. + // + // See http://www.cprogramming.com/c++11/c++11-compile-time-processing-with-constexpr.html for more information. + // Example usage: + // EA_CONSTEXPR int GetValue() { return 37; } + // EA_CONSTEXPR_OR_CONST double gValue = std::sin(kTwoPi); + // + #if !defined(EA_CONSTEXPR) + #if defined(EA_COMPILER_NO_CONSTEXPR) + #define EA_CONSTEXPR + #else + #define EA_CONSTEXPR constexpr + #endif + #endif + + #if !defined(EA_CONSTEXPR_OR_CONST) + #if defined(EA_COMPILER_NO_CONSTEXPR) + #define EA_CONSTEXPR_OR_CONST const + #else + #define EA_CONSTEXPR_OR_CONST constexpr + #endif + #endif + + // ------------------------------------------------------------------------ + // EA_CONSTEXPR_IF + // + // Portable wrapper for C++17's 'constexpr if' support. + // + // https://en.cppreference.com/w/cpp/language/if + // + // Example usage: + // + // EA_CONSTEXPR_IF(eastl::is_copy_constructible_v) + // { ... } + // + #if !defined(EA_CONSTEXPR_IF) + #if defined(EA_COMPILER_NO_CONSTEXPR_IF) + #define EA_CONSTEXPR_IF(predicate) if ((predicate)) + #else + #define EA_CONSTEXPR_IF(predicate) if constexpr ((predicate)) + #endif + #endif + + + + // ------------------------------------------------------------------------ + // EA_EXTERN_TEMPLATE + // + // Portable wrapper for C++11's 'extern template' support. + // + // Example usage: + // EA_EXTERN_TEMPLATE(class basic_string); + // + #if !defined(EA_EXTERN_TEMPLATE) + #if defined(EA_COMPILER_NO_EXTERN_TEMPLATE) + #define EA_EXTERN_TEMPLATE(declaration) + #else + #define EA_EXTERN_TEMPLATE(declaration) extern template declaration + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_NOEXCEPT + // EA_NOEXCEPT_IF(predicate) + // EA_NOEXCEPT_EXPR(expression) + // + // Portable wrapper for C++11 noexcept + // http://en.cppreference.com/w/cpp/language/noexcept + // http://en.cppreference.com/w/cpp/language/noexcept_spec + // + // Example usage: + // EA_NOEXCEPT + // EA_NOEXCEPT_IF(predicate) + // EA_NOEXCEPT_EXPR(expression) + // + // This function never throws an exception. + // void DoNothing() EA_NOEXCEPT + // { } + // + // This function throws an exception of T::T() throws an exception. + // template + // void DoNothing() EA_NOEXCEPT_IF(EA_NOEXCEPT_EXPR(T())) + // { T t; } + // + #if !defined(EA_NOEXCEPT) + #if defined(EA_COMPILER_NO_NOEXCEPT) + #define EA_NOEXCEPT + #define EA_NOEXCEPT_IF(predicate) + #define EA_NOEXCEPT_EXPR(expression) false + #else + #define EA_NOEXCEPT noexcept + #define EA_NOEXCEPT_IF(predicate) noexcept((predicate)) + #define EA_NOEXCEPT_EXPR(expression) noexcept((expression)) + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_NORETURN + // + // Wraps the C++11 noreturn attribute. See EA_COMPILER_NO_NORETURN + // http://en.cppreference.com/w/cpp/language/attributes + // http://msdn.microsoft.com/en-us/library/k6ktzx3s%28v=vs.80%29.aspx + // http://blog.aaronballman.com/2011/09/understanding-attributes/ + // + // Example usage: + // EA_NORETURN void SomeFunction() + // { throw "error"; } + // + #if !defined(EA_NORETURN) + #if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1300) // VS2003 (VC7) and later + #define EA_NORETURN __declspec(noreturn) + #elif defined(EA_COMPILER_NO_NORETURN) + #define EA_NORETURN + #else + #define EA_NORETURN [[noreturn]] + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_CARRIES_DEPENDENCY + // + // Wraps the C++11 carries_dependency attribute + // http://en.cppreference.com/w/cpp/language/attributes + // http://blog.aaronballman.com/2011/09/understanding-attributes/ + // + // Example usage: + // EA_CARRIES_DEPENDENCY int* SomeFunction() + // { return &mX; } + // + // + #if !defined(EA_CARRIES_DEPENDENCY) + #if defined(EA_COMPILER_NO_CARRIES_DEPENDENCY) + #define EA_CARRIES_DEPENDENCY + #else + #define EA_CARRIES_DEPENDENCY [[carries_dependency]] + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_FALLTHROUGH + // + // [[fallthrough] is a C++17 standard attribute that appears in switch + // statements to indicate that the fallthrough from the previous case in the + // switch statement is intentially and not a bug. + // + // http://en.cppreference.com/w/cpp/language/attributes + // + // Example usage: + // void f(int n) + // { + // switch(n) + // { + // case 1: + // DoCase1(); + // // Compiler may generate a warning for fallthrough behaviour + // + // case 2: + // DoCase2(); + // + // EA_FALLTHROUGH; + // case 3: + // DoCase3(); + // } + // } + // + #if !defined(EA_FALLTHROUGH) + #if defined(EA_COMPILER_NO_FALLTHROUGH) + #define EA_FALLTHROUGH + #else + #define EA_FALLTHROUGH [[fallthrough]] + #endif + #endif + + + + // ------------------------------------------------------------------------ + // EA_NODISCARD + // + // [[nodiscard]] is a C++17 standard attribute that can be applied to a + // function declaration, enum, or class declaration. If a any of the list + // previously are returned from a function (without the user explicitly + // casting to void) the addition of the [[nodiscard]] attribute encourages + // the compiler to generate a warning about the user discarding the return + // value. This is a useful practice to encourage client code to check API + // error codes. + // + // http://en.cppreference.com/w/cpp/language/attributes + // + // Example usage: + // + // EA_NODISCARD int baz() { return 42; } + // + // void foo() + // { + // baz(); // warning: ignoring return value of function declared with 'nodiscard' attribute + // } + // + #if !defined(EA_NODISCARD) + #if defined(EA_COMPILER_NO_NODISCARD) + #define EA_NODISCARD + #else + #define EA_NODISCARD [[nodiscard]] + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_MAYBE_UNUSED + // + // [[maybe_unused]] is a C++17 standard attribute that suppresses warnings + // on unused entities that are declared as maybe_unused. + // + // http://en.cppreference.com/w/cpp/language/attributes + // + // Example usage: + // void foo(EA_MAYBE_UNUSED int i) + // { + // assert(i == 42); // warning suppressed when asserts disabled. + // } + // + #if !defined(EA_MAYBE_UNUSED) + #if defined(EA_COMPILER_NO_MAYBE_UNUSED) + #define EA_MAYBE_UNUSED + #else + #define EA_MAYBE_UNUSED [[maybe_unused]] + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_NO_UBSAN + // + // The LLVM/Clang undefined behaviour sanitizer will not analyse a function tagged with the following attribute. + // + // https://clang.llvm.org/docs/UndefinedBehaviorSanitizer.html#disabling-instrumentation-with-attribute-no-sanitize-undefined + // + // Example usage: + // EA_NO_UBSAN int SomeFunction() { ... } + // + #ifndef EA_NO_UBSAN + #if defined(EA_COMPILER_CLANG) + #define EA_NO_UBSAN __attribute__((no_sanitize("undefined"))) + #else + #define EA_NO_UBSAN + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_NO_ASAN + // + // The LLVM/Clang address sanitizer will not analyse a function tagged with the following attribute. + // + // https://clang.llvm.org/docs/AddressSanitizer.html#disabling-instrumentation-with-attribute-no-sanitize-address + // + // Example usage: + // EA_NO_ASAN int SomeFunction() { ... } + // + #ifndef EA_NO_ASAN + #if defined(EA_COMPILER_CLANG) + #define EA_NO_ASAN __attribute__((no_sanitize("address"))) + #else + #define EA_NO_ASAN + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_ASAN_ENABLED + // + // Defined as 0 or 1. It's value depends on the compile environment. + // Specifies whether the code is being built with Clang's Address Sanitizer. + // + #if defined(__has_feature) + #if __has_feature(address_sanitizer) + #define EA_ASAN_ENABLED 1 + #else + #define EA_ASAN_ENABLED 0 + #endif + #else + #define EA_ASAN_ENABLED 0 + #endif + + + // ------------------------------------------------------------------------ + // EA_NON_COPYABLE + // + // This macro defines as a class as not being copy-constructable + // or assignable. This is useful for preventing class instances + // from being passed to functions by value, is useful for preventing + // compiler warnings by some compilers about the inability to + // auto-generate a copy constructor and assignment, and is useful + // for simply declaring in the interface that copy semantics are + // not supported by the class. Your class needs to have at least a + // default constructor when using this macro. + // + // Beware that this class works by declaring a private: section of + // the class in the case of compilers that don't support C++11 deleted + // functions. + // + // Note: With some pre-C++11 compilers (e.g. Green Hills), you may need + // to manually define an instances of the hidden functions, even + // though they are not used. + // + // Example usage: + // class Widget { + // Widget(); + // . . . + // EA_NON_COPYABLE(Widget) + // }; + // + #if !defined(EA_NON_COPYABLE) + #if defined(EA_COMPILER_NO_DELETED_FUNCTIONS) + #define EA_NON_COPYABLE(EAClass_) \ + private: \ + EA_DISABLE_VC_WARNING(4822); /* local class member function does not have a body */ \ + EAClass_(const EAClass_&); \ + void operator=(const EAClass_&); \ + EA_RESTORE_VC_WARNING(); + #else + #define EA_NON_COPYABLE(EAClass_) \ + EA_DISABLE_VC_WARNING(4822); /* local class member function does not have a body */ \ + EAClass_(const EAClass_&) = delete; \ + void operator=(const EAClass_&) = delete; \ + EA_RESTORE_VC_WARNING(); + #endif + #endif + + + // ------------------------------------------------------------------------ + // EA_FUNCTION_DELETE + // + // Semi-portable way of specifying a deleted function which allows for + // cleaner code in class declarations. + // + // Example usage: + // + // class Example + // { + // private: // For portability with pre-C++11 compilers, make the function private. + // void foo() EA_FUNCTION_DELETE; + // }; + // + // Note: EA_FUNCTION_DELETE'd functions should be private to prevent the + // functions from being called even when the compiler does not support + // deleted functions. Some compilers (e.g. Green Hills) that don't support + // C++11 deleted functions can require that you define the function, + // which you can do in the associated source file for the class. + // + #if defined(EA_COMPILER_NO_DELETED_FUNCTIONS) + #define EA_FUNCTION_DELETE + #else + #define EA_FUNCTION_DELETE = delete + #endif + + // ------------------------------------------------------------------------ + // EA_DISABLE_DEFAULT_CTOR + // + // Disables the compiler generated default constructor. This macro is + // provided to improve portability and clarify intent of code. + // + // Example usage: + // + // class Example + // { + // private: + // EA_DISABLE_DEFAULT_CTOR(Example); + // }; + // + #define EA_DISABLE_DEFAULT_CTOR(ClassName) ClassName() EA_FUNCTION_DELETE + + // ------------------------------------------------------------------------ + // EA_DISABLE_COPY_CTOR + // + // Disables the compiler generated copy constructor. This macro is + // provided to improve portability and clarify intent of code. + // + // Example usage: + // + // class Example + // { + // private: + // EA_DISABLE_COPY_CTOR(Example); + // }; + // + #define EA_DISABLE_COPY_CTOR(ClassName) ClassName(const ClassName &) EA_FUNCTION_DELETE + + // ------------------------------------------------------------------------ + // EA_DISABLE_MOVE_CTOR + // + // Disables the compiler generated move constructor. This macro is + // provided to improve portability and clarify intent of code. + // + // Example usage: + // + // class Example + // { + // private: + // EA_DISABLE_MOVE_CTOR(Example); + // }; + // + #define EA_DISABLE_MOVE_CTOR(ClassName) ClassName(ClassName&&) EA_FUNCTION_DELETE + + // ------------------------------------------------------------------------ + // EA_DISABLE_ASSIGNMENT_OPERATOR + // + // Disables the compiler generated assignment operator. This macro is + // provided to improve portability and clarify intent of code. + // + // Example usage: + // + // class Example + // { + // private: + // EA_DISABLE_ASSIGNMENT_OPERATOR(Example); + // }; + // + #define EA_DISABLE_ASSIGNMENT_OPERATOR(ClassName) ClassName & operator=(const ClassName &) EA_FUNCTION_DELETE + + // ------------------------------------------------------------------------ + // EA_DISABLE_MOVE_OPERATOR + // + // Disables the compiler generated move operator. This macro is + // provided to improve portability and clarify intent of code. + // + // Example usage: + // + // class Example + // { + // private: + // EA_DISABLE_MOVE_OPERATOR(Example); + // }; + // + #define EA_DISABLE_MOVE_OPERATOR(ClassName) ClassName & operator=(ClassName&&) EA_FUNCTION_DELETE + + // ------------------------------------------------------------------------ + // EANonCopyable + // + // Declares a class as not supporting copy construction or assignment. + // May be more reliable with some situations that EA_NON_COPYABLE alone, + // though it may result in more code generation. + // + // Note that VC++ will generate warning C4625 and C4626 if you use EANonCopyable + // and you are compiling with /W4 and /Wall. There is no resolution but + // to redelare EA_NON_COPYABLE in your subclass or disable the warnings with + // code like this: + // EA_DISABLE_VC_WARNING(4625 4626) + // ... + // EA_RESTORE_VC_WARNING() + // + // Example usage: + // struct Widget : EANonCopyable { + // . . . + // }; + // + #ifdef __cplusplus + struct EANonCopyable + { + #if defined(EA_COMPILER_NO_DEFAULTED_FUNCTIONS) || defined(__EDG__) + // EDG doesn't appear to behave properly for the case of defaulted constructors; + // it generates a mistaken warning about missing default constructors. + EANonCopyable() {} // Putting {} here has the downside that it allows a class to create itself, + ~EANonCopyable() {} // but avoids linker errors that can occur with some compilers (e.g. Green Hills). + #else + EANonCopyable() = default; + ~EANonCopyable() = default; + #endif + + EA_NON_COPYABLE(EANonCopyable) + }; + #endif + + + // ------------------------------------------------------------------------ + // EA_OPTIMIZE_OFF / EA_OPTIMIZE_ON + // + // Implements portable inline optimization enabling/disabling. + // Usage of these macros must be in order OFF then ON. This is + // because the OFF macro pushes a set of settings and the ON + // macro pops them. The nesting of OFF/ON sets (e.g. OFF, OFF, ON, ON) + // is not guaranteed to work on all platforms. + // + // This is often used to allow debugging of some code that's + // otherwise compiled with undebuggable optimizations. It's also + // useful for working around compiler code generation problems + // that occur in optimized builds. + // + // Some compilers (e.g. VC++) don't allow doing this within a function and + // so the usage must be outside a function, as with the example below. + // GCC on x86 appears to have some problem with argument passing when + // using EA_OPTIMIZE_OFF in optimized builds. + // + // Example usage: + // // Disable optimizations for SomeFunction. + // EA_OPTIMIZE_OFF() + // void SomeFunction() + // { + // ... + // } + // EA_OPTIMIZE_ON() + // + #if !defined(EA_OPTIMIZE_OFF) + #if defined(EA_COMPILER_MSVC) + #define EA_OPTIMIZE_OFF() __pragma(optimize("", off)) + #elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION > 4004) && (defined(__i386__) || defined(__x86_64__)) // GCC 4.4+ - Seems to work only on x86/Linux so far. However, GCC 4.4 itself appears broken and screws up parameter passing conventions. + #define EA_OPTIMIZE_OFF() \ + _Pragma("GCC push_options") \ + _Pragma("GCC optimize 0") + #elif defined(EA_COMPILER_CLANG) && (!defined(EA_PLATFORM_ANDROID) || (EA_COMPILER_VERSION >= 380)) + #define EA_OPTIMIZE_OFF() \ + EA_DISABLE_CLANG_WARNING(-Wunknown-pragmas) \ + _Pragma("clang optimize off") \ + EA_RESTORE_CLANG_WARNING() + #else + #define EA_OPTIMIZE_OFF() + #endif + #endif + + #if !defined(EA_OPTIMIZE_ON) + #if defined(EA_COMPILER_MSVC) + #define EA_OPTIMIZE_ON() __pragma(optimize("", on)) + #elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION > 4004) && (defined(__i386__) || defined(__x86_64__)) // GCC 4.4+ - Seems to work only on x86/Linux so far. However, GCC 4.4 itself appears broken and screws up parameter passing conventions. + #define EA_OPTIMIZE_ON() _Pragma("GCC pop_options") + #elif defined(EA_COMPILER_CLANG) && (!defined(EA_PLATFORM_ANDROID) || (EA_COMPILER_VERSION >= 380)) + #define EA_OPTIMIZE_ON() \ + EA_DISABLE_CLANG_WARNING(-Wunknown-pragmas) \ + _Pragma("clang optimize on") \ + EA_RESTORE_CLANG_WARNING() + #else + #define EA_OPTIMIZE_ON() + #endif + #endif + + + + // ------------------------------------------------------------------------ + // EA_SIGNED_RIGHT_SHIFT_IS_UNSIGNED + // + // Defined if right shifts of signed integers (i.e. arithmetic shifts) fail + // to propogate the high bit downward, and thus preserve sign. Most hardware + // and their corresponding compilers do this. + // + // + +#endif // Header include guard + + + + + + + + + + diff --git a/libkram/eastl/include/EABase/config/eaplatform.h b/libkram/eastl/include/EABase/config/eaplatform.h new file mode 100644 index 00000000..37c1350a --- /dev/null +++ b/libkram/eastl/include/EABase/config/eaplatform.h @@ -0,0 +1,738 @@ +/*----------------------------------------------------------------------------- + * config/eaplatform.h + * + * Copyright (c) Electronic Arts Inc. All rights reserved. + *----------------------------------------------------------------------------- + * Currently supported platform indentification defines include: + */ +#ifdef EA_PLATFORM_PS4 // ifdef for code stripping purposes +// EA_PLATFORM_PS4 (EA_PLATFORM_KETTLE) +#endif +#ifdef EA_PLATFORM_XBOXONE // ifdef for code stripping purposes + // EA_PLATFORM_XBOXONE (EA_PLATFORM_CAPILANO) + // EA_PLATFORM_XBOXONE_XDK (EA_PLATFORM_CAPILANO_XDK), set by capilano_config package + // EA_PLATFORM_XBOXONE_ADK (EA_PLATFORM_CAPILANO_ADK), set by capilano_config package +#endif +// EA_PLATFORM_ANDROID +// EA_PLATFORM_APPLE +// EA_PLATFORM_IPHONE +// EA_PLATFORM_IPHONE_SIMULATOR +// EA_PLATFORM_OSX +// EA_PLATFORM_LINUX +// EA_PLATFORM_SAMSUNG_TV +// EA_PLATFORM_WINDOWS +// EA_PLATFORM_WIN32 +// EA_PLATFORM_WIN64 +// EA_PLATFORM_WINDOWS_PHONE +// EA_PLATFORM_WINRT +// EA_PLATFORM_SUN +// EA_PLATFORM_LRB (Larrabee) +// EA_PLATFORM_POSIX (pseudo-platform; may be defined along with another platform like EA_PLATFORM_LINUX, EA_PLATFORM_UNIX, EA_PLATFORM_QNX) +// EA_PLATFORM_UNIX (pseudo-platform; may be defined along with another platform like EA_PLATFORM_LINUX) +// EA_PLATFORM_CYGWIN (pseudo-platform; may be defined along with another platform like EA_PLATFORM_LINUX) +// EA_PLATFORM_MINGW (pseudo-platform; may be defined along with another platform like EA_PLATFORM_WINDOWS) +// EA_PLATFORM_MICROSOFT (pseudo-platform; may be defined along with another platform like EA_PLATFORM_WINDOWS) +// +// EA_ABI_ARM_LINUX (a.k.a. "eabi". for all platforms that use the CodeSourcery GNU/Linux toolchain, like Android) +// EA_ABI_ARM_APPLE (similar to eabi but not identical) +// EA_ABI_ARM64_APPLE (similar to eabi but not identical) https://developer.apple.com/library/ios/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html +// EA_ABI_ARM_WINCE (similar to eabi but not identical) +// +// Other definitions emanated from this file inclue: +// EA_PLATFORM_NAME = +// EA_PLATFORM_DESCRIPTION = +// EA_PROCESSOR_XXX +// EA_MISALIGNED_SUPPORT_LEVEL=0|1|2 +// EA_SYSTEM_LITTLE_ENDIAN | EA_SYSTEM_BIG_ENDIAN +// EA_ASM_STYLE_ATT | EA_ASM_STYLE_INTEL | EA_ASM_STYLE_MOTOROLA +// EA_PLATFORM_PTR_SIZE = +// EA_PLATFORM_WORD_SIZE = +// EA_CACHE_LINE_SIZE = +//--------------------------------------------------------------------------- + +/* + EA_PLATFORM_MOBILE + EA_PLATFORM_MOBILE is a peer to EA_PLATORM_DESKTOP and EA_PLATFORM_CONSOLE. Their definition is qualitative rather + than quantitative, and refers to the general (usually weaker) capabilities of the machine. Mobile devices have a + similar set of weaknesses that are useful to generally categorize. The primary motivation is to avoid code that + tests for multiple mobile platforms on a line and needs to be updated every time we get a new one. + For example, mobile platforms tend to have weaker ARM processors, don't have full multiple processor support, + are hand-held, don't have mice (though may have touch screens or basic cursor controls), have writable solid + state permanent storage. Production user code shouldn't have too many expectations about the meaning of this define. + + EA_PLATFORM_DESKTOP + This is similar to EA_PLATFORM_MOBILE in its qualitative nature and refers to platforms that are powerful. + For example, they nearly always have virtual memory, mapped memory, hundreds of GB of writable disk storage, + TCP/IP network connections, mice, keyboards, 512+ MB of RAM, multiprocessing, multiple display support. + Production user code shouldn't have too many expectations about the meaning of this define. + + EA_PLATFORM_CONSOLE + This is similar to EA_PLATFORM_MOBILE in its qualitative nature and refers to platforms that are consoles. + This means platforms that are connected to TVs, are fairly powerful (especially graphics-wise), are tightly + controlled by vendors, tend not to have mapped memory, tend to have TCP/IP, don't have multiple process support + though they might have multiple CPUs, support TV output only. Production user code shouldn't have too many + expectations about the meaning of this define. + +*/ + + +#ifndef INCLUDED_eaplatform_H +#define INCLUDED_eaplatform_H + + +// Cygwin +// This is a pseudo-platform which will be defined along with EA_PLATFORM_LINUX when +// using the Cygwin build environment. +#if defined(__CYGWIN__) + #define EA_PLATFORM_CYGWIN 1 + #define EA_PLATFORM_DESKTOP 1 +#endif + +// MinGW +// This is a pseudo-platform which will be defined along with EA_PLATFORM_WINDOWS when +// using the MinGW Windows build environment. +#if defined(__MINGW32__) || defined(__MINGW64__) + #define EA_PLATFORM_MINGW 1 + #define EA_PLATFORM_DESKTOP 1 +#endif + +#if defined(EA_PLATFORM_PS4) || defined(__ORBIS__) || defined(EA_PLATFORM_KETTLE) + // PlayStation 4 + // Orbis was Sony's code-name for the platform, which is now obsolete. + // Kettle was an EA-specific code-name for the platform, which is now obsolete. + #if defined(EA_PLATFORM_PS4) + #undef EA_PLATFORM_PS4 + #endif + #define EA_PLATFORM_PS4 1 + + // Backward compatibility: + #if defined(EA_PLATFORM_KETTLE) + #undef EA_PLATFORM_KETTLE + #endif + // End backward compatbility + + #define EA_PLATFORM_KETTLE 1 + #define EA_PLATFORM_NAME "PS4" + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "PS4 on x64" + #define EA_PLATFORM_CONSOLE 1 + #define EA_PLATFORM_SONY 1 + #define EA_PLATFORM_POSIX 1 + // #define EA_POSIX_THREADS_AVAILABLE 1 // POSIX threading API is available but discouraged. Sony indicated use of the scePthreads* API is preferred. + #define EA_PROCESSOR_X86_64 1 + #if defined(__GNUC__) || defined(__clang__) + #define EA_ASM_STYLE_ATT 1 + #endif + +#elif defined(EA_PLATFORM_XBOXONE) || defined(_DURANGO) || defined(_XBOX_ONE) || defined(EA_PLATFORM_CAPILANO) || defined(_GAMING_XBOX) + // XBox One + // Durango was Microsoft's code-name for the platform, which is now obsolete. + // Microsoft uses _DURANGO instead of some variation of _XBOX, though it's not natively defined by the compiler. + // Capilano was an EA-specific code-name for the platform, which is now obsolete. + #if defined(EA_PLATFORM_XBOXONE) + #undef EA_PLATFORM_XBOXONE + #endif + #define EA_PLATFORM_XBOXONE 1 + + // Backward compatibility: + #if defined(EA_PLATFORM_CAPILANO) + #undef EA_PLATFORM_CAPILANO + #endif + #define EA_PLATFORM_CAPILANO 1 + #if defined(EA_PLATFORM_CAPILANO_XDK) && !defined(EA_PLATFORM_XBOXONE_XDK) + #define EA_PLATFORM_XBOXONE_XDK 1 + #endif + #if defined(EA_PLATFORM_CAPILANO_ADK) && !defined(EA_PLATFORM_XBOXONE_ADK) + #define EA_PLATFORM_XBOXONE_ADK 1 + #endif + // End backward compatibility + + #if !defined(_DURANGO) + #define _DURANGO + #endif + #define EA_PLATFORM_NAME "XBox One" + //#define EA_PROCESSOR_X86 Currently our policy is that we don't define this, even though x64 is something of a superset of x86. + #define EA_PROCESSOR_X86_64 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "XBox One on x64" + #define EA_ASM_STYLE_INTEL 1 + #define EA_PLATFORM_CONSOLE 1 + #define EA_PLATFORM_MICROSOFT 1 + + // WINAPI_FAMILY defines - mirrored from winapifamily.h + #define EA_WINAPI_FAMILY_APP 1000 + #define EA_WINAPI_FAMILY_DESKTOP_APP 1001 + #define EA_WINAPI_FAMILY_PHONE_APP 1002 + #define EA_WINAPI_FAMILY_TV_APP 1003 + #define EA_WINAPI_FAMILY_TV_TITLE 1004 + #define EA_WINAPI_FAMILY_GAMES 1006 + + #if defined(WINAPI_FAMILY) + #include + #if defined(WINAPI_FAMILY_TV_TITLE) && WINAPI_FAMILY == WINAPI_FAMILY_TV_TITLE + #define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_TV_TITLE + #elif defined(WINAPI_FAMILY_DESKTOP_APP) && WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP + #define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_DESKTOP_APP + #elif defined(WINAPI_FAMILY_GAMES) && WINAPI_FAMILY == WINAPI_FAMILY_GAMES + #define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_GAMES + #else + #error Unsupported WINAPI_FAMILY + #endif + #else + #error WINAPI_FAMILY should always be defined on Capilano. + #endif + + // Macro to determine if a partition is enabled. + #define EA_WINAPI_FAMILY_PARTITION(Partition) (Partition) + + #if EA_WINAPI_FAMILY == EA_WINAPI_FAMILY_DESKTOP_APP + #define EA_WINAPI_PARTITION_CORE 1 + #define EA_WINAPI_PARTITION_DESKTOP 1 + #define EA_WINAPI_PARTITION_APP 1 + #define EA_WINAPI_PARTITION_PC_APP 0 + #define EA_WIANPI_PARTITION_PHONE 0 + #define EA_WINAPI_PARTITION_TV_APP 0 + #define EA_WINAPI_PARTITION_TV_TITLE 0 + #define EA_WINAPI_PARTITION_GAMES 0 + #elif EA_WINAPI_FAMILY == EA_WINAPI_FAMILY_TV_TITLE + #define EA_WINAPI_PARTITION_CORE 1 + #define EA_WINAPI_PARTITION_DESKTOP 0 + #define EA_WINAPI_PARTITION_APP 0 + #define EA_WINAPI_PARTITION_PC_APP 0 + #define EA_WIANPI_PARTITION_PHONE 0 + #define EA_WINAPI_PARTITION_TV_APP 0 + #define EA_WINAPI_PARTITION_TV_TITLE 1 + #define EA_WINAPI_PARTITION_GAMES 0 + #elif EA_WINAPI_FAMILY == EA_WINAPI_FAMILY_GAMES + #define EA_WINAPI_PARTITION_CORE 1 + #define EA_WINAPI_PARTITION_DESKTOP 0 + #define EA_WINAPI_PARTITION_APP 0 + #define EA_WINAPI_PARTITION_PC_APP 0 + #define EA_WIANPI_PARTITION_PHONE 0 + #define EA_WINAPI_PARTITION_TV_APP 0 + #define EA_WINAPI_PARTITION_TV_TITLE 0 + #define EA_WINAPI_PARTITION_GAMES 1 + #else + #error Unsupported WINAPI_FAMILY + #endif + + #if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_GAMES) + #define CS_UNDEFINED_STRING 1 + #define CS_UNDEFINED_STRING 1 + #endif + + #if EA_WINAPI_FAMILY_PARTITION(EA_WINAPI_PARTITION_TV_TITLE) + #define EA_PLATFORM_XBOXONE_XDK 1 + #endif +#elif defined(EA_PLATFORM_LRB) || defined(__LRB__) || (defined(__EDG__) && defined(__ICC) && defined(__x86_64__)) + #undef EA_PLATFORM_LRB + #define EA_PLATFORM_LRB 1 + #define EA_PLATFORM_NAME "Larrabee" + #define EA_PLATFORM_DESCRIPTION "Larrabee on LRB1" + #define EA_PROCESSOR_X86_64 1 + #if defined(BYTE_ORDER) && (BYTE_ORDER == 4321) + #define EA_SYSTEM_BIG_ENDIAN 1 + #else + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #endif + #define EA_PROCESSOR_LRB 1 + #define EA_PROCESSOR_LRB1 1 // Larrabee version 1 + #define EA_ASM_STYLE_ATT 1 // Both types of asm style + #define EA_ASM_STYLE_INTEL 1 // are supported. + #define EA_PLATFORM_DESKTOP 1 + +// Android (Google phone OS) +#elif defined(EA_PLATFORM_ANDROID) || defined(__ANDROID__) + #undef EA_PLATFORM_ANDROID + #define EA_PLATFORM_ANDROID 1 + #define EA_PLATFORM_LINUX 1 + #define EA_PLATFORM_UNIX 1 + #define EA_PLATFORM_POSIX 1 + #define EA_PLATFORM_NAME "Android" + #define EA_ASM_STYLE_ATT 1 + #if defined(__arm__) + #define EA_ABI_ARM_LINUX 1 // a.k.a. "ARM eabi" + #define EA_PROCESSOR_ARM32 1 + #define EA_PLATFORM_DESCRIPTION "Android on ARM" + #elif defined(__aarch64__) + #define EA_PROCESSOR_ARM64 1 + #define EA_PLATFORM_DESCRIPTION "Android on ARM64" + #elif defined(__i386__) + #define EA_PROCESSOR_X86 1 + #define EA_PLATFORM_DESCRIPTION "Android on x86" + #elif defined(__x86_64) + #define EA_PROCESSOR_X86_64 1 + #define EA_PLATFORM_DESCRIPTION "Android on x64" + #else + #error Unknown processor + #endif + #if !defined(EA_SYSTEM_BIG_ENDIAN) && !defined(EA_SYSTEM_LITTLE_ENDIAN) + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #endif + #define EA_PLATFORM_MOBILE 1 + +// Samsung SMART TV - a Linux-based smart TV +#elif defined(EA_PLATFORM_SAMSUNG_TV) + #undef EA_PLATFORM_SAMSUNG_TV + #define EA_PLATFORM_SAMSUNG_TV 1 + #define EA_PLATFORM_LINUX 1 + #define EA_PLATFORM_UNIX 1 + #define EA_PLATFORM_POSIX 1 + #define EA_PLATFORM_NAME "SamsungTV" + #define EA_PLATFORM_DESCRIPTION "Samsung SMART TV on ARM" + #define EA_ASM_STYLE_ATT 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PROCESSOR_ARM32 1 + #define EA_ABI_ARM_LINUX 1 // a.k.a. "ARM eabi" + #define EA_PROCESSOR_ARM7 1 + +#elif defined(__APPLE__) && __APPLE__ + #include + + // Apple family of operating systems. + #define EA_PLATFORM_APPLE + #define EA_PLATFORM_POSIX 1 + + // iPhone + // TARGET_OS_IPHONE will be undefined on an unknown compiler, and will be defined on gcc. + #if defined(EA_PLATFORM_IPHONE) || defined(__IPHONE__) || (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) || (defined(TARGET_IPHONE_SIMULATOR) && TARGET_IPHONE_SIMULATOR) + #undef EA_PLATFORM_IPHONE + #define EA_PLATFORM_IPHONE 1 + #define EA_PLATFORM_NAME "iPhone" + #define EA_ASM_STYLE_ATT 1 + #define EA_POSIX_THREADS_AVAILABLE 1 + #if defined(__arm__) + #define EA_ABI_ARM_APPLE 1 + #define EA_PROCESSOR_ARM32 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "iPhone on ARM" + #elif defined(__aarch64__) || defined(__AARCH64) + #define EA_ABI_ARM64_APPLE 1 + #define EA_PROCESSOR_ARM64 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "iPhone on ARM64" + #elif defined(__i386__) + #define EA_PLATFORM_IPHONE_SIMULATOR 1 + #define EA_PROCESSOR_X86 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "iPhone simulator on x86" + #elif defined(__x86_64) || defined(__amd64) + #define EA_PROCESSOR_X86_64 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "iPhone simulator on x64" + #else + #error Unknown processor + #endif + #define EA_PLATFORM_MOBILE 1 + + // Macintosh OSX + // TARGET_OS_MAC is defined by the Metrowerks and older AppleC compilers. + // Howerver, TARGET_OS_MAC is defined to be 1 in all cases. + // __i386__ and __intel__ are defined by the GCC compiler. + // __dest_os is defined by the Metrowerks compiler. + // __MACH__ is defined by the Metrowerks and GCC compilers. + // powerc and __powerc are defined by the Metrowerks and GCC compilers. + #elif defined(EA_PLATFORM_OSX) || defined(__MACH__) || (defined(__MSL__) && (__dest_os == __mac_os_x)) + #undef EA_PLATFORM_OSX + #define EA_PLATFORM_OSX 1 + #define EA_PLATFORM_UNIX 1 + #define EA_PLATFORM_POSIX 1 + //#define EA_PLATFORM_BSD 1 We don't currently define this. OSX has some BSD history but a lot of the API is different. + #define EA_PLATFORM_NAME "OSX" + #if defined(__i386__) || defined(__intel__) + #define EA_PROCESSOR_X86 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "OSX on x86" + #elif defined(__x86_64) || defined(__amd64) + #define EA_PROCESSOR_X86_64 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "OSX on x64" + #elif defined(__arm__) + #define EA_ABI_ARM_APPLE 1 + #define EA_PROCESSOR_ARM32 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "OSX on ARM" + #elif defined(__aarch64__) || defined(__AARCH64) + #define EA_ABI_ARM64_APPLE 1 + #define EA_PROCESSOR_ARM64 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "OSX on ARM64" + #elif defined(__POWERPC64__) || defined(__powerpc64__) + #define EA_PROCESSOR_POWERPC 1 + #define EA_PROCESSOR_POWERPC_64 1 + #define EA_SYSTEM_BIG_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "OSX on PowerPC 64" + #elif defined(__POWERPC__) || defined(__powerpc__) + #define EA_PROCESSOR_POWERPC 1 + #define EA_PROCESSOR_POWERPC_32 1 + #define EA_SYSTEM_BIG_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "OSX on PowerPC" + #else + #error Unknown processor + #endif + #if defined(__GNUC__) + #define EA_ASM_STYLE_ATT 1 + #else + #define EA_ASM_STYLE_MOTOROLA 1 + #endif + #define EA_PLATFORM_DESKTOP 1 + #else + #error Unknown Apple Platform + #endif + +// Linux +// __linux and __linux__ are defined by the GCC and Borland compiler. +// __i386__ and __intel__ are defined by the GCC compiler. +// __i386__ is defined by the Metrowerks compiler. +// _M_IX86 is defined by the Borland compiler. +// __sparc__ is defined by the GCC compiler. +// __powerpc__ is defined by the GCC compiler. +// __ARM_EABI__ is defined by GCC on an ARM v6l (Raspberry Pi 1) +// __ARM_ARCH_7A__ is defined by GCC on an ARM v7l (Raspberry Pi 2) +#elif defined(EA_PLATFORM_LINUX) || (defined(__linux) || defined(__linux__)) + #undef EA_PLATFORM_LINUX + #define EA_PLATFORM_LINUX 1 + #define EA_PLATFORM_UNIX 1 + #define EA_PLATFORM_POSIX 1 + #define EA_PLATFORM_NAME "Linux" + #if defined(__i386__) || defined(__intel__) || defined(_M_IX86) + #define EA_PROCESSOR_X86 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "Linux on x86" + #elif defined(__ARM_ARCH_7A__) || defined(__ARM_EABI__) + #define EA_ABI_ARM_LINUX 1 + #define EA_PROCESSOR_ARM32 1 + #define EA_PLATFORM_DESCRIPTION "Linux on ARM 6/7 32-bits" + #elif defined(__aarch64__) || defined(__AARCH64) + #define EA_PROCESSOR_ARM64 1 + #define EA_PLATFORM_DESCRIPTION "Linux on ARM64" + #elif defined(__x86_64__) + #define EA_PROCESSOR_X86_64 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "Linux on x64" + #elif defined(__powerpc64__) + #define EA_PROCESSOR_POWERPC 1 + #define EA_PROCESSOR_POWERPC_64 1 + #define EA_SYSTEM_BIG_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "Linux on PowerPC 64" + #elif defined(__powerpc__) + #define EA_PROCESSOR_POWERPC 1 + #define EA_PROCESSOR_POWERPC_32 1 + #define EA_SYSTEM_BIG_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "Linux on PowerPC" + #else + #error Unknown processor + #error Unknown endianness + #endif + #if defined(__GNUC__) + #define EA_ASM_STYLE_ATT 1 + #endif + #define EA_PLATFORM_DESKTOP 1 + + +#elif defined(EA_PLATFORM_BSD) || (defined(__BSD__) || defined(__FreeBSD__)) + #undef EA_PLATFORM_BSD + #define EA_PLATFORM_BSD 1 + #define EA_PLATFORM_UNIX 1 + #define EA_PLATFORM_POSIX 1 // BSD's posix complaince is not identical to Linux's + #define EA_PLATFORM_NAME "BSD Unix" + #if defined(__i386__) || defined(__intel__) + #define EA_PROCESSOR_X86 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "BSD on x86" + #elif defined(__x86_64__) + #define EA_PROCESSOR_X86_64 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "BSD on x64" + #elif defined(__powerpc64__) + #define EA_PROCESSOR_POWERPC 1 + #define EA_PROCESSOR_POWERPC_64 1 + #define EA_SYSTEM_BIG_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "BSD on PowerPC 64" + #elif defined(__powerpc__) + #define EA_PROCESSOR_POWERPC 1 + #define EA_PROCESSOR_POWERPC_32 1 + #define EA_SYSTEM_BIG_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "BSD on PowerPC" + #else + #error Unknown processor + #error Unknown endianness + #endif + #if !defined(EA_PLATFORM_FREEBSD) && defined(__FreeBSD__) + #define EA_PLATFORM_FREEBSD 1 // This is a variation of BSD. + #endif + #if defined(__GNUC__) + #define EA_ASM_STYLE_ATT 1 + #endif + #define EA_PLATFORM_DESKTOP 1 + + +#elif defined(EA_PLATFORM_WINDOWS_PHONE) + #undef EA_PLATFORM_WINDOWS_PHONE + #define EA_PLATFORM_WINDOWS_PHONE 1 + #define EA_PLATFORM_NAME "Windows Phone" + #if defined(_M_AMD64) || defined(_AMD64_) || defined(__x86_64__) + #define EA_PROCESSOR_X86_64 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "Windows Phone on x64" + #elif defined(_M_IX86) || defined(_X86_) + #define EA_PROCESSOR_X86 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "Windows Phone on X86" + #elif defined(_M_ARM) + #define EA_ABI_ARM_WINCE 1 + #define EA_PROCESSOR_ARM32 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "Windows Phone on ARM" + #else //Possibly other Windows Phone variants + #error Unknown processor + #error Unknown endianness + #endif + #define EA_PLATFORM_MICROSOFT 1 + + // WINAPI_FAMILY defines - mirrored from winapifamily.h + #define EA_WINAPI_FAMILY_APP 1 + #define EA_WINAPI_FAMILY_DESKTOP_APP 2 + #define EA_WINAPI_FAMILY_PHONE_APP 3 + + #if defined(WINAPI_FAMILY) + #include + #if WINAPI_FAMILY == WINAPI_FAMILY_PHONE_APP + #define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_PHONE_APP + #else + #error Unsupported WINAPI_FAMILY for Windows Phone + #endif + #else + #error WINAPI_FAMILY should always be defined on Windows Phone. + #endif + + // Macro to determine if a partition is enabled. + #define EA_WINAPI_FAMILY_PARTITION(Partition) (Partition) + + // Enable the appropriate partitions for the current family + #if EA_WINAPI_FAMILY == EA_WINAPI_FAMILY_PHONE_APP + # define EA_WINAPI_PARTITION_CORE 1 + # define EA_WINAPI_PARTITION_PHONE 1 + # define EA_WINAPI_PARTITION_APP 1 + #else + # error Unsupported WINAPI_FAMILY for Windows Phone + #endif + + +// Windows +// _WIN32 is defined by the VC++, Intel and GCC compilers. +// _WIN64 is defined by the VC++, Intel and GCC compilers. +// __WIN32__ is defined by the Borland compiler. +// __INTEL__ is defined by the Metrowerks compiler. +// _M_IX86, _M_AMD64 and _M_IA64 are defined by the VC++, Intel, and Borland compilers. +// _X86_, _AMD64_, and _IA64_ are defined by the Metrowerks compiler. +// _M_ARM is defined by the VC++ compiler. +#elif (defined(EA_PLATFORM_WINDOWS) || (defined(_WIN32) || defined(__WIN32__) || defined(_WIN64))) && !defined(CS_UNDEFINED_STRING) + #undef EA_PLATFORM_WINDOWS + #define EA_PLATFORM_WINDOWS 1 + #define EA_PLATFORM_NAME "Windows" + #ifdef _WIN64 // VC++ defines both _WIN32 and _WIN64 when compiling for Win64. + #define EA_PLATFORM_WIN64 1 + #else + #define EA_PLATFORM_WIN32 1 + #endif + #if defined(_M_AMD64) || defined(_AMD64_) || defined(__x86_64__) + #define EA_PROCESSOR_X86_64 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "Windows on x64" + #elif defined(_M_IX86) || defined(_X86_) + #define EA_PROCESSOR_X86 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "Windows on X86" + #elif defined(_M_IA64) || defined(_IA64_) + #define EA_PROCESSOR_IA64 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "Windows on IA-64" + #elif defined(_M_ARM) + #define EA_ABI_ARM_WINCE 1 + #define EA_PROCESSOR_ARM32 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "Windows on ARM" + #elif defined(_M_ARM64) + #define EA_PROCESSOR_ARM64 1 + #define EA_SYSTEM_LITTLE_ENDIAN 1 + #define EA_PLATFORM_DESCRIPTION "Windows on ARM64" + #else //Possibly other Windows CE variants + #error Unknown processor + #error Unknown endianness + #endif + #if defined(__GNUC__) + #define EA_ASM_STYLE_ATT 1 + #elif defined(_MSC_VER) || defined(__BORLANDC__) || defined(__ICL) + #define EA_ASM_STYLE_INTEL 1 + #endif + #define EA_PLATFORM_DESKTOP 1 + #define EA_PLATFORM_MICROSOFT 1 + + // WINAPI_FAMILY defines to support Windows 8 Metro Apps - mirroring winapifamily.h in the Windows 8 SDK + #define EA_WINAPI_FAMILY_APP 1000 + #define EA_WINAPI_FAMILY_DESKTOP_APP 1001 + #define EA_WINAPI_FAMILY_GAMES 1006 + + #if defined(WINAPI_FAMILY) + #if defined(_MSC_VER) + #pragma warning(push, 0) + #endif + #include + #if defined(_MSC_VER) + #pragma warning(pop) + #endif + #if defined(WINAPI_FAMILY_DESKTOP_APP) && WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP + #define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_DESKTOP_APP + #elif defined(WINAPI_FAMILY_APP) && WINAPI_FAMILY == WINAPI_FAMILY_APP + #define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_APP + #elif defined(WINAPI_FAMILY_GAMES) && WINAPI_FAMILY == WINAPI_FAMILY_GAMES + #define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_GAMES + #else + #error Unsupported WINAPI_FAMILY + #endif + #else + #define EA_WINAPI_FAMILY EA_WINAPI_FAMILY_DESKTOP_APP + #endif + + #define EA_WINAPI_PARTITION_DESKTOP 1 + #define EA_WINAPI_PARTITION_APP 1 + #define EA_WINAPI_PARTITION_GAMES (EA_WINAPI_FAMILY == EA_WINAPI_FAMILY_GAMES) + + #define EA_WINAPI_FAMILY_PARTITION(Partition) (Partition) + + // EA_PLATFORM_WINRT + // This is a subset of Windows which is used for tablets and the "Metro" (restricted) Windows user interface. + // WinRT doesn't doesn't have access to the Windows "desktop" API, but WinRT can nevertheless run on + // desktop computers in addition to tablets. The Windows Phone API is a subset of WinRT and is not included + // in it due to it being only a part of the API. + #if defined(__cplusplus_winrt) + #define EA_PLATFORM_WINRT 1 + #endif + +// Sun (Solaris) +// __SUNPRO_CC is defined by the Sun compiler. +// __sun is defined by the GCC compiler. +// __i386 is defined by the Sun and GCC compilers. +// __sparc is defined by the Sun and GCC compilers. +#else + #error Unknown platform + #error Unknown processor + #error Unknown endianness +#endif + +#ifndef EA_PROCESSOR_ARM + #if defined(EA_PROCESSOR_ARM32) || defined(EA_PROCESSOR_ARM64) || defined(EA_PROCESSOR_ARM7) + #define EA_PROCESSOR_ARM + #endif +#endif + +// EA_PLATFORM_PTR_SIZE +// Platform pointer size; same as sizeof(void*). +// This is not the same as sizeof(int), as int is usually 32 bits on +// even 64 bit platforms. +// +// _WIN64 is defined by Win64 compilers, such as VC++. +// _M_IA64 is defined by VC++ and Intel compilers for IA64 processors. +// __LP64__ is defined by HP compilers for the LP64 standard. +// _LP64 is defined by the GCC and Sun compilers for the LP64 standard. +// __ia64__ is defined by the GCC compiler for IA64 processors. +// __arch64__ is defined by the Sparc compiler for 64 bit processors. +// __mips64__ is defined by the GCC compiler for MIPS processors. +// __powerpc64__ is defined by the GCC compiler for PowerPC processors. +// __64BIT__ is defined by the AIX compiler for 64 bit processors. +// __sizeof_ptr is defined by the ARM compiler (armcc, armcpp). +// +#ifndef EA_PLATFORM_PTR_SIZE + #if defined(__WORDSIZE) // Defined by some variations of GCC. + #define EA_PLATFORM_PTR_SIZE ((__WORDSIZE) / 8) + #elif defined(_WIN64) || defined(__LP64__) || defined(_LP64) || defined(_M_IA64) || defined(__ia64__) || defined(__arch64__) || defined(__aarch64__) || defined(__mips64__) || defined(__64BIT__) || defined(__Ptr_Is_64) + #define EA_PLATFORM_PTR_SIZE 8 + #elif defined(__CC_ARM) && (__sizeof_ptr == 8) + #define EA_PLATFORM_PTR_SIZE 8 + #else + #define EA_PLATFORM_PTR_SIZE 4 + #endif +#endif + + + +// EA_PLATFORM_WORD_SIZE +// This defines the size of a machine word. This will be the same as +// the size of registers on the machine but not necessarily the same +// as the size of pointers on the machine. A number of 64 bit platforms +// have 64 bit registers but 32 bit pointers. +// +#ifndef EA_PLATFORM_WORD_SIZE + #define EA_PLATFORM_WORD_SIZE EA_PLATFORM_PTR_SIZE +#endif + +// EA_PLATFORM_MIN_MALLOC_ALIGNMENT +// This defines the minimal alignment that the platform's malloc +// implementation will return. This should be used when writing custom +// allocators to ensure that the alignment matches that of malloc +#ifndef EA_PLATFORM_MIN_MALLOC_ALIGNMENT + #if defined(EA_PLATFORM_APPLE) + #define EA_PLATFORM_MIN_MALLOC_ALIGNMENT 16 + #elif defined(EA_PLATFORM_ANDROID) && defined(EA_PROCESSOR_ARM) + #define EA_PLATFORM_MIN_MALLOC_ALIGNMENT 8 + #elif defined(EA_PLATFORM_ANDROID) && defined(EA_PROCESSOR_X86_64) + #define EA_PLATFORM_MIN_MALLOC_ALIGNMENT 8 + #else + #define EA_PLATFORM_MIN_MALLOC_ALIGNMENT (EA_PLATFORM_PTR_SIZE * 2) + #endif +#endif + + +// EA_MISALIGNED_SUPPORT_LEVEL +// Specifies if the processor can read and write built-in types that aren't +// naturally aligned. +// 0 - not supported. Likely causes an exception. +// 1 - supported but slow. +// 2 - supported and fast. +// +#ifndef EA_MISALIGNED_SUPPORT_LEVEL + #if defined(EA_PROCESSOR_X86_64) + #define EA_MISALIGNED_SUPPORT_LEVEL 2 + #else + #define EA_MISALIGNED_SUPPORT_LEVEL 0 + #endif +#endif + +// Macro to determine if a Windows API partition is enabled. Always false on non Microsoft platforms. +#if !defined(EA_WINAPI_FAMILY_PARTITION) + #define EA_WINAPI_FAMILY_PARTITION(Partition) (0) +#endif + + +// EA_CACHE_LINE_SIZE +// Specifies the cache line size broken down by compile target. +// This the expected best guess values for the targets that we can make at compilation time. + +#ifndef EA_CACHE_LINE_SIZE + #if defined(EA_PROCESSOR_X86) + #define EA_CACHE_LINE_SIZE 32 // This is the minimum possible value. + #elif defined(EA_PROCESSOR_X86_64) + #define EA_CACHE_LINE_SIZE 64 // This is the minimum possible value + #elif defined(EA_PROCESSOR_ARM32) + #define EA_CACHE_LINE_SIZE 32 // This varies between implementations and is usually 32 or 64. + #elif defined(EA_PROCESSOR_ARM64) + #define EA_CACHE_LINE_SIZE 64 // Cache line Cortex-A8 (64 bytes) http://shervinemami.info/armAssembly.html however this remains to be mostly an assumption at this stage + #elif (EA_PLATFORM_WORD_SIZE == 4) + #define EA_CACHE_LINE_SIZE 32 // This is the minimum possible value + #else + #define EA_CACHE_LINE_SIZE 64 // This is the minimum possible value + #endif +#endif + + +#endif // INCLUDED_eaplatform_H + + + + + + + + + diff --git a/libkram/eastl/include/EABase/eabase.h b/libkram/eastl/include/EABase/eabase.h new file mode 100644 index 00000000..dab9e467 --- /dev/null +++ b/libkram/eastl/include/EABase/eabase.h @@ -0,0 +1,1011 @@ +/*----------------------------------------------------------------------------- + * eabase.h + * + * Copyright (c) Electronic Arts Inc. All rights reserved. + *---------------------------------------------------------------------------*/ + + +#ifndef INCLUDED_eabase_H +#define INCLUDED_eabase_H + + +// Identify the compiler and declare the EA_COMPILER_xxxx defines +#include + +// Identify traits which this compiler supports, or does not support +#include + +// Identify the platform and declare the EA_xxxx defines +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + +// Always include version.h for backwards compatibility. +#include + +// Define common SI unit macros +#include + + +// ------------------------------------------------------------------------ +// The C++ standard defines size_t as a built-in type. Some compilers are +// not standards-compliant in this respect, so we need an additional include. +// The case is similar with wchar_t under C++. + +#if defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_MSVC) || defined(EA_WCHAR_T_NON_NATIVE) || defined(EA_PLATFORM_SONY) + #if defined(EA_COMPILER_MSVC) + #pragma warning(push, 0) + #pragma warning(disable: 4265 4365 4836 4574) + #endif + #include + #if defined(EA_COMPILER_MSVC) + #pragma warning(pop) + #endif +#endif + +// ------------------------------------------------------------------------ +// Include stddef.h on Apple's clang compiler to ensure the ptrdiff_t type +// is defined. +#if defined(EA_COMPILER_CLANG) && defined(EA_PLATFORM_APPLE) + #include +#endif + +// ------------------------------------------------------------------------ +// Include assert.h on C11 supported compilers so we may allow static_assert usage +// http://en.cppreference.com/w/c/error/static_assert +// C11 standard(ISO / IEC 9899:2011) : +// 7.2/3 Diagnostics (p : 186) +#if !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201100L + #include +#endif + + +// ------------------------------------------------------------------------ +// By default, GCC defines NULL as ((void*)0), which is the +// C definition. This causes all sort of problems for C++ code, so it is +// worked around by undefining NULL. + +#if defined(NULL) + #undef NULL +#endif + + +// ------------------------------------------------------------------------ +// Define the NULL pointer. This is normally defined in , but we +// don't want to force a global dependency on that header, so the definition +// is duplicated here. + +#if defined(__cplusplus) + #define NULL 0 +#else + #define NULL ((void*)0) +#endif + + +// ------------------------------------------------------------------------ +// C98/99 Standard typedefs. From the ANSI ISO/IEC 9899 standards document +// Most recent versions of the gcc-compiler come with these defined in +// inttypes.h or stddef.h. Determining if they are predefined can be +// tricky, so we expect some problems on non-standard compilers + +//#if (defined(_INTTYPES_H) || defined(_INTTYPES_H_)) && !defined(PRId64) +// #error " was #included before eabase.h, but without __STDC_FORMAT_MACROS #defined. You must #include eabase.h or an equivalent before #including C99 headers, or you must define __STDC_FORMAT_MACRO before #including system headrs." +//#endif + +// ------------------------------------------------------------------------ +// We need to test this after we potentially include stddef.h, otherwise we +// would have put this into the compilertraits header. +#if !defined(EA_COMPILER_HAS_INTTYPES) && (!defined(_MSC_VER) || (_MSC_VER > 1500)) && (defined(EA_COMPILER_IS_C99) || defined(INT8_MIN) || defined(EA_COMPILER_HAS_C99_TYPES) || defined(_SN_STDINT_H)) + #define EA_COMPILER_HAS_INTTYPES +#endif + +#ifdef EA_COMPILER_HAS_INTTYPES // If the compiler supports inttypes... + // ------------------------------------------------------------------------ + // Include the stdint header to define and derive the required types. + // Additionally include inttypes.h as many compilers, including variations + // of GCC define things in inttypes.h that the C99 standard says goes + // in stdint.h. + // + // The C99 standard specifies that inttypes.h only define printf/scanf + // format macros if __STDC_FORMAT_MACROS is defined before #including + // inttypes.h. For consistency, we do that here. + #ifndef __STDC_FORMAT_MACROS + #define __STDC_FORMAT_MACROS + #endif + // The GCC PSP compiler defines standard int types (e.g. uint32_t) but not PRId8, etc. + // MSVC added support for inttypes.h header in VS2013. + #if !defined(EA_COMPILER_MSVC) || (defined(EA_COMPILER_MSVC) && EA_COMPILER_VERSION >= 1800) + #include // PRId8, SCNd8, etc. + #endif + #if defined(_MSC_VER) + #pragma warning(push, 0) + #endif + #include // int32_t, INT64_C, UINT8_MAX, etc. + #include // float_t, double_t, etc. + #include // FLT_EVAL_METHOD. + #if defined(_MSC_VER) + #pragma warning(pop) + #endif + + #if !defined(FLT_EVAL_METHOD) && (defined(__FLT_EVAL_METHOD__) || defined(_FEVAL)) // GCC 3.x defines __FLT_EVAL_METHOD__ instead of the C99 standard FLT_EVAL_METHOD. + #ifdef __FLT_EVAL_METHOD__ + #define FLT_EVAL_METHOD __FLT_EVAL_METHOD__ + #else + #define FLT_EVAL_METHOD _FEVAL + #endif + #endif + + // MinGW GCC (up to at least v4.3.0-20080502) mistakenly neglects to define float_t and double_t. + // This appears to be an acknowledged bug as of March 2008 and is scheduled to be fixed. + // Similarly, Android uses a mix of custom standard library headers which prior to SDK API level 21 + // don't define float_t and double_t. + #if defined(__MINGW32__) || (defined(EA_PLATFORM_ANDROID) && !(defined(EA_ANDROID_SDK_LEVEL) && EA_ANDROID_SDK_LEVEL >= 21)) + #if defined(__FLT_EVAL_METHOD__) + #if(__FLT_EVAL_METHOD__== 0) + typedef float float_t; + typedef double double_t; + #elif(__FLT_EVAL_METHOD__ == 1) + typedef double float_t; + typedef double double_t; + #elif(__FLT_EVAL_METHOD__ == 2) + typedef long double float_t; + typedef long double double_t; + #endif + #else + typedef float float_t; + typedef double double_t; + #endif + #endif + + // The CodeSourcery definitions of PRIxPTR and SCNxPTR are broken for 32 bit systems. + #if defined(__SIZEOF_SIZE_T__) && (__SIZEOF_SIZE_T__ == 4) && (defined(__have_long64) || defined(__have_longlong64)) + #undef PRIdPTR + #define PRIdPTR "d" + #undef PRIiPTR + #define PRIiPTR "i" + #undef PRIoPTR + #define PRIoPTR "o" + #undef PRIuPTR + #define PRIuPTR "u" + #undef PRIxPTR + #define PRIxPTR "x" + #undef PRIXPTR + #define PRIXPTR "X" + + #undef SCNdPTR + #define SCNdPTR "d" + #undef SCNiPTR + #define SCNiPTR "i" + #undef SCNoPTR + #define SCNoPTR "o" + #undef SCNuPTR + #define SCNuPTR "u" + #undef SCNxPTR + #define SCNxPTR "x" + #endif +#else // else we must implement types ourselves. + + #if !defined(__BIT_TYPES_DEFINED__) && !defined(__int8_t_defined) + typedef signed char int8_t; //< 8 bit signed integer + #endif + #if !defined( __int8_t_defined ) + typedef signed short int16_t; //< 16 bit signed integer + typedef signed int int32_t; //< 32 bit signed integer. This works for both 32 bit and 64 bit platforms, as we assume the LP64 is followed. + #define __int8_t_defined + #endif + typedef unsigned char uint8_t; //< 8 bit unsigned integer + typedef unsigned short uint16_t; //< 16 bit unsigned integer + #if !defined( __uint32_t_defined ) + typedef unsigned int uint32_t; //< 32 bit unsigned integer. This works for both 32 bit and 64 bit platforms, as we assume the LP64 is followed. + #define __uint32_t_defined + #endif + + // According to the C98/99 standard, FLT_EVAL_METHOD defines control the + // width used for floating point _t types. + #if defined(_MSC_VER) && _MSC_VER >= 1800 + // MSVC's math.h provides float_t, double_t under this condition. + #elif defined(FLT_EVAL_METHOD) + #if (FLT_EVAL_METHOD == 0) + typedef float float_t; + typedef double double_t; + #elif (FLT_EVAL_METHOD == 1) + typedef double float_t; + typedef double double_t; + #elif (FLT_EVAL_METHOD == 2) + typedef long double float_t; + typedef long double double_t; + #endif + #endif + + #if defined(EA_COMPILER_MSVC) + typedef signed __int64 int64_t; + typedef unsigned __int64 uint64_t; + + #else + typedef signed long long int64_t; + typedef unsigned long long uint64_t; + #endif +#endif + + +// ------------------------------------------------------------------------ +// macros for declaring constants in a portable way. +// +// e.g. int64_t x = INT64_C(1234567812345678); +// e.g. int64_t x = INT64_C(0x1111111122222222); +// e.g. uint64_t x = UINT64_C(0x1111111122222222); +// +// Microsoft VC++'s definitions of INT8_C/UINT8_C/INT16_C/UINT16_C are like so: +// #define INT8_C(x) (x) +// #define INT16_C(x) (x) +// #define UINT8_C(x) (x) +// #define UINT16_C(x) (x) +// To consider: undefine Microsoft's and use the casting versions below. +// ------------------------------------------------------------------------ + +#ifndef INT8_C_DEFINED // If the user hasn't already defined these... + #define INT8_C_DEFINED + + #ifndef INT8_C + #define INT8_C(x) int8_t(x) // For the majority of compilers and platforms, long is 32 bits and long long is 64 bits. + #endif + #ifndef UINT8_C + #define UINT8_C(x) uint8_t(x) + #endif + #ifndef INT16_C + #define INT16_C(x) int16_t(x) + #endif + #ifndef UINT16_C + #define UINT16_C(x) uint16_t(x) // Possibly we should make this be uint16_t(x##u). Let's see how compilers react before changing this. + #endif + #ifndef INT32_C + #define INT32_C(x) x##L + #endif + #ifndef UINT32_C + #define UINT32_C(x) x##UL + #endif + #ifndef INT64_C + #define INT64_C(x) x##LL // The way to deal with this is to compare ULONG_MAX to 0xffffffff and if not equal, then remove the L. + #endif + #ifndef UINT64_C + #define UINT64_C(x) x##ULL // We need to follow a similar approach for LL. + #endif + #ifndef UINTMAX_C + #define UINTMAX_C(x) UINT64_C(x) + #endif +#endif + +// ------------------------------------------------------------------------ +// type sizes +#ifndef INT8_MAX_DEFINED // If the user hasn't already defined these... + #define INT8_MAX_DEFINED + + // The value must be 2^(n-1)-1 + #ifndef INT8_MAX + #define INT8_MAX 127 + #endif + #ifndef INT16_MAX + #define INT16_MAX 32767 + #endif + #ifndef INT32_MAX + #define INT32_MAX 2147483647 + #endif + #ifndef INT64_MAX + #define INT64_MAX INT64_C(9223372036854775807) + #endif + #ifndef INTMAX_MAX + #define INTMAX_MAX INT64_MAX + #endif + #ifndef INTPTR_MAX + #if EA_PLATFORM_PTR_SIZE == 4 + #define INTPTR_MAX INT32_MAX + #else + #define INTPTR_MAX INT64_MAX + #endif + #endif + + // The value must be either -2^(n-1) or 1-2(n-1). + #ifndef INT8_MIN + #define INT8_MIN -128 + #endif + #ifndef INT16_MIN + #define INT16_MIN -32768 + #endif + #ifndef INT32_MIN + #define INT32_MIN (-INT32_MAX - 1) // -2147483648 + #endif + #ifndef INT64_MIN + #define INT64_MIN (-INT64_MAX - 1) // -9223372036854775808 + #endif + #ifndef INTMAX_MIN + #define INTMAX_MIN INT64_MIN + #endif + #ifndef INTPTR_MIN + #if EA_PLATFORM_PTR_SIZE == 4 + #define INTPTR_MIN INT32_MIN + #else + #define INTPTR_MIN INT64_MIN + #endif + #endif + + // The value must be 2^n-1 + #ifndef UINT8_MAX + #define UINT8_MAX 0xffU // 255 + #endif + #ifndef UINT16_MAX + #define UINT16_MAX 0xffffU // 65535 + #endif + #ifndef UINT32_MAX + #define UINT32_MAX UINT32_C(0xffffffff) // 4294967295 + #endif + #ifndef UINT64_MAX + #define UINT64_MAX UINT64_C(0xffffffffffffffff) // 18446744073709551615 + #endif + #ifndef UINTMAX_MAX + #define UINTMAX_MAX UINT64_MAX + #endif + #ifndef UINTPTR_MAX + #if EA_PLATFORM_PTR_SIZE == 4 + #define UINTPTR_MAX UINT32_MAX + #else + #define UINTPTR_MAX UINT64_MAX + #endif + #endif +#endif + +#ifndef FLT_EVAL_METHOD + #define FLT_EVAL_METHOD 0 + typedef float float_t; + typedef double double_t; +#endif + +#if defined(EA_COMPILER_HAS_INTTYPES) && (!defined(EA_COMPILER_MSVC) || (defined(EA_COMPILER_MSVC) && EA_COMPILER_VERSION >= 1800)) + #define EA_COMPILER_HAS_C99_FORMAT_MACROS +#endif + +#ifndef EA_COMPILER_HAS_C99_FORMAT_MACROS + // ------------------------------------------------------------------------ + // sized printf and scanf format specifiers + // See the C99 standard, section 7.8.1 -- Macros for format specifiers. + // + // The C99 standard specifies that inttypes.h only define printf/scanf + // format macros if __STDC_FORMAT_MACROS is defined before #including + // inttypes.h. For consistency, we define both __STDC_FORMAT_MACROS and + // the printf format specifiers here. We also skip the "least/most" + // variations of these specifiers, as we've decided to do so with + // basic types. + // + // For 64 bit systems, we assume the LP64 standard is followed + // (as opposed to ILP64, etc.) For 32 bit systems, we assume the + // ILP32 standard is followed. See: + // http://www.opengroup.org/public/tech/aspen/lp64_wp.htm + // for information about this. Thus, on both 32 and 64 bit platforms, + // %l refers to 32 bit data while %ll refers to 64 bit data. + + #ifndef __STDC_FORMAT_MACROS + #define __STDC_FORMAT_MACROS + #endif + + #if defined(EA_COMPILER_MSVC) // VC++ 7.1+ understands long long as a data type but doesn't accept %ll as a printf specifier. + #define EA_PRI_64_LENGTH_SPECIFIER "I64" + #define EA_SCN_64_LENGTH_SPECIFIER "I64" + #else + #define EA_PRI_64_LENGTH_SPECIFIER "ll" + #define EA_SCN_64_LENGTH_SPECIFIER "ll" + #endif // It turns out that some platforms use %q to represent a 64 bit value, but these are not relevant to us at this time. + + // Printf format specifiers + #if defined(EA_COMPILER_IS_C99) || defined(EA_COMPILER_GNUC) + #define PRId8 "hhd" + #define PRIi8 "hhi" + #define PRIo8 "hho" + #define PRIu8 "hhu" + #define PRIx8 "hhx" + #define PRIX8 "hhX" + #else // VC++, Borland, etc. which have no way to specify 8 bit values other than %c. + #define PRId8 "c" // This may not work properly but it at least will not crash. Try using 16 bit versions instead. + #define PRIi8 "c" // " + #define PRIo8 "o" // " + #define PRIu8 "u" // " + #define PRIx8 "x" // " + #define PRIX8 "X" // " + #endif + + #define PRId16 "hd" + #define PRIi16 "hi" + #define PRIo16 "ho" + #define PRIu16 "hu" + #define PRIx16 "hx" + #define PRIX16 "hX" + + #define PRId32 "d" // This works for both 32 bit and 64 bit systems, as we assume LP64 conventions. + #define PRIi32 "i" + #define PRIo32 "o" + #define PRIu32 "u" + #define PRIx32 "x" + #define PRIX32 "X" + + #define PRId64 EA_PRI_64_LENGTH_SPECIFIER "d" + #define PRIi64 EA_PRI_64_LENGTH_SPECIFIER "i" + #define PRIo64 EA_PRI_64_LENGTH_SPECIFIER "o" + #define PRIu64 EA_PRI_64_LENGTH_SPECIFIER "u" + #define PRIx64 EA_PRI_64_LENGTH_SPECIFIER "x" + #define PRIX64 EA_PRI_64_LENGTH_SPECIFIER "X" + + #if (EA_PLATFORM_PTR_SIZE == 4) + #define PRIdPTR PRId32 // Usage of pointer values will generate warnings with + #define PRIiPTR PRIi32 // some compilers because they are defined in terms of + #define PRIoPTR PRIo32 // integers. However, you can't simply use "p" because + #define PRIuPTR PRIu32 // 'p' is interpreted in a specific and often different + #define PRIxPTR PRIx32 // way by the library. + #define PRIXPTR PRIX32 + #elif (EA_PLATFORM_PTR_SIZE == 8) + #define PRIdPTR PRId64 + #define PRIiPTR PRIi64 + #define PRIoPTR PRIo64 + #define PRIuPTR PRIu64 + #define PRIxPTR PRIx64 + #define PRIXPTR PRIX64 + #endif + + // Scanf format specifiers + #if defined(EA_COMPILER_IS_C99) || defined(EA_COMPILER_GNUC) + #define SCNd8 "hhd" + #define SCNi8 "hhi" + #define SCNo8 "hho" + #define SCNu8 "hhu" + #define SCNx8 "hhx" + #else // VC++, Borland, etc. which have no way to specify 8 bit values other than %c. + #define SCNd8 "c" // This will not work properly but it at least will not crash. Try using 16 bit versions instead. + #define SCNi8 "c" // " + #define SCNo8 "c" // " + #define SCNu8 "c" // " + #define SCNx8 "c" // " + #endif + + #define SCNd16 "hd" + #define SCNi16 "hi" + #define SCNo16 "ho" + #define SCNu16 "hu" + #define SCNx16 "hx" + + #define SCNd32 "d" // This works for both 32 bit and 64 bit systems, as we assume LP64 conventions. + #define SCNi32 "i" + #define SCNo32 "o" + #define SCNu32 "u" + #define SCNx32 "x" + + #define SCNd64 EA_SCN_64_LENGTH_SPECIFIER "d" + #define SCNi64 EA_SCN_64_LENGTH_SPECIFIER "i" + #define SCNo64 EA_SCN_64_LENGTH_SPECIFIER "o" + #define SCNu64 EA_SCN_64_LENGTH_SPECIFIER "u" + #define SCNx64 EA_SCN_64_LENGTH_SPECIFIER "x" + + #if defined(EA_COMPILER_MSVC) && (EA_COMPILER_VERSION >= 1900) + #define SCNdPTR PRIdPTR + #define SCNiPTR PRIiPTR + #define SCNoPTR PRIoPTR + #define SCNuPTR PRIuPTR + #define SCNxPTR PRIxPTR + #elif (EA_PLATFORM_PTR_SIZE == 4) + #define SCNdPTR SCNd32 // Usage of pointer values will generate warnings with + #define SCNiPTR SCNi32 // some compilers because they are defined in terms of + #define SCNoPTR SCNo32 // integers. However, you can't simply use "p" because + #define SCNuPTR SCNu32 // 'p' is interpreted in a specific and often different + #define SCNxPTR SCNx32 // way by the library. + #elif (EA_PLATFORM_PTR_SIZE == 8) + #define SCNdPTR SCNd64 + #define SCNiPTR SCNi64 + #define SCNoPTR SCNo64 + #define SCNuPTR SCNu64 + #define SCNxPTR SCNx64 + #endif +#endif + + +// ------------------------------------------------------------------------ +// bool8_t +// The definition of a bool8_t is controversial with some, as it doesn't +// act just like built-in bool. For example, you can assign -100 to it. +// +#ifndef BOOL8_T_DEFINED // If the user hasn't already defined this... + #define BOOL8_T_DEFINED + #if defined(EA_COMPILER_MSVC) || (defined(EA_COMPILER_INTEL) && defined(EA_PLATFORM_WINDOWS)) + #if defined(__cplusplus) + typedef bool bool8_t; + #else + typedef int8_t bool8_t; + #endif + #else // EA_COMPILER_GNUC generally uses 4 bytes per bool. + typedef int8_t bool8_t; + #endif +#endif + + +// ------------------------------------------------------------------------ +// intptr_t / uintptr_t +// Integer type guaranteed to be big enough to hold +// a native pointer ( intptr_t is defined in STDDEF.H ) +// +#if !defined(_INTPTR_T_DEFINED) && !defined(_intptr_t_defined) && !defined(EA_COMPILER_HAS_C99_TYPES) + #if (EA_PLATFORM_PTR_SIZE == 4) + typedef int32_t intptr_t; + #elif (EA_PLATFORM_PTR_SIZE == 8) + typedef int64_t intptr_t; + #endif + + #define _intptr_t_defined + #define _INTPTR_T_DEFINED +#endif + +#if !defined(_UINTPTR_T_DEFINED) && !defined(_uintptr_t_defined) && !defined(EA_COMPILER_HAS_C99_TYPES) + #if (EA_PLATFORM_PTR_SIZE == 4) + typedef uint32_t uintptr_t; + #elif (EA_PLATFORM_PTR_SIZE == 8) + typedef uint64_t uintptr_t; + #endif + + #define _uintptr_t_defined + #define _UINTPTR_T_DEFINED +#endif + +#if !defined(EA_COMPILER_HAS_INTTYPES) + #ifndef INTMAX_T_DEFINED + #define INTMAX_T_DEFINED + + // At this time, all supported compilers have int64_t as the max + // integer type. Some compilers support a 128 bit integer type, + // but in some cases it is not a true int128_t but rather a + // crippled data type. Also, it turns out that Unix 64 bit ABIs + // require that intmax_t be int64_t and nothing larger. So we + // play it safe here and set intmax_t to int64_t, even though + // an int128_t type may exist. + + typedef int64_t intmax_t; + typedef uint64_t uintmax_t; + #endif +#endif + + +// ------------------------------------------------------------------------ +// ssize_t +// signed equivalent to size_t. +// This is defined by GCC (except the QNX implementation of GCC) but not by other compilers. +// +#if !defined(__GNUC__) + // As of this writing, all non-GCC compilers significant to us implement + // uintptr_t the same as size_t. However, this isn't guaranteed to be + // so for all compilers, as size_t may be based on int, long, or long long. + #if !defined(_SSIZE_T_) && !defined(_SSIZE_T_DEFINED) + #define _SSIZE_T_ + #define _SSIZE_T_DEFINED + + #if defined(_MSC_VER) && (EA_PLATFORM_PTR_SIZE == 8) + typedef __int64 ssize_t; + #else + typedef long ssize_t; + #endif + #endif +#else + #include +#endif + + +// ------------------------------------------------------------------------ +// Character types +// +#if defined(EA_COMPILER_MSVC) + #if defined(EA_WCHAR_T_NON_NATIVE) + // In this case, wchar_t is not defined unless we include + // wchar.h or if the compiler makes it built-in. + #ifdef EA_COMPILER_MSVC + #pragma warning(push, 3) + #endif + #include + #ifdef EA_COMPILER_MSVC + #pragma warning(pop) + #endif + #endif +#endif + + +// ------------------------------------------------------------------------ +// char8_t -- Guaranteed to be equal to the compiler's char data type. +// Some compilers implement char8_t as unsigned, though char +// is usually set to be signed. +// +// char16_t -- This is set to be an unsigned 16 bit value. If the compiler +// has wchar_t as an unsigned 16 bit value, then char16_t is +// set to be the same thing as wchar_t in order to allow the +// user to use char16_t with standard wchar_t functions. +// +// char32_t -- This is set to be an unsigned 32 bit value. If the compiler +// has wchar_t as an unsigned 32 bit value, then char32_t is +// set to be the same thing as wchar_t in order to allow the +// user to use char32_t with standard wchar_t functions. +// +// EA_CHAR8_UNIQUE +// EA_CHAR16_NATIVE +// EA_CHAR32_NATIVE +// EA_WCHAR_UNIQUE +// +// VS2010 unilaterally defines char16_t and char32_t in its yvals.h header +// unless _HAS_CHAR16_T_LANGUAGE_SUPPORT or _CHAR16T are defined. +// However, VS2010 does not support the C++0x u"" and U"" string literals, +// which makes its definition of char16_t and char32_t somewhat useless. +// Until VC++ supports string literals, the build system should define +// _CHAR16T and let EABase define char16_t and EA_CHAR16. +// +// GCC defines char16_t and char32_t in the C compiler in -std=gnu99 mode, +// as __CHAR16_TYPE__ and __CHAR32_TYPE__, and for the C++ compiler +// in -std=c++0x and -std=gnu++0x modes, as char16_t and char32_t too. +// +// The EA_WCHAR_UNIQUE symbol is defined to 1 if wchar_t is distinct from +// char8_t, char16_t, and char32_t, and defined to 0 if not. In some cases, +// if the compiler does not support char16_t/char32_t, one of these two types +// is typically a typedef or define of wchar_t. For compilers that support +// the C++11 unicode character types often overloads must be provided to +// support existing code that passes a wide char string to a function that +// takes a unicode string. +// +// The EA_CHAR8_UNIQUE symbol is defined to 1 if char8_t is distinct type +// from char in the type system, and defined to 0 if otherwise. + +#if !defined(EA_CHAR16_NATIVE) + // To do: Change this to be based on EA_COMPILER_NO_NEW_CHARACTER_TYPES. + #if defined(_MSC_VER) && (_MSC_VER >= 1600) && defined(_HAS_CHAR16_T_LANGUAGE_SUPPORT) && _HAS_CHAR16_T_LANGUAGE_SUPPORT // VS2010+ + #define EA_CHAR16_NATIVE 1 + #elif defined(EA_COMPILER_CLANG) && defined(EA_COMPILER_CPP11_ENABLED) + #if __has_feature(cxx_unicode_literals) + #define EA_CHAR16_NATIVE 1 + #elif (EA_COMPILER_VERSION >= 300) && !(defined(EA_PLATFORM_IPHONE) || defined(EA_PLATFORM_OSX)) + #define EA_CHAR16_NATIVE 1 + #elif defined(EA_PLATFORM_APPLE) + #define EA_CHAR16_NATIVE 1 + #else + #define EA_CHAR16_NATIVE 0 + #endif + #elif defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 404) && defined(__CHAR16_TYPE__) && defined(EA_COMPILER_CPP11_ENABLED)// EDG 4.4+. + #define EA_CHAR16_NATIVE 1 + #elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) && !defined(EA_COMPILER_EDG) && (defined(EA_COMPILER_CPP11_ENABLED) || defined(__STDC_VERSION__)) // g++ (C++ compiler) 4.4+ with -std=c++0x or gcc (C compiler) 4.4+ with -std=gnu99 + #define EA_CHAR16_NATIVE 1 + #else + #define EA_CHAR16_NATIVE 0 + #endif +#endif + +#if !defined(EA_CHAR32_NATIVE) // Microsoft currently ties char32_t language support to char16_t language support. So we use CHAR16_T here. + // To do: Change this to be based on EA_COMPILER_NO_NEW_CHARACTER_TYPES. + #if defined(_MSC_VER) && (_MSC_VER >= 1600) && defined(_HAS_CHAR16_T_LANGUAGE_SUPPORT) && _HAS_CHAR16_T_LANGUAGE_SUPPORT // VS2010+ + #define EA_CHAR32_NATIVE 1 + #elif defined(EA_COMPILER_CLANG) && defined(EA_COMPILER_CPP11_ENABLED) + #if __has_feature(cxx_unicode_literals) + #define EA_CHAR32_NATIVE 1 + #elif (EA_COMPILER_VERSION >= 300) && !(defined(EA_PLATFORM_IPHONE) || defined(EA_PLATFORM_OSX)) + #define EA_CHAR32_NATIVE 1 + #elif defined(EA_PLATFORM_APPLE) + #define EA_CHAR32_NATIVE 1 + #else + #define EA_CHAR32_NATIVE 0 + #endif + #elif defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 404) && defined(__CHAR32_TYPE__) && defined(EA_COMPILER_CPP11_ENABLED)// EDG 4.4+. + #define EA_CHAR32_NATIVE 1 + #elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) && !defined(EA_COMPILER_EDG) && (defined(EA_COMPILER_CPP11_ENABLED) || defined(__STDC_VERSION__)) // g++ (C++ compiler) 4.4+ with -std=c++0x or gcc (C compiler) 4.4+ with -std=gnu99 + #define EA_CHAR32_NATIVE 1 + #else + #define EA_CHAR32_NATIVE 0 + #endif +#endif + + +#if EA_CHAR16_NATIVE || EA_CHAR32_NATIVE + #define EA_WCHAR_UNIQUE 1 +#else + #define EA_WCHAR_UNIQUE 0 +#endif + + +// EA_CHAR8_UNIQUE +// +// Check for char8_t support in the cpp type system. Moving forward from c++20, +// the char8_t type allows users to overload function for character encoding. +// +// EA_CHAR8_UNIQUE is 1 when the type is a unique in the type system and +// can there be used as a valid overload. EA_CHAR8_UNIQUE is 0 otherwise. +// +// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0482r6.html +// +#ifdef __cpp_char8_t + #define CHAR8_T_DEFINED + #define EA_CHAR8_UNIQUE 1 +#else + #define EA_CHAR8_UNIQUE 0 +#endif + + +#ifndef CHAR8_T_DEFINED // If the user hasn't already defined these... + #define CHAR8_T_DEFINED + #if defined(EA_PLATFORM_APPLE) + #define char8_t char // The Apple debugger is too stupid to realize char8_t is typedef'd to char, so we #define it. + #else + typedef char char8_t; + #endif + + #if EA_CHAR16_NATIVE + // In C++, char16_t and char32_t are already defined by the compiler. + // In MS C, char16_t and char32_t are already defined by the compiler/standard library. + // In GCC C, __CHAR16_TYPE__ and __CHAR32_TYPE__ are defined instead, and we must define char16_t and char32_t from these. + #if defined(__GNUC__) && !defined(__GXX_EXPERIMENTAL_CXX0X__) && defined(__CHAR16_TYPE__) // If using GCC and compiling in C... + typedef __CHAR16_TYPE__ char16_t; + typedef __CHAR32_TYPE__ char32_t; + #endif + #elif (EA_WCHAR_SIZE == 2) + #if (defined(_MSC_VER) && (_MSC_VER >= 1600)) // if VS2010+ or using platforms that use Dinkumware under a compiler that doesn't natively support C++11 char16_t. + #if !defined(_CHAR16T) + #define _CHAR16T + #endif + #if !defined(_HAS_CHAR16_T_LANGUAGE_SUPPORT) || !_HAS_CHAR16_T_LANGUAGE_SUPPORT + typedef wchar_t char16_t; + typedef uint32_t char32_t; + #endif + #else + typedef wchar_t char16_t; + typedef uint32_t char32_t; + #endif + #else + typedef uint16_t char16_t; + #if defined(__cplusplus) + typedef wchar_t char32_t; + #else + typedef uint32_t char32_t; + #endif + #endif +#endif + + +// CHAR8_MIN, CHAR8_MAX, etc. +// +#define EA_LIMITS_DIGITS_S(T) ((sizeof(T) * 8) - 1) +#define EA_LIMITS_DIGITS_U(T) ((sizeof(T) * 8)) +#define EA_LIMITS_DIGITS(T) ((EA_LIMITS_IS_SIGNED(T) ? EA_LIMITS_DIGITS_S(T) : EA_LIMITS_DIGITS_U(T))) +#define EA_LIMITS_IS_SIGNED(T) ((T)(-1) < 0) +#define EA_LIMITS_MIN_S(T) ((T)((T)1 << EA_LIMITS_DIGITS_S(T))) +#define EA_LIMITS_MIN_U(T) ((T)0) +#define EA_LIMITS_MIN(T) ((EA_LIMITS_IS_SIGNED(T) ? EA_LIMITS_MIN_S(T) : EA_LIMITS_MIN_U(T))) +#define EA_LIMITS_MAX_S(T) ((T)(((((T)1 << (EA_LIMITS_DIGITS(T) - 1)) - 1) << 1) + 1)) +#define EA_LIMITS_MAX_U(T) ((T)~(T)0) +#define EA_LIMITS_MAX(T) ((EA_LIMITS_IS_SIGNED(T) ? EA_LIMITS_MAX_S(T) : EA_LIMITS_MAX_U(T))) + +#if !defined(CHAR8_MIN) + #define CHAR8_MIN EA_LIMITS_MIN(char8_t) +#endif + +#if !defined(CHAR8_MAX) + #define CHAR8_MAX EA_LIMITS_MAX(char8_t) +#endif + +#if !defined(CHAR16_MIN) + #define CHAR16_MIN EA_LIMITS_MIN(char16_t) +#endif + +#if !defined(CHAR16_MAX) + #define CHAR16_MAX EA_LIMITS_MAX(char16_t) +#endif + +#if !defined(CHAR32_MIN) + #define CHAR32_MIN EA_LIMITS_MIN(char32_t) +#endif + +#if !defined(CHAR32_MAX) + #define CHAR32_MAX EA_LIMITS_MAX(char32_t) +#endif + + + +// EA_CHAR8 / EA_CHAR16 / EA_CHAR32 / EA_WCHAR +// +// Supports usage of portable string constants. +// +// Example usage: +// const char16_t* str = EA_CHAR16("Hello world"); +// const char32_t* str = EA_CHAR32("Hello world"); +// const char16_t c = EA_CHAR16('\x3001'); +// const char32_t c = EA_CHAR32('\x3001'); +// +#ifndef EA_CHAR8 + #if EA_CHAR8_UNIQUE + #define EA_CHAR8(s) u8 ## s + #else + #define EA_CHAR8(s) s + #endif +#endif + +#ifndef EA_WCHAR + #define EA_WCHAR_(s) L ## s + #define EA_WCHAR(s) EA_WCHAR_(s) +#endif + +#ifndef EA_CHAR16 + #if EA_CHAR16_NATIVE && !defined(_MSC_VER) // Microsoft doesn't support char16_t string literals. + #define EA_CHAR16_(s) u ## s + #define EA_CHAR16(s) EA_CHAR16_(s) + #elif (EA_WCHAR_SIZE == 2) + #if defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(__cplusplus) // VS2015 supports u"" string literals. + #define EA_CHAR16_(s) u ## s + #define EA_CHAR16(s) EA_CHAR16_(s) + #else + #define EA_CHAR16_(s) L ## s + #define EA_CHAR16(s) EA_CHAR16_(s) + #endif + #else + //#define EA_CHAR16(s) // Impossible to implement efficiently. + #endif +#endif + +#ifndef EA_CHAR32 + #if EA_CHAR32_NATIVE && !defined(_MSC_VER) // Microsoft doesn't support char32_t string literals. + #define EA_CHAR32_(s) U ## s + #define EA_CHAR32(s) EA_CHAR32_(s) + #elif (EA_WCHAR_SIZE == 2) + #if defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(__cplusplus) // VS2015 supports u"" string literals. + #define EA_CHAR32_(s) U ## s + #define EA_CHAR32(s) EA_CHAR32_(s) + #else + //#define EA_CHAR32(s) // Impossible to implement. + #endif + #elif (EA_WCHAR_SIZE == 4) + #define EA_CHAR32_(s) L ## s + #define EA_CHAR32(s) EA_CHAR32_(s) + #else + #error Unexpected size of wchar_t + #endif +#endif + +// EAText8 / EAText16 +// +// Provided for backwards compatibility with older code. +// +#if defined(EABASE_ENABLE_EATEXT_MACROS) + #define EAText8(x) x + #define EAChar8(x) x + + #define EAText16(x) EA_CHAR16(x) + #define EAChar16(x) EA_CHAR16(x) +#endif + + + + +// ------------------------------------------------------------------------ +// EAArrayCount +// +// Returns the count of items in a built-in C array. This is a common technique +// which is often used to help properly calculate the number of items in an +// array at runtime in order to prevent overruns, etc. +// +// Example usage: +// int array[75]; +// size_t arrayCount = EAArrayCount(array); // arrayCount is 75. +// +#if defined(EA_COMPILER_NO_CONSTEXPR) + #ifndef EAArrayCount + #define EAArrayCount(x) (sizeof(x) / sizeof(x[0])) + #endif +#else + // This C++11 version is a little smarter than the macro version above; + // it can tell the difference between arrays and pointers. Other simpler + // templated versions have failed in various subtle ways. + + template + char (&EAArraySizeHelper(T (&x)[N]))[N]; + + template + char (&EAArraySizeHelper(T (&&x)[N]))[N]; + + #define EAArrayCount(x) (sizeof(EAArraySizeHelper(x))) +#endif + + +// ------------------------------------------------------------------------ +// static_assert +// +// C++11 static_assert (a.k.a. compile-time assert). +// +// Specification: +// void static_assert(bool const_expression, const char* description); +// +// Example usage: +// static_assert(sizeof(int) == 4, "int must be 32 bits"); +// +#if defined(_MSC_VER) && (_MSC_VER >= 1600) && defined(__cplusplus) + // static_assert is defined by the compiler for both C and C++. +#elif !defined(__cplusplus) && defined(EA_PLATFORM_ANDROID) && ((defined(__STDC_VERSION__) && __STDC_VERSION__ < 201100L) || !defined(__STDC_VERSION__)) + // AndroidNDK does not support static_assert despite claiming it's a C11 compiler + #define NEED_CUSTOM_STATIC_ASSERT +#elif defined(__clang__) && defined(__cplusplus) + // We need to separate these checks on a new line, as the pre-processor on other compilers will fail on the _has_feature macros + #if !(__has_feature(cxx_static_assert) || __has_extension(cxx_static_assert)) + #define NEED_CUSTOM_STATIC_ASSERT + #endif +#elif defined(__GNUC__) && (defined(__GXX_EXPERIMENTAL_CXX0X__) || (defined(__cplusplus) && (__cplusplus >= 201103L))) + // static_assert is defined by the compiler. +#elif defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 401) && defined(EA_COMPILER_CPP11_ENABLED) + // static_assert is defined by the compiler. +#elif !defined(__cplusplus) && defined(__GLIBC__) && defined(__USE_ISOC11) + // static_assert is defined by the compiler. +#elif !defined(__cplusplus) && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201100L + // static_assert is defined by the compiler. +#else + #define NEED_CUSTOM_STATIC_ASSERT +#endif + +#ifdef NEED_CUSTOM_STATIC_ASSERT + #ifdef __GNUC__ + // On GCC the 'unused' attribute can be used to indicate a typedef is not actually used + // (such as in the static_assert implementation below). New versions of GCC generate + // warnings for unused typedefs in function/method scopes. + #define EA_STATIC_ASSERT_UNUSED_ATTRIBUTE __attribute__((unused)) + #else + #define EA_STATIC_ASSERT_UNUSED_ATTRIBUTE + #endif + #define EA_STATIC_ASSERT_TOKEN_PASTE(a,b) a ## b + #define EA_STATIC_ASSERT_CONCATENATE_HELPER(a,b) EA_STATIC_ASSERT_TOKEN_PASTE(a,b) + + #if defined(__COUNTER__) // If this extension is available, which allows multiple statements per line... + #define static_assert(expression, description) typedef char EA_STATIC_ASSERT_CONCATENATE_HELPER(compileTimeAssert,__COUNTER__) [((expression) != 0) ? 1 : -1] EA_STATIC_ASSERT_UNUSED_ATTRIBUTE + #else + #define static_assert(expression, description) typedef char EA_STATIC_ASSERT_CONCATENATE_HELPER(compileTimeAssert,__LINE__) [((expression) != 0) ? 1 : -1] EA_STATIC_ASSERT_UNUSED_ATTRIBUTE + #endif + + #undef NEED_CUSTOM_STATIC_ASSERT +#endif + +// ------------------------------------------------------------------------ +// EA_IS_ENABLED +// +// EA_IS_ENABLED is intended to be used for detecting if compile time features are enabled or disabled. +// +// It has some advantages over using a standard #if or #ifdef tests: +// 1) Fails to compile when passes numeric macro values. Valid options are strictly enabled or disabled. +// 2) Fails to compile when passed undefined macro values rather than disabling by default +// 3) Fails to compile when the passed macro is defined to but empty +// +// To use the macro, the calling code should create a define for the feature to enable or disable. This feature define +// must be set to either EA_ENABLED or EA_DISABLED. (Do not try to set the feature define directly to some other +// value.) +// +// Note: These macros are analogous to the Frostbite macro FB_USING used in combination with FB_OFF / FB_ON and are +// designed to be compatible to support gradual migration. +// +// Example usage: +// +// // The USER_PROVIDED_FEATURE_DEFINE should be defined as either +// // EA_ENABLED or EA_DISABLED. +// #define USER_PROVIDED_FEATURE_DEFINE EA_ENABLED +// +// #if EA_IS_ENABLED(USER_PROVIDED_FEATURE_DEFINE) +// // USER_PROVIDED_FEATURE_DEFINE is enabled +// #else +// // USER_PROVIDED_FEATURE_DEFINE is disabled +// #endif +// +#define EA_ENABLED 111- +#define EA_DISABLED 333- +// NOTE: Numeric values for x will produce a parse error while empty values produce a divide by zero, and the test is a bool for proper negation behavior +#define EA_IS_ENABLED(x) (333 == 333 * 111 / ((x 0) * (((x 0) == 333 ? 1 : 0) + ((x 0) == 111 ? 1 : 0)))) + + + +// Define int128_t / uint128_t types. +// NOTE(rparolin): include file at the end because we want all the signed integral types defined. +#ifdef __cplusplus + #include +#endif + +#endif // Header include guard + + + + diff --git a/libkram/eastl/include/EABase/eahave.h b/libkram/eastl/include/EABase/eahave.h new file mode 100644 index 00000000..b0987be7 --- /dev/null +++ b/libkram/eastl/include/EABase/eahave.h @@ -0,0 +1,877 @@ +/*----------------------------------------------------------------------------- + * eahave.h + * + * Copyright (c) Electronic Arts Inc. All rights reserved. + *---------------------------------------------------------------------------*/ + + +/*----------------------------------------------------------------------------- + This file's functionality is preliminary and won't be considered stable until + a future EABase version. + *---------------------------------------------------------------------------*/ + + +/*----------------------------------------------------------------------------- + This header identifies if the given facilities are available in the + standard build environment the current compiler/linker/standard library/ + operating system combination. This file may in some cases #include standard + headers in order to make availability determinations, such as to check + compiler or SDK version numbers. However, it cannot be perfect. + This header does not identify compiler features, as those are defined in + eacompiler.h and eacompilertraits.h. Rather this header is about library support. + This header does not identify platform or library conventions either, such + as whether the file paths use \ or / for directory separators. + + We provide three types of HAVE features here: + + - EA_HAVE_XXX_FEATURE - Have compiler feature. + Identifies if the compiler has or lacks some feature in the + current build. Sometimes you need to check to see if the + compiler is running in some mode in able to write portable code + against it. For example, some compilers (e.g. VC++) have a + mode in which all language extensions are disabled. If you want + to write code that works with that but still uses the extensions + when available then you can check #if defined(EA_HAVE_EXTENSIONS_FEATURE). + Features can be forcibly cancelled via EA_NO_HAVE_XXX_FEATURE. + EA_NO_HAVE is useful for a build system or user to override the + defaults because it happens to know better. + + - EA_HAVE_XXX_H - Have header file information. + Identifies if a given header file is available to the current + compile configuration. For example, some compilers provide a + malloc.h header, while others don't. For the former we define + EA_HAVE_MALLOC_H, while for the latter it remains undefined. + If a header is missing then it may still be that the functions + the header usually declares are declared in some other header. + EA_HAVE_XXX does not include the possibility that our own code + provides versions of these headers, and in fact a purpose of + EA_HAVE_XXX is to decide if we should be using our own because + the system doesn't provide one. + Header availability can be forcibly cancelled via EA_NO_HAVE_XXX_H. + EA_NO_HAVE is useful for a build system or user to override the + defaults because it happens to know better. + + - EA_HAVE_XXX_DECL - Have function declaration information. + Identifies if a given function declaration is provided by + the current compile configuration. For example, some compiler + standard libraries declare a wcslen function, while others + don't. For the former we define EA_HAVE_WCSLEN_DECL, while for + the latter it remains undefined. If a declaration of a function + is missing then we assume the implementation is missing as well. + EA_HAVE_XXX_DECL does not include the possibility that our + own code provides versions of these declarations, and in fact a + purpose of EA_HAVE_XXX_DECL is to decide if we should be using + our own because the system doesn't provide one. + Declaration availability can be forcibly cancelled via EA_NO_HAVE_XXX_DECL. + EA_NO_HAVE is useful for a build system or user to override the + defaults because it happens to know better. + + - EA_HAVE_XXX_IMPL - Have function implementation information. + Identifies if a given function implementation is provided by + the current compile and link configuration. For example, it's + commonly the case that console platforms declare a getenv function + but don't provide a linkable implementation. + In this case the user needs to provide such a function manually + as part of the link. If the implementation is available then + we define EA_HAVE_GETENV_IMPL, otherwise it remains undefined. + Beware that sometimes a function may not seem to be present in + the Standard Library but in reality you need to link some auxiliary + provided library for it. An example of this is the Unix real-time + functions such as clock_gettime. + EA_HAVE_XXX_IMPL does not include the possibility that our + own code provides versions of these implementations, and in fact a + purpose of EA_HAVE_XXX_IMPL is to decide if we should be using + our own because the system doesn't provide one. + Implementation availability can be forcibly cancelled via EA_NO_HAVE_XXX_IMPL. + EA_NO_HAVE is useful for a build system or user to override the + defaults because it happens to know better. + + It's not practical to define EA_HAVE macros for every possible header, + declaration, and implementation, and so the user must simply know that + some headers, declarations, and implementations tend to require EA_HAVE + checking. Nearly every C Standard Library we've seen has a + header, a strlen declaration, and a linkable strlen implementation, + so there's no need to provide EA_HAVE support for this. On the other hand + it's commonly the case that the C Standard Library doesn't have a malloc.h + header or an inet_ntop declaration. + +---------------------------------------------------------------------------*/ + + +#ifndef INCLUDED_eahave_H +#define INCLUDED_eahave_H + + +#include + + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + +/* EA_HAVE_XXX_FEATURE */ + +#if !defined(EA_HAVE_EXTENSIONS_FEATURE) && !defined(EA_NO_HAVE_EXTENSIONS_FEATURE) + #define EA_HAVE_EXTENSIONS_FEATURE 1 +#endif + + +/* EA_HAVE_XXX_LIBRARY */ + +// Dinkumware +#if !defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && !defined(EA_NO_HAVE_DINKUMWARE_CPP_LIBRARY) + #if defined(__cplusplus) + EA_DISABLE_ALL_VC_WARNINGS() + #include // Need to trigger the compilation of yvals.h without directly using because it might not exist. + EA_RESTORE_ALL_VC_WARNINGS() + #endif + + #if defined(__cplusplus) && defined(_CPPLIB_VER) /* If using the Dinkumware Standard library... */ + #define EA_HAVE_DINKUMWARE_CPP_LIBRARY 1 + #else + #define EA_NO_HAVE_DINKUMWARE_CPP_LIBRARY 1 + #endif +#endif + +// GCC libstdc++ +#if !defined(EA_HAVE_LIBSTDCPP_LIBRARY) && !defined(EA_NO_HAVE_LIBSTDCPP_LIBRARY) + #if defined(__GLIBCXX__) /* If using libstdc++ ... */ + #define EA_HAVE_LIBSTDCPP_LIBRARY 1 + #else + #define EA_NO_HAVE_LIBSTDCPP_LIBRARY 1 + #endif +#endif + +// Clang libc++ +#if !defined(EA_HAVE_LIBCPP_LIBRARY) && !defined(EA_NO_HAVE_LIBCPP_LIBRARY) + #if EA_HAS_INCLUDE_AVAILABLE + #if EA_HAS_INCLUDE(<__config>) + #define EA_HAVE_LIBCPP_LIBRARY 1 // We could also #include and check if defined(_LIBCPP_VERSION). + #endif + #endif + + #if !defined(EA_HAVE_LIBCPP_LIBRARY) + #define EA_NO_HAVE_LIBCPP_LIBRARY 1 + #endif +#endif + + +/* EA_HAVE_XXX_H */ + +// #include +#if !defined(EA_HAVE_SYS_TYPES_H) && !defined(EA_NO_HAVE_SYS_TYPES_H) + #define EA_HAVE_SYS_TYPES_H 1 +#endif + +// #include (and not sys/io.h or asm/io.h) +#if !defined(EA_HAVE_IO_H) && !defined(EA_NO_HAVE_IO_H) + // Unix doesn't have Microsoft's but has the same functionality in and . + #if defined(EA_PLATFORM_MICROSOFT) + #define EA_HAVE_IO_H 1 + #else + #define EA_NO_HAVE_IO_H 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_INTTYPES_H) && !defined(EA_NO_HAVE_INTTYPES_H) + #if !defined(EA_PLATFORM_MICROSOFT) + #define EA_HAVE_INTTYPES_H 1 + #else + #define EA_NO_HAVE_INTTYPES_H 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_UNISTD_H) && !defined(EA_NO_HAVE_UNISTD_H) + #if defined(EA_PLATFORM_UNIX) + #define EA_HAVE_UNISTD_H 1 + #else + #define EA_NO_HAVE_UNISTD_H 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_SYS_TIME_H) && !defined(EA_NO_HAVE_SYS_TIME_H) + #if !defined(EA_PLATFORM_MICROSOFT) && !defined(_CPPLIB_VER) /* _CPPLIB_VER indicates Dinkumware. */ + #define EA_HAVE_SYS_TIME_H 1 /* defines struct timeval */ + #else + #define EA_NO_HAVE_SYS_TIME_H 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_SYS_PTRACE_H) && !defined(EA_NO_HAVE_SYS_PTRACE_H) + #if defined(EA_PLATFORM_UNIX) && !defined(__CYGWIN__) && (defined(EA_PLATFORM_DESKTOP) || defined(EA_PLATFORM_SERVER)) + #define EA_HAVE_SYS_PTRACE_H 1 /* declares the ptrace function */ + #else + #define EA_NO_HAVE_SYS_PTRACE_H 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_SYS_STAT_H) && !defined(EA_NO_HAVE_SYS_STAT_H) + #if (defined(EA_PLATFORM_UNIX) && !(defined(EA_PLATFORM_SONY) && defined(EA_PLATFORM_CONSOLE))) || defined(__APPLE__) || defined(EA_PLATFORM_ANDROID) + #define EA_HAVE_SYS_STAT_H 1 /* declares the stat struct and function */ + #else + #define EA_NO_HAVE_SYS_STAT_H 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_LOCALE_H) && !defined(EA_NO_HAVE_LOCALE_H) + #define EA_HAVE_LOCALE_H 1 +#endif + +// #include +#if !defined(EA_HAVE_SIGNAL_H) && !defined(EA_NO_HAVE_SIGNAL_H) + #if !defined(EA_PLATFORM_BSD) && !defined(EA_PLATFORM_SONY) && !defined(CS_UNDEFINED_STRING) + #define EA_HAVE_SIGNAL_H 1 + #else + #define EA_NO_HAVE_SIGNAL_H 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_SYS_SIGNAL_H) && !defined(EA_NO_HAVE_SYS_SIGNAL_H) + #if defined(EA_PLATFORM_BSD) || defined(EA_PLATFORM_SONY) + #define EA_HAVE_SYS_SIGNAL_H 1 + #else + #define EA_NO_HAVE_SYS_SIGNAL_H 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_PTHREAD_H) && !defined(EA_NO_HAVE_PTHREAD_H) + #if defined(EA_PLATFORM_UNIX) || defined(EA_PLATFORM_APPLE) || defined(EA_PLATFORM_POSIX) + #define EA_HAVE_PTHREAD_H 1 /* It can be had under Microsoft/Windows with the http://sourceware.org/pthreads-win32/ library */ + #else + #define EA_NO_HAVE_PTHREAD_H 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_WCHAR_H) && !defined(EA_NO_HAVE_WCHAR_H) + #if defined(EA_PLATFORM_DESKTOP) && defined(EA_PLATFORM_UNIX) && defined(EA_PLATFORM_SONY) && defined(EA_PLATFORM_APPLE) + #define EA_HAVE_WCHAR_H 1 + #else + #define EA_NO_HAVE_WCHAR_H 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_MALLOC_H) && !defined(EA_NO_HAVE_MALLOC_H) + #if defined(_MSC_VER) || defined(__MINGW32__) + #define EA_HAVE_MALLOC_H 1 + #else + #define EA_NO_HAVE_MALLOC_H 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_ALLOCA_H) && !defined(EA_NO_HAVE_ALLOCA_H) + #if !defined(EA_HAVE_MALLOC_H) && !defined(EA_PLATFORM_SONY) + #define EA_HAVE_ALLOCA_H 1 + #else + #define EA_NO_HAVE_ALLOCA_H 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_EXECINFO_H) && !defined(EA_NO_HAVE_EXECINFO_H) + #if (defined(EA_PLATFORM_LINUX) || defined(EA_PLATFORM_OSX)) && !defined(EA_PLATFORM_ANDROID) + #define EA_HAVE_EXECINFO_H 1 + #else + #define EA_NO_HAVE_EXECINFO_H 1 + #endif +#endif + +// #include (Unix semaphore support) +#if !defined(EA_HAVE_SEMAPHORE_H) && !defined(EA_NO_HAVE_SEMAPHORE_H) + #if defined(EA_PLATFORM_UNIX) + #define EA_HAVE_SEMAPHORE_H 1 + #else + #define EA_NO_HAVE_SEMAPHORE_H 1 + #endif +#endif + +// #include (Unix semaphore support) +#if !defined(EA_HAVE_DIRENT_H) && !defined(EA_NO_HAVE_DIRENT_H) + #if defined(EA_PLATFORM_UNIX) && !defined(EA_PLATFORM_CONSOLE) + #define EA_HAVE_DIRENT_H 1 + #else + #define EA_NO_HAVE_DIRENT_H 1 + #endif +#endif + +// #include , , , +#if !defined(EA_HAVE_CPP11_CONTAINERS) && !defined(EA_NO_HAVE_CPP11_CONTAINERS) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) // Dinkumware. VS2010+ + #define EA_HAVE_CPP11_CONTAINERS 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) // Actually GCC 4.3 supports array and unordered_ + #define EA_HAVE_CPP11_CONTAINERS 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_CONTAINERS 1 + #else + #define EA_NO_HAVE_CPP11_CONTAINERS 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_ATOMIC) && !defined(EA_NO_HAVE_CPP11_ATOMIC) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+ + #define EA_HAVE_CPP11_ATOMIC 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007) + #define EA_HAVE_CPP11_ATOMIC 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_ATOMIC 1 + #else + #define EA_NO_HAVE_CPP11_ATOMIC 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_CONDITION_VARIABLE) && !defined(EA_NO_HAVE_CPP11_CONDITION_VARIABLE) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+ + #define EA_HAVE_CPP11_CONDITION_VARIABLE 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007) + #define EA_HAVE_CPP11_CONDITION_VARIABLE 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_CONDITION_VARIABLE 1 + #else + #define EA_NO_HAVE_CPP11_CONDITION_VARIABLE 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_MUTEX) && !defined(EA_NO_HAVE_CPP11_MUTEX) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+ + #define EA_HAVE_CPP11_MUTEX 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007) + #define EA_HAVE_CPP11_MUTEX 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_MUTEX 1 + #else + #define EA_NO_HAVE_CPP11_MUTEX 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_THREAD) && !defined(EA_NO_HAVE_CPP11_THREAD) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+ + #define EA_HAVE_CPP11_THREAD 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007) + #define EA_HAVE_CPP11_THREAD 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_THREAD 1 + #else + #define EA_NO_HAVE_CPP11_THREAD 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_FUTURE) && !defined(EA_NO_HAVE_CPP11_FUTURE) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+ + #define EA_HAVE_CPP11_FUTURE 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4005) + #define EA_HAVE_CPP11_FUTURE 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_FUTURE 1 + #else + #define EA_NO_HAVE_CPP11_FUTURE 1 + #endif +#endif + + +// #include +#if !defined(EA_HAVE_CPP11_TYPE_TRAITS) && !defined(EA_NO_HAVE_CPP11_TYPE_TRAITS) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+ + #define EA_HAVE_CPP11_TYPE_TRAITS 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007) // Prior versions of libstdc++ have incomplete support for C++11 type traits. + #define EA_HAVE_CPP11_TYPE_TRAITS 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_TYPE_TRAITS 1 + #else + #define EA_NO_HAVE_CPP11_TYPE_TRAITS 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_TUPLES) && !defined(EA_NO_HAVE_CPP11_TUPLES) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) // Dinkumware. VS2010+ + #define EA_HAVE_CPP11_TUPLES 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003) + #define EA_HAVE_CPP11_TUPLES 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_TUPLES 1 + #else + #define EA_NO_HAVE_CPP11_TUPLES 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_REGEX) && !defined(EA_NO_HAVE_CPP11_REGEX) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) && (defined(_HAS_EXCEPTIONS) && _HAS_EXCEPTIONS) // Dinkumware. VS2012+ + #define EA_HAVE_CPP11_REGEX 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003) + #define EA_HAVE_CPP11_REGEX 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_REGEX 1 + #else + #define EA_NO_HAVE_CPP11_REGEX 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_RANDOM) && !defined(EA_NO_HAVE_CPP11_RANDOM) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) // Dinkumware. VS2010+ + #define EA_HAVE_CPP11_RANDOM 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4005) + #define EA_HAVE_CPP11_RANDOM 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_RANDOM 1 + #else + #define EA_NO_HAVE_CPP11_RANDOM 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_CHRONO) && !defined(EA_NO_HAVE_CPP11_CHRONO) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+ + #define EA_HAVE_CPP11_CHRONO 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007) // chrono was broken in glibc prior to 4.7. + #define EA_HAVE_CPP11_CHRONO 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_CHRONO 1 + #else + #define EA_NO_HAVE_CPP11_CHRONO 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_SCOPED_ALLOCATOR) && !defined(EA_NO_HAVE_CPP11_SCOPED_ALLOCATOR) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+ + #define EA_HAVE_CPP11_SCOPED_ALLOCATOR 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007) + #define EA_HAVE_CPP11_SCOPED_ALLOCATOR 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_SCOPED_ALLOCATOR 1 + #else + #define EA_NO_HAVE_CPP11_SCOPED_ALLOCATOR 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_INITIALIZER_LIST) && !defined(EA_NO_HAVE_CPP11_INITIALIZER_LIST) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) && !defined(EA_COMPILER_NO_INITIALIZER_LISTS) // Dinkumware. VS2010+ + #define EA_HAVE_CPP11_INITIALIZER_LIST 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_CLANG) && (EA_COMPILER_VERSION >= 301) && !defined(EA_COMPILER_NO_INITIALIZER_LISTS) && !defined(EA_PLATFORM_APPLE) + #define EA_HAVE_CPP11_INITIALIZER_LIST 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBCPP_LIBRARY) && defined(EA_COMPILER_CLANG) && (EA_COMPILER_VERSION >= 301) && !defined(EA_COMPILER_NO_INITIALIZER_LISTS) && !defined(EA_PLATFORM_APPLE) + #define EA_HAVE_CPP11_INITIALIZER_LIST 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) && !defined(EA_COMPILER_NO_INITIALIZER_LISTS) && !defined(EA_PLATFORM_APPLE) + #define EA_HAVE_CPP11_INITIALIZER_LIST 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) && !defined(EA_COMPILER_NO_INITIALIZER_LISTS) + #define EA_HAVE_CPP11_INITIALIZER_LIST 1 + #else + #define EA_NO_HAVE_CPP11_INITIALIZER_LIST 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_SYSTEM_ERROR) && !defined(EA_NO_HAVE_CPP11_SYSTEM_ERROR) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) && !(defined(_HAS_CPP0X) && _HAS_CPP0X) // Dinkumware. VS2010+ + #define EA_HAVE_CPP11_SYSTEM_ERROR 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_CLANG) && (EA_COMPILER_VERSION >= 301) && !defined(EA_PLATFORM_APPLE) + #define EA_HAVE_CPP11_SYSTEM_ERROR 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) && !defined(EA_PLATFORM_APPLE) + #define EA_HAVE_CPP11_SYSTEM_ERROR 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_SYSTEM_ERROR 1 + #else + #define EA_NO_HAVE_CPP11_SYSTEM_ERROR 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_CODECVT) && !defined(EA_NO_HAVE_CPP11_CODECVT) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) // Dinkumware. VS2010+ + #define EA_HAVE_CPP11_CODECVT 1 + // Future versions of libc++ may support this header. However, at the moment there isn't + // a reliable way of detecting if this header is available. + //#elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4008) + // #define EA_HAVE_CPP11_CODECVT 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_CODECVT 1 + #else + #define EA_NO_HAVE_CPP11_CODECVT 1 + #endif +#endif + +// #include +#if !defined(EA_HAVE_CPP11_TYPEINDEX) && !defined(EA_NO_HAVE_CPP11_TYPEINDEX) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) // Dinkumware. VS2010+ + #define EA_HAVE_CPP11_TYPEINDEX 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006) + #define EA_HAVE_CPP11_TYPEINDEX 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_TYPEINDEX 1 + #else + #define EA_NO_HAVE_CPP11_TYPEINDEX 1 + #endif +#endif + + + + +/* EA_HAVE_XXX_DECL */ + +#if !defined(EA_HAVE_mkstemps_DECL) && !defined(EA_NO_HAVE_mkstemps_DECL) + #if defined(EA_PLATFORM_APPLE) || defined(CS_UNDEFINED_STRING) + #define EA_HAVE_mkstemps_DECL 1 + #else + #define EA_NO_HAVE_mkstemps_DECL 1 + #endif +#endif + +#if !defined(EA_HAVE_gettimeofday_DECL) && !defined(EA_NO_HAVE_gettimeofday_DECL) + #if defined(EA_PLATFORM_POSIX) /* Posix means Linux, Unix, and Macintosh OSX, among others (including Linux-based mobile platforms). */ + #define EA_HAVE_gettimeofday_DECL 1 + #else + #define EA_NO_HAVE_gettimeofday_DECL 1 + #endif +#endif + +#if !defined(EA_HAVE_strcasecmp_DECL) && !defined(EA_NO_HAVE_strcasecmp_DECL) + #if !defined(EA_PLATFORM_MICROSOFT) + #define EA_HAVE_strcasecmp_DECL 1 /* This is found as stricmp when not found as strcasecmp */ + #define EA_HAVE_strncasecmp_DECL 1 + #else + #define EA_HAVE_stricmp_DECL 1 + #define EA_HAVE_strnicmp_DECL 1 + #endif +#endif + +#if !defined(EA_HAVE_mmap_DECL) && !defined(EA_NO_HAVE_mmap_DECL) + #if defined(EA_PLATFORM_POSIX) + #define EA_HAVE_mmap_DECL 1 /* mmap functionality varies significantly between systems. */ + #else + #define EA_NO_HAVE_mmap_DECL 1 + #endif +#endif + +#if !defined(EA_HAVE_fopen_DECL) && !defined(EA_NO_HAVE_fopen_DECL) + #define EA_HAVE_fopen_DECL 1 /* C FILE functionality such as fopen */ +#endif + +#if !defined(EA_HAVE_ISNAN) && !defined(EA_NO_HAVE_ISNAN) + #if defined(EA_PLATFORM_MICROSOFT) && !defined(EA_PLATFORM_MINGW) + #define EA_HAVE_ISNAN(x) _isnan(x) /* declared in */ + #define EA_HAVE_ISINF(x) !_finite(x) + #elif defined(EA_PLATFORM_APPLE) + #define EA_HAVE_ISNAN(x) std::isnan(x) /* declared in */ + #define EA_HAVE_ISINF(x) std::isinf(x) + #elif defined(EA_PLATFORM_ANDROID) + #define EA_HAVE_ISNAN(x) __builtin_isnan(x) /* There are a number of standard libraries for Android and it's hard to tell them apart, so just go with builtins */ + #define EA_HAVE_ISINF(x) __builtin_isinf(x) + #elif defined(__GNUC__) && defined(__CYGWIN__) + #define EA_HAVE_ISNAN(x) __isnand(x) /* declared nowhere, it seems. */ + #define EA_HAVE_ISINF(x) __isinfd(x) + #else + #define EA_HAVE_ISNAN(x) std::isnan(x) /* declared in */ + #define EA_HAVE_ISINF(x) std::isinf(x) + #endif +#endif + +#if !defined(EA_HAVE_itoa_DECL) && !defined(EA_NO_HAVE_itoa_DECL) + #if defined(EA_COMPILER_MSVC) + #define EA_HAVE_itoa_DECL 1 + #else + #define EA_NO_HAVE_itoa_DECL 1 + #endif +#endif + +#if !defined(EA_HAVE_nanosleep_DECL) && !defined(EA_NO_HAVE_nanosleep_DECL) + #if (defined(EA_PLATFORM_UNIX) && !defined(EA_PLATFORM_SONY)) || defined(EA_PLATFORM_IPHONE) || defined(EA_PLATFORM_OSX) || defined(EA_PLATFORM_SONY) || defined(CS_UNDEFINED_STRING) + #define EA_HAVE_nanosleep_DECL 1 + #else + #define EA_NO_HAVE_nanosleep_DECL 1 + #endif +#endif + +#if !defined(EA_HAVE_utime_DECL) && !defined(EA_NO_HAVE_utime_DECL) + #if defined(EA_PLATFORM_MICROSOFT) + #define EA_HAVE_utime_DECL _utime + #elif EA_PLATFORM_UNIX + #define EA_HAVE_utime_DECL utime + #else + #define EA_NO_HAVE_utime_DECL 1 + #endif +#endif + +#if !defined(EA_HAVE_ftruncate_DECL) && !defined(EA_NO_HAVE_ftruncate_DECL) + #if !defined(__MINGW32__) + #define EA_HAVE_ftruncate_DECL 1 + #else + #define EA_NO_HAVE_ftruncate_DECL 1 + #endif +#endif + +#if !defined(EA_HAVE_localtime_DECL) && !defined(EA_NO_HAVE_localtime_DECL) + #define EA_HAVE_localtime_DECL 1 +#endif + +#if !defined(EA_HAVE_pthread_getattr_np_DECL) && !defined(EA_NO_HAVE_pthread_getattr_np_DECL) + #if defined(EA_PLATFORM_LINUX) + #define EA_HAVE_pthread_getattr_np_DECL 1 + #else + #define EA_NO_HAVE_pthread_getattr_np_DECL 1 + #endif +#endif + + + +/* EA_HAVE_XXX_IMPL*/ + +#if !defined(EA_HAVE_WCHAR_IMPL) && !defined(EA_NO_HAVE_WCHAR_IMPL) + #if defined(EA_PLATFORM_DESKTOP) + #define EA_HAVE_WCHAR_IMPL 1 /* Specifies if wchar_t string functions are provided, such as wcslen, wprintf, etc. Implies EA_HAVE_WCHAR_H */ + #else + #define EA_NO_HAVE_WCHAR_IMPL 1 + #endif +#endif + +#if !defined(EA_HAVE_getenv_IMPL) && !defined(EA_NO_HAVE_getenv_IMPL) + #if (defined(EA_PLATFORM_DESKTOP) || defined(EA_PLATFORM_UNIX)) && !defined(EA_PLATFORM_WINRT) + #define EA_HAVE_getenv_IMPL 1 + #else + #define EA_NO_HAVE_getenv_IMPL 1 + #endif +#endif + +#if !defined(EA_HAVE_setenv_IMPL) && !defined(EA_NO_HAVE_setenv_IMPL) + #if defined(EA_PLATFORM_UNIX) && defined(EA_PLATFORM_POSIX) + #define EA_HAVE_setenv_IMPL 1 + #else + #define EA_NO_HAVE_setenv_IMPL 1 + #endif +#endif + +#if !defined(EA_HAVE_unsetenv_IMPL) && !defined(EA_NO_HAVE_unsetenv_IMPL) + #if defined(EA_PLATFORM_UNIX) && defined(EA_PLATFORM_POSIX) + #define EA_HAVE_unsetenv_IMPL 1 + #else + #define EA_NO_HAVE_unsetenv_IMPL 1 + #endif +#endif + +#if !defined(EA_HAVE_putenv_IMPL) && !defined(EA_NO_HAVE_putenv_IMPL) + #if (defined(EA_PLATFORM_DESKTOP) || defined(EA_PLATFORM_UNIX)) && !defined(EA_PLATFORM_WINRT) + #define EA_HAVE_putenv_IMPL 1 /* With Microsoft compilers you may need to use _putenv, as they have deprecated putenv. */ + #else + #define EA_NO_HAVE_putenv_IMPL 1 + #endif +#endif + +#if !defined(EA_HAVE_time_IMPL) && !defined(EA_NO_HAVE_time_IMPL) + #define EA_HAVE_time_IMPL 1 + #define EA_HAVE_clock_IMPL 1 +#endif + +// fopen() +#if !defined(EA_HAVE_fopen_IMPL) && !defined(EA_NO_HAVE_fopen_IMPL) + #define EA_HAVE_fopen_IMPL 1 /* C FILE functionality such as fopen */ +#endif + +// inet_ntop() +#if !defined(EA_HAVE_inet_ntop_IMPL) && !defined(EA_NO_HAVE_inet_ntop_IMPL) + #if (defined(EA_PLATFORM_UNIX) || defined(EA_PLATFORM_POSIX)) && !defined(EA_PLATFORM_SONY) && !defined(CS_UNDEFINED_STRING) + #define EA_HAVE_inet_ntop_IMPL 1 /* This doesn't identify if the platform SDK has some alternative function that does the same thing; */ + #define EA_HAVE_inet_pton_IMPL 1 /* it identifies strictly the inet_ntop and inet_pton functions. For example, Microsoft has InetNtop in */ + #else + #define EA_NO_HAVE_inet_ntop_IMPL 1 + #define EA_NO_HAVE_inet_pton_IMPL 1 + #endif +#endif + +// clock_gettime() +#if !defined(EA_HAVE_clock_gettime_IMPL) && !defined(EA_NO_HAVE_clock_gettime_IMPL) + #if defined(EA_PLATFORM_LINUX) || defined(__CYGWIN__) || (defined(_POSIX_TIMERS) && (_POSIX_TIMERS > 0)) || (defined(EA_PLATFORM_POSIX) && defined(_CPPLIB_VER) /*Dinkumware*/) + #define EA_HAVE_clock_gettime_IMPL 1 /* You need to link the 'rt' library to get this */ + #else + #define EA_NO_HAVE_clock_gettime_IMPL 1 + #endif +#endif + +#if !defined(EA_HAVE_getcwd_IMPL) && !defined(EA_NO_HAVE_getcwd_IMPL) + #if (defined(EA_PLATFORM_DESKTOP) || defined(EA_PLATFORM_UNIX)) && !defined(EA_PLATFORM_ANDROID) && !defined(EA_PLATFORM_WINRT) + #define EA_HAVE_getcwd_IMPL 1 /* With Microsoft compilers you may need to use _getcwd, as they have deprecated getcwd. And in any case it's present at */ + #else + #define EA_NO_HAVE_getcwd_IMPL 1 + #endif +#endif + +#if !defined(EA_HAVE_tmpnam_IMPL) && !defined(EA_NO_HAVE_tmpnam_IMPL) + #if (defined(EA_PLATFORM_DESKTOP) || defined(EA_PLATFORM_UNIX)) && !defined(EA_PLATFORM_ANDROID) + #define EA_HAVE_tmpnam_IMPL 1 + #else + #define EA_NO_HAVE_tmpnam_IMPL 1 + #endif +#endif + +// nullptr, the built-in C++11 type. +// This EA_HAVE is deprecated, as EA_COMPILER_NO_NULLPTR is more appropriate, given that nullptr is a compiler-level feature and not a library feature. +#if !defined(EA_HAVE_nullptr_IMPL) && !defined(EA_NO_HAVE_nullptr_IMPL) + #if defined(EA_COMPILER_NO_NULLPTR) + #define EA_NO_HAVE_nullptr_IMPL 1 + #else + #define EA_HAVE_nullptr_IMPL 1 + #endif +#endif + +// std::nullptr_t +// Note that implements a portable nullptr implementation, but this +// EA_HAVE specifically refers to std::nullptr_t from the standard libraries. +#if !defined(EA_HAVE_nullptr_t_IMPL) && !defined(EA_NO_HAVE_nullptr_t_IMPL) + #if defined(EA_COMPILER_CPP11_ENABLED) + // VS2010+ with its default Dinkumware standard library. + #if defined(_MSC_VER) && (_MSC_VER >= 1600) && defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) + #define EA_HAVE_nullptr_t_IMPL 1 + + #elif defined(EA_HAVE_LIBCPP_LIBRARY) // clang/llvm libc++ + #define EA_HAVE_nullptr_t_IMPL 1 + + #elif defined(EA_HAVE_LIBSTDCPP_LIBRARY) // GNU libstdc++ + // Unfortunately __GLIBCXX__ date values don't go strictly in version ordering. + #if (__GLIBCXX__ >= 20110325) && (__GLIBCXX__ != 20120702) && (__GLIBCXX__ != 20110428) + #define EA_HAVE_nullptr_t_IMPL 1 + #else + #define EA_NO_HAVE_nullptr_t_IMPL 1 + #endif + + // We simply assume that the standard library (e.g. Dinkumware) provides std::nullptr_t. + #elif defined(__clang__) + #define EA_HAVE_nullptr_t_IMPL 1 + + // With GCC compiler >= 4.6, std::nullptr_t is always defined in , in practice. + #elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006) + #define EA_HAVE_nullptr_t_IMPL 1 + + // The EDG compiler provides nullptr, but uses an older standard library that doesn't support std::nullptr_t. + #elif defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 403) + #define EA_HAVE_nullptr_t_IMPL 1 + + #else + #define EA_NO_HAVE_nullptr_t_IMPL 1 + #endif + #else + #define EA_NO_HAVE_nullptr_t_IMPL 1 + #endif +#endif + +// std::terminate +#if !defined(EA_HAVE_std_terminate_IMPL) && !defined(EA_NO_HAVE_std_terminate_IMPL) + #if !defined(EA_PLATFORM_IPHONE) && !defined(EA_PLATFORM_ANDROID) + #define EA_HAVE_std_terminate_IMPL 1 /* iOS doesn't appear to provide an implementation for std::terminate under the armv6 target. */ + #else + #define EA_NO_HAVE_std_terminate_IMPL 1 + #endif +#endif + +// : std::begin, std::end, std::prev, std::next, std::move_iterator. +#if !defined(EA_HAVE_CPP11_ITERATOR_IMPL) && !defined(EA_NO_HAVE_CPP11_ITERATOR_IMPL) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) && !(defined(_HAS_CPP0X) && _HAS_CPP0X) // Dinkumware. VS2010+ + #define EA_HAVE_CPP11_ITERATOR_IMPL 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006) + #define EA_HAVE_CPP11_ITERATOR_IMPL 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_ITERATOR_IMPL 1 + #else + #define EA_NO_HAVE_CPP11_ITERATOR_IMPL 1 + #endif +#endif + +// : std::weak_ptr, std::shared_ptr, std::unique_ptr, std::bad_weak_ptr, std::owner_less +#if !defined(EA_HAVE_CPP11_SMART_POINTER_IMPL) && !defined(EA_NO_HAVE_CPP11_SMART_POINTER_IMPL) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) && !(defined(_HAS_CPP0X) && _HAS_CPP0X) // Dinkumware. VS2010+ + #define EA_HAVE_CPP11_SMART_POINTER_IMPL 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) + #define EA_HAVE_CPP11_SMART_POINTER_IMPL 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_SMART_POINTER_IMPL 1 + #else + #define EA_NO_HAVE_CPP11_SMART_POINTER_IMPL 1 + #endif +#endif + +// : std::function, std::mem_fn, std::bad_function_call, std::is_bind_expression, std::is_placeholder, std::reference_wrapper, std::hash, std::bind, std::ref, std::cref. +#if !defined(EA_HAVE_CPP11_FUNCTIONAL_IMPL) && !defined(EA_NO_HAVE_CPP11_FUNCTIONAL_IMPL) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) && !(defined(_HAS_CPP0X) && _HAS_CPP0X) // Dinkumware. VS2010+ + #define EA_HAVE_CPP11_FUNCTIONAL_IMPL 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) + #define EA_HAVE_CPP11_FUNCTIONAL_IMPL 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_FUNCTIONAL_IMPL 1 + #else + #define EA_NO_HAVE_CPP11_FUNCTIONAL_IMPL 1 + #endif +#endif + +// std::current_exception, std::rethrow_exception, std::exception_ptr, std::make_exception_ptr +#if !defined(EA_HAVE_CPP11_EXCEPTION_IMPL) && !defined(EA_NO_HAVE_CPP11_EXCEPTION_IMPL) + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 520) && !(defined(_HAS_CPP0X) && _HAS_CPP0X) // Dinkumware. VS2010+ + #define EA_HAVE_CPP11_EXCEPTION_IMPL 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4004) + #define EA_HAVE_CPP11_EXCEPTION_IMPL 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EA_HAVE_CPP11_EXCEPTION_IMPL 1 + #else + #define EA_NO_HAVE_CPP11_EXCEPTION_IMPL 1 + #endif +#endif + + + + +/* Implementations that all platforms seem to have: */ +/* + alloca + malloc + calloc + strtoll + strtoull + vsprintf + vsnprintf +*/ + +/* Implementations that we don't care about: */ +/* + bcopy -- Just use memmove or some customized equivalent. bcopy offers no practical benefit. + strlcpy -- So few platforms have this built-in that we get no benefit from using it. Use EA::StdC::Strlcpy instead. + strlcat -- " +*/ + + + +/*----------------------------------------------------------------------------- + EABASE_USER_HAVE_HEADER + + This allows the user to define a header file to be #included after the + eahave.h's contents are compiled. A primary use of this is to override + the contents of this header file. You can define the overhead header + file name in-code or define it globally as part of your build file. + + Example usage: + #define EABASE_USER_HAVE_HEADER "MyHaveOverrides.h" + #include +---------------------------------------------------------------------------*/ + +#ifdef EABASE_USER_HAVE_HEADER + #include EABASE_USER_HAVE_HEADER +#endif + + +#endif /* Header include guard */ + + + diff --git a/libkram/eastl/include/EABase/earesult.h b/libkram/eastl/include/EABase/earesult.h new file mode 100644 index 00000000..d08b3460 --- /dev/null +++ b/libkram/eastl/include/EABase/earesult.h @@ -0,0 +1,62 @@ +/*----------------------------------------------------------------------------- + * earesult.h + * + * Copyright (c) Electronic Arts Inc. All rights reserved. + *---------------------------------------------------------------------------*/ + + +#ifndef INCLUDED_earesult_H +#define INCLUDED_earesult_H + + +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once /* Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. */ +#endif + + + +/* This result type is width-compatible with most systems. */ +typedef int32_t ea_result_type; + + +namespace EA +{ + typedef int32_t result_type; + + enum + { +#ifndef SUCCESS + // Deprecated + // Note: a public MS header has created a define of this name which causes a build error. Fortunately they + // define it to 0 which is compatible. + // see: WindowsSDK\8.1.51641-fb\installed\Include\um\RasError.h + SUCCESS = 0, +#endif + // Deprecated + FAILURE = -1, + + // These values are now the preferred constants + EA_SUCCESS = 0, + EA_FAILURE = -1, + }; +} + + +/* Macro to simplify testing for success. */ +#ifndef EA_SUCCEEDED + #define EA_SUCCEEDED(result) ((result) >= 0) +#endif + +/* Macro to simplfify testing for general failure. */ +#ifndef EA_FAILED + #define EA_FAILED(result) ((result) < 0) +#endif + + +#endif + + + + diff --git a/libkram/eastl/include/EABase/eastdarg.h b/libkram/eastl/include/EABase/eastdarg.h new file mode 100644 index 00000000..2c613eb8 --- /dev/null +++ b/libkram/eastl/include/EABase/eastdarg.h @@ -0,0 +1,99 @@ +/*----------------------------------------------------------------------------- + * eastdarg.h + * + * Copyright (c) Electronic Arts Inc. All rights reserved. + *---------------------------------------------------------------------------*/ + + +#ifndef INCLUDED_eastdarg_H +#define INCLUDED_eastdarg_H + + +#include +#include + + +// VA_ARG_COUNT +// +// Returns the number of arguments passed to a macro's ... argument. +// This applies to macros only and not functions. +// +// Example usage: +// assert(VA_ARG_COUNT() == 0); +// assert(VA_ARG_COUNT(a) == 1); +// assert(VA_ARG_COUNT(a, b) == 2); +// assert(VA_ARG_COUNT(a, b, c) == 3); +// +#if !defined(VA_ARG_COUNT) + #define VA_ARG_COUNT(...) VA_ARG_COUNT_II((VA_ARG_COUNT_PREFIX_ ## __VA_ARGS__ ## _VA_ARG_COUNT_POSTFIX,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0)) + #define VA_ARG_COUNT_II(__args) VA_ARG_COUNT_I __args + #define VA_ARG_COUNT_PREFIX__VA_ARG_COUNT_POSTFIX ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0 + #define VA_ARG_COUNT_I(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12,_13,_14,_15,_16,_17,_18,_19,_20,_21,_22,_23,_24,_25,_26,_27,_28,_29,_30,_31,N,...) N +#endif + + +// va_copy +// +// va_copy is required by C++11 +// C++11 and C99 require va_copy to be #defined and implemented. +// http://en.cppreference.com/w/cpp/utility/variadic/va_copy +// +// Example usage: +// void Func(char* p, ...){ +// va_list args, argsCopy; +// va_start(args, p); +// va_copy(argsCopy, args); +// (use args) +// (use argsCopy, which acts the same as args) +// va_end(args); +// va_end(argsCopy); +// } +// +#ifndef va_copy + #if defined(__va_copy) // GCC and others define this for non-C99 compatibility. + #define va_copy(dest, src) __va_copy((dest), (src)) + #else + // This may not work for some platforms, depending on their ABI. + // It works for Microsoft x86,x64, and PowerPC-based platforms. + #define va_copy(dest, src) memcpy(&(dest), &(src), sizeof(va_list)) + #endif +#endif + + + +// va_list_reference +// +// va_list_reference is not part of the C or C++ standards. +// It allows you to pass a va_list by reference to another +// function instead of by value. You cannot simply use va_list& +// as that won't work with many va_list implementations because +// they are implemented as arrays (which can't be passed by +// reference to a function without decaying to a pointer). +// +// Example usage: +// void Test(va_list_reference args){ +// printf("%d", va_arg(args, int)); +// } +// void Func(char* p, ...){ +// va_list args; +// va_start(args, p); +// Test(args); // Upon return args will be modified. +// va_end(args); +// } +#ifndef va_list_reference + #if defined(EA_PLATFORM_MICROSOFT) || (EA_PLATFORM_PTR_SIZE == 4) || (defined(EA_PLATFORM_APPLE) && defined(EA_PROCESSOR_ARM64)) || defined(CS_UNDEFINED_STRING) || (defined(EA_PLATFORM_ANDROID) && defined(EA_PROCESSOR_ARM64)) + // This is required for platform ABIs in which va_list is a struct or pointer. + #define va_list_reference va_list& + #else + // This is required for platform ABIs in which va_list is defined to be an array. + #define va_list_reference va_list + #endif +#endif + + + + +#endif /* Header include guard */ + + + diff --git a/libkram/eastl/include/EABase/eaunits.h b/libkram/eastl/include/EABase/eaunits.h new file mode 100644 index 00000000..22357234 --- /dev/null +++ b/libkram/eastl/include/EABase/eaunits.h @@ -0,0 +1,54 @@ +/*----------------------------------------------------------------------------- + * eaunits.h + * + * Copyright (c) Electronic Arts Inc. All rights reserved. + *---------------------------------------------------------------------------*/ + + +#ifndef INCLUDED_eaunits_h +#define INCLUDED_eaunits_h + +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + +// Defining common SI unit macros. +// +// The mebibyte is a multiple of the unit byte for digital information. Technically a +// megabyte (MB) is a power of ten, while a mebibyte (MiB) is a power of two, +// appropriate for binary machines. Many Linux distributions use the unit, but it is +// not widely acknowledged within the industry or media. +// Reference: https://en.wikipedia.org/wiki/Mebibyte +// +// Examples: +// auto size1 = EA_KILOBYTE(16); +// auto size2 = EA_MEGABYTE(128); +// auto size3 = EA_MEBIBYTE(8); +// auto size4 = EA_GIBIBYTE(8); + +// define byte for completeness +#define EA_BYTE(x) (x) + +// Decimal SI units +#define EA_KILOBYTE(x) (size_t(x) * 1000) +#define EA_MEGABYTE(x) (size_t(x) * 1000 * 1000) +#define EA_GIGABYTE(x) (size_t(x) * 1000 * 1000 * 1000) +#define EA_TERABYTE(x) (size_t(x) * 1000 * 1000 * 1000 * 1000) +#define EA_PETABYTE(x) (size_t(x) * 1000 * 1000 * 1000 * 1000 * 1000) +#define EA_EXABYTE(x) (size_t(x) * 1000 * 1000 * 1000 * 1000 * 1000 * 1000) + +// Binary SI units +#define EA_KIBIBYTE(x) (size_t(x) * 1024) +#define EA_MEBIBYTE(x) (size_t(x) * 1024 * 1024) +#define EA_GIBIBYTE(x) (size_t(x) * 1024 * 1024 * 1024) +#define EA_TEBIBYTE(x) (size_t(x) * 1024 * 1024 * 1024 * 1024) +#define EA_PEBIBYTE(x) (size_t(x) * 1024 * 1024 * 1024 * 1024 * 1024) +#define EA_EXBIBYTE(x) (size_t(x) * 1024 * 1024 * 1024 * 1024 * 1024 * 1024) + +#endif // INCLUDED_earesult_H + + + + diff --git a/libkram/eastl/include/EABase/int128.h b/libkram/eastl/include/EABase/int128.h new file mode 100644 index 00000000..068d557a --- /dev/null +++ b/libkram/eastl/include/EABase/int128.h @@ -0,0 +1,1268 @@ +/*----------------------------------------------------------------------------- + * eaint128_t.h + * + * Copyright (c) Electronic Arts Inc. All rights reserved. + *---------------------------------------------------------------------------*/ + + +#ifndef INCLUDED_int128_h +#define INCLUDED_int128_h + + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// EA_INT128_INTRINSIC_AVAILABLE +// +#if (EA_COMPILER_INTMAX_SIZE >= 16) && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) + // __int128_t/__uint128_t is supported + #define EA_INT128_INTRINSIC_AVAILABLE 1 +#else + #define EA_INT128_INTRINSIC_AVAILABLE 0 +#endif + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// EA_INT128_ALIGNAS +// +#if EA_INT128_INTRINSIC_AVAILABLE && !defined(EA_COMPILER_NO_ALIGNAS) + #define EA_INT128_ALIGNAS alignas(unsigned __int128) +#else + #define EA_INT128_ALIGNAS +#endif + + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// EA_HAVE_INT128 +// +// Indicates that EABase implements 128-bit integer types +// +#define EA_HAVE_INT128 1 + + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// uint128_t_base +// +struct EA_INT128_ALIGNAS int128_t_base +{ + // Constructors / destructors + int128_t_base() = default; + int128_t_base(uint32_t nPart0, uint32_t nPart1, uint32_t nPart2, uint32_t nPart3); + int128_t_base(uint64_t nPart0, uint64_t nPart1); + int128_t_base(uint8_t value); + int128_t_base(uint16_t value); + int128_t_base(uint32_t value); + int128_t_base(uint64_t value); + int128_t_base(const int128_t_base& value) = default; + + // Assignment operator + int128_t_base& operator=(const int128_t_base& value) = default; + + // Explicit operators to convert back to basic types + EA_CONSTEXPR explicit operator bool() const; + EA_CONSTEXPR explicit operator char() const; + EA_CONSTEXPR explicit operator int() const; + EA_CONSTEXPR explicit operator long() const; + EA_CONSTEXPR explicit operator long long() const; + EA_CONSTEXPR explicit operator short() const; + EA_CONSTEXPR explicit operator signed char() const; + EA_CONSTEXPR explicit operator unsigned char() const; + EA_CONSTEXPR explicit operator unsigned int() const; + EA_CONSTEXPR explicit operator unsigned long long() const; + EA_CONSTEXPR explicit operator unsigned long() const; + EA_CONSTEXPR explicit operator unsigned short() const; +#if EA_WCHAR_UNIQUE + // EA_CONSTEXPR explicit operator char16_t() const; + // EA_CONSTEXPR explicit operator char32_t() const; + // EA_CONSTEXPR explicit operator wchar_t() const; +#endif + EA_CONSTEXPR explicit operator float() const; + EA_CONSTEXPR explicit operator double() const; + EA_CONSTEXPR explicit operator long double() const; +#if EA_INT128_INTRINSIC_AVAILABLE + EA_CONSTEXPR explicit operator __int128() const; + EA_CONSTEXPR explicit operator unsigned __int128() const; +#endif + + // Math operators + static void OperatorPlus (const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result); + static void OperatorMinus(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result); + static void OperatorMul (const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result); + + // Shift operators + static void OperatorShiftRight(const int128_t_base& value, int nShift, int128_t_base& result); + static void OperatorShiftLeft (const int128_t_base& value, int nShift, int128_t_base& result); + + // Unary arithmetic/logic operators + bool operator!() const; + + // Logical operators + static void OperatorXOR(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result); + static void OperatorOR (const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result); + static void OperatorAND(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result); + + bool IsZero() const; + void SetZero(); + void TwosComplement(); + void InverseTwosComplement(); + + int GetBit(int nIndex) const; + void SetBit(int nIndex, int value); + +protected: + void DoubleToUint128(double value); + + EA_CONSTEXPR uint64_t Low() const + { + return mPart0; + } + + EA_CONSTEXPR uint64_t High() const + { + return mPart1; + } + +protected: + #ifdef EA_SYSTEM_BIG_ENDIAN + uint64_t mPart1; // Most significant byte. + uint64_t mPart0; // Least significant byte. + #else + uint64_t mPart0; // Most significant byte. + uint64_t mPart1; // Least significant byte. + #endif +}; + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// int128_t +// +// Implements signed 128 bit integer. +// +struct int128_t : public int128_t_base +{ + // Constructors / destructors + using int128_t_base::int128_t_base; + + // Assignment operator + using int128_t_base::operator=; + + // Unary arithmetic/logic operators + int128_t operator-() const; + int128_t& operator++(); + int128_t& operator--(); + int128_t operator++(int); + int128_t operator--(int); + int128_t operator~() const; + int128_t operator+() const; + + // Math operators + int128_t operator+ (const int128_t& other); + int128_t operator- (const int128_t& other); + int128_t operator* (const int128_t& other); + int128_t operator/ (const int128_t& other); + int128_t operator% (const int128_t& other); + int128_t& operator+=(const int128_t& other); + int128_t& operator-=(const int128_t& other); + int128_t& operator*=(const int128_t& other); + int128_t& operator/=(const int128_t& other); + int128_t& operator%=(const int128_t& other); + + // Shift operators + int128_t operator>> (int nShift) const; + int128_t operator<< (int nShift) const; + int128_t& operator>>=(int nShift); + int128_t& operator<<=(int nShift); + + // Logical operators + int128_t operator^ (const int128_t& other) const; + int128_t operator| (const int128_t& other) const; + int128_t operator& (const int128_t& other) const; + int128_t& operator^=(const int128_t& other); + int128_t& operator|=(const int128_t& other); + int128_t& operator&=(const int128_t& other); + + // Equality operators + bool operator==(const int128_t& other) const; + bool operator!=(const int128_t& other) const; + bool operator> (const int128_t& other) const; + bool operator>=(const int128_t& other) const; + bool operator< (const int128_t& other) const; + bool operator<=(const int128_t& other) const; + +protected: + int compare(const int128_t& other) const; + void Negate(); + void Modulus(const int128_t& divisor, int128_t& quotient, int128_t& remainder) const; + bool IsNegative() const; // Returns true for value < 0 + bool IsPositive() const; // Returns true for value >= 0 +}; + + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// uint128_t +// +// Implements unsigned 128 bit integer. +// +struct uint128_t : public int128_t_base +{ + // Constructors / destructors + using int128_t_base::int128_t_base; + + // Assignment operator + using int128_t_base::operator=; + + // Unary arithmetic/logic operators + uint128_t operator-() const; + uint128_t& operator++(); + uint128_t& operator--(); + uint128_t operator++(int); + uint128_t operator--(int); + uint128_t operator~() const; + uint128_t operator+() const; + + // Math operators + uint128_t operator+ (const uint128_t& other); + uint128_t operator- (const uint128_t& other); + uint128_t operator* (const uint128_t& other); + uint128_t operator/ (const uint128_t& other); + uint128_t operator% (const uint128_t& other); + uint128_t& operator+=(const uint128_t& other); + uint128_t& operator-=(const uint128_t& other); + uint128_t& operator*=(const uint128_t& other); + uint128_t& operator/=(const uint128_t& other); + uint128_t& operator%=(const uint128_t& other); + + // Shift operators + uint128_t operator>> (int nShift) const; + uint128_t operator<< (int nShift) const; + uint128_t& operator>>=(int nShift); + uint128_t& operator<<=(int nShift); + + // Logical operators + uint128_t operator^ (const uint128_t& other) const; + uint128_t operator| (const uint128_t& other) const; + uint128_t operator& (const uint128_t& other) const; + uint128_t& operator^=(const uint128_t& other); + uint128_t& operator|=(const uint128_t& other); + uint128_t& operator&=(const uint128_t& other); + + // Equality operators + bool operator==(const uint128_t& other) const; + bool operator!=(const uint128_t& other) const; + bool operator> (const uint128_t& other) const; + bool operator>=(const uint128_t& other) const; + bool operator< (const uint128_t& other) const; + bool operator<=(const uint128_t& other) const; + +protected: + int compare(const uint128_t& other) const; + void Negate(); + void Modulus(const uint128_t& divisor, uint128_t& quotient, uint128_t& remainder) const; + bool IsNegative() const; // Returns true for value < 0 + bool IsPositive() const; // Returns true for value >= 0 +}; + + + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// uint128_t_base implementation +/////////////////////////////////////////////////////////////////////////////////////////////////////// +EA_CONSTEXPR inline int128_t_base::operator bool() const { return mPart0 || mPart1; } +EA_CONSTEXPR inline int128_t_base::operator char() const { return static_cast(Low()); } +#if EA_WCHAR_UNIQUE +// EA_CONSTEXPR inline int128_t_base::operator char16_t() const { return static_cast(Low()); } +// EA_CONSTEXPR inline int128_t_base::operator char32_t() const { return static_cast(Low()); } +// EA_CONSTEXPR inline int128_t_base::operator wchar_t() const { return static_cast(Low()); } +#endif +EA_CONSTEXPR inline int128_t_base::operator int() const { return static_cast(Low()); } +EA_CONSTEXPR inline int128_t_base::operator long() const { return static_cast(Low()); } +EA_CONSTEXPR inline int128_t_base::operator long long() const { return static_cast(Low()); } +EA_CONSTEXPR inline int128_t_base::operator short() const { return static_cast(Low()); } +EA_CONSTEXPR inline int128_t_base::operator signed char() const { return static_cast(Low()); } +EA_CONSTEXPR inline int128_t_base::operator unsigned char() const { return static_cast(Low()); } +EA_CONSTEXPR inline int128_t_base::operator unsigned int() const { return static_cast(Low()); } +EA_CONSTEXPR inline int128_t_base::operator unsigned long long() const { return static_cast(Low()); } +EA_CONSTEXPR inline int128_t_base::operator unsigned long() const { return static_cast(Low()); } +EA_CONSTEXPR inline int128_t_base::operator unsigned short() const { return static_cast(Low()); } +EA_CONSTEXPR inline int128_t_base::operator float() const { return static_cast(Low()); } +EA_CONSTEXPR inline int128_t_base::operator double() const { return static_cast(Low()); } +EA_CONSTEXPR inline int128_t_base::operator long double() const { return static_cast(Low()); } +#if EA_INT128_INTRINSIC_AVAILABLE +EA_CONSTEXPR inline int128_t_base::operator __int128() const { return static_cast<__int128>(Low()); } +EA_CONSTEXPR inline int128_t_base::operator unsigned __int128() const { return static_cast(Low()); } +#endif + +inline void int128_t_base::SetBit(int nIndex, int value) +{ + // EA_ASSERT((nIndex >= 0) && (nIndex < 128)); + + const uint64_t nBitMask = ((uint64_t)1 << (nIndex % 64)); + + if(nIndex < 64) + { + if(value) + mPart0 = mPart0 | nBitMask; + else + mPart0 = mPart0 & ~nBitMask; + } + else if(nIndex < 128) + { + if(value) + mPart1 = mPart1 | nBitMask; + else + mPart1 = mPart1 & ~nBitMask; + } +} + +inline int int128_t_base::GetBit(int nIndex) const +{ + // EA_ASSERT((nIndex >= 0) && (nIndex < 128)); + + const uint64_t nBitMask = ((uint64_t)1 << (nIndex % 64)); + + if(nIndex < 64) + return ((mPart0 & nBitMask) ? 1 : 0); + else if(nIndex < 128) + return ((mPart1 & nBitMask) ? 1 : 0); + return 0; +} + +inline int128_t_base::int128_t_base(uint32_t nPart0, uint32_t nPart1, uint32_t nPart2, uint32_t nPart3) +{ + mPart1 = ((uint64_t)nPart3 << 32) + nPart2; + mPart0 = ((uint64_t)nPart1 << 32) + nPart0; +} + +inline int128_t_base::int128_t_base(uint64_t nPart0, uint64_t nPart1) +{ + mPart1 = nPart1; + mPart0 = nPart0; +} + +inline int128_t_base::int128_t_base(uint8_t value) +{ + mPart1 = 0; + mPart0 = value; +} + +inline int128_t_base::int128_t_base(uint16_t value) +{ + mPart1 = 0; + mPart0 = value; +} + +inline int128_t_base::int128_t_base(uint32_t value) +{ + mPart1 = 0; + mPart0 = value; +} + +inline int128_t_base::int128_t_base(uint64_t value) +{ + mPart1 = 0; + mPart0 = value; +} + +/////////////////////////////////////////////////////////////////////////////// +// OperatorPlus +// +// Returns: (value1 + value2) into result. +// The output 'result' *is* allowed to point to the same memory as one of the inputs. +// To consider: Fix 'defect' of this function whereby it doesn't implement overflow wraparound. +// +inline void int128_t_base::OperatorPlus(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result) +{ + uint64_t t = value1.mPart0 + value2.mPart0; + uint64_t nCarry = (t < value1.mPart0) && (t < value2.mPart0); + result.mPart0 = t; + result.mPart1 = value1.mPart1 + value2.mPart1 + nCarry; +} + +/////////////////////////////////////////////////////////////////////////////// +// OperatorMinus +// +// Returns: (value1 - value2) into result. +// The output 'result' *is* allowed to point to the same memory as one of the inputs. +// To consider: Fix 'defect' of this function whereby it doesn't implement overflow wraparound. +// +inline void int128_t_base::OperatorMinus(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result) +{ + uint64_t t = (value1.mPart0 - value2.mPart0); + uint64_t nCarry = (value1.mPart0 < value2.mPart0) ? 1u : 0u; + result.mPart0 = t; + result.mPart1 = (value1.mPart1 - value2.mPart1) - nCarry; +} + +/////////////////////////////////////////////////////////////////////////////// +// OperatorMul +// +// 64 bit systems: +// This is how it would be able to work if we could get a 128 bit result from +// two 64 bit values. None of the 64 bit systems that we are currently working +// with have C language support for multiplying two 64 bit numbers and retrieving +// the 128 bit result. However, many 64 bit platforms have support at the asm +// level for doing such a thing. +// Part 1 Part 0 +// 0000000000000002 0000000000000001 +// x 0000000000000002 0000000000000001 +// ------------------------------------------- +// | 0000000000000002 0000000000000001 +// + 0000000000000004 | 0000000000000002 (0000000000000000) +// ------------------------------------------------------------------------- +// +inline void int128_t_base::OperatorMul(const int128_t_base& a, const int128_t_base& b, int128_t_base& result) +{ + // To consider: Use compiler or OS-provided custom functionality here, such as + // Windows UnsignedMultiply128 and GCC's built-in int128_t. + + #if defined(DISABLED_PLATFORM_WIN64) + // To do: Implement x86-64 asm here. + + #else + // Else we are stuck doing something less efficient. In this case we + // fall back to doing 32 bit multiplies as with 32 bit platforms. + result = (a.mPart0 & 0xffffffff) * (b.mPart0 & 0xffffffff); + int128_t v01 = (a.mPart0 & 0xffffffff) * ((b.mPart0 >> 32) & 0xffffffff); + int128_t v02 = (a.mPart0 & 0xffffffff) * (b.mPart1 & 0xffffffff); + int128_t v03 = (a.mPart0 & 0xffffffff) * ((b.mPart1 >> 32) & 0xffffffff); + + int128_t v10 = ((a.mPart0 >> 32) & 0xffffffff) * (b.mPart0 & 0xffffffff); + int128_t v11 = ((a.mPart0 >> 32) & 0xffffffff) * ((b.mPart0 >> 32) & 0xffffffff); + int128_t v12 = ((a.mPart0 >> 32) & 0xffffffff) * (b.mPart1 & 0xffffffff); + + int128_t v20 = (a.mPart1 & 0xffffffff) * (b.mPart0 & 0xffffffff); + int128_t v21 = (a.mPart1 & 0xffffffff) * ((b.mPart0 >> 32) & 0xffffffff); + + int128_t v30 = ((a.mPart1 >> 32) & 0xffffffff) * (b.mPart0 & 0xffffffff); + + // Do row addition, shifting as needed. + OperatorPlus(result, v01 << 32, result); + OperatorPlus(result, v02 << 64, result); + OperatorPlus(result, v03 << 96, result); + + OperatorPlus(result, v10 << 32, result); + OperatorPlus(result, v11 << 64, result); + OperatorPlus(result, v12 << 96, result); + + OperatorPlus(result, v20 << 64, result); + OperatorPlus(result, v21 << 96, result); + + OperatorPlus(result, v30 << 96, result); + #endif +} + +/////////////////////////////////////////////////////////////////////////////// +// OperatorShiftRight +// +// Returns: value >> nShift into result +// The output 'result' may *not* be the same as one the input. +// With rightward shifts of negative numbers, shift in zero from the left side. +// +inline void int128_t_base::OperatorShiftRight(const int128_t_base& value, int nShift, int128_t_base& result) +{ + if(nShift >= 0) + { + if(nShift < 64) + { // 0 - 63 + result.mPart1 = (value.mPart1 >> nShift); + + if(nShift == 0) + result.mPart0 = (value.mPart0 >> nShift); + else + result.mPart0 = (value.mPart0 >> nShift) | (value.mPart1 << (64 - nShift)); + } + else + { // 64+ + result.mPart1 = 0; + result.mPart0 = (value.mPart1 >> (nShift - 64)); + } + } + else // (nShift < 0) + OperatorShiftLeft(value, -nShift, result); +} + + +/////////////////////////////////////////////////////////////////////////////// +// OperatorShiftRight +// +// Returns: value << nShift into result +// The output 'result' may *not* be the same as one the input. +// With rightward shifts of negative numbers, shift in zero from the left side. +// +inline void int128_t_base::OperatorShiftLeft(const int128_t_base& value, int nShift, int128_t_base& result) +{ + if(nShift >= 0) + { + if(nShift < 64) + { + if(nShift) // We need to have a special case because CPUs convert a shift by 64 to a no-op. + { + // 1 - 63 + result.mPart0 = (value.mPart0 << nShift); + result.mPart1 = (value.mPart1 << nShift) | (value.mPart0 >> (64 - nShift)); + } + else + { + result.mPart0 = value.mPart0; + result.mPart1 = value.mPart1; + } + } + else + { // 64+ + result.mPart0 = 0; + result.mPart1 = (value.mPart0 << (nShift - 64)); + } + } + else // (nShift < 0) + OperatorShiftRight(value, -nShift, result); +} + + +inline bool int128_t_base::operator!() const +{ + return (mPart0 == 0) && (mPart1 == 0); +} + + +/////////////////////////////////////////////////////////////////////////////// +// OperatorXOR +// +// Returns: value1 ^ value2 into result +// The output 'result' may be the same as one the input. +// +inline void int128_t_base::OperatorXOR(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result) +{ + result.mPart0 = (value1.mPart0 ^ value2.mPart0); + result.mPart1 = (value1.mPart1 ^ value2.mPart1); +} + + +/////////////////////////////////////////////////////////////////////////////// +// OperatorOR +// +// Returns: value1 | value2 into result +// The output 'result' may be the same as one the input. +// +inline void int128_t_base::OperatorOR(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result) +{ + result.mPart0 = (value1.mPart0 | value2.mPart0); + result.mPart1 = (value1.mPart1 | value2.mPart1); +} + + +/////////////////////////////////////////////////////////////////////////////// +// OperatorAND +// +// Returns: value1 & value2 into result +// The output 'result' may be the same as one the input. +// +inline void int128_t_base::OperatorAND(const int128_t_base& value1, const int128_t_base& value2, int128_t_base& result) +{ + result.mPart0 = (value1.mPart0 & value2.mPart0); + result.mPart1 = (value1.mPart1 & value2.mPart1); +} + + +inline bool int128_t_base::IsZero() const +{ + return (mPart0 == 0) && // Check mPart0 first as this will likely yield faster execution. + (mPart1 == 0); +} + + +inline void int128_t_base::SetZero() +{ + mPart1 = 0; + mPart0 = 0; +} + + +inline void int128_t_base::TwosComplement() +{ + mPart1 = ~mPart1; + mPart0 = ~mPart0; + + // What we want to do, but isn't available at this level: + // operator++(); + // Alternative: + int128_t_base one((uint32_t)1); + OperatorPlus(*this, one, *this); +} + + +inline void int128_t_base::InverseTwosComplement() +{ + // What we want to do, but isn't available at this level: + // operator--(); + // Alternative: + int128_t_base one((uint32_t)1); + OperatorMinus(*this, one, *this); + + mPart1 = ~mPart1; + mPart0 = ~mPart0; +} + + +inline void int128_t_base::DoubleToUint128(double value) +{ + // Currently this function is limited to 64 bits of integer input. + // We need to make a better version of this function. Perhaps we should implement + // it via dissecting the IEEE floating point format (sign, exponent, matissa). + // EA_ASSERT(fabs(value) < 18446744073709551616.0); // Assert that the input is <= 64 bits of integer. + + mPart1 = 0; + mPart0 = (value >= 0 ? (uint64_t)value : (uint64_t)-value); +} + + + + + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// uint128_t implementation +/////////////////////////////////////////////////////////////////////////////////////////////////////// + +inline uint128_t uint128_t::operator^(const uint128_t& other) const +{ + uint128_t temp; + uint128_t::OperatorXOR(*this, other, temp); + return temp; +} + +inline uint128_t uint128_t::operator|(const uint128_t& other) const +{ + uint128_t temp; + uint128_t::OperatorOR(*this, other, temp); + return temp; +} + +inline uint128_t uint128_t::operator&(const uint128_t& other) const +{ + uint128_t temp; + uint128_t::OperatorAND(*this, other, temp); + return temp; +} + +inline uint128_t& uint128_t::operator^=(const uint128_t& value) +{ + OperatorXOR(*this, value, *this); + return *this; +} + +inline uint128_t& uint128_t::operator|=(const uint128_t& value) +{ + OperatorOR(*this, value, *this); + return *this; +} + +inline uint128_t& uint128_t::operator&=(const uint128_t& value) +{ + OperatorAND(*this, value, *this); + return *this; +} + +// With rightward shifts of negative numbers, shift in zero from the left side. +inline uint128_t uint128_t::operator>>(int nShift) const +{ + uint128_t temp; + OperatorShiftRight(*this, nShift, temp); + return temp; +} + +// With rightward shifts of negative numbers, shift in zero from the left side. +inline uint128_t uint128_t::operator<<(int nShift) const +{ + uint128_t temp; + OperatorShiftLeft(*this, nShift, temp); + return temp; +} + +inline uint128_t& uint128_t::operator>>=(int nShift) +{ + uint128_t temp; + OperatorShiftRight(*this, nShift, temp); + *this = temp; + return *this; +} + +inline uint128_t& uint128_t::operator<<=(int nShift) +{ + uint128_t temp; + OperatorShiftLeft(*this, nShift, temp); + *this = temp; + return *this; +} + +inline uint128_t& uint128_t::operator+=(const uint128_t& value) +{ + OperatorPlus(*this, value, *this); + return *this; +} + +inline uint128_t& uint128_t::operator-=(const uint128_t& value) +{ + OperatorMinus(*this, value, *this); + return *this; +} + +inline uint128_t& uint128_t::operator*=(const uint128_t& value) +{ + *this = *this * value; + return *this; +} + +inline uint128_t& uint128_t::operator/=(const uint128_t& value) +{ + *this = *this / value; + return *this; +} + +inline uint128_t& uint128_t::operator%=(const uint128_t& value) +{ + *this = *this % value; + return *this; +} + +inline uint128_t uint128_t::operator+(const uint128_t& other) +{ + uint128_t temp; + uint128_t::OperatorPlus(*this, other, temp); + return temp; +} + +inline uint128_t uint128_t::operator-(const uint128_t& other) +{ + uint128_t temp; + uint128_t::OperatorMinus(*this, other, temp); + return temp; +} + +inline uint128_t uint128_t::operator*(const uint128_t& other) +{ + uint128_t returnValue; + int128_t_base::OperatorMul(*this, other, returnValue); + return returnValue; +} + +inline uint128_t uint128_t::operator/(const uint128_t& other) +{ + uint128_t remainder; + uint128_t quotient; + this->Modulus(other, quotient, remainder); + return quotient; +} + +inline uint128_t uint128_t::operator%(const uint128_t& other) +{ + uint128_t remainder; + uint128_t quotient; + this->Modulus(other, quotient, remainder); + return remainder; +} + +inline uint128_t uint128_t::operator+() const +{ + return *this; +} + +inline uint128_t uint128_t::operator~() const +{ + return uint128_t(~mPart0, ~mPart1); +} + +inline uint128_t& uint128_t::operator--() +{ + int128_t_base one((uint32_t)1); + OperatorMinus(*this, one, *this); + return *this; +} + +inline uint128_t uint128_t::operator--(int) +{ + uint128_t temp((uint32_t)1); + OperatorMinus(*this, temp, temp); + return temp; +} + +inline uint128_t uint128_t::operator++(int) +{ + uint128_t prev = *this; + uint128_t temp((uint32_t)1); + OperatorPlus(*this, temp, *this); + return prev; +} + +inline uint128_t& uint128_t::operator++() +{ + int128_t_base one((uint32_t)1); + OperatorPlus(*this, one, *this); + return *this; +} + +inline void uint128_t::Negate() +{ + TwosComplement(); +} + +inline uint128_t uint128_t::operator-() const +{ + uint128_t returnValue(*this); + returnValue.Negate(); + return returnValue; +} + +// This function forms the basis of all logical comparison functions. +// If value1 < value2, the return value is -1. +// If value1 == value2, the return value is 0. +// If value1 > value2, the return value is 1. +inline int uint128_t::compare(const uint128_t& other) const +{ + // Compare individual parts. At this point, the two numbers have the same sign. + if(mPart1 == other.mPart1) + { + if(mPart0 == other.mPart0) + return 0; + else if(mPart0 > other.mPart0) + return 1; + // return -1; //Just fall through to the end. + } + else if(mPart1 > other.mPart1) + return 1; + return -1; +} + +EA_DISABLE_VC_WARNING(4723) // warning C4723: potential divide by 0 +inline void uint128_t::Modulus(const uint128_t& divisor, uint128_t& quotient, uint128_t& remainder) const +{ + uint128_t tempDividend(*this); + uint128_t tempDivisor(divisor); + + if(tempDivisor.IsZero()) + { + // Force a divide by zero exception. + // We know that tempDivisor.mPart0 is zero. + quotient.mPart0 /= tempDivisor.mPart0; + } + else if(tempDividend.IsZero()) + { + quotient = uint128_t((uint32_t)0); + remainder = uint128_t((uint32_t)0); + } + else + { + remainder.SetZero(); + + for(int i(0); i < 128; i++) + { + remainder += (uint32_t)tempDividend.GetBit(127 - i); + const bool bBit(remainder >= tempDivisor); + quotient.SetBit(127 - i, bBit); + + if(bBit) + remainder -= tempDivisor; + + if((i != 127) && !remainder.IsZero()) + remainder <<= 1; + } + } +} +EA_RESTORE_VC_WARNING() + +inline bool uint128_t::operator==(const uint128_t& other) const +{ + return (mPart0 == other.mPart0) && // Check mPart0 first as this will likely yield faster execution. + (mPart1 == other.mPart1); +} + +inline bool uint128_t::operator< (const uint128_t& other) const { return (compare(other) < 0); } +inline bool uint128_t::operator!=(const uint128_t& other) const { return !(*this == other); } +inline bool uint128_t::operator> (const uint128_t& other) const { return other < *this; } +inline bool uint128_t::operator>=(const uint128_t& other) const { return !(*this < other); } +inline bool uint128_t::operator<=(const uint128_t& other) const { return !(other < *this); } + +inline bool uint128_t::IsNegative() const +{ // True if value < 0 + return false; +} + +inline bool uint128_t::IsPositive() const +{ + // True of value >= 0 + return true; +} + + + + + + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// int128_t implementation +/////////////////////////////////////////////////////////////////////////////////////////////////////// + +inline void int128_t::Negate() +{ + if (IsPositive()) + TwosComplement(); + else + InverseTwosComplement(); +} + +inline int128_t int128_t::operator-() const +{ + int128_t returnValue(*this); + returnValue.Negate(); + return returnValue; +} + +inline int128_t& int128_t::operator++() +{ + int128_t_base one((uint32_t)1); + OperatorPlus(*this, one, *this); + return *this; +} + +inline int128_t& int128_t::operator--() +{ + int128_t_base one((uint32_t)1); + OperatorMinus(*this, one, *this); + return *this; +} + +inline int128_t int128_t::operator++(int) +{ + int128_t prev = *this; + int128_t temp((uint32_t)1); + OperatorPlus(*this, temp, *this); + return prev; +} + +inline int128_t int128_t::operator--(int) +{ + int128_t temp((uint32_t)1); + OperatorMinus(*this, temp, temp); + return temp; +} + +inline int128_t int128_t::operator+() const +{ + return *this; +} + +inline int128_t int128_t::operator~() const +{ + return int128_t(~mPart0, ~mPart1); +} + +inline int128_t int128_t::operator+(const int128_t& other) +{ + int128_t temp; + int128_t::OperatorPlus(*this, other, temp); + return temp; +} + +inline int128_t int128_t::operator-(const int128_t& other) +{ + int128_t temp; + int128_t::OperatorMinus(*this, other, temp); + return temp; +} + +// This function forms the basis of all logical comparison functions. +// If value1 < value2, the return value is -1. +// If value1 == value2, the return value is 0. +// If value1 > value2, the return value is 1. +inline int int128_t::compare(const int128_t& other) const +{ + // Cache some values. Positive means >= 0. Negative means < 0 and thus means '!positive'. + const bool bValue1IsPositive( IsPositive()); + const bool bValue2IsPositive(other.IsPositive()); + + // Do positive/negative tests. + if(bValue1IsPositive != bValue2IsPositive) + return bValue1IsPositive ? 1 : -1; + + // Compare individual parts. At this point, the two numbers have the same sign. + if(mPart1 == other.mPart1) + { + if(mPart0 == other.mPart0) + return 0; + else if(mPart0 > other.mPart0) + return 1; + // return -1; //Just fall through to the end. + } + else if(mPart1 > other.mPart1) + return 1; + return -1; +} + +inline bool int128_t::operator==(const int128_t& other) const +{ + return (mPart0 == other.mPart0) && // Check mPart0 first as this will likely yield faster execution. + (mPart1 == other.mPart1); +} + +inline bool int128_t::operator!=(const int128_t& other) const +{ + return (mPart0 != other.mPart0) || // Check mPart0 first as this will likely yield faster execution. + (mPart1 != other.mPart1); +} + +inline bool int128_t::operator>(const int128_t& other) const +{ + return (compare(other) > 0); +} + +inline bool int128_t::operator>=(const int128_t& other) const +{ + return (compare(other) >= 0); +} + +inline bool int128_t::operator<(const int128_t& other) const +{ + return (compare(other) < 0); +} + +inline bool int128_t::operator<=(const int128_t& other) const +{ + return (compare(other) <= 0); +} + +inline bool int128_t::IsNegative() const +{ // True if value < 0 + return ((mPart1 & UINT64_C(0x8000000000000000)) != 0); +} + +inline bool int128_t::IsPositive() const +{ // True of value >= 0 + return ((mPart1 & UINT64_C(0x8000000000000000)) == 0); +} + +inline int128_t int128_t::operator*(const int128_t& other) +{ + int128_t a(*this); + int128_t b(other); + int128_t returnValue; + + // Correctly handle negative values + bool bANegative(false); + bool bBNegative(false); + + if(a.IsNegative()) + { + bANegative = true; + a.Negate(); + } + + if(b.IsNegative()) + { + bBNegative = true; + b.Negate(); + } + + int128_t_base::OperatorMul(a, b, returnValue); + + // Do negation as needed. + if(bANegative != bBNegative) + returnValue.Negate(); + + return returnValue; +} + +inline int128_t int128_t::operator/(const int128_t& other) +{ + int128_t remainder; + int128_t quotient; + this->Modulus(other, quotient, remainder); + return quotient; +} + +inline int128_t int128_t::operator<<(int nShift) const +{ + int128_t temp; + OperatorShiftLeft(*this, nShift, temp); + return temp; +} + +inline int128_t& int128_t::operator+=(const int128_t& value) +{ + OperatorPlus(*this, value, *this); + return *this; +} + +inline int128_t& int128_t::operator-=(const int128_t& value) +{ + OperatorMinus(*this, value, *this); + return *this; +} + +inline int128_t& int128_t::operator<<=(int nShift) +{ + int128_t temp; + OperatorShiftLeft(*this, nShift, temp); + *this = temp; + return *this; +} + +inline int128_t& int128_t::operator*=(const int128_t& value) +{ + *this = *this * value; + return *this; +} + +inline int128_t& int128_t::operator%=(const int128_t& value) +{ + *this = *this % value; + return *this; +} + +inline int128_t int128_t::operator%(const int128_t& other) +{ + int128_t remainder; + int128_t quotient; + this->Modulus(other, quotient, remainder); + return remainder; +} + +inline int128_t& int128_t::operator/=(const int128_t& value) +{ + *this = *this / value; + return *this; +} + +// With rightward shifts of negative numbers, shift in zero from the left side. +inline int128_t int128_t::operator>>(int nShift) const +{ + int128_t temp; + OperatorShiftRight(*this, nShift, temp); + return temp; +} + +inline int128_t& int128_t::operator>>=(int nShift) +{ + int128_t temp; + OperatorShiftRight(*this, nShift, temp); + *this = temp; + return *this; +} + +inline int128_t int128_t::operator^(const int128_t& other) const +{ + int128_t temp; + int128_t::OperatorXOR(*this, other, temp); + return temp; +} + +inline int128_t int128_t::operator|(const int128_t& other) const +{ + int128_t temp; + int128_t::OperatorOR(*this, other, temp); + return temp; +} + + +inline int128_t int128_t::operator&(const int128_t& other) const +{ + int128_t temp; + int128_t::OperatorAND(*this, other, temp); + return temp; +} + +inline int128_t& int128_t::operator^=(const int128_t& value) +{ + OperatorXOR(*this, value, *this); + return *this; +} + +inline int128_t& int128_t::operator|=(const int128_t& value) +{ + OperatorOR(*this, value, *this); + return *this; +} + +inline int128_t& int128_t::operator&=(const int128_t& value) +{ + OperatorAND(*this, value, *this); + return *this; +} + +EA_DISABLE_VC_WARNING(4723) // warning C4723: potential divide by 0 +inline void int128_t::Modulus(const int128_t& divisor, int128_t& quotient, int128_t& remainder) const +{ + int128_t tempDividend(*this); + int128_t tempDivisor(divisor); + + bool bDividendNegative = false; + bool bDivisorNegative = false; + + if(tempDividend.IsNegative()) + { + bDividendNegative = true; + tempDividend.Negate(); + } + if(tempDivisor.IsNegative()) + { + bDivisorNegative = true; + tempDivisor.Negate(); + } + + // Handle the special cases + if(tempDivisor.IsZero()) + { + // Force a divide by zero exception. + // We know that tempDivisor.mPart0 is zero. + quotient.mPart0 /= tempDivisor.mPart0; + } + else if(tempDividend.IsZero()) + { + quotient = int128_t((uint32_t)0); + remainder = int128_t((uint32_t)0); + } + else + { + remainder.SetZero(); + + for(int i(0); i < 128; i++) + { + remainder += (uint32_t)tempDividend.GetBit(127 - i); + const bool bBit(remainder >= tempDivisor); + quotient.SetBit(127 - i, bBit); + + if(bBit) + remainder -= tempDivisor; + + if((i != 127) && !remainder.IsZero()) + remainder <<= 1; + } + } + + if((bDividendNegative && !bDivisorNegative) || (!bDividendNegative && bDivisorNegative)) + { + // Ensure the following formula applies for negative dividends + // dividend = divisor * quotient + remainder + quotient.Negate(); + } +} +EA_RESTORE_VC_WARNING() + + + + + + +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// INT128_C / UINT128_C +// +// The C99 language defines macros for portably defining constants of +// sized numeric types. For example, there might be: +// #define UINT64_C(x) x##ULL +// Since our int128 data type is not a built-in type, we can't define a +// UINT128_C macro as something that pastes ULLL at the end of the digits. +// Instead we define it to create a temporary that is constructed from a +// string of the digits. This will work in most cases that suffix pasting +// would work. +// +/* EA_CONSTEXPR */ inline uint128_t UINT128_C(uint64_t nPart1, uint64_t nPart0) { return uint128_t(nPart0, nPart1); } +/* EA_CONSTEXPR */ inline int128_t INT128_C(int64_t nPart1, int64_t nPart0) { return int128_t(static_cast(nPart0), static_cast(nPart1)); } + + + + +#endif // INCLUDED_int128_h + diff --git a/libkram/eastl/include/EABase/nullptr.h b/libkram/eastl/include/EABase/nullptr.h new file mode 100644 index 00000000..d6629d50 --- /dev/null +++ b/libkram/eastl/include/EABase/nullptr.h @@ -0,0 +1,102 @@ +/*----------------------------------------------------------------------------- + * nullptr.h + * + * Copyright (c) Electronic Arts Inc. All rights reserved. + *---------------------------------------------------------------------------*/ + + +#include +#include + + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once /* Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. */ +#endif + + +#if defined(EA_COMPILER_CPP11_ENABLED) && !defined(EA_COMPILER_NO_NULLPTR) && !defined(EA_HAVE_nullptr_t_IMPL) + // The compiler supports nullptr, but the standard library doesn't implement a declaration for std::nullptr_t. So we provide one. + namespace std { typedef decltype(nullptr) nullptr_t; } +#endif + + + +#if defined(EA_COMPILER_NO_NULLPTR) // If the compiler lacks a native version... + + namespace std + { + class nullptr_t + { + public: + template // When tested a pointer, acts as 0. + operator T*() const + { return 0; } + + template // When tested as a member pointer, acts as 0. + operator T C::*() const + { return 0; } + + typedef void* (nullptr_t::*bool_)() const; + operator bool_() const // An rvalue of type std::nullptr_t can be converted to an rvalue of type bool; the resulting value is false. + { return false; } // We can't use operator bool(){ return false; } because bool is convertable to int which breaks other required functionality. + + // We can't enable this without generating warnings about nullptr being uninitialized after being used when created without "= {}". + //void* mSizeofVoidPtr; // sizeof(nullptr_t) == sizeof(void*). Needs to be public if nullptr_t is to be a POD. + + private: + void operator&() const; // Address cannot be taken. + }; + + inline nullptr_t nullptr_get() + { + nullptr_t n = { }; // std::nullptr exists. + return n; + } + + #if !defined(nullptr) // If somebody hasn't already defined nullptr in a custom way... + #define nullptr nullptr_get() + #endif + + } // namespace std + + + template + inline bool operator==(T* p, const std::nullptr_t) + { return p == 0; } + + template + inline bool operator==(const std::nullptr_t, T* p) + { return p == 0; } + + template + inline bool operator==(T U::* p, const std::nullptr_t) + { return p == 0; } + + template + inline bool operator==(const std::nullptr_t, T U::* p) + { return p == 0; } + + inline bool operator==(const std::nullptr_t, const std::nullptr_t) + { return true; } + + inline bool operator!=(const std::nullptr_t, const std::nullptr_t) + { return false; } + + inline bool operator<(const std::nullptr_t, const std::nullptr_t) + { return false; } + + inline bool operator>(const std::nullptr_t, const std::nullptr_t) + { return false; } + + inline bool operator<=(const std::nullptr_t, const std::nullptr_t) + { return true; } + + inline bool operator>=(const std::nullptr_t, const std::nullptr_t) + { return true; } + + + using std::nullptr_t; // exported to global namespace. + using std::nullptr_get; // exported to global namespace. + +#endif // EA_COMPILER_NO_NULLPTR + diff --git a/libkram/eastl/include/EABase/version.h b/libkram/eastl/include/EABase/version.h new file mode 100644 index 00000000..b6e1b665 --- /dev/null +++ b/libkram/eastl/include/EABase/version.h @@ -0,0 +1,36 @@ +/*----------------------------------------------------------------------------- + * version.h + * + * Copyright (c) Electronic Arts Inc. All rights reserved. + *---------------------------------------------------------------------------*/ + +#ifndef INCLUDED_EABASE_VERSION_H +#define INCLUDED_EABASE_VERSION_H + +/////////////////////////////////////////////////////////////////////////////// +// EABASE_VERSION +// +// We more or less follow the conventional EA packaging approach to versioning +// here. A primary distinction here is that minor versions are defined as two +// digit entities (e.g. .03") instead of minimal digit entities ".3"). The logic +// here is that the value is a counter and not a floating point fraction. +// Note that the major version doesn't have leading zeros. +// +// Example version strings: +// "0.91.00" // Major version 0, minor version 91, patch version 0. +// "1.00.00" // Major version 1, minor and patch version 0. +// "3.10.02" // Major version 3, minor version 10, patch version 02. +// "12.03.01" // Major version 12, minor version 03, patch version +// +// Example usage: +// printf("EABASE version: %s", EABASE_VERSION); +// printf("EABASE version: %d.%d.%d", EABASE_VERSION_N / 10000 % 100, EABASE_VERSION_N / 100 % 100, EABASE_VERSION_N % 100); +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EABASE_VERSION + #define EABASE_VERSION "2.09.12" + #define EABASE_VERSION_N 20912 +#endif + +#endif diff --git a/libkram/eastl/include/EASTL/algorithm.h b/libkram/eastl/include/EASTL/algorithm.h new file mode 100644 index 00000000..da35c2e2 --- /dev/null +++ b/libkram/eastl/include/EASTL/algorithm.h @@ -0,0 +1,4221 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements some of the primary algorithms from the C++ STL +// algorithm library. These versions are just like that STL versions and so +// are redundant. They are provided solely for the purpose of projects that +// either cannot use standard C++ STL or want algorithms that have guaranteed +// identical behaviour across platforms. +/////////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// Definitions +// +// You will notice that we are very particular about the templated typenames +// we use here. You will notice that we follow the C++ standard closely in +// these respects. Each of these typenames have a specific meaning; +// this is why we don't just label templated arguments with just letters +// such as T, U, V, A, B. Here we provide a quick reference for the typenames +// we use. See the C++ standard, section 25-8 for more details. +// -------------------------------------------------------------- +// typename Meaning +// -------------------------------------------------------------- +// T The value type. +// Compare A function which takes two arguments and returns the lesser of the two. +// Predicate A function which takes one argument returns true if the argument meets some criteria. +// BinaryPredicate A function which takes two arguments and returns true if some criteria is met (e.g. they are equal). +// StrickWeakOrdering A BinaryPredicate that compares two objects, returning true if the first precedes the second. Like Compare but has additional requirements. Used for sorting routines. +// Function A function which takes one argument and applies some operation to the target. +// Size A count or size. +// Generator A function which takes no arguments and returns a value (which will usually be assigned to an object). +// UnaryOperation A function which takes one argument and returns a value (which will usually be assigned to second object). +// BinaryOperation A function which takes two arguments and returns a value (which will usually be assigned to a third object). +// InputIterator An input iterator (iterator you read from) which allows reading each element only once and only in a forward direction. +// ForwardIterator An input iterator which is like InputIterator except it can be reset back to the beginning. +// BidirectionalIterator An input iterator which is like ForwardIterator except it can be read in a backward direction as well. +// RandomAccessIterator An input iterator which can be addressed like an array. It is a superset of all other input iterators. +// OutputIterator An output iterator (iterator you write to) which allows writing each element only once in only in a forward direction. +// +// Note that with iterators that a function which takes an InputIterator will +// also work with a ForwardIterator, BidirectionalIterator, or RandomAccessIterator. +// The given iterator type is merely the -minimum- supported functionality the +// iterator must support. +/////////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// Optimizations +// +// There are a number of opportunities for opptimizations that we take here +// in this library. The most obvious kinds are those that subsitute memcpy +// in the place of a conventional loop for data types with which this is +// possible. The algorithms here are optimized to a higher level than currently +// available C++ STL algorithms from vendors such as Microsoft. This is especially +// so for game programming on console devices, as we do things such as reduce +// branching relative to other STL algorithm implementations. However, the +// proper implementation of these algorithm optimizations is a fairly tricky +// thing. +// +// The various things we look to take advantage of in order to implement +// optimizations include: +// - Taking advantage of random access iterators. +// - Taking advantage of POD (plain old data) data types. +// - Taking advantage of type_traits in general. +// - Reducing branching and taking advantage of likely branch predictions. +// - Taking advantage of issues related to pointer and reference aliasing. +// - Improving cache coherency during memory accesses. +// - Making code more likely to be inlinable by the compiler. +// +/////////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// Supported Algorithms +// +// Algorithms that we implement are listed here. Note that these items are not +// all within this header file, as we split up the header files in order to +// improve compilation performance. Items marked with '+' are items that are +// extensions which don't exist in the C++ standard. +// +// ------------------------------------------------------------------------------- +// Algorithm Notes +// ------------------------------------------------------------------------------- +// adjacent_find +// adjacent_find +// all_of C++11 +// any_of C++11 +// none_of C++11 +// binary_search +// binary_search +// +binary_search_i +// +binary_search_i +// +change_heap Found in heap.h +// +change_heap Found in heap.h +// clamp +// copy +// copy_if C++11 +// copy_n C++11 +// copy_backward +// count +// count_if +// equal +// equal +// equal_range +// equal_range +// fill +// fill_n +// find +// find_end +// find_end +// find_first_of +// find_first_of +// +find_first_not_of +// +find_first_not_of +// +find_last_of +// +find_last_of +// +find_last_not_of +// +find_last_not_of +// find_if +// find_if_not +// for_each +// generate +// generate_n +// +identical +// +identical +// iter_swap +// lexicographical_compare +// lexicographical_compare +// lower_bound +// lower_bound +// make_heap Found in heap.h +// make_heap Found in heap.h +// min +// min +// max +// max +// +min_alt Exists to work around the problem of conflicts with min/max #defines on some systems. +// +min_alt +// +max_alt +// +max_alt +// +median +// +median +// merge Found in sort.h +// merge Found in sort.h +// min_element +// min_element +// max_element +// max_element +// mismatch +// mismatch +// move +// move_backward +// nth_element Found in sort.h +// nth_element Found in sort.h +// partial_sort Found in sort.h +// partial_sort Found in sort.h +// push_heap Found in heap.h +// push_heap Found in heap.h +// pop_heap Found in heap.h +// pop_heap Found in heap.h +// random_shuffle +// remove +// remove_if +// remove_copy +// remove_copy_if +// +remove_heap Found in heap.h +// +remove_heap Found in heap.h +// replace +// replace_if +// replace_copy +// replace_copy_if +// reverse_copy +// reverse +// random_shuffle +// rotate +// rotate_copy +// search +// search +// search_n +// set_difference +// set_difference +// set_difference_2 +// set_difference_2 +// set_decomposition +// set_decomposition +// set_intersection +// set_intersection +// set_symmetric_difference +// set_symmetric_difference +// set_union +// set_union +// sort Found in sort.h +// sort Found in sort.h +// sort_heap Found in heap.h +// sort_heap Found in heap.h +// stable_sort Found in sort.h +// stable_sort Found in sort.h +// swap +// swap_ranges +// transform +// transform +// unique +// unique +// upper_bound +// upper_bound +// is_permutation +// is_permutation +// next_permutation +// next_permutation +// +// Algorithms from the C++ standard that we don't implement are listed here. +// Most of these items are absent because they aren't used very often. +// They also happen to be the more complicated than other algorithms. +// However, we can implement any of these functions for users that might +// need them. +// includes +// includes +// inplace_merge +// inplace_merge +// partial_sort_copy +// partial_sort_copy +// paritition +// prev_permutation +// prev_permutation +// search_n +// stable_partition +// unique_copy +// unique_copy +// +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ALGORITHM_H +#define EASTL_ALGORITHM_H + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +EA_DISABLE_ALL_VC_WARNINGS(); + + #if defined(EA_COMPILER_MSVC) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)) + #include + #endif + + #include + #include // memcpy, memcmp, memmove + +EA_RESTORE_ALL_VC_WARNINGS(); + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// min/max workaround +// +// MSVC++ has #defines for min/max which collide with the min/max algorithm +// declarations. The following may still not completely resolve some kinds of +// problems with MSVC++ #defines, though it deals with most cases in production +// game code. +// +#if EASTL_NOMINMAX + #ifdef min + #undef min + #endif + #ifdef max + #undef max + #endif +#endif + + + + +namespace eastl +{ + /// min_element + /// + /// min_element finds the smallest element in the range [first, last). + /// It returns the first iterator i in [first, last) such that no other + /// iterator in [first, last) points to a value smaller than *i. + /// The return value is last if and only if [first, last) is an empty range. + /// + /// Returns: The first iterator i in the range [first, last) such that + /// for any iterator j in the range [first, last) the following corresponding + /// condition holds: !(*j < *i). + /// + /// Complexity: Exactly 'max((last - first) - 1, 0)' applications of the + /// corresponding comparisons. + /// + template + ForwardIterator min_element(ForwardIterator first, ForwardIterator last) + { + if(first != last) + { + ForwardIterator currentMin = first; + + while(++first != last) + { + if(*first < *currentMin) + currentMin = first; + } + return currentMin; + } + return first; + } + + + /// min_element + /// + /// min_element finds the smallest element in the range [first, last). + /// It returns the first iterator i in [first, last) such that no other + /// iterator in [first, last) points to a value smaller than *i. + /// The return value is last if and only if [first, last) is an empty range. + /// + /// Returns: The first iterator i in the range [first, last) such that + /// for any iterator j in the range [first, last) the following corresponding + /// conditions hold: compare(*j, *i) == false. + /// + /// Complexity: Exactly 'max((last - first) - 1, 0)' applications of the + /// corresponding comparisons. + /// + template + ForwardIterator min_element(ForwardIterator first, ForwardIterator last, Compare compare) + { + if(first != last) + { + ForwardIterator currentMin = first; + + while(++first != last) + { + if(compare(*first, *currentMin)) + currentMin = first; + } + return currentMin; + } + return first; + } + + + /// max_element + /// + /// max_element finds the largest element in the range [first, last). + /// It returns the first iterator i in [first, last) such that no other + /// iterator in [first, last) points to a value greater than *i. + /// The return value is last if and only if [first, last) is an empty range. + /// + /// Returns: The first iterator i in the range [first, last) such that + /// for any iterator j in the range [first, last) the following corresponding + /// condition holds: !(*i < *j). + /// + /// Complexity: Exactly 'max((last - first) - 1, 0)' applications of the + /// corresponding comparisons. + /// + template + ForwardIterator max_element(ForwardIterator first, ForwardIterator last) + { + if(first != last) + { + ForwardIterator currentMax = first; + + while(++first != last) + { + if(*currentMax < *first) + currentMax = first; + } + return currentMax; + } + return first; + } + + + /// max_element + /// + /// max_element finds the largest element in the range [first, last). + /// It returns the first iterator i in [first, last) such that no other + /// iterator in [first, last) points to a value greater than *i. + /// The return value is last if and only if [first, last) is an empty range. + /// + /// Returns: The first iterator i in the range [first, last) such that + /// for any iterator j in the range [first, last) the following corresponding + /// condition holds: compare(*i, *j) == false. + /// + /// Complexity: Exactly 'max((last - first) - 1, 0)' applications of the + /// corresponding comparisons. + /// + template + ForwardIterator max_element(ForwardIterator first, ForwardIterator last, Compare compare) + { + if(first != last) + { + ForwardIterator currentMax = first; + + while(++first != last) + { + if(compare(*currentMax, *first)) + currentMax = first; + } + return currentMax; + } + return first; + } + + + #if EASTL_MINMAX_ENABLED + + /// min + /// + /// Min returns the lesser of its two arguments; it returns the first + /// argument if neither is less than the other. The two arguments are + /// compared with operator <. + /// + /// This min and our other min implementations are defined as returning: + /// b < a ? b : a + /// which for example may in practice result in something different than: + /// b <= a ? b : a + /// in the case where b is different from a (though they compare as equal). + /// We choose the specific ordering here because that's the ordering + /// done by other STL implementations. + /// + /// Some compilers (e.g. VS20003 - VS2013) generate poor code for the case of + /// scalars returned by reference, so we provide a specialization for those cases. + /// The specialization returns T by value instead of reference, which is + /// not that the Standard specifies. The Standard allows you to use + /// an expression like &max(x, y), which would be impossible in this case. + /// However, we have found no actual code that uses min or max like this and + /// this specialization causes no problems in practice. Microsoft has acknowledged + /// the problem and may fix it for a future VS version. + /// + template + inline EA_CONSTEXPR typename eastl::enable_if::value, T>::type + min(T a, T b) + { + return b < a ? b : a; + } + + template + inline EA_CONSTEXPR typename eastl::enable_if::value, const T&>::type + min(const T& a, const T& b) + { + return b < a ? b : a; + } + + inline EA_CONSTEXPR float min(float a, float b) { return b < a ? b : a; } + inline EA_CONSTEXPR double min(double a, double b) { return b < a ? b : a; } + inline EA_CONSTEXPR long double min(long double a, long double b) { return b < a ? b : a; } + + #endif // EASTL_MINMAX_ENABLED + + + /// min_alt + /// + /// This is an alternative version of min that avoids any possible + /// collisions with Microsoft #defines of min and max. + /// + /// See min(a, b) for detailed specifications. + /// + template + inline EA_CONSTEXPR typename eastl::enable_if::value, T>::type + min_alt(T a, T b) + { + return b < a ? b : a; + } + + template + inline typename eastl::enable_if::value, const T&>::type + min_alt(const T& a, const T& b) + { + return b < a ? b : a; + } + + inline EA_CONSTEXPR float min_alt(float a, float b) { return b < a ? b : a; } + inline EA_CONSTEXPR double min_alt(double a, double b) { return b < a ? b : a; } + inline EA_CONSTEXPR long double min_alt(long double a, long double b) { return b < a ? b : a; } + + + #if EASTL_MINMAX_ENABLED + + /// min + /// + /// Min returns the lesser of its two arguments; it returns the first + /// argument if neither is less than the other. The two arguments are + /// compared with the Compare function (or function object), which + /// takes two arguments and returns true if the first is less than + /// the second. + /// + /// See min(a, b) for detailed specifications. + /// + /// Example usage: + /// struct A{ int a; }; + /// struct Struct{ bool operator()(const A& a1, const A& a2){ return a1.a < a2.a; } }; + /// + /// A a1, a2, a3; + /// a3 = min(a1, a2, Struct()); + /// + /// Example usage: + /// struct B{ int b; }; + /// inline bool Function(const B& b1, const B& b2){ return b1.b < b2.b; } + /// + /// B b1, b2, b3; + /// b3 = min(b1, b2, Function); + /// + template + inline const T& + min(const T& a, const T& b, Compare compare) + { + return compare(b, a) ? b : a; + } + + #endif // EASTL_MINMAX_ENABLED + + + /// min_alt + /// + /// This is an alternative version of min that avoids any possible + /// collisions with Microsoft #defines of min and max. + /// + /// See min(a, b) for detailed specifications. + /// + template + inline const T& + min_alt(const T& a, const T& b, Compare compare) + { + return compare(b, a) ? b : a; + } + + + #if EASTL_MINMAX_ENABLED + + /// max + /// + /// Max returns the greater of its two arguments; it returns the first + /// argument if neither is greater than the other. The two arguments are + /// compared with operator < (and not operator >). + /// + /// This min and our other min implementations are defined as returning: + /// a < b ? b : a + /// which for example may in practice result in something different than: + /// a <= b ? b : a + /// in the case where b is different from a (though they compare as equal). + /// We choose the specific ordering here because that's the ordering + /// done by other STL implementations. + /// + template + inline EA_CONSTEXPR typename eastl::enable_if::value, T>::type + max(T a, T b) + { + return a < b ? b : a; + } + + template + inline EA_CONSTEXPR typename eastl::enable_if::value, const T&>::type + max(const T& a, const T& b) + { + return a < b ? b : a; + } + + inline EA_CONSTEXPR float max(float a, float b) { return a < b ? b : a; } + inline EA_CONSTEXPR double max(double a, double b) { return a < b ? b : a; } + inline EA_CONSTEXPR long double max(long double a, long double b) { return a < b ? b : a; } + + #endif // EASTL_MINMAX_ENABLED + + + /// max_alt + /// + /// This is an alternative version of max that avoids any possible + /// collisions with Microsoft #defines of min and max. + /// + template + inline EA_CONSTEXPR typename eastl::enable_if::value, T>::type + max_alt(T a, T b) + { + return a < b ? b : a; + } + + template + inline EA_CONSTEXPR typename eastl::enable_if::value, const T&>::type + max_alt(const T& a, const T& b) + { + return a < b ? b : a; + } + + inline EA_CONSTEXPR float max_alt(float a, float b) { return a < b ? b : a; } + inline EA_CONSTEXPR double max_alt(double a, double b) { return a < b ? b : a; } + inline EA_CONSTEXPR long double max_alt(long double a, long double b) { return a < b ? b : a; } + + + #if EASTL_MINMAX_ENABLED + /// max + /// + /// Min returns the lesser of its two arguments; it returns the first + /// argument if neither is less than the other. The two arguments are + /// compared with the Compare function (or function object), which + /// takes two arguments and returns true if the first is less than + /// the second. + /// + template + inline const T& + max(const T& a, const T& b, Compare compare) + { + return compare(a, b) ? b : a; + } + #endif + + + /// max_alt + /// + /// This is an alternative version of max that avoids any possible + /// collisions with Microsoft #defines of min and max. + /// + template + inline const T& + max_alt(const T& a, const T& b, Compare compare) + { + return compare(a, b) ? b : a; + } + + + /// min(std::initializer_list) + /// + template + T min(std::initializer_list ilist) + { + return *eastl::min_element(ilist.begin(), ilist.end()); + } + + /// min(std::initializer_list, Compare) + /// + template + T min(std::initializer_list ilist, Compare compare) + { + return *eastl::min_element(ilist.begin(), ilist.end(), compare); + } + + + /// max(std::initializer_list) + /// + template + T max(std::initializer_list ilist) + { + return *eastl::max_element(ilist.begin(), ilist.end()); + } + + /// max(std::initializer_list, Compare) + /// + template + T max(std::initializer_list ilist, Compare compare) + { + return *eastl::max_element(ilist.begin(), ilist.end(), compare); + } + + + /// minmax_element + /// + /// Returns: make_pair(first, first) if [first, last) is empty, otherwise make_pair(m, M), + /// where m is the first iterator in [first,last) such that no iterator in the range + /// refers to a smaller element, and where M is the last iterator in [first,last) such + /// that no iterator in the range refers to a larger element. + /// + /// Complexity: At most max([(3/2)*(N - 1)], 0) applications of the corresponding predicate, + /// where N is distance(first, last). + /// + template + eastl::pair + minmax_element(ForwardIterator first, ForwardIterator last, Compare compare) + { + eastl::pair result(first, first); + + if(!(first == last) && !(++first == last)) + { + if(compare(*first, *result.first)) + { + result.second = result.first; + result.first = first; + } + else + result.second = first; + + while(++first != last) + { + ForwardIterator i = first; + + if(++first == last) + { + if(compare(*i, *result.first)) + result.first = i; + else if(!compare(*i, *result.second)) + result.second = i; + break; + } + else + { + if(compare(*first, *i)) + { + if(compare(*first, *result.first)) + result.first = first; + + if(!compare(*i, *result.second)) + result.second = i; + } + else + { + if(compare(*i, *result.first)) + result.first = i; + + if(!compare(*first, *result.second)) + result.second = first; + } + } + } + } + + return result; + } + + + template + eastl::pair + minmax_element(ForwardIterator first, ForwardIterator last) + { + typedef typename eastl::iterator_traits::value_type value_type; + + return eastl::minmax_element(first, last, eastl::less()); + } + + + + /// minmax + /// + /// Requires: Type T shall be LessThanComparable. + /// Returns: pair(b, a) if b is smaller than a, and pair(a, b) otherwise. + /// Remarks: Returns pair(a, b) when the arguments are equivalent. + /// Complexity: Exactly one comparison. + /// + + // The following optimization is a problem because it changes the return value in a way that would break + // users unless they used auto (e.g. auto result = minmax(17, 33); ) + // + // template + // inline EA_CONSTEXPR typename eastl::enable_if::value, eastl::pair >::type + // minmax(T a, T b) + // { + // return (b < a) ? eastl::make_pair(b, a) : eastl::make_pair(a, b); + // } + // + // template + // inline typename eastl::enable_if::value, eastl::pair >::type + // minmax(const T& a, const T& b) + // { + // return (b < a) ? eastl::make_pair(b, a) : eastl::make_pair(a, b); + // } + + // It turns out that the following conforming definition of minmax generates a warning when used with VC++ up + // to at least VS2012. The VS2012 version of minmax is a broken and non-conforming definition, and we don't + // want to do that. We could do it for scalars alone, though we'd have to decide if we are going to do that + // for all compilers, because it changes the return value from a pair of references to a pair of values. + template + inline eastl::pair + minmax(const T& a, const T& b) + { + return (b < a) ? eastl::make_pair(b, a) : eastl::make_pair(a, b); + } + + + template + eastl::pair + minmax(const T& a, const T& b, Compare compare) + { + return compare(b, a) ? eastl::make_pair(b, a) : eastl::make_pair(a, b); + } + + + + template + eastl::pair + minmax(std::initializer_list ilist) + { + typedef typename std::initializer_list::iterator iterator_type; + eastl::pair iteratorPair = eastl::minmax_element(ilist.begin(), ilist.end()); + return eastl::make_pair(*iteratorPair.first, *iteratorPair.second); + } + + template + eastl::pair + minmax(std::initializer_list ilist, Compare compare) + { + typedef typename std::initializer_list::iterator iterator_type; + eastl::pair iteratorPair = eastl::minmax_element(ilist.begin(), ilist.end(), compare); + return eastl::make_pair(*iteratorPair.first, *iteratorPair.second); + } + + template + inline T&& median_impl(T&& a, T&& b, T&& c) + { + if(a < b) + { + if(b < c) + return eastl::forward(b); + else if(a < c) + return eastl::forward(c); + else + return eastl::forward(a); + } + else if(a < c) + return eastl::forward(a); + else if(b < c) + return eastl::forward(c); + return eastl::forward(b); + } + + /// median + /// + /// median finds which element of three (a, b, d) is in-between the other two. + /// If two or more elements are equal, the first (e.g. a before b) is chosen. + /// + /// Complexity: Either two or three comparisons will be required, depending + /// on the values. + /// + template + inline const T& median(const T& a, const T& b, const T& c) + { + return median_impl(a, b, c); + } + + /// median + /// + /// median finds which element of three (a, b, d) is in-between the other two. + /// If two or more elements are equal, the first (e.g. a before b) is chosen. + /// + /// Complexity: Either two or three comparisons will be required, depending + /// on the values. + /// + template + inline T&& median(T&& a, T&& b, T&& c) + { + return eastl::forward(median_impl(eastl::forward(a), eastl::forward(b), eastl::forward(c))); + } + + + template + inline T&& median_impl(T&& a, T&& b, T&& c, Compare compare) + { + if(compare(a, b)) + { + if(compare(b, c)) + return eastl::forward(b); + else if(compare(a, c)) + return eastl::forward(c); + else + return eastl::forward(a); + } + else if(compare(a, c)) + return eastl::forward(a); + else if(compare(b, c)) + return eastl::forward(c); + return eastl::forward(b); + } + + + /// median + /// + /// median finds which element of three (a, b, d) is in-between the other two. + /// If two or more elements are equal, the first (e.g. a before b) is chosen. + /// + /// Complexity: Either two or three comparisons will be required, depending + /// on the values. + /// + template + inline const T& median(const T& a, const T& b, const T& c, Compare compare) + { + return median_impl(a, b, c, compare); + } + + /// median + /// + /// median finds which element of three (a, b, d) is in-between the other two. + /// If two or more elements are equal, the first (e.g. a before b) is chosen. + /// + /// Complexity: Either two or three comparisons will be required, depending + /// on the values. + /// + template + inline T&& median(T&& a, T&& b, T&& c, Compare compare) + { + return eastl::forward(median_impl(eastl::forward(a), eastl::forward(b), eastl::forward(c), compare)); + } + + + + + /// all_of + /// + /// Returns: true if the unary predicate p returns true for all elements in the range [first, last) + /// + template + inline bool all_of(InputIterator first, InputIterator last, Predicate p) + { + for(; first != last; ++first) + { + if(!p(*first)) + return false; + } + return true; + } + + + /// any_of + /// + /// Returns: true if the unary predicate p returns true for any of the elements in the range [first, last) + /// + template + inline bool any_of(InputIterator first, InputIterator last, Predicate p) + { + for(; first != last; ++first) + { + if(p(*first)) + return true; + } + return false; + } + + + /// none_of + /// + /// Returns: true if the unary predicate p returns true for none of the elements in the range [first, last) + /// + template + inline bool none_of(InputIterator first, InputIterator last, Predicate p) + { + for(; first != last; ++first) + { + if(p(*first)) + return false; + } + return true; + } + + + /// adjacent_find + /// + /// Returns: The first iterator i such that both i and i + 1 are in the range + /// [first, last) for which the following corresponding conditions hold: *i == *(i + 1). + /// Returns last if no such iterator is found. + /// + /// Complexity: Exactly 'find(first, last, value) - first' applications of the corresponding predicate. + /// + template + inline ForwardIterator + adjacent_find(ForwardIterator first, ForwardIterator last) + { + if(first != last) + { + ForwardIterator i = first; + + for(++i; i != last; ++i) + { + if(*first == *i) + return first; + first = i; + } + } + return last; + } + + + + /// adjacent_find + /// + /// Returns: The first iterator i such that both i and i + 1 are in the range + /// [first, last) for which the following corresponding conditions hold: predicate(*i, *(i + 1)) != false. + /// Returns last if no such iterator is found. + /// + /// Complexity: Exactly 'find(first, last, value) - first' applications of the corresponding predicate. + /// + template + inline ForwardIterator + adjacent_find(ForwardIterator first, ForwardIterator last, BinaryPredicate predicate) + { + if(first != last) + { + ForwardIterator i = first; + + for(++i; i != last; ++i) + { + if(predicate(*first, *i)) + return first; + first = i; + } + } + return last; + } + + + /// shuffle + /// + /// New for C++11 + /// Randomizes a sequence of values via a user-supplied UniformRandomNumberGenerator. + /// The difference between this and the original random_shuffle function is that this uses the more + /// advanced and flexible UniformRandomNumberGenerator interface as opposed to the more + /// limited RandomNumberGenerator interface of random_shuffle. + /// + /// Effects: Shuffles the elements in the range [first, last) with uniform distribution. + /// + /// Complexity: Exactly '(last - first) - 1' swaps. + /// + /// Example usage: + /// struct Rand{ eastl_size_t operator()(eastl_size_t n) { return (eastl_size_t)(rand() % n); } }; // Note: The C rand function is poor and slow. + /// Rand randInstance; + /// shuffle(pArrayBegin, pArrayEnd, randInstance); + /// + // See the C++11 Standard, 26.5.1.3, Uniform random number generator requirements. + // Also http://en.cppreference.com/w/cpp/numeric/random/uniform_int_distribution + + template + void shuffle(RandomAccessIterator first, RandomAccessIterator last, UniformRandomNumberGenerator&& urng) + { + if(first != last) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + typedef typename eastl::make_unsigned::type unsigned_difference_type; + typedef typename eastl::uniform_int_distribution uniform_int_distribution; + typedef typename uniform_int_distribution::param_type uniform_int_distribution_param_type; + + uniform_int_distribution uid; + + for(RandomAccessIterator i = first + 1; i != last; ++i) + iter_swap(i, first + uid(urng, uniform_int_distribution_param_type(0, i - first))); + } + } + + + /// random_shuffle + /// + /// Randomizes a sequence of values. + /// + /// Effects: Shuffles the elements in the range [first, last) with uniform distribution. + /// + /// Complexity: Exactly '(last - first) - 1' swaps. + /// + /// Example usage: + /// eastl_size_t Rand(eastl_size_t n) { return (eastl_size_t)(rand() % n); } // Note: The C rand function is poor and slow. + /// pointer_to_unary_function randInstance(Rand); + /// random_shuffle(pArrayBegin, pArrayEnd, randInstance); + /// + /// Example usage: + /// struct Rand{ eastl_size_t operator()(eastl_size_t n) { return (eastl_size_t)(rand() % n); } }; // Note: The C rand function is poor and slow. + /// Rand randInstance; + /// random_shuffle(pArrayBegin, pArrayEnd, randInstance); + /// + template + inline void random_shuffle(RandomAccessIterator first, RandomAccessIterator last, RandomNumberGenerator&& rng) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + + // We must do 'rand((i - first) + 1)' here and cannot do 'rand(last - first)', + // as it turns out that the latter results in unequal distribution probabilities. + // http://www.cigital.com/papers/download/developer_gambling.php + + for(RandomAccessIterator i = first + 1; i < last; ++i) + iter_swap(i, first + (difference_type)rng((eastl_size_t)((i - first) + 1))); + } + + + /// random_shuffle + /// + /// Randomizes a sequence of values. + /// + /// Effects: Shuffles the elements in the range [first, last) with uniform distribution. + /// + /// Complexity: Exactly '(last - first) - 1' swaps. + /// + /// Example usage: + /// random_shuffle(pArrayBegin, pArrayEnd); + /// + /// *** Disabled until we decide if we want to get into the business of writing random number generators. *** + /// + /// template + /// inline void random_shuffle(RandomAccessIterator first, RandomAccessIterator last) + /// { + /// for(RandomAccessIterator i = first + 1; i < last; ++i) + /// iter_swap(i, first + SomeRangedRandomNumberGenerator((i - first) + 1)); + /// } + + + + + + + /// move_n + /// + /// Same as move(InputIterator, InputIterator, OutputIterator) except based on count instead of iterator range. + /// + template + inline OutputIterator + move_n_impl(InputIterator first, Size n, OutputIterator result, EASTL_ITC_NS::input_iterator_tag) + { + for(; n > 0; --n) + *result++ = eastl::move(*first++); + return result; + } + + template + inline OutputIterator + move_n_impl(RandomAccessIterator first, Size n, OutputIterator result, EASTL_ITC_NS::random_access_iterator_tag) + { + return eastl::move(first, first + n, result); // Take advantage of the optimizations present in the move algorithm. + } + + + template + inline OutputIterator + move_n(InputIterator first, Size n, OutputIterator result) + { + typedef typename eastl::iterator_traits::iterator_category IC; + return eastl::move_n_impl(first, n, result, IC()); + } + + + + /// copy_n + /// + /// Same as copy(InputIterator, InputIterator, OutputIterator) except based on count instead of iterator range. + /// Effects: Copies exactly count values from the range beginning at first to the range beginning at result, if count > 0. Does nothing otherwise. + /// Returns: Iterator in the destination range, pointing past the last element copied if count>0 or first otherwise. + /// Complexity: Exactly count assignments, if count > 0. + /// + template + inline OutputIterator + copy_n_impl(InputIterator first, Size n, OutputIterator result, EASTL_ITC_NS::input_iterator_tag) + { + for(; n > 0; --n) + *result++ = *first++; + return result; + } + + template + inline OutputIterator + copy_n_impl(RandomAccessIterator first, Size n, OutputIterator result, EASTL_ITC_NS::random_access_iterator_tag) + { + return eastl::copy(first, first + n, result); // Take advantage of the optimizations present in the copy algorithm. + } + + + template + inline OutputIterator + copy_n(InputIterator first, Size n, OutputIterator result) + { + typedef typename eastl::iterator_traits::iterator_category IC; + return eastl::copy_n_impl(first, n, result, IC()); + } + + + /// copy_if + /// + /// Effects: Assigns to the result iterator only if the predicate is true. + /// + template + inline OutputIterator + copy_if(InputIterator first, InputIterator last, OutputIterator result, Predicate predicate) + { + // This implementation's performance could be improved by taking a more complicated approach like with the copy algorithm. + for(; first != last; ++first) + { + if(predicate(*first)) + *result++ = *first; + } + + return result; + } + + + + + // Implementation moving copying both trivial and non-trivial data via a lesser iterator than random-access. + template + struct move_and_copy_backward_helper + { + template + static BidirectionalIterator2 move_or_copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) + { + while(first != last) + *--resultEnd = *--last; + return resultEnd; // resultEnd now points to the beginning of the destination sequence instead of the end. + } + }; + + // Specialization for moving non-trivial data via a lesser iterator than random-access. + template + struct move_and_copy_backward_helper + { + template + static BidirectionalIterator2 move_or_copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) + { + while(first != last) + *--resultEnd = eastl::move(*--last); + return resultEnd; // resultEnd now points to the beginning of the destination sequence instead of the end. + } + }; + + // Specialization for moving non-trivial data via a random-access iterator. It's theoretically faster because the compiler can see the count when its a compile-time const. + template<> + struct move_and_copy_backward_helper + { + template + static BidirectionalIterator2 move_or_copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + + for(difference_type n = (last - first); n > 0; --n) + *--resultEnd = eastl::move(*--last); + return resultEnd; // resultEnd now points to the beginning of the destination sequence instead of the end. + } + }; + + // Specialization for copying non-trivial data via a random-access iterator. It's theoretically faster because the compiler can see the count when its a compile-time const. + // This specialization converts the random access BidirectionalIterator1 last-first to an integral type. There's simple way for us to take advantage of a random access output iterator, + // as the range is specified by the input instead of the output, and distance(first, last) for a non-random-access iterator is potentially slow. + template <> + struct move_and_copy_backward_helper + { + template + static BidirectionalIterator2 move_or_copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + + for(difference_type n = (last - first); n > 0; --n) + *--resultEnd = *--last; + return resultEnd; // resultEnd now points to the beginning of the destination sequence instead of the end. + } + }; + + // Specialization for when we can use memmove/memcpy. See the notes above for what conditions allow this. + template + struct move_and_copy_backward_helper + { + template + static T* move_or_copy_backward(const T* first, const T* last, T* resultEnd) + { + return (T*)memmove(resultEnd - (last - first), first, (size_t)((uintptr_t)last - (uintptr_t)first)); + // We could use memcpy here if there's no range overlap, but memcpy is rarely much faster than memmove. + } + }; + + template + inline BidirectionalIterator2 move_and_copy_backward_chooser(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) + { + typedef typename eastl::iterator_traits::iterator_category IIC; + typedef typename eastl::iterator_traits::iterator_category OIC; + typedef typename eastl::iterator_traits::value_type value_type_input; + typedef typename eastl::iterator_traits::value_type value_type_output; + + const bool canBeMemmoved = eastl::is_trivially_copyable::value && + eastl::is_same::value && + (eastl::is_pointer::value || eastl::is_same::value) && + (eastl::is_pointer::value || eastl::is_same::value); + + return eastl::move_and_copy_backward_helper::move_or_copy_backward(first, last, resultEnd); // Need to chose based on the input iterator tag and not the output iterator tag, because containers accept input ranges of iterator types different than self. + } + + + // We have a second layer of unwrap_iterator calls because the original iterator might be something like move_iterator > (i.e. doubly-wrapped). + template + inline BidirectionalIterator2 move_and_copy_backward_unwrapper(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) + { + return BidirectionalIterator2(eastl::move_and_copy_backward_chooser(eastl::unwrap_iterator(first), eastl::unwrap_iterator(last), eastl::unwrap_iterator(resultEnd))); // Have to convert to BidirectionalIterator2 because result.base() could be a T* + } + + + /// move_backward + /// + /// The elements are moved in reverse order (the last element is moved first), but their relative order is preserved. + /// After this operation the elements in the moved-from range will still contain valid values of the + /// appropriate type, but not necessarily the same values as before the move. + /// Returns the beginning of the result range. + /// Note: When moving between containers, the dest range must be valid; this function doesn't resize containers. + /// Note: If result is within [first, last), move must be used instead of move_backward. + /// + /// Example usage: + /// eastl::move_backward(myArray.begin(), myArray.end(), myDestArray.end()); + /// + /// Reference implementation: + /// template + /// BidirectionalIterator2 move_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) + /// { + /// while(last != first) + /// *--resultEnd = eastl::move(*--last); + /// return resultEnd; + /// } + /// + template + inline BidirectionalIterator2 move_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) + { + return eastl::move_and_copy_backward_unwrapper(eastl::unwrap_iterator(first), eastl::unwrap_iterator(last), resultEnd); + } + + + /// copy_backward + /// + /// copies memory in the range of [first, last) to the range *ending* with result. + /// + /// Effects: Copies elements in the range [first, last) into the range + /// [result - (last - first), result) starting from last 1 and proceeding to first. + /// For each positive integer n <= (last - first), performs *(result n) = *(last - n). + /// + /// Requires: result shall not be in the range [first, last). + /// + /// Returns: result - (last - first). That is, returns the beginning of the result range. + /// + /// Complexity: Exactly 'last - first' assignments. + /// + template + inline BidirectionalIterator2 copy_backward(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 resultEnd) + { + const bool isMove = eastl::is_move_iterator::value; EA_UNUSED(isMove); + + return eastl::move_and_copy_backward_unwrapper(eastl::unwrap_iterator(first), eastl::unwrap_iterator(last), resultEnd); + } + + + /// count + /// + /// Counts the number of items in the range of [first, last) which equal the input value. + /// + /// Effects: Returns the number of iterators i in the range [first, last) for which the + /// following corresponding conditions hold: *i == value. + /// + /// Complexity: At most 'last - first' applications of the corresponding predicate. + /// + /// Note: The predicate version of count is count_if and not another variation of count. + /// This is because both versions would have three parameters and there could be ambiguity. + /// + template + inline typename eastl::iterator_traits::difference_type + count(InputIterator first, InputIterator last, const T& value) + { + typename eastl::iterator_traits::difference_type result = 0; + + for(; first != last; ++first) + { + if(*first == value) + ++result; + } + return result; + } + + + // C++ doesn't define a count with predicate, as it can effectively be synthesized via count_if + // with an appropriate predicate. However, it's often simpler to just have count with a predicate. + template + inline typename eastl::iterator_traits::difference_type + count(InputIterator first, InputIterator last, const T& value, Predicate predicate) + { + typename eastl::iterator_traits::difference_type result = 0; + + for(; first != last; ++first) + { + if(predicate(*first, value)) + ++result; + } + return result; + } + + + /// count_if + /// + /// Counts the number of items in the range of [first, last) which match + /// the input value as defined by the input predicate function. + /// + /// Effects: Returns the number of iterators i in the range [first, last) for which the + /// following corresponding conditions hold: predicate(*i) != false. + /// + /// Complexity: At most 'last - first' applications of the corresponding predicate. + /// + /// Note: The non-predicate version of count_if is count and not another variation of count_if. + /// This is because both versions would have three parameters and there could be ambiguity. + /// + template + inline typename eastl::iterator_traits::difference_type + count_if(InputIterator first, InputIterator last, Predicate predicate) + { + typename eastl::iterator_traits::difference_type result = 0; + + for(; first != last; ++first) + { + if(predicate(*first)) + ++result; + } + return result; + } + + + /// find + /// + /// finds the value within the unsorted range of [first, last). + /// + /// Returns: The first iterator i in the range [first, last) for which + /// the following corresponding conditions hold: *i == value. + /// Returns last if no such iterator is found. + /// + /// Complexity: At most 'last - first' applications of the corresponding predicate. + /// This is a linear search and not a binary one. + /// + /// Note: The predicate version of find is find_if and not another variation of find. + /// This is because both versions would have three parameters and there could be ambiguity. + /// + template + inline InputIterator + find(InputIterator first, InputIterator last, const T& value) + { + while((first != last) && !(*first == value)) // Note that we always express value comparisons in terms of < or ==. + ++first; + return first; + } + + + // C++ doesn't define a find with predicate, as it can effectively be synthesized via find_if + // with an appropriate predicate. However, it's often simpler to just have find with a predicate. + template + inline InputIterator + find(InputIterator first, InputIterator last, const T& value, Predicate predicate) + { + while((first != last) && !predicate(*first, value)) + ++first; + return first; + } + + + + /// find_if + /// + /// finds the value within the unsorted range of [first, last). + /// + /// Returns: The first iterator i in the range [first, last) for which + /// the following corresponding conditions hold: pred(*i) != false. + /// Returns last if no such iterator is found. + /// If the sequence of elements to search for (i.e. first2 - last2) is empty, + /// the find always fails and last1 will be returned. + /// + /// Complexity: At most 'last - first' applications of the corresponding predicate. + /// + /// Note: The non-predicate version of find_if is find and not another variation of find_if. + /// This is because both versions would have three parameters and there could be ambiguity. + /// + template + inline InputIterator + find_if(InputIterator first, InputIterator last, Predicate predicate) + { + while((first != last) && !predicate(*first)) + ++first; + return first; + } + + + + /// find_if_not + /// + /// find_if_not works the same as find_if except it tests for if the predicate + /// returns false for the elements instead of true. + /// + template + inline InputIterator + find_if_not(InputIterator first, InputIterator last, Predicate predicate) + { + for(; first != last; ++first) + { + if(!predicate(*first)) + return first; + } + return last; + } + + + + + /// find_first_of + /// + /// find_first_of is similar to find in that it performs linear search through + /// a range of ForwardIterators. The difference is that while find searches + /// for one particular value, find_first_of searches for any of several values. + /// Specifically, find_first_of searches for the first occurrance in the + /// range [first1, last1) of any of the elements in [first2, last2). + /// This function is thus similar to the strpbrk standard C string function. + /// If the sequence of elements to search for (i.e. first2-last2) is empty, + /// the find always fails and last1 will be returned. + /// + /// Effects: Finds an element that matches one of a set of values. + /// + /// Returns: The first iterator i in the range [first1, last1) such that for some + /// integer j in the range [first2, last2) the following conditions hold: *i == *j. + /// Returns last1 if no such iterator is found. + /// + /// Complexity: At most '(last1 - first1) * (last2 - first2)' applications of the + /// corresponding predicate. + /// + template + ForwardIterator1 + find_first_of(ForwardIterator1 first1, ForwardIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2) + { + for(; first1 != last1; ++first1) + { + for(ForwardIterator2 i = first2; i != last2; ++i) + { + if(*first1 == *i) + return first1; + } + } + return last1; + } + + + /// find_first_of + /// + /// find_first_of is similar to find in that it performs linear search through + /// a range of ForwardIterators. The difference is that while find searches + /// for one particular value, find_first_of searches for any of several values. + /// Specifically, find_first_of searches for the first occurrance in the + /// range [first1, last1) of any of the elements in [first2, last2). + /// This function is thus similar to the strpbrk standard C string function. + /// + /// Effects: Finds an element that matches one of a set of values. + /// + /// Returns: The first iterator i in the range [first1, last1) such that for some + /// integer j in the range [first2, last2) the following conditions hold: pred(*i, *j) != false. + /// Returns last1 if no such iterator is found. + /// + /// Complexity: At most '(last1 - first1) * (last2 - first2)' applications of the + /// corresponding predicate. + /// + template + ForwardIterator1 + find_first_of(ForwardIterator1 first1, ForwardIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2, + BinaryPredicate predicate) + { + for(; first1 != last1; ++first1) + { + for(ForwardIterator2 i = first2; i != last2; ++i) + { + if(predicate(*first1, *i)) + return first1; + } + } + return last1; + } + + + /// find_first_not_of + /// + /// Searches through first range for the first element that does not belong the second input range. + /// This is very much like the C++ string find_first_not_of function. + /// + /// Returns: The first iterator i in the range [first1, last1) such that for some + /// integer j in the range [first2, last2) the following conditions hold: !(*i == *j). + /// Returns last1 if no such iterator is found. + /// + /// Complexity: At most '(last1 - first1) * (last2 - first2)' applications of the + /// corresponding predicate. + /// + template + ForwardIterator1 + find_first_not_of(ForwardIterator1 first1, ForwardIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2) + { + for(; first1 != last1; ++first1) + { + if(eastl::find(first2, last2, *first1) == last2) + break; + } + + return first1; + } + + + + /// find_first_not_of + /// + /// Searches through first range for the first element that does not belong the second input range. + /// This is very much like the C++ string find_first_not_of function. + /// + /// Returns: The first iterator i in the range [first1, last1) such that for some + /// integer j in the range [first2, last2) the following conditions hold: pred(*i, *j) == false. + /// Returns last1 if no such iterator is found. + /// + /// Complexity: At most '(last1 - first1) * (last2 - first2)' applications of the + /// corresponding predicate. + /// + template + inline ForwardIterator1 + find_first_not_of(ForwardIterator1 first1, ForwardIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2, + BinaryPredicate predicate) + { + typedef typename eastl::iterator_traits::value_type value_type; + + for(; first1 != last1; ++first1) + { + if(eastl::find_if(first2, last2, eastl::bind1st(predicate, *first1)) == last2) + break; + } + + return first1; + } + + + template + inline BidirectionalIterator1 + find_last_of(BidirectionalIterator1 first1, BidirectionalIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2) + { + if((first1 != last1) && (first2 != last2)) + { + BidirectionalIterator1 it1(last1); + + while((--it1 != first1) && (eastl::find(first2, last2, *it1) == last2)) + ; // Do nothing + + if((it1 != first1) || (eastl::find(first2, last2, *it1) != last2)) + return it1; + } + + return last1; + } + + + template + BidirectionalIterator1 + find_last_of(BidirectionalIterator1 first1, BidirectionalIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2, + BinaryPredicate predicate) + { + typedef typename eastl::iterator_traits::value_type value_type; + + if((first1 != last1) && (first2 != last2)) + { + BidirectionalIterator1 it1(last1); + + while((--it1 != first1) && (eastl::find_if(first2, last2, eastl::bind1st(predicate, *it1)) == last2)) + ; // Do nothing + + if((it1 != first1) || (eastl::find_if(first2, last2, eastl::bind1st(predicate, *it1)) != last2)) + return it1; + } + + return last1; + } + + + template + inline BidirectionalIterator1 + find_last_not_of(BidirectionalIterator1 first1, BidirectionalIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2) + { + if((first1 != last1) && (first2 != last2)) + { + BidirectionalIterator1 it1(last1); + + while((--it1 != first1) && (eastl::find(first2, last2, *it1) != last2)) + ; // Do nothing + + if((it1 != first1) || (eastl::find( first2, last2, *it1) == last2)) + return it1; + } + + return last1; + } + + + template + inline BidirectionalIterator1 + find_last_not_of(BidirectionalIterator1 first1, BidirectionalIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2, + BinaryPredicate predicate) + { + typedef typename eastl::iterator_traits::value_type value_type; + + if((first1 != last1) && (first2 != last2)) + { + BidirectionalIterator1 it1(last1); + + while((--it1 != first1) && (eastl::find_if(first2, last2, eastl::bind1st(predicate, *it1)) != last2)) + ; // Do nothing + + if((it1 != first1) || (eastl::find_if(first2, last2, eastl::bind1st(predicate, *it1))) != last2) + return it1; + } + + return last1; + } + + + + + /// for_each + /// + /// Calls the Function function for each value in the range [first, last). + /// Function takes a single parameter: the current value. + /// + /// Effects: Applies function to the result of dereferencing every iterator in + /// the range [first, last), starting from first and proceeding to last 1. + /// + /// Returns: function. + /// + /// Complexity: Applies function exactly 'last - first' times. + /// + /// Note: If function returns a result, the result is ignored. + /// + template + inline Function + for_each(InputIterator first, InputIterator last, Function function) + { + for(; first != last; ++first) + function(*first); + return function; + } + + /// for_each_n + /// + /// Calls the Function function for each value in the range [first, first + n). + /// Function takes a single parameter: the current value. + /// + /// Effects: Applies function to the result of dereferencing every iterator in + /// the range [first, first + n), starting from first and proceeding to last 1. + /// + /// Returns: first + n. + /// + /// Complexity: Applies function exactly 'first + n' times. + /// + /// Note: + //// * If function returns a result, the result is ignored. + //// * If n < 0, behaviour is undefined. + /// + template + EA_CPP14_CONSTEXPR inline InputIterator + for_each_n(InputIterator first, Size n, Function function) + { + for (Size i = 0; i < n; ++first, i++) + function(*first); + return first; + } + + + /// generate + /// + /// Iterates the range of [first, last) and assigns to each element the + /// result of the function generator. Generator is a function which takes + /// no arguments. + /// + /// Complexity: Exactly 'last - first' invocations of generator and assignments. + /// + template + inline void + generate(ForwardIterator first, ForwardIterator last, Generator generator) + { + for(; first != last; ++first) // We cannot call generate_n(first, last-first, generator) + *first = generator(); // because the 'last-first' might not be supported by the + } // given iterator. + + + /// generate_n + /// + /// Iterates an interator n times and assigns the result of generator + /// to each succeeding element. Generator is a function which takes + /// no arguments. + /// + /// Complexity: Exactly n invocations of generator and assignments. + /// + template + inline OutputIterator + generate_n(OutputIterator first, Size n, Generator generator) + { + for(; n > 0; --n, ++first) + *first = generator(); + return first; + } + + + /// transform + /// + /// Iterates the input range of [first, last) and the output iterator result + /// and assigns the result of unaryOperation(input) to result. + /// + /// Effects: Assigns through every iterator i in the range [result, result + (last1 - first1)) + /// a new corresponding value equal to unaryOperation(*(first1 + (i - result)). + /// + /// Requires: op shall not have any side effects. + /// + /// Returns: result + (last1 - first1). That is, returns the end of the output range. + /// + /// Complexity: Exactly 'last1 - first1' applications of unaryOperation. + /// + /// Note: result may be equal to first. + /// + template + inline OutputIterator + transform(InputIterator first, InputIterator last, OutputIterator result, UnaryOperation unaryOperation) + { + for(; first != last; ++first, ++result) + *result = unaryOperation(*first); + return result; + } + + + /// transform + /// + /// Iterates the input range of [first, last) and the output iterator result + /// and assigns the result of binaryOperation(input1, input2) to result. + /// + /// Effects: Assigns through every iterator i in the range [result, result + (last1 - first1)) + /// a new corresponding value equal to binaryOperation(*(first1 + (i - result), *(first2 + (i - result))). + /// + /// Requires: binaryOperation shall not have any side effects. + /// + /// Returns: result + (last1 - first1). That is, returns the end of the output range. + /// + /// Complexity: Exactly 'last1 - first1' applications of binaryOperation. + /// + /// Note: result may be equal to first1 or first2. + /// + template + inline OutputIterator + transform(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, OutputIterator result, BinaryOperation binaryOperation) + { + for(; first1 != last1; ++first1, ++first2, ++result) + *result = binaryOperation(*first1, *first2); + return result; + } + + + /// equal + /// + /// Returns: true if for every iterator i in the range [first1, last1) the + /// following corresponding conditions hold: predicate(*i, *(first2 + (i - first1))) != false. + /// Otherwise, returns false. + /// + /// Complexity: At most last1 first1 applications of the corresponding predicate. + /// + /// To consider: Make specializations of this for scalar types and random access + /// iterators that uses memcmp or some trick memory comparison function. + /// We should verify that such a thing results in an improvement. + /// + template + EA_CPP14_CONSTEXPR inline bool equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2) + { + for(; first1 != last1; ++first1, ++first2) + { + if(!(*first1 == *first2)) // Note that we always express value comparisons in terms of < or ==. + return false; + } + return true; + } + + /* Enable the following if there was shown to be some benefit. A glance and Microsoft VC++ memcmp + shows that it is not optimized in any way, much less one that would benefit us here. + + inline bool equal(const bool* first1, const bool* last1, const bool* first2) + { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } + + inline bool equal(const char* first1, const char* last1, const char* first2) + { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } + + inline bool equal(const unsigned char* first1, const unsigned char* last1, const unsigned char* first2) + { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } + + inline bool equal(const signed char* first1, const signed char* last1, const signed char* first2) + { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } + + #ifndef EA_WCHAR_T_NON_NATIVE + inline bool equal(const wchar_t* first1, const wchar_t* last1, const wchar_t* first2) + { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } + #endif + + inline bool equal(const int16_t* first1, const int16_t* last1, const int16_t* first2) + { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } + + inline bool equal(const uint16_t* first1, const uint16_t* last1, const uint16_t* first2) + { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } + + inline bool equal(const int32_t* first1, const int32_t* last1, const int32_t* first2) + { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } + + inline bool equal(const uint32_t* first1, const uint32_t* last1, const uint32_t* first2) + { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } + + inline bool equal(const int64_t* first1, const int64_t* last1, const int64_t* first2) + { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } + + inline bool equal(const uint64_t* first1, const uint64_t* last1, const uint64_t* first2) + { return (memcmp(first1, first2, (size_t)((uintptr_t)last1 - (uintptr_t)first1)) == 0); } + */ + + + + /// equal + /// + /// Returns: true if for every iterator i in the range [first1, last1) the + /// following corresponding conditions hold: pred(*i, *(first2 + (i first1))) != false. + /// Otherwise, returns false. + /// + /// Complexity: At most last1 first1 applications of the corresponding predicate. + /// + template + inline bool + equal(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, BinaryPredicate predicate) + { + for(; first1 != last1; ++first1, ++first2) + { + if(!predicate(*first1, *first2)) + return false; + } + return true; + } + + + + /// identical + /// + /// Returns true if the two input ranges are equivalent. + /// There is a subtle difference between this algorithm and + /// the 'equal' algorithm. The equal algorithm assumes the + /// two ranges are of equal length. This algorithm efficiently + /// compares two ranges for both length equality and for + /// element equality. There is no other standard algorithm + /// that can do this. + /// + /// Returns: true if the sequence of elements defined by the range + /// [first1, last1) is of the same length as the sequence of + /// elements defined by the range of [first2, last2) and if + /// the elements in these ranges are equal as per the + /// equal algorithm. + /// + /// Complexity: At most 'min((last1 - first1), (last2 - first2))' applications + /// of the corresponding comparison. + /// + template + bool identical(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2) + { + while((first1 != last1) && (first2 != last2) && (*first1 == *first2)) + { + ++first1; + ++first2; + } + return (first1 == last1) && (first2 == last2); + } + + + /// identical + /// + template + bool identical(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, BinaryPredicate predicate) + { + while((first1 != last1) && (first2 != last2) && predicate(*first1, *first2)) + { + ++first1; + ++first2; + } + return (first1 == last1) && (first2 == last2); + } + + + + /// lexicographical_compare + /// + /// Returns: true if the sequence of elements defined by the range + /// [first1, last1) is lexicographically less than the sequence of + /// elements defined by the range [first2, last2). Returns false otherwise. + /// + /// Complexity: At most 'min((last1 - first1), (last2 - first2))' applications + /// of the corresponding comparison. + /// + /// Note: If two sequences have the same number of elements and their + /// corresponding elements are equivalent, then neither sequence is + /// lexicographically less than the other. If one sequence is a prefix + /// of the other, then the shorter sequence is lexicographically less + /// than the longer sequence. Otherwise, the lexicographical comparison + /// of the sequences yields the same result as the comparison of the first + /// corresponding pair of elements that are not equivalent. + /// + template + inline bool + lexicographical_compare(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2) + { + for(; (first1 != last1) && (first2 != last2); ++first1, ++first2) + { + if(*first1 < *first2) + return true; + if(*first2 < *first1) + return false; + } + return (first1 == last1) && (first2 != last2); + } + + inline bool // Specialization for const char*. + lexicographical_compare(const char* first1, const char* last1, const char* first2, const char* last2) + { + const ptrdiff_t n1(last1 - first1), n2(last2 - first2); + const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2)); + return result ? (result < 0) : (n1 < n2); + } + + inline bool // Specialization for char*. + lexicographical_compare(char* first1, char* last1, char* first2, char* last2) + { + const ptrdiff_t n1(last1 - first1), n2(last2 - first2); + const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2)); + return result ? (result < 0) : (n1 < n2); + } + + inline bool // Specialization for const unsigned char*. + lexicographical_compare(const unsigned char* first1, const unsigned char* last1, const unsigned char* first2, const unsigned char* last2) + { + const ptrdiff_t n1(last1 - first1), n2(last2 - first2); + const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2)); + return result ? (result < 0) : (n1 < n2); + } + + inline bool // Specialization for unsigned char*. + lexicographical_compare(unsigned char* first1, unsigned char* last1, unsigned char* first2, unsigned char* last2) + { + const ptrdiff_t n1(last1 - first1), n2(last2 - first2); + const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2)); + return result ? (result < 0) : (n1 < n2); + } + + inline bool // Specialization for const signed char*. + lexicographical_compare(const signed char* first1, const signed char* last1, const signed char* first2, const signed char* last2) + { + const ptrdiff_t n1(last1 - first1), n2(last2 - first2); + const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2)); + return result ? (result < 0) : (n1 < n2); + } + + inline bool // Specialization for signed char*. + lexicographical_compare(signed char* first1, signed char* last1, signed char* first2, signed char* last2) + { + const ptrdiff_t n1(last1 - first1), n2(last2 - first2); + const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2)); + return result ? (result < 0) : (n1 < n2); + } + + #if defined(_MSC_VER) // If using the VC++ compiler (and thus bool is known to be a single byte)... + //Not sure if this is a good idea. + //inline bool // Specialization for const bool*. + //lexicographical_compare(const bool* first1, const bool* last1, const bool* first2, const bool* last2) + //{ + // const ptrdiff_t n1(last1 - first1), n2(last2 - first2); + // const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2)); + // return result ? (result < 0) : (n1 < n2); + //} + // + //inline bool // Specialization for bool*. + //lexicographical_compare(bool* first1, bool* last1, bool* first2, bool* last2) + //{ + // const ptrdiff_t n1(last1 - first1), n2(last2 - first2); + // const int result = memcmp(first1, first2, (size_t)eastl::min_alt(n1, n2)); + // return result ? (result < 0) : (n1 < n2); + //} + #endif + + + + /// lexicographical_compare + /// + /// Returns: true if the sequence of elements defined by the range + /// [first1, last1) is lexicographically less than the sequence of + /// elements defined by the range [first2, last2). Returns false otherwise. + /// + /// Complexity: At most 'min((last1 -first1), (last2 - first2))' applications + /// of the corresponding comparison. + /// + /// Note: If two sequences have the same number of elements and their + /// corresponding elements are equivalent, then neither sequence is + /// lexicographically less than the other. If one sequence is a prefix + /// of the other, then the shorter sequence is lexicographically less + /// than the longer sequence. Otherwise, the lexicographical comparison + /// of the sequences yields the same result as the comparison of the first + /// corresponding pair of elements that are not equivalent. + /// + /// Note: False is always returned if range 1 is exhausted before range 2. + /// The result of this is that you can't do a successful reverse compare + /// (e.g. use greater<> as the comparison instead of less<>) unless the + /// two sequences are of identical length. What you want to do is reverse + /// the order of the arguments in order to get the desired effect. + /// + template + inline bool + lexicographical_compare(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, Compare compare) + { + for(; (first1 != last1) && (first2 != last2); ++first1, ++first2) + { + if(compare(*first1, *first2)) + return true; + if(compare(*first2, *first1)) + return false; + } + return (first1 == last1) && (first2 != last2); + } + + + /// mismatch + /// + /// Finds the first position where the two ranges [first1, last1) and + /// [first2, first2 + (last1 - first1)) differ. The two versions of + /// mismatch use different tests for whether elements differ. + /// + /// Returns: A pair of iterators i and j such that j == first2 + (i - first1) + /// and i is the first iterator in the range [first1, last1) for which the + /// following corresponding condition holds: !(*i == *(first2 + (i - first1))). + /// Returns the pair last1 and first2 + (last1 - first1) if such an iterator + /// i is not found. + /// + /// Complexity: At most last1 first1 applications of the corresponding predicate. + /// + template + inline eastl::pair + mismatch(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2) // , InputIterator2 last2) + { + while((first1 != last1) && (*first1 == *first2)) // && (first2 != last2) <- C++ standard mismatch function doesn't check first2/last2. + { + ++first1; + ++first2; + } + + return eastl::pair(first1, first2); + } + + + /// mismatch + /// + /// Finds the first position where the two ranges [first1, last1) and + /// [first2, first2 + (last1 - first1)) differ. The two versions of + /// mismatch use different tests for whether elements differ. + /// + /// Returns: A pair of iterators i and j such that j == first2 + (i - first1) + /// and i is the first iterator in the range [first1, last1) for which the + /// following corresponding condition holds: pred(*i, *(first2 + (i - first1))) == false. + /// Returns the pair last1 and first2 + (last1 - first1) if such an iterator + /// i is not found. + /// + /// Complexity: At most last1 first1 applications of the corresponding predicate. + /// + template + inline eastl::pair + mismatch(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, // InputIterator2 last2, + BinaryPredicate predicate) + { + while((first1 != last1) && predicate(*first1, *first2)) // && (first2 != last2) <- C++ standard mismatch function doesn't check first2/last2. + { + ++first1; + ++first2; + } + + return eastl::pair(first1, first2); + } + + + /// lower_bound + /// + /// Finds the position of the first element in a sorted range that has a value + /// greater than or equivalent to a specified value. + /// + /// Effects: Finds the first position into which value can be inserted without + /// violating the ordering. + /// + /// Returns: The furthermost iterator i in the range [first, last) such that + /// for any iterator j in the range [first, i) the following corresponding + /// condition holds: *j < value. + /// + /// Complexity: At most 'log(last - first) + 1' comparisons. + /// + /// Optimizations: We have no need to specialize this implementation for random + /// access iterators (e.g. contiguous array), as the code below will already + /// take advantage of them. + /// + template + ForwardIterator + lower_bound(ForwardIterator first, ForwardIterator last, const T& value) + { + typedef typename eastl::iterator_traits::difference_type DifferenceType; + + DifferenceType d = eastl::distance(first, last); // This will be efficient for a random access iterator such as an array. + + while(d > 0) + { + ForwardIterator i = first; + DifferenceType d2 = d >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure. + + eastl::advance(i, d2); // This will be efficient for a random access iterator such as an array. + + if(*i < value) + { + // Disabled because std::lower_bound doesn't specify (23.3.3.3, p3) this can be done: EASTL_VALIDATE_COMPARE(!(value < *i)); // Validate that the compare function is sane. + first = ++i; + d -= d2 + 1; + } + else + d = d2; + } + return first; + } + + + /// lower_bound + /// + /// Finds the position of the first element in a sorted range that has a value + /// greater than or equivalent to a specified value. The input Compare function + /// takes two arguments and returns true if the first argument is less than + /// the second argument. + /// + /// Effects: Finds the first position into which value can be inserted without + /// violating the ordering. + /// + /// Returns: The furthermost iterator i in the range [first, last) such that + /// for any iterator j in the range [first, i) the following corresponding + /// condition holds: compare(*j, value) != false. + /// + /// Complexity: At most 'log(last - first) + 1' comparisons. + /// + /// Optimizations: We have no need to specialize this implementation for random + /// access iterators (e.g. contiguous array), as the code below will already + /// take advantage of them. + /// + template + ForwardIterator + lower_bound(ForwardIterator first, ForwardIterator last, const T& value, Compare compare) + { + typedef typename eastl::iterator_traits::difference_type DifferenceType; + + DifferenceType d = eastl::distance(first, last); // This will be efficient for a random access iterator such as an array. + + while(d > 0) + { + ForwardIterator i = first; + DifferenceType d2 = d >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure. + + eastl::advance(i, d2); // This will be efficient for a random access iterator such as an array. + + if(compare(*i, value)) + { + // Disabled because std::lower_bound doesn't specify (23.3.3.1, p3) this can be done: EASTL_VALIDATE_COMPARE(!compare(value, *i)); // Validate that the compare function is sane. + first = ++i; + d -= d2 + 1; + } + else + d = d2; + } + return first; + } + + + + /// upper_bound + /// + /// Finds the position of the first element in a sorted range that has a + /// value that is greater than a specified value. + /// + /// Effects: Finds the furthermost position into which value can be inserted + /// without violating the ordering. + /// + /// Returns: The furthermost iterator i in the range [first, last) such that + /// for any iterator j in the range [first, i) the following corresponding + /// condition holds: !(value < *j). + /// + /// Complexity: At most 'log(last - first) + 1' comparisons. + /// + template + ForwardIterator + upper_bound(ForwardIterator first, ForwardIterator last, const T& value) + { + typedef typename eastl::iterator_traits::difference_type DifferenceType; + + DifferenceType len = eastl::distance(first, last); + + while(len > 0) + { + ForwardIterator i = first; + DifferenceType len2 = len >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure. + + eastl::advance(i, len2); + + if(!(value < *i)) // Note that we always express value comparisons in terms of < or ==. + { + first = ++i; + len -= len2 + 1; + } + else + { + // Disabled because std::upper_bound doesn't specify (23.3.3.2, p3) this can be done: EASTL_VALIDATE_COMPARE(!(*i < value)); // Validate that the compare function is sane. + len = len2; + } + } + return first; + } + + + /// upper_bound + /// + /// Finds the position of the first element in a sorted range that has a + /// value that is greater than a specified value. The input Compare function + /// takes two arguments and returns true if the first argument is less than + /// the second argument. + /// + /// Effects: Finds the furthermost position into which value can be inserted + /// without violating the ordering. + /// + /// Returns: The furthermost iterator i in the range [first, last) such that + /// for any iterator j in the range [first, i) the following corresponding + /// condition holds: compare(value, *j) == false. + /// + /// Complexity: At most 'log(last - first) + 1' comparisons. + /// + template + ForwardIterator + upper_bound(ForwardIterator first, ForwardIterator last, const T& value, Compare compare) + { + typedef typename eastl::iterator_traits::difference_type DifferenceType; + + DifferenceType len = eastl::distance(first, last); + + while(len > 0) + { + ForwardIterator i = first; + DifferenceType len2 = len >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure. + + eastl::advance(i, len2); + + if(!compare(value, *i)) + { + first = ++i; + len -= len2 + 1; + } + else + { + // Disabled because std::upper_bound doesn't specify (23.3.3.2, p3) this can be done: EASTL_VALIDATE_COMPARE(!compare(*i, value)); // Validate that the compare function is sane. + len = len2; + } + } + return first; + } + + + /// equal_range + /// + /// Effects: Finds the largest subrange [i, j) such that the value can be inserted + /// at any iterator k in it without violating the ordering. k satisfies the + /// corresponding conditions: !(*k < value) && !(value < *k). + /// + /// Complexity: At most '2 * log(last - first) + 1' comparisons. + /// + template + pair + equal_range(ForwardIterator first, ForwardIterator last, const T& value) + { + typedef pair ResultType; + typedef typename eastl::iterator_traits::difference_type DifferenceType; + + DifferenceType d = eastl::distance(first, last); + + while(d > 0) + { + ForwardIterator i(first); + DifferenceType d2 = d >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure. + + eastl::advance(i, d2); + + if(*i < value) + { + EASTL_VALIDATE_COMPARE(!(value < *i)); // Validate that the compare function is sane. + first = ++i; + d -= d2 + 1; + } + else if(value < *i) + { + EASTL_VALIDATE_COMPARE(!(*i < value)); // Validate that the compare function is sane. + d = d2; + last = i; + } + else + { + ForwardIterator j(i); + + return ResultType(eastl::lower_bound(first, i, value), + eastl::upper_bound(++j, last, value)); + } + } + return ResultType(first, first); + } + + + /// equal_range + /// + /// Effects: Finds the largest subrange [i, j) such that the value can be inserted + /// at any iterator k in it without violating the ordering. k satisfies the + /// corresponding conditions: compare(*k, value) == false && compare(value, *k) == false. + /// + /// Complexity: At most '2 * log(last - first) + 1' comparisons. + /// + template + pair + equal_range(ForwardIterator first, ForwardIterator last, const T& value, Compare compare) + { + typedef pair ResultType; + typedef typename eastl::iterator_traits::difference_type DifferenceType; + + DifferenceType d = eastl::distance(first, last); + + while(d > 0) + { + ForwardIterator i(first); + DifferenceType d2 = d >> 1; // We use '>>1' here instead of '/2' because MSVC++ for some reason generates significantly worse code for '/2'. Go figure. + + eastl::advance(i, d2); + + if(compare(*i, value)) + { + EASTL_VALIDATE_COMPARE(!compare(value, *i)); // Validate that the compare function is sane. + first = ++i; + d -= d2 + 1; + } + else if(compare(value, *i)) + { + EASTL_VALIDATE_COMPARE(!compare(*i, value)); // Validate that the compare function is sane. + d = d2; + last = i; + } + else + { + ForwardIterator j(i); + + return ResultType(eastl::lower_bound(first, i, value, compare), + eastl::upper_bound(++j, last, value, compare)); + } + } + return ResultType(first, first); + } + + + /// replace + /// + /// Effects: Substitutes elements referred by the iterator i in the range [first, last) + /// with new_value, when the following corresponding conditions hold: *i == old_value. + /// + /// Complexity: Exactly 'last - first' applications of the corresponding predicate. + /// + /// Note: The predicate version of replace is replace_if and not another variation of replace. + /// This is because both versions would have the same parameter count and there could be ambiguity. + /// + template + inline void + replace(ForwardIterator first, ForwardIterator last, const T& old_value, const T& new_value) + { + for(; first != last; ++first) + { + if(*first == old_value) + *first = new_value; + } + } + + + /// replace_if + /// + /// Effects: Substitutes elements referred by the iterator i in the range [first, last) + /// with new_value, when the following corresponding conditions hold: predicate(*i) != false. + /// + /// Complexity: Exactly 'last - first' applications of the corresponding predicate. + /// + /// Note: The predicate version of replace_if is replace and not another variation of replace_if. + /// This is because both versions would have the same parameter count and there could be ambiguity. + /// + template + inline void + replace_if(ForwardIterator first, ForwardIterator last, Predicate predicate, const T& new_value) + { + for(; first != last; ++first) + { + if(predicate(*first)) + *first = new_value; + } + } + + + /// remove_copy + /// + /// Effects: Copies all the elements referred to by the iterator i in the range + /// [first, last) for which the following corresponding condition does not hold: + /// *i == value. + /// + /// Requires: The ranges [first, last) and [result, result + (last - first)) shall not overlap. + /// + /// Returns: The end of the resulting range. + /// + /// Complexity: Exactly 'last - first' applications of the corresponding predicate. + /// + template + inline OutputIterator + remove_copy(InputIterator first, InputIterator last, OutputIterator result, const T& value) + { + for(; first != last; ++first) + { + if(!(*first == value)) // Note that we always express value comparisons in terms of < or ==. + { + *result = eastl::move(*first); + ++result; + } + } + return result; + } + + + /// remove_copy_if + /// + /// Effects: Copies all the elements referred to by the iterator i in the range + /// [first, last) for which the following corresponding condition does not hold: + /// predicate(*i) != false. + /// + /// Requires: The ranges [first, last) and [result, result + (last - first)) shall not overlap. + /// + /// Returns: The end of the resulting range. + /// + /// Complexity: Exactly 'last - first' applications of the corresponding predicate. + /// + template + inline OutputIterator + remove_copy_if(InputIterator first, InputIterator last, OutputIterator result, Predicate predicate) + { + for(; first != last; ++first) + { + if(!predicate(*first)) + { + *result = eastl::move(*first); + ++result; + } + } + return result; + } + + + /// remove + /// + /// Effects: Eliminates all the elements referred to by iterator i in the + /// range [first, last) for which the following corresponding condition + /// holds: *i == value. + /// + /// Returns: The end of the resulting range. + /// + /// Complexity: Exactly 'last - first' applications of the corresponding predicate. + /// + /// Note: The predicate version of remove is remove_if and not another variation of remove. + /// This is because both versions would have the same parameter count and there could be ambiguity. + /// + /// Note: Since this function moves the element to the back of the heap and + /// doesn't actually remove it from the given container, the user must call + /// the container erase function if the user wants to erase the element + /// from the container. + /// + /// Example usage: + /// vector intArray; + /// ... + /// intArray.erase(remove(intArray.begin(), intArray.end(), 4), intArray.end()); // Erase all elements of value 4. + /// + template + inline ForwardIterator + remove(ForwardIterator first, ForwardIterator last, const T& value) + { + first = eastl::find(first, last, value); + if(first != last) + { + ForwardIterator i(first); + return eastl::remove_copy(++i, last, first, value); + } + return first; + } + + + /// remove_if + /// + /// Effects: Eliminates all the elements referred to by iterator i in the + /// range [first, last) for which the following corresponding condition + /// holds: predicate(*i) != false. + /// + /// Returns: The end of the resulting range. + /// + /// Complexity: Exactly 'last - first' applications of the corresponding predicate. + /// + /// Note: The predicate version of remove_if is remove and not another variation of remove_if. + /// This is because both versions would have the same parameter count and there could be ambiguity. + /// + /// Note: Since this function moves the element to the back of the heap and + /// doesn't actually remove it from the given container, the user must call + /// the container erase function if the user wants to erase the element + /// from the container. + /// + /// Example usage: + /// vector intArray; + /// ... + /// intArray.erase(remove(intArray.begin(), intArray.end(), bind2nd(less(), (int)3)), intArray.end()); // Erase all elements less than 3. + /// + template + inline ForwardIterator + remove_if(ForwardIterator first, ForwardIterator last, Predicate predicate) + { + first = eastl::find_if(first, last, predicate); + if(first != last) + { + ForwardIterator i(first); + return eastl::remove_copy_if(++i, last, first, predicate); + } + return first; + } + + + /// replace_copy + /// + /// Effects: Assigns to every iterator i in the range [result, result + (last - first)) + /// either new_value or *(first + (i - result)) depending on whether the following + /// corresponding conditions hold: *(first + (i - result)) == old_value. + /// + /// Requires: The ranges [first, last) and [result, result + (last - first)) shall not overlap. + /// + /// Returns: result + (last - first). + /// + /// Complexity: Exactly 'last - first' applications of the corresponding predicate. + /// + /// Note: The predicate version of replace_copy is replace_copy_if and not another variation of replace_copy. + /// This is because both versions would have the same parameter count and there could be ambiguity. + /// + template + inline OutputIterator + replace_copy(InputIterator first, InputIterator last, OutputIterator result, const T& old_value, const T& new_value) + { + for(; first != last; ++first, ++result) + *result = (*first == old_value) ? new_value : *first; + return result; + } + + + /// replace_copy_if + /// + /// Effects: Assigns to every iterator i in the range [result, result + (last - first)) + /// either new_value or *(first + (i - result)) depending on whether the following + /// corresponding conditions hold: predicate(*(first + (i - result))) != false. + /// + /// Requires: The ranges [first, last) and [result, result+(lastfirst)) shall not overlap. + /// + /// Returns: result + (last - first). + /// + /// Complexity: Exactly 'last - first' applications of the corresponding predicate. + /// + /// Note: The predicate version of replace_copy_if is replace_copy and not another variation of replace_copy_if. + /// This is because both versions would have the same parameter count and there could be ambiguity. + /// + template + inline OutputIterator + replace_copy_if(InputIterator first, InputIterator last, OutputIterator result, Predicate predicate, const T& new_value) + { + for(; first != last; ++first, ++result) + *result = predicate(*first) ? new_value : *first; + return result; + } + + + + + // reverse + // + // We provide helper functions which allow reverse to be implemented more + // efficiently for some types of iterators and types. + // + template + inline void reverse_impl(BidirectionalIterator first, BidirectionalIterator last, EASTL_ITC_NS::bidirectional_iterator_tag) + { + for(; (first != last) && (first != --last); ++first) // We are not allowed to use operator <, <=, >, >= with a + eastl::iter_swap(first, last); // generic (bidirectional or otherwise) iterator. + } + + template + inline void reverse_impl(RandomAccessIterator first, RandomAccessIterator last, EASTL_ITC_NS::random_access_iterator_tag) + { + if(first != last) + { + for(; first < --last; ++first) // With a random access iterator, we can use operator < to more efficiently implement + eastl::iter_swap(first, last); // this algorithm. A generic iterator doesn't necessarily have an operator < defined. + } + } + + /// reverse + /// + /// Reverses the values within the range [first, last). + /// + /// Effects: For each nonnegative integer i <= (last - first) / 2, + /// applies swap to all pairs of iterators first + i, (last i) - 1. + /// + /// Complexity: Exactly '(last - first) / 2' swaps. + /// + template + inline void reverse(BidirectionalIterator first, BidirectionalIterator last) + { + typedef typename eastl::iterator_traits::iterator_category IC; + eastl::reverse_impl(first, last, IC()); + } + + + + /// reverse_copy + /// + /// Copies the range [first, last) in reverse order to the result. + /// + /// Effects: Copies the range [first, last) to the range + /// [result, result + (last - first)) such that for any nonnegative + /// integer i < (last - first) the following assignment takes place: + /// *(result + (last - first) - i) = *(first + i) + /// + /// Requires: The ranges [first, last) and [result, result + (last - first)) + /// shall not overlap. + /// + /// Returns: result + (last - first). That is, returns the end of the output range. + /// + /// Complexity: Exactly 'last - first' assignments. + /// + template + inline OutputIterator + reverse_copy(BidirectionalIterator first, BidirectionalIterator last, OutputIterator result) + { + for(; first != last; ++result) + *result = *--last; + return result; + } + + + + /// search + /// + /// Search finds a subsequence within the range [first1, last1) that is identical to [first2, last2) + /// when compared element-by-element. It returns an iterator pointing to the beginning of that + /// subsequence, or else last1 if no such subsequence exists. As such, it is very much like + /// the C strstr function, with the primary difference being that strstr uses 0-terminated strings + /// whereas search uses an end iterator to specify the end of a string. + /// + /// Returns: The first iterator i in the range [first1, last1 - (last2 - first2)) such that for + /// any nonnegative integer n less than 'last2 - first2' the following corresponding condition holds: + /// *(i + n) == *(first2 + n). Returns last1 if no such iterator is found. + /// + /// Complexity: At most (last1 first1) * (last2 first2) applications of the corresponding predicate. + /// + template + ForwardIterator1 + search(ForwardIterator1 first1, ForwardIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2) + { + if(first2 != last2) // If there is anything to search for... + { + // We need to make a special case for a pattern of one element, + // as the logic below prevents one element patterns from working. + ForwardIterator2 temp2(first2); + ++temp2; + + if(temp2 != last2) // If what we are searching for has a length > 1... + { + ForwardIterator1 cur1(first1); + ForwardIterator2 p2; + + while(first1 != last1) + { + // The following loop is the equivalent of eastl::find(first1, last1, *first2) + while((first1 != last1) && !(*first1 == *first2)) + ++first1; + + if(first1 != last1) + { + p2 = temp2; + cur1 = first1; + + if(++cur1 != last1) + { + while(*cur1 == *p2) + { + if(++p2 == last2) + return first1; + + if(++cur1 == last1) + return last1; + } + + ++first1; + continue; + } + } + return last1; + } + + // Fall through to the end. + } + else + return eastl::find(first1, last1, *first2); + } + + return first1; + + + #if 0 + /* Another implementation which is a little more simpler but executes a little slower on average. + typedef typename eastl::iterator_traits::difference_type difference_type_1; + typedef typename eastl::iterator_traits::difference_type difference_type_2; + + const difference_type_2 d2 = eastl::distance(first2, last2); + + for(difference_type_1 d1 = eastl::distance(first1, last1); d1 >= d2; ++first1, --d1) + { + ForwardIterator1 temp1 = first1; + + for(ForwardIterator2 temp2 = first2; ; ++temp1, ++temp2) + { + if(temp2 == last2) + return first1; + if(!(*temp1 == *temp2)) + break; + } + } + + return last1; + */ + #endif + } + + + /// search + /// + /// Search finds a subsequence within the range [first1, last1) that is identical to [first2, last2) + /// when compared element-by-element. It returns an iterator pointing to the beginning of that + /// subsequence, or else last1 if no such subsequence exists. As such, it is very much like + /// the C strstr function, with the only difference being that strstr uses 0-terminated strings + /// whereas search uses an end iterator to specify the end of a string. + /// + /// Returns: The first iterator i in the range [first1, last1 - (last2 - first2)) such that for + /// any nonnegative integer n less than 'last2 - first2' the following corresponding condition holds: + /// predicate(*(i + n), *(first2 + n)) != false. Returns last1 if no such iterator is found. + /// + /// Complexity: At most (last1 first1) * (last2 first2) applications of the corresponding predicate. + /// + template + ForwardIterator1 + search(ForwardIterator1 first1, ForwardIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2, + BinaryPredicate predicate) + { + typedef typename eastl::iterator_traits::difference_type difference_type_1; + typedef typename eastl::iterator_traits::difference_type difference_type_2; + + difference_type_2 d2 = eastl::distance(first2, last2); + + if(d2 != 0) + { + ForwardIterator1 i(first1); + eastl::advance(i, d2); + + for(difference_type_1 d1 = eastl::distance(first1, last1); d1 >= d2; --d1) + { + if(eastl::equal(first1, i, first2, predicate)) + return first1; + if(d1 > d2) // To do: Find a way to make the algorithm more elegant. + { + ++first1; + ++i; + } + } + return last1; + } + return first1; // Just like with strstr, we return first1 if the match string is empty. + } + + + + // search_n helper functions + // + template + ForwardIterator // Generic implementation. + search_n_impl(ForwardIterator first, ForwardIterator last, Size count, const T& value, EASTL_ITC_NS::forward_iterator_tag) + { + if(count <= 0) + return first; + + Size d1 = (Size)eastl::distance(first, last); // Should d1 be of type Size, ptrdiff_t, or iterator_traits::difference_type? + // The problem with using iterator_traits::difference_type is that + if(count > d1) // ForwardIterator may not be a true iterator but instead something like a pointer. + return last; + + for(; d1 >= count; ++first, --d1) + { + ForwardIterator i(first); + + for(Size n = 0; n < count; ++n, ++i, --d1) + { + if(!(*i == value)) // Note that we always express value comparisons in terms of < or ==. + goto not_found; + } + return first; + + not_found: + first = i; + } + return last; + } + + template inline + RandomAccessIterator // Random access iterator implementation. Much faster than generic implementation. + search_n_impl(RandomAccessIterator first, RandomAccessIterator last, Size count, const T& value, EASTL_ITC_NS::random_access_iterator_tag) + { + if(count <= 0) + return first; + else if(count == 1) + return eastl::find(first, last, value); + else if(last > first) + { + RandomAccessIterator lookAhead; + RandomAccessIterator backTrack; + + Size skipOffset = (count - 1); + Size tailSize = (Size)(last - first); + Size remainder; + Size prevRemainder; + + for(lookAhead = first + skipOffset; tailSize >= count; lookAhead += count) + { + tailSize -= count; + + if(*lookAhead == value) + { + remainder = skipOffset; + + for(backTrack = lookAhead - 1; *backTrack == value; --backTrack) + { + if(--remainder == 0) + return (lookAhead - skipOffset); // success + } + + if(remainder <= tailSize) + { + prevRemainder = remainder; + + while(*(++lookAhead) == value) + { + if(--remainder == 0) + return (backTrack + 1); // success + } + tailSize -= (prevRemainder - remainder); + } + else + return last; // failure + } + + // lookAhead here is always pointing to the element of the last mismatch. + } + } + + return last; // failure + } + + + /// search_n + /// + /// Returns: The first iterator i in the range [first, last count) such that + /// for any nonnegative integer n less than count the following corresponding + /// conditions hold: *(i + n) == value, pred(*(i + n),value) != false. + /// Returns last if no such iterator is found. + /// + /// Complexity: At most '(last1 - first1) * count' applications of the corresponding predicate. + /// + template + ForwardIterator + search_n(ForwardIterator first, ForwardIterator last, Size count, const T& value) + { + typedef typename eastl::iterator_traits::iterator_category IC; + return eastl::search_n_impl(first, last, count, value, IC()); + } + + + /// binary_search + /// + /// Returns: true if there is an iterator i in the range [first last) that + /// satisfies the corresponding conditions: !(*i < value) && !(value < *i). + /// + /// Complexity: At most 'log(last - first) + 2' comparisons. + /// + /// Note: The reason binary_search returns bool instead of an iterator is + /// that search_n, lower_bound, or equal_range already return an iterator. + /// However, there are arguments that binary_search should return an iterator. + /// Note that we provide binary_search_i (STL extension) to return an iterator. + /// + /// To use search_n to find an item, do this: + /// iterator i = search_n(begin, end, 1, value); + /// To use lower_bound to find an item, do this: + /// iterator i = lower_bound(begin, end, value); + /// if((i != last) && !(value < *i)) + /// + /// It turns out that the above lower_bound method is as fast as binary_search + /// would be if it returned an iterator. + /// + template + inline bool + binary_search(ForwardIterator first, ForwardIterator last, const T& value) + { + // To do: This can be made slightly faster by not using lower_bound. + ForwardIterator i(eastl::lower_bound(first, last, value)); + return ((i != last) && !(value < *i)); // Note that we always express value comparisons in terms of < or ==. + } + + + /// binary_search + /// + /// Returns: true if there is an iterator i in the range [first last) that + /// satisfies the corresponding conditions: compare(*i, value) == false && + /// compare(value, *i) == false. + /// + /// Complexity: At most 'log(last - first) + 2' comparisons. + /// + /// Note: See comments above regarding the bool return value of binary_search. + /// + template + inline bool + binary_search(ForwardIterator first, ForwardIterator last, const T& value, Compare compare) + { + // To do: This can be made slightly faster by not using lower_bound. + ForwardIterator i(eastl::lower_bound(first, last, value, compare)); + return ((i != last) && !compare(value, *i)); + } + + + /// binary_search_i + /// + /// Returns: iterator if there is an iterator i in the range [first last) that + /// satisfies the corresponding conditions: !(*i < value) && !(value < *i). + /// Returns last if the value is not found. + /// + /// Complexity: At most 'log(last - first) + 2' comparisons. + /// + template + inline ForwardIterator + binary_search_i(ForwardIterator first, ForwardIterator last, const T& value) + { + // To do: This can be made slightly faster by not using lower_bound. + ForwardIterator i(eastl::lower_bound(first, last, value)); + if((i != last) && !(value < *i)) // Note that we always express value comparisons in terms of < or ==. + return i; + return last; + } + + + /// binary_search_i + /// + /// Returns: iterator if there is an iterator i in the range [first last) that + /// satisfies the corresponding conditions: !(*i < value) && !(value < *i). + /// Returns last if the value is not found. + /// + /// Complexity: At most 'log(last - first) + 2' comparisons. + /// + template + inline ForwardIterator + binary_search_i(ForwardIterator first, ForwardIterator last, const T& value, Compare compare) + { + // To do: This can be made slightly faster by not using lower_bound. + ForwardIterator i(eastl::lower_bound(first, last, value, compare)); + if((i != last) && !compare(value, *i)) + return i; + return last; + } + + + /// unique + /// + /// Given a sorted range, this function removes duplicated items. + /// Note that if you have a container then you will probably want + /// to call erase on the container with the return value if your + /// goal is to remove the duplicated items from the container. + /// + /// Effects: Eliminates all but the first element from every consecutive + /// group of equal elements referred to by the iterator i in the range + /// [first, last) for which the following corresponding condition holds: + /// *i == *(i - 1). + /// + /// Returns: The end of the resulting range. + /// + /// Complexity: If the range (last - first) is not empty, exactly (last - first) + /// applications of the corresponding predicate, otherwise no applications of the predicate. + /// + /// Example usage: + /// vector intArray; + /// ... + /// intArray.erase(unique(intArray.begin(), intArray.end()), intArray.end()); + /// + template + ForwardIterator unique(ForwardIterator first, ForwardIterator last) + { + first = eastl::adjacent_find(first, last); + + if(first != last) // We expect that there are duplicated items, else the user wouldn't be calling this function. + { + ForwardIterator dest(first); + + for(++first; first != last; ++first) + { + if(!(*dest == *first)) // Note that we always express value comparisons in terms of < or ==. + *++dest = *first; + } + return ++dest; + } + return last; + } + + + /// unique + /// + /// Given a sorted range, this function removes duplicated items. + /// Note that if you have a container then you will probably want + /// to call erase on the container with the return value if your + /// goal is to remove the duplicated items from the container. + /// + /// Effects: Eliminates all but the first element from every consecutive + /// group of equal elements referred to by the iterator i in the range + /// [first, last) for which the following corresponding condition holds: + /// predicate(*i, *(i - 1)) != false. + /// + /// Returns: The end of the resulting range. + /// + /// Complexity: If the range (last - first) is not empty, exactly (last - first) + /// applications of the corresponding predicate, otherwise no applications of the predicate. + /// + template + ForwardIterator unique(ForwardIterator first, ForwardIterator last, BinaryPredicate predicate) + { + first = eastl::adjacent_find(first, last, predicate); + + if(first != last) // We expect that there are duplicated items, else the user wouldn't be calling this function. + { + ForwardIterator dest(first); + + for(++first; first != last; ++first) + { + if(!predicate(*dest, *first)) + *++dest = *first; + } + return ++dest; + } + return last; + } + + + + // find_end + // + // We provide two versions here, one for a bidirectional iterators and one for + // regular forward iterators. Given that we are searching backward, it's a bit + // more efficient if we can use backwards iteration to implement our search, + // though this requires an iterator that can be reversed. + // + template + ForwardIterator1 + find_end_impl(ForwardIterator1 first1, ForwardIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2, + EASTL_ITC_NS::forward_iterator_tag, EASTL_ITC_NS::forward_iterator_tag) + { + if(first2 != last2) // We have to do this check because the search algorithm below will return first1 (and not last1) if the first2/last2 range is empty. + { + for(ForwardIterator1 result(last1); ; ) + { + const ForwardIterator1 resultNext(eastl::search(first1, last1, first2, last2)); + + if(resultNext != last1) // If another sequence was found... + { + first1 = result = resultNext; + ++first1; + } + else + return result; + } + } + return last1; + } + + template + BidirectionalIterator1 + find_end_impl(BidirectionalIterator1 first1, BidirectionalIterator1 last1, + BidirectionalIterator2 first2, BidirectionalIterator2 last2, + EASTL_ITC_NS::bidirectional_iterator_tag, EASTL_ITC_NS::bidirectional_iterator_tag) + { + typedef eastl::reverse_iterator reverse_iterator1; + typedef eastl::reverse_iterator reverse_iterator2; + + reverse_iterator1 rresult(eastl::search(reverse_iterator1(last1), reverse_iterator1(first1), + reverse_iterator2(last2), reverse_iterator2(first2))); + if(rresult.base() != first1) // If we found something... + { + BidirectionalIterator1 result(rresult.base()); + + eastl::advance(result, -eastl::distance(first2, last2)); // We have an opportunity to optimize this, as the + return result; // search function already calculates this distance. + } + return last1; + } + + /// find_end + /// + /// Finds the last occurrence of the second sequence in the first sequence. + /// As such, this function is much like the C string function strrstr and it + /// is also the same as a reversed version of 'search'. It is called find_end + /// instead of the possibly more consistent search_end simply because the C++ + /// standard algorithms have such naming. + /// + /// Returns an iterator between first1 and last1 if the sequence is found. + /// returns last1 (the end of the first seqence) if the sequence is not found. + /// + template + inline ForwardIterator1 + find_end(ForwardIterator1 first1, ForwardIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2) + { + typedef typename eastl::iterator_traits::iterator_category IC1; + typedef typename eastl::iterator_traits::iterator_category IC2; + + return eastl::find_end_impl(first1, last1, first2, last2, IC1(), IC2()); + } + + + + + // To consider: Fold the predicate and non-predicate versions of + // this algorithm into a single function. + template + ForwardIterator1 + find_end_impl(ForwardIterator1 first1, ForwardIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2, + BinaryPredicate predicate, + EASTL_ITC_NS::forward_iterator_tag, EASTL_ITC_NS::forward_iterator_tag) + { + if(first2 != last2) // We have to do this check because the search algorithm below will return first1 (and not last1) if the first2/last2 range is empty. + { + for(ForwardIterator1 result = last1; ; ) + { + const ForwardIterator1 resultNext(eastl::search(first1, last1, first2, last2, predicate)); + + if(resultNext != last1) // If another sequence was found... + { + first1 = result = resultNext; + ++first1; + } + else + return result; + } + } + return last1; + } + + template + BidirectionalIterator1 + find_end_impl(BidirectionalIterator1 first1, BidirectionalIterator1 last1, + BidirectionalIterator2 first2, BidirectionalIterator2 last2, + BinaryPredicate predicate, + EASTL_ITC_NS::bidirectional_iterator_tag, EASTL_ITC_NS::bidirectional_iterator_tag) + { + typedef eastl::reverse_iterator reverse_iterator1; + typedef eastl::reverse_iterator reverse_iterator2; + + reverse_iterator1 rresult(eastl::search + (reverse_iterator1(last1), reverse_iterator1(first1), + reverse_iterator2(last2), reverse_iterator2(first2), + predicate)); + if(rresult.base() != first1) // If we found something... + { + BidirectionalIterator1 result(rresult.base()); + eastl::advance(result, -eastl::distance(first2, last2)); + return result; + } + return last1; + } + + + /// find_end + /// + /// Effects: Finds a subsequence of equal values in a sequence. + /// + /// Returns: The last iterator i in the range [first1, last1 - (last2 - first2)) + /// such that for any nonnegative integer n < (last2 - first2), the following + /// corresponding conditions hold: pred(*(i+n),*(first2+n)) != false. Returns + /// last1 if no such iterator is found. + /// + /// Complexity: At most (last2 - first2) * (last1 - first1 - (last2 - first2) + 1) + /// applications of the corresponding predicate. + /// + template + inline ForwardIterator1 + find_end(ForwardIterator1 first1, ForwardIterator1 last1, + ForwardIterator2 first2, ForwardIterator2 last2, + BinaryPredicate predicate) + { + typedef typename eastl::iterator_traits::iterator_category IC1; + typedef typename eastl::iterator_traits::iterator_category IC2; + + return eastl::find_end_impl + (first1, last1, first2, last2, predicate, IC1(), IC2()); + } + + + /// set_difference + /// + /// set_difference iterates over both input ranges and copies elements present + /// in the first range but not the second to the output range. + /// + /// Effects: Copies the elements of the range [first1, last1) which are not + /// present in the range [first2, last2) to the range beginning at result. + /// The elements in the constructed range are sorted. + /// + /// Requires: The input ranges must be sorted. + /// Requires: The output range shall not overlap with either of the original ranges. + /// + /// Returns: The end of the output range. + /// + /// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons. + /// + template + OutputIterator set_difference(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result) + { + while((first1 != last1) && (first2 != last2)) + { + if(*first1 < *first2) + { + *result = *first1; + ++first1; + ++result; + } + else if(*first2 < *first1) + ++first2; + else + { + ++first1; + ++first2; + } + } + + return eastl::copy(first1, last1, result); + } + + + template + OutputIterator set_difference(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result, Compare compare) + { + while((first1 != last1) && (first2 != last2)) + { + if(compare(*first1, *first2)) + { + EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane. + *result = *first1; + ++first1; + ++result; + } + else if(compare(*first2, *first1)) + { + EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane. + ++first2; + } + else + { + ++first1; + ++first2; + } + } + + return eastl::copy(first1, last1, result); + } + + + /// set_difference_2 + /// + /// set_difference_2 iterates over both input ranges and copies elements present + /// in the first range but not the second to the first output range and copies + /// elements present in the second range but not in the first to the second output + /// range. + /// + /// Effects: Copies the elements of the range [first1, last1) which are not + /// present in the range [first2, last2) to the first output range beginning at + /// result1 AND copies the element of range [first2, last2) which are not present + /// in the range [first1, last) to the second output range beginning at result2. + /// The elements in the constructed range are sorted. + /// + /// Requires: The input ranges must be sorted. + /// Requires: The output ranges shall not overlap with either of the original ranges. + /// + /// Returns: Nothing. + /// + /// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons. + /// + template + void set_difference_2(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result1, OutputIterator result2, Compare compare) + { + while ((first1 != last1) && (first2 != last2)) + { + if (compare(*first1, *first2)) + { + EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane. + *result1++ = *first1++; + } + else if (compare(*first2, *first1)) + { + EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane. + *result2++ = *first2++; + } + else + { + ++first1; + ++first2; + } + } + + eastl::copy(first2, last2, result2); + eastl::copy(first1, last1, result1); + } + + /// set_difference_2 + /// + /// set_difference_2 with the default comparison object is eastl::less<>. + /// + template + void set_difference_2(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result1, OutputIterator result2) + { + eastl::set_difference_2(first1, last1, first2, last2, result1, result2, eastl::less<>{}); + } + + + /// set_symmetric_difference + /// + /// set_difference iterates over both input ranges and copies elements present + /// in the either range but not the other to the output range. + /// + /// Effects: Copies the elements of the range [first1, last1) which are not + /// present in the range [first2, last2), and the elements of the range [first2, last2) + /// which are not present in the range [first1, last1) to the range beginning at result. + /// The elements in the constructed range are sorted. + /// + /// Requires: The input ranges must be sorted. + /// Requires: The resulting range shall not overlap with either of the original ranges. + /// + /// Returns: The end of the constructed range. + /// + /// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons. + /// + template + OutputIterator set_symmetric_difference(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result) + { + while((first1 != last1) && (first2 != last2)) + { + if(*first1 < *first2) + { + *result = *first1; + ++first1; + ++result; + } + else if(*first2 < *first1) + { + *result = *first2; + ++first2; + ++result; + } + else + { + ++first1; + ++first2; + } + } + + return eastl::copy(first2, last2, eastl::copy(first1, last1, result)); + } + + + template + OutputIterator set_symmetric_difference(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result, Compare compare) + { + while((first1 != last1) && (first2 != last2)) + { + if(compare(*first1, *first2)) + { + EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane. + *result = *first1; + ++first1; + ++result; + } + else if(compare(*first2, *first1)) + { + EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane. + *result = *first2; + ++first2; + ++result; + } + else + { + ++first1; + ++first2; + } + } + + return eastl::copy(first2, last2, eastl::copy(first1, last1, result)); + } + + + /// set_intersection + /// + /// set_intersection over both ranges and copies elements present in + /// both ranges to the output range. + /// + /// Effects: Constructs a sorted intersection of the elements from the + /// two ranges; that is, the set of elements that are present in both of the ranges. + /// + /// Requires: The input ranges must be sorted. + /// Requires: The resulting range shall not overlap with either of the original ranges. + /// + /// Returns: The end of the constructed range. + /// + /// Complexity: At most 2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons. + /// + /// Note: The copying operation is stable; if an element is present in both ranges, + /// the one from the first range is copied. + /// + template + OutputIterator set_intersection(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result) + { + while((first1 != last1) && (first2 != last2)) + { + if(*first1 < *first2) + ++first1; + else if(*first2 < *first1) + ++first2; + else + { + *result = *first1; + ++first1; + ++first2; + ++result; + } + } + + return result; + } + + + template + OutputIterator set_intersection(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result, Compare compare) + { + while((first1 != last1) && (first2 != last2)) + { + if(compare(*first1, *first2)) + { + EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane. + ++first1; + } + else if(compare(*first2, *first1)) + { + EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane. + ++first2; + } + else + { + *result = *first1; + ++first1; + ++first2; + ++result; + } + } + + return result; + } + + + + /// set_union + /// + /// set_union iterates over both ranges and copies elements present in + /// both ranges to the output range. + /// + /// Effects: Constructs a sorted union of the elements from the two ranges; + /// that is, the set of elements that are present in one or both of the ranges. + /// + /// Requires: The input ranges must be sorted. + /// Requires: The resulting range shall not overlap with either of the original ranges. + /// + /// Returns: The end of the constructed range. + /// + /// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons. + /// + /// Note: The copying operation is stable; if an element is present in both ranges, + /// the one from the first range is copied. + /// + template + OutputIterator set_union(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result) + { + while((first1 != last1) && (first2 != last2)) + { + if(*first1 < *first2) + { + *result = *first1; + ++first1; + } + else if(*first2 < *first1) + { + *result = *first2; + ++first2; + } + else + { + *result = *first1; + ++first1; + ++first2; + } + ++result; + } + + return eastl::copy(first2, last2, eastl::copy(first1, last1, result)); + } + + + template + OutputIterator set_union(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator result, Compare compare) + { + while((first1 != last1) && (first2 != last2)) + { + if(compare(*first1, *first2)) + { + EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane. + *result = *first1; + ++first1; + } + else if(compare(*first2, *first1)) + { + EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane. + *result = *first2; + ++first2; + } + else + { + *result = *first1; + ++first1; + ++first2; + } + ++result; + } + + return eastl::copy(first2, last2, eastl::copy(first1, last1, result)); + } + + + /// set_decomposition + /// + /// set_decomposition iterates over both ranges and copies elements to one of the three + /// categories of output ranges. + /// + /// Effects: Constructs three sorted containers of the elements from the two ranges. + /// * OutputIterator1 is elements only in Container1. + /// * OutputIterator2 is elements only in Container2. + /// * OutputIterator3 is elements that are in both Container1 and Container2. + /// + /// Requires: The input ranges must be sorted. + /// Requires: The resulting ranges shall not overlap with either of the original ranges. + /// + /// Returns: The end of the constructed range of elements in both Container1 and Container2. + /// + /// Complexity: At most (2 * ((last1 - first1) + (last2 - first2)) - 1) comparisons. + /// + template + OutputIterator3 set_decomposition(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + OutputIterator1 result1, OutputIterator2 result2, OutputIterator3 result3, Compare compare) + { + while ((first1 != last1) && (first2 != last2)) + { + if (compare(*first1, *first2)) + { + EASTL_VALIDATE_COMPARE(!compare(*first2, *first1)); // Validate that the compare function is sane. + *result1++ = *first1++; + } + else if (compare(*first2, *first1)) + { + EASTL_VALIDATE_COMPARE(!compare(*first1, *first2)); // Validate that the compare function is sane. + *result2++ = *first2++; + } + else + { + *result3++ = *first1++; + ++first2; + } + } + + eastl::copy(first1, last1, result1); + eastl::copy(first2, last2, result2); + + return result3; + } + + /// set_decomposition + /// + /// set_decomposition with the default comparison object is eastl::less<>. + /// + template + OutputIterator3 set_decomposition(InputIterator1 first1, InputIterator1 last1, InputIterator2 first2, InputIterator2 last2, + OutputIterator1 result1, OutputIterator2 result2, OutputIterator3 result3) + { + return eastl::set_decomposition(first1, last1, first2, last2, result1, result2, result3, eastl::less<>{}); + } + + + /// is_permutation + /// + template + bool is_permutation(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + + // Skip past any equivalent initial elements. + while((first1 != last1) && (*first1 == *first2)) + { + ++first1; + ++first2; + } + + if(first1 != last1) + { + const difference_type first1Size = eastl::distance(first1, last1); + ForwardIterator2 last2 = first2; + eastl::advance(last2, first1Size); + + for(ForwardIterator1 i = first1; i != last1; ++i) + { + if(i == eastl::find(first1, i, *i)) + { + const difference_type c = eastl::count(first2, last2, *i); + + if((c == 0) || (c != eastl::count(i, last1, *i))) + return false; + } + } + } + + return true; + } + + /// is_permutation + /// + template + bool is_permutation(ForwardIterator1 first1, ForwardIterator1 last1, ForwardIterator2 first2, BinaryPredicate predicate) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + + // Skip past any equivalent initial elements. + while((first1 != last1) && predicate(*first1, *first2)) + { + ++first1; + ++first2; + } + + if(first1 != last1) + { + const difference_type first1Size = eastl::distance(first1, last1); + ForwardIterator2 last2 = first2; + eastl::advance(last2, first1Size); + + for(ForwardIterator1 i = first1; i != last1; ++i) + { + if(i == eastl::find(first1, i, *i, predicate)) + { + const difference_type c = eastl::count(first2, last2, *i, predicate); + + if((c == 0) || (c != eastl::count(i, last1, *i, predicate))) + return false; + } + } + } + + return true; + } + + + /// next_permutation + /// + /// mutates the range [first, last) to the next permutation. Returns true if the + /// new range is not the final permutation (sorted like the starting permutation). + /// Permutations start with a sorted range, and false is returned when next_permutation + /// results in the initial sorted range, or if the range has <= 1 element. + /// Note that elements are compared by operator < (as usual) and that elements deemed + /// equal via this are not rearranged. + /// + /// http://marknelson.us/2002/03/01/next-permutation/ + /// Basically we start with an ordered range and reverse it's order one specifically + /// chosen swap and reverse at a time. It happens that this require going through every + /// permutation of the range. We use the same variable names as the document above. + /// + /// To consider: Significantly improved permutation/combination functionality: + /// http://home.roadrunner.com/~hinnant/combinations.html + /// + /// Example usage: + /// vector intArray; + /// // + /// sort(intArray.begin(), intArray.end()); + /// do { + /// // + /// } while(next_permutation(intArray.begin(), intArray.end())); + /// + + template + bool next_permutation(BidirectionalIterator first, BidirectionalIterator last, Compare compare) + { + if(first != last) // If there is anything in the range... + { + BidirectionalIterator i = last; + + if(first != --i) // If the range has more than one item... + { + for(;;) + { + BidirectionalIterator ii(i), j; + + if(compare(*--i, *ii)) // Find two consecutive values where the first is less than the second. + { + j = last; + while(!compare(*i, *--j)) // Find the final value that's greater than the first (it may be equal to the second). + {} + eastl::iter_swap(i, j); // Swap the first and the final. + eastl::reverse(ii, last); // Reverse the ranget from second to last. + return true; + } + + if(i == first) // There are no two consecutive values where the first is less than the second, meaning the range is in reverse order. The reverse ordered range is always the last permutation. + { + eastl::reverse(first, last); + break; // We are done. + } + } + } + } + + return false; + } + + template + bool next_permutation(BidirectionalIterator first, BidirectionalIterator last) + { + typedef typename eastl::iterator_traits::value_type value_type; + + return eastl::next_permutation(first, last, eastl::less()); + } + + + + /// rotate + /// + /// Effects: For each non-negative integer i < (last - first), places the element from the + /// position first + i into position first + (i + (last - middle)) % (last - first). + /// + /// Returns: first + (last - middle). That is, returns where first went to. + /// + /// Remarks: This is a left rotate. + /// + /// Requires: [first,middle) and [middle,last) shall be valid ranges. ForwardIterator shall + /// satisfy the requirements of ValueSwappable (17.6.3.2). The type of *first shall satisfy + /// the requirements of MoveConstructible (Table 20) and the requirements of MoveAssignable. + /// + /// Complexity: At most last - first swaps. + /// + /// Note: While rotate works on ForwardIterators (e.g. slist) and BidirectionalIterators (e.g. list), + /// you can get much better performance (O(1) instead of O(n)) with slist and list rotation by + /// doing splice operations on those lists instead of calling this rotate function. + /// + /// http://www.cs.bell-labs.com/cm/cs/pearls/s02b.pdf / http://books.google.com/books?id=kse_7qbWbjsC&pg=PA14&lpg=PA14&dq=Programming+Pearls+flipping+hands + /// http://books.google.com/books?id=tjOlkl7ecVQC&pg=PA189&lpg=PA189&dq=stepanov+Elements+of+Programming+rotate + /// http://stackoverflow.com/questions/21160875/why-is-stdrotate-so-fast + /// + /// Strategy: + /// - We handle the special case of (middle == first) and (middle == last) no-ops + /// up front in the main rotate entry point. + /// - There's a basic ForwardIterator implementation (rotate_general_impl) which is + /// a fallback implementation that's not as fast as others but works for all cases. + /// - There's a slightly better BidirectionalIterator implementation. + /// - We have specialized versions for rotating elements that are is_trivially_move_assignable. + /// These versions will use memmove for when we have a RandomAccessIterator. + /// - We have a specialized version for rotating by only a single position, as that allows us + /// (with any iterator type) to avoid a lot of logic involved with algorithms like "flipping hands" + /// and achieve near optimal O(n) behavior. it turns out that rotate-by-one is a common use + /// case in practice. + /// + namespace Internal + { + template + ForwardIterator rotate_general_impl(ForwardIterator first, ForwardIterator middle, ForwardIterator last) + { + using eastl::swap; + + ForwardIterator current = middle; + + do { + swap(*first++, *current++); + + if(first == middle) + middle = current; + } while(current != last); + + ForwardIterator result = first; + current = middle; + + while(current != last) + { + swap(*first++, *current++); + + if(first == middle) + middle = current; + else if(current == last) + current = middle; + } + + return result; // result points to first + (last - middle). + } + + + template + ForwardIterator move_rotate_left_by_one(ForwardIterator first, ForwardIterator last) + { + typedef typename eastl::iterator_traits::value_type value_type; + + value_type temp(eastl::move(*first)); + ForwardIterator result = eastl::move(eastl::next(first), last, first); // Note that while our template type is BidirectionalIterator, if the actual + *result = eastl::move(temp); // iterator is a RandomAccessIterator then this move will be a memmove for trivial types. + + return result; // result points to the final element in the range. + } + + + template + BidirectionalIterator move_rotate_right_by_one(BidirectionalIterator first, BidirectionalIterator last) + { + typedef typename eastl::iterator_traits::value_type value_type; + + BidirectionalIterator beforeLast = eastl::prev(last); + value_type temp(eastl::move(*beforeLast)); + BidirectionalIterator result = eastl::move_backward(first, beforeLast, last); // Note that while our template type is BidirectionalIterator, if the actual + *first = eastl::move(temp); // iterator is a RandomAccessIterator then this move will be a memmove for trivial types. + + return result; // result points to the first element in the range. + } + + template + struct rotate_helper + { + template + static ForwardIterator rotate_impl(ForwardIterator first, ForwardIterator middle, ForwardIterator last) + { return Internal::rotate_general_impl(first, middle, last); } + }; + + template <> + struct rotate_helper + { + template + static ForwardIterator rotate_impl(ForwardIterator first, ForwardIterator middle, ForwardIterator last) + { + if(eastl::next(first) == middle) // If moving trivial types by a single element, memcpy is fast for that case. + return Internal::move_rotate_left_by_one(first, last); + return Internal::rotate_general_impl(first, middle, last); + } + }; + + template <> + struct rotate_helper + { + template + static BidirectionalIterator rotate_impl(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last) + { return Internal::rotate_general_impl(first, middle, last); } // rotate_general_impl outperforms the flipping hands algorithm. + + /* + // Simplest "flipping hands" implementation. Disabled because it's slower on average than rotate_general_impl. + template + static BidirectionalIterator rotate_impl(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last) + { + eastl::reverse(first, middle); + eastl::reverse(middle, last); + eastl::reverse(first, last); + return first + (last - middle); // This can be slow for large ranges because operator + and - are O(n). + } + + // Smarter "flipping hands" implementation, but still disabled because benchmarks are showing it to be slower than rotate_general_impl. + template + static BidirectionalIterator rotate_impl(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last) + { + // This is the "flipping hands" algorithm. + eastl::reverse_impl(first, middle, EASTL_ITC_NS::bidirectional_iterator_tag()); // Reverse the left side. + eastl::reverse_impl(middle, last, EASTL_ITC_NS::bidirectional_iterator_tag()); // Reverse the right side. + + // Reverse the entire range. + while((first != middle) && (middle != last)) + { + eastl::iter_swap(first, --last); + ++first; + } + + if(first == middle) // Finish reversing the entire range. + { + eastl::reverse_impl(middle, last, bidirectional_iterator_tag()); + return last; + } + else + { + eastl::reverse_impl(first, middle, bidirectional_iterator_tag()); + return first; + } + } + */ + }; + + template <> + struct rotate_helper + { + template + static BidirectionalIterator rotate_impl(BidirectionalIterator first, BidirectionalIterator middle, BidirectionalIterator last) + { + if(eastl::next(first) == middle) // If moving trivial types by a single element, memcpy is fast for that case. + return Internal::move_rotate_left_by_one(first, last); + if(eastl::next(middle) == last) + return Internal::move_rotate_right_by_one(first, last); + return Internal::rotate_general_impl(first, middle, last); + } + }; + + template + inline Integer greatest_common_divisor(Integer x, Integer y) + { + do { + Integer t = (x % y); + x = y; + y = t; + } while(y); + + return x; + } + + template <> + struct rotate_helper + { + // This is the juggling algorithm, using move operations. + // In practice this implementation is about 25% faster than rotate_general_impl. We may want to + // consider sticking with just rotate_general_impl and avoid the code generation of this function. + template + static RandomAccessIterator rotate_impl(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last) + { + typedef typename iterator_traits::difference_type difference_type; + typedef typename iterator_traits::value_type value_type; + + const difference_type m1 = (middle - first); + const difference_type m2 = (last - middle); + const difference_type g = Internal::greatest_common_divisor(m1, m2); + value_type temp; + + for(RandomAccessIterator p = first + g; p != first;) + { + temp = eastl::move(*--p); + RandomAccessIterator p1 = p; + RandomAccessIterator p2 = p + m1; + do + { + *p1 = eastl::move(*p2); + p1 = p2; + const difference_type d = (last - p2); + + if(m1 < d) + p2 += m1; + else + p2 = first + (m1 - d); + } while(p2 != p); + + *p1 = eastl::move(temp); + } + + return first + m2; + } + }; + + template <> + struct rotate_helper + { + // Experiments were done which tested the performance of using an intermediate buffer + // to do memcpy's to as opposed to executing a swapping algorithm. It turns out this is + // actually slower than even rotate_general_impl, partly because the average case involves + // memcpy'ing a quarter of the element range twice. Experiments were done with various kinds + // of PODs with various element counts. + + template + static RandomAccessIterator rotate_impl(RandomAccessIterator first, RandomAccessIterator middle, RandomAccessIterator last) + { + if(eastl::next(first) == middle) // If moving trivial types by a single element, memcpy is fast for that case. + return Internal::move_rotate_left_by_one(first, last); + if(eastl::next(middle) == last) + return Internal::move_rotate_right_by_one(first, last); + if((last - first) < 32) // For small ranges rotate_general_impl is faster. + return Internal::rotate_general_impl(first, middle, last); + return Internal::rotate_helper::rotate_impl(first, middle, last); + } + }; + + } // namespace Internal + + + template + ForwardIterator rotate(ForwardIterator first, ForwardIterator middle, ForwardIterator last) + { + if(middle != first) + { + if(middle != last) + { + typedef typename eastl::iterator_traits::iterator_category IC; + typedef typename eastl::iterator_traits::value_type value_type; + + return Internal::rotate_helper::value || // This is the best way of telling if we can move types via memmove, but without a conforming C++11 compiler it usually returns false. + eastl::is_pod::value || // This is a more conservative way of telling if we can move types via memmove, and most compilers support it, but it doesn't have as full of coverage as is_trivially_move_assignable. + eastl::is_scalar::value> // This is the most conservative means and works with all compilers, but works only for scalars. + ::rotate_impl(first, middle, last); + } + + return first; + } + + return last; + } + + + + /// rotate_copy + /// + /// Similar to rotate except writes the output to the OutputIterator and + /// returns an OutputIterator to the element past the last element copied + /// (i.e. result + (last - first)) + /// + template + OutputIterator rotate_copy(ForwardIterator first, ForwardIterator middle, ForwardIterator last, OutputIterator result) + { + return eastl::copy(first, middle, eastl::copy(middle, last, result)); + } + + + + /// clamp + /// + /// Returns a reference to a clamped value within the range of [lo, hi]. + /// + /// http://en.cppreference.com/w/cpp/algorithm/clamp + /// + template + EA_CONSTEXPR const T& clamp(const T& v, const T& lo, const T& hi, Compare comp) + { + // code collapsed to a single line due to constexpr requirements + return [&] { EASTL_ASSERT(!comp(hi, lo)); }(), + comp(v, lo) ? lo : comp(hi, v) ? hi : v; + } + + template + EA_CONSTEXPR const T& clamp(const T& v, const T& lo, const T& hi) + { + return eastl::clamp(v, lo, hi, eastl::less<>()); + } + + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/allocator.h b/libkram/eastl/include/EASTL/allocator.h new file mode 100644 index 00000000..ad20e4d8 --- /dev/null +++ b/libkram/eastl/include/EASTL/allocator.h @@ -0,0 +1,395 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ALLOCATOR_H +#define EASTL_ALLOCATOR_H + + +#include +#include +#include + + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /// alloc_flags + /// + /// Defines allocation flags. + /// + enum alloc_flags + { + MEM_TEMP = 0, // Low memory, not necessarily actually temporary. + MEM_PERM = 1 // High memory, for things that won't be unloaded. + }; + + + /// allocator + /// + /// In this allocator class, note that it is not templated on any type and + /// instead it simply allocates blocks of memory much like the C malloc and + /// free functions. It can be thought of as similar to C++ std::allocator. + /// The flags parameter has meaning that is specific to the allocation + /// + /// C++11's std::allocator (20.6.9) doesn't have a move constructor or assignment + /// operator. This is possibly because std::allocators are associated with types + /// instead of as instances. The potential non-equivalance of C++ std::allocator + /// instances has been a source of some acknowledged design problems. + /// We don't implement support for move construction or assignment in eastl::allocator, + /// but users can define their own allocators which do have move functions and + /// the eastl containers are compatible with such allocators (i.e. nothing unexpected + /// will happen). + /// + class EASTL_API allocator + { + public: + EASTL_ALLOCATOR_EXPLICIT allocator(const char* pName = EASTL_NAME_VAL(EASTL_ALLOCATOR_DEFAULT_NAME)); + allocator(const allocator& x); + allocator(const allocator& x, const char* pName); + + allocator& operator=(const allocator& x); + + void* allocate(size_t n, int flags = 0); + void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0); + void deallocate(void* p, size_t n); + + const char* get_name() const; + void set_name(const char* pName); + + protected: + #if EASTL_NAME_ENABLED + const char* mpName; // Debug name, used to track memory. + #endif + }; + + bool operator==(const allocator& a, const allocator& b); + bool operator!=(const allocator& a, const allocator& b); + + + + /// dummy_allocator + /// + /// Defines an allocator which does nothing. It returns NULL from allocate calls. + /// + class EASTL_API dummy_allocator + { + public: + EASTL_ALLOCATOR_EXPLICIT dummy_allocator(const char* = NULL) { } + dummy_allocator(const dummy_allocator&) { } + dummy_allocator(const dummy_allocator&, const char*) { } + + dummy_allocator& operator=(const dummy_allocator&) { return *this; } + + void* allocate(size_t, int = 0) { return NULL; } + void* allocate(size_t, size_t, size_t, int = 0) { return NULL; } + void deallocate(void*, size_t) { } + + const char* get_name() const { return ""; } + void set_name(const char*) { } + }; + + inline bool operator==(const dummy_allocator&, const dummy_allocator&) { return true; } + inline bool operator!=(const dummy_allocator&, const dummy_allocator&) { return false; } + + + + /// Defines a static default allocator which is constant across all types. + /// This is different from get_default_allocator, which is is bound at + /// compile-time and expected to differ per allocator type. + /// Currently this Default Allocator applies only to CoreAllocatorAdapter. + /// To consider: This naming of this function is too similar to get_default_allocator + /// and instead should be named something like GetStaticDefaultAllocator. + EASTL_API allocator* GetDefaultAllocator(); + EASTL_API allocator* SetDefaultAllocator(allocator* pAllocator); + + + /// get_default_allocator + /// + /// This templated function allows the user to implement a default allocator + /// retrieval function that any part of EASTL can use. EASTL containers take + /// an Allocator parameter which identifies an Allocator class to use. But + /// different kinds of allocators have different mechanisms for retrieving + /// a default allocator instance, and some don't even intrinsically support + /// such functionality. The user can override this get_default_allocator + /// function in order to provide the glue between EASTL and whatever their + /// system's default allocator happens to be. + /// + /// Example usage: + /// MyAllocatorType* gpSystemAllocator; + /// + /// MyAllocatorType* get_default_allocator(const MyAllocatorType*) + /// { return gpSystemAllocator; } + /// + template + Allocator* get_default_allocator(const Allocator*); + + EASTLAllocatorType* get_default_allocator(const EASTLAllocatorType*); + + + /// default_allocfreemethod + /// + /// Implements a default allocfreemethod which uses the default global allocator. + /// This version supports only default alignment. + /// + void* default_allocfreemethod(size_t n, void* pBuffer, void* /*pContext*/); + + + /// allocate_memory + /// + /// This is a memory allocation dispatching function. + /// To do: Make aligned and unaligned specializations. + /// Note that to do this we will need to use a class with a static + /// function instead of a standalone function like below. + /// + template + void* allocate_memory(Allocator& a, size_t n, size_t alignment, size_t alignmentOffset); + + +} // namespace eastl + + + + + + +#ifndef EASTL_USER_DEFINED_ALLOCATOR // If the user hasn't declared that he has defined a different allocator implementation elsewhere... + + EA_DISABLE_ALL_VC_WARNINGS() + #include + EA_RESTORE_ALL_VC_WARNINGS() + + #if !EASTL_DLL // If building a regular library and not building EASTL as a DLL... + // It is expected that the application define the following + // versions of operator new for the application. Either that or the + // user needs to override the implementation of the allocator class. + void* operator new[](size_t size, const char* pName, int flags, unsigned debugFlags, const char* file, int line); + void* operator new[](size_t size, size_t alignment, size_t alignmentOffset, const char* pName, int flags, unsigned debugFlags, const char* file, int line); + #endif + + namespace eastl + { + inline allocator::allocator(const char* EASTL_NAME(pName)) + { + #if EASTL_NAME_ENABLED + mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME; + #endif + } + + + inline allocator::allocator(const allocator& EASTL_NAME(alloc)) + { + #if EASTL_NAME_ENABLED + mpName = alloc.mpName; + #endif + } + + + inline allocator::allocator(const allocator&, const char* EASTL_NAME(pName)) + { + #if EASTL_NAME_ENABLED + mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME; + #endif + } + + + inline allocator& allocator::operator=(const allocator& EASTL_NAME(alloc)) + { + #if EASTL_NAME_ENABLED + mpName = alloc.mpName; + #endif + return *this; + } + + + inline const char* allocator::get_name() const + { + #if EASTL_NAME_ENABLED + return mpName; + #else + return EASTL_ALLOCATOR_DEFAULT_NAME; + #endif + } + + + inline void allocator::set_name(const char* EASTL_NAME(pName)) + { + #if EASTL_NAME_ENABLED + mpName = pName; + #endif + } + + + inline void* allocator::allocate(size_t n, int flags) + { + #if EASTL_NAME_ENABLED + #define pName mpName + #else + #define pName EASTL_ALLOCATOR_DEFAULT_NAME + #endif + + #if EASTL_DLL + return allocate(n, EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT, 0, flags); + #elif (EASTL_DEBUGPARAMS_LEVEL <= 0) + return ::new((char*)0, flags, 0, (char*)0, 0) char[n]; + #elif (EASTL_DEBUGPARAMS_LEVEL == 1) + return ::new( pName, flags, 0, (char*)0, 0) char[n]; + #else + return ::new( pName, flags, 0, __FILE__, __LINE__) char[n]; + #endif + } + + + inline void* allocator::allocate(size_t n, size_t alignment, size_t offset, int flags) + { + #if EASTL_DLL + // We currently have no support for implementing flags when + // using the C runtime library operator new function. The user + // can use SetDefaultAllocator to override the default allocator. + EA_UNUSED(offset); EA_UNUSED(flags); + + size_t adjustedAlignment = (alignment > EA_PLATFORM_PTR_SIZE) ? alignment : EA_PLATFORM_PTR_SIZE; + + void* p = new char[n + adjustedAlignment + EA_PLATFORM_PTR_SIZE]; + void* pPlusPointerSize = (void*)((uintptr_t)p + EA_PLATFORM_PTR_SIZE); + void* pAligned = (void*)(((uintptr_t)pPlusPointerSize + adjustedAlignment - 1) & ~(adjustedAlignment - 1)); + + void** pStoredPtr = (void**)pAligned - 1; + EASTL_ASSERT(pStoredPtr >= p); + *(pStoredPtr) = p; + + EASTL_ASSERT(((size_t)pAligned & ~(alignment - 1)) == (size_t)pAligned); + + return pAligned; + #elif (EASTL_DEBUGPARAMS_LEVEL <= 0) + return ::new(alignment, offset, (char*)0, flags, 0, (char*)0, 0) char[n]; + #elif (EASTL_DEBUGPARAMS_LEVEL == 1) + return ::new(alignment, offset, pName, flags, 0, (char*)0, 0) char[n]; + #else + return ::new(alignment, offset, pName, flags, 0, __FILE__, __LINE__) char[n]; + #endif + + #undef pName // See above for the definition of this. + } + + + inline void allocator::deallocate(void* p, size_t) + { + #if EASTL_DLL + if (p != nullptr) + { + void* pOriginalAllocation = *((void**)p - 1); + delete[](char*)pOriginalAllocation; + } + #else + delete[](char*)p; + #endif + } + + + inline bool operator==(const allocator&, const allocator&) + { + return true; // All allocators are considered equal, as they merely use global new/delete. + } + + + inline bool operator!=(const allocator&, const allocator&) + { + return false; // All allocators are considered equal, as they merely use global new/delete. + } + + + } // namespace eastl + + +#endif // EASTL_USER_DEFINED_ALLOCATOR + + + +namespace eastl +{ + + template + inline Allocator* get_default_allocator(const Allocator*) + { + return NULL; // By default we return NULL; the user must make specialization of this function in order to provide their own implementation. + } + + + inline EASTLAllocatorType* get_default_allocator(const EASTLAllocatorType*) + { + return EASTLAllocatorDefault(); // For the built-in allocator EASTLAllocatorType, we happen to already have a function for returning the default allocator instance, so we provide it. + } + + + inline void* default_allocfreemethod(size_t n, void* pBuffer, void* /*pContext*/) + { + EASTLAllocatorType* const pAllocator = EASTLAllocatorDefault(); + + if(pBuffer) // If freeing... + { + EASTLFree(*pAllocator, pBuffer, n); + return NULL; // The return value is meaningless for the free. + } + else // allocating + return EASTLAlloc(*pAllocator, n); + } + + + /// allocate_memory + /// + /// This is a memory allocation dispatching function. + /// To do: Make aligned and unaligned specializations. + /// Note that to do this we will need to use a class with a static + /// function instead of a standalone function like below. + /// + template + inline void* allocate_memory(Allocator& a, size_t n, size_t alignment, size_t alignmentOffset) + { + void *result; + if (alignment <= EASTL_ALLOCATOR_MIN_ALIGNMENT) + { + result = EASTLAlloc(a, n); + // Ensure the result is correctly aligned. An assertion likely indicates a mismatch between EASTL_ALLOCATOR_MIN_ALIGNMENT and the minimum alignment + // of EASTLAlloc. If there is a mismatch it may be necessary to define EASTL_ALLOCATOR_MIN_ALIGNMENT to be the minimum alignment of EASTLAlloc, or + // to increase the alignment of EASTLAlloc to match EASTL_ALLOCATOR_MIN_ALIGNMENT. + EASTL_ASSERT((reinterpret_cast(result)& ~(alignment - 1)) == reinterpret_cast(result)); + } + else + { + result = EASTLAllocAligned(a, n, alignment, alignmentOffset); + // Ensure the result is correctly aligned. An assertion here may indicate a bug in the allocator. + auto resultMinusOffset = (char*)result - alignmentOffset; + EA_UNUSED(resultMinusOffset); + EASTL_ASSERT((reinterpret_cast(resultMinusOffset)& ~(alignment - 1)) == reinterpret_cast(resultMinusOffset)); + } + return result; + } + +} + + +#endif // Header include guard + + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/allocator_malloc.h b/libkram/eastl/include/EASTL/allocator_malloc.h new file mode 100644 index 00000000..a13d1165 --- /dev/null +++ b/libkram/eastl/include/EASTL/allocator_malloc.h @@ -0,0 +1,130 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ALLOCATOR_MALLOC_H +#define EASTL_ALLOCATOR_MALLOC_H + + +#include +#include +#include + + +// EASTL_ALIGNED_MALLOC_AVAILABLE +// +// Identifies if the standard library provides a built-in aligned version of malloc. +// Defined as 0 or 1, depending on the standard library or platform availability. +// None of the viable C functions provides for an aligned malloc with offset, so we +// don't consider that supported in any case. +// +// Options for aligned allocations: +// C11 aligned_alloc http://linux.die.net/man/3/aligned_alloc +// glibc memalign http://linux.die.net/man/3/posix_memalign +// Posix posix_memalign http://pubs.opengroup.org/onlinepubs/000095399/functions/posix_memalign.html +// VC++ _aligned_malloc http://msdn.microsoft.com/en-us/library/8z34s9c6%28VS.80%29.aspx This is not suitable, since it has a limitation that you need to free via _aligned_free. +// +#if !defined EASTL_ALIGNED_MALLOC_AVAILABLE + #if defined(EA_PLATFORM_POSIX) // && !defined(EA_PLATFORM_APPLE) + // memalign is more consistently available than posix_memalign, though its location isn't consistent across + // platforms and compiler libraries. Typically it's declared in one of three headers: stdlib.h, malloc.h, or malloc/malloc.h + #include // memalign, posix_memalign. + #define EASTL_ALIGNED_MALLOC_AVAILABLE 1 + + #if EA_HAS_INCLUDE_AVAILABLE + #if EA_HAS_INCLUDE() + #include + #elif EA_HAS_INCLUDE() + #include + #endif + #elif defined(EA_PLATFORM_BSD) + #include + #elif defined(EA_COMPILER_CLANG) + #if __has_include() + #include + #elif __has_include() + #include + #endif + #else + #include + #endif + #else + #define EASTL_ALIGNED_MALLOC_AVAILABLE 0 + #endif +#endif + + +namespace eastl +{ + + /////////////////////////////////////////////////////////////////////////////// + // allocator_malloc + // + // Implements an EASTL allocator that uses malloc/free as opposed to + // new/delete or PPMalloc Malloc/Free. + // + // Example usage: + // vector intVector; + // + class allocator_malloc + { + public: + allocator_malloc(const char* = NULL) + { } + + allocator_malloc(const allocator_malloc&) + { } + + allocator_malloc(const allocator_malloc&, const char*) + { } + + allocator_malloc& operator=(const allocator_malloc&) + { return *this; } + + bool operator==(const allocator_malloc&) + { return true; } + + bool operator!=(const allocator_malloc&) + { return false; } + + void* allocate(size_t n, int /*flags*/ = 0) + { return malloc(n); } + + void* allocate(size_t n, size_t alignment, size_t alignmentOffset, int /*flags*/ = 0) + { + #if EASTL_ALIGNED_MALLOC_AVAILABLE + if((alignmentOffset % alignment) == 0) // We check for (offset % alignmnent == 0) instead of (offset == 0) because any block which is aligned on e.g. 64 also is aligned at an offset of 64 by definition. + return memalign(alignment, n); // memalign is more consistently available than posix_memalign. + #else + if((alignment <= EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT) && ((alignmentOffset % alignment) == 0)) + return malloc(n); + #endif + return NULL; + } + + void deallocate(void* p, size_t /*n*/) + { free(p); } + + const char* get_name() const + { return "allocator_malloc"; } + + void set_name(const char*) + { } + }; + + +} // namespace eastl + + + +#endif // Header include guard + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/any.h b/libkram/eastl/include/EASTL/any.h new file mode 100644 index 00000000..c2ef6388 --- /dev/null +++ b/libkram/eastl/include/EASTL/any.h @@ -0,0 +1,652 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// This file implements the eastl::any which is part of the C++ standard STL +// library specification. +// +// eastl::any is a type-safe container for single values of any type. Our +// implementation makes use of the "small local buffer" optimization to avoid +// unnecessary dynamic memory allocation if the specified type is eligible to +// be stored in its local buffer. The user type must satisfy the size +// requirements and must be no-throw move-constructible to qualify for the local +// buffer optimization. +// +// To consider: Implement a fixed_any variant to allow users to customize +// the size of the "small local buffer" optimization. +// +// http://en.cppreference.com/w/cpp/utility/any +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ANY_H +#define EASTL_ANY_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + +#include +#include +#if EASTL_RTTI_ENABLED + #include +#endif +#if EASTL_EXCEPTIONS_ENABLED + #include +#endif + + +namespace eastl +{ + /////////////////////////////////////////////////////////////////////////////// + // bad_any_cast + // + // The type thrown by any_cast on failure. + // + // http://en.cppreference.com/w/cpp/utility/any/bad_any_cast + // + #if EASTL_EXCEPTIONS_ENABLED + struct bad_cast : std::exception + { + const char* what() const EA_NOEXCEPT EA_OVERRIDE + { return "bad cast"; } + }; + + struct bad_any_cast : public bad_cast + { + const char* what() const EA_NOEXCEPT EA_OVERRIDE + { return "bad_any_cast"; } + }; + #endif + + namespace Internal + { + // utility to switch between exceptions and asserts + inline void DoBadAnyCast() + { + #if EASTL_EXCEPTIONS_ENABLED + throw bad_any_cast(); + #else + EASTL_ASSERT_MSG(false, "bad_any_cast\n"); + + // NOTE(rparolin): CRASH! + // You crashed here because you requested a type that was not contained in the object. + // We choose to intentionally crash here instead of returning invalid data to the calling + // code which could cause hard to track down bugs. + *((volatile int*)0) = 0xDEADC0DE; + #endif + } + + template + void* DefaultConstruct(Args&&... args) + { + auto* pMem = EASTLAllocatorDefault()->allocate(sizeof(T), alignof(T), 0); + + return ::new(pMem) T(eastl::forward(args)...); + } + + template + void DefaultDestroy(T* p) + { + p->~T(); + + EASTLAllocatorDefault()->deallocate(static_cast(p), sizeof(T)); + } + } + + + /////////////////////////////////////////////////////////////////////////////// + // 20.7.3, class any + // + class any + { + ////////////////////////////////////////////////////////////////////////////////////////// + // storage_operation + // + // operations supported by the storage handler + // + enum class storage_operation + { + GET, + DESTROY, + COPY, + MOVE, + TYPE_INFO + }; + + + ////////////////////////////////////////////////////////////////////////////////////////// + // storage + // + // the underlying storage type which enables the switching between objects stored in + // the heap and objects stored within the any type. + // + union storage + { + typedef aligned_storage_t<4 * sizeof(void*), alignment_of::value> internal_storage_t; + + void* external_storage = nullptr; + internal_storage_t internal_storage; + }; + + + ////////////////////////////////////////////////////////////////////////////////////////// + // use_internal_storage + // + // determines when the "local buffer optimization" is used + // + template + using use_internal_storage = bool_constant + < + is_nothrow_move_constructible::value + && (sizeof(T) <= sizeof(storage)) && + (alignment_of::value % alignment_of::value == 0) + >; + + + ////////////////////////////////////////////////////////////////////////////////////////// + // non-member friend functions + // + template friend const ValueType* any_cast(const any* pAny) EA_NOEXCEPT; + template friend ValueType* any_cast(any* pAny) EA_NOEXCEPT; + template friend ValueType any_cast(const any& operand); + template friend ValueType any_cast(any& operand); + template friend ValueType any_cast(any&& operand); + + //Adding Unsafe any cast operations + template friend const ValueType* unsafe_any_cast(const any* pAny) EA_NOEXCEPT; + template friend ValueType* unsafe_any_cast(any* pAny) EA_NOEXCEPT; + + + ////////////////////////////////////////////////////////////////////////////////////////// + // internal storage handler + // + template + struct storage_handler_internal + { + template + static void construct(storage& s, V&& v) + { + ::new(&s.internal_storage) T(eastl::forward(v)); + } + + template + static void construct_inplace(storage& s, Args... args) + { + ::new(&s.internal_storage) T(eastl::forward(args)...); + } + + template + static void construct_inplace(storage& s, std::initializer_list il, Args&&... args) + { + ::new(&s.internal_storage) NT(il, eastl::forward(args)...); + } + + static inline void destroy(any& refAny) + { + T& t = *static_cast(static_cast(&refAny.m_storage.internal_storage)); + EA_UNUSED(t); + t.~T(); + + refAny.m_handler = nullptr; + } + + static void* handler_func(storage_operation op, const any* pThis, any* pOther) + { + switch (op) + { + case storage_operation::GET: + { + EASTL_ASSERT(pThis); + return (void*)(&pThis->m_storage.internal_storage); + } + break; + + case storage_operation::DESTROY: + { + EASTL_ASSERT(pThis); + destroy(const_cast(*pThis)); + } + break; + + case storage_operation::COPY: + { + EASTL_ASSERT(pThis); + EASTL_ASSERT(pOther); + construct(pOther->m_storage, *(T*)(&pThis->m_storage.internal_storage)); + } + break; + + case storage_operation::MOVE: + { + EASTL_ASSERT(pThis); + EASTL_ASSERT(pOther); + construct(pOther->m_storage, eastl::move(*(T*)(&pThis->m_storage.internal_storage))); + destroy(const_cast(*pThis)); + } + break; + + case storage_operation::TYPE_INFO: + { + #if EASTL_RTTI_ENABLED + return (void*)&typeid(T); + #endif + } + break; + + default: + { + EASTL_ASSERT_MSG(false, "unknown storage operation\n"); + } + break; + }; + + return nullptr; + } + }; + + + + ////////////////////////////////////////////////////////////////////////////////////////// + // external storage handler + // + template + struct storage_handler_external + { + template + static inline void construct(storage& s, V&& v) + { + s.external_storage = Internal::DefaultConstruct(eastl::forward(v)); + } + + template + static inline void construct_inplace(storage& s, Args... args) + { + s.external_storage = Internal::DefaultConstruct(eastl::forward(args)...); + } + + template + static inline void construct_inplace(storage& s, std::initializer_list il, Args&&... args) + { + s.external_storage = Internal::DefaultConstruct(il, eastl::forward(args)...); + } + + static inline void destroy(any& refAny) + { + Internal::DefaultDestroy(static_cast(refAny.m_storage.external_storage)); + + refAny.m_handler = nullptr; + } + + static void* handler_func(storage_operation op, const any* pThis, any* pOther) + { + switch (op) + { + case storage_operation::GET: + { + EASTL_ASSERT(pThis); + EASTL_ASSERT(pThis->m_storage.external_storage); + return static_cast(pThis->m_storage.external_storage); + } + break; + + case storage_operation::DESTROY: + { + EASTL_ASSERT(pThis); + destroy(*const_cast(pThis)); + } + break; + + case storage_operation::COPY: + { + EASTL_ASSERT(pThis); + EASTL_ASSERT(pOther); + construct(pOther->m_storage, *static_cast(pThis->m_storage.external_storage)); + } + break; + + case storage_operation::MOVE: + { + EASTL_ASSERT(pThis); + EASTL_ASSERT(pOther); + construct(pOther->m_storage, eastl::move(*(T*)(pThis->m_storage.external_storage))); + destroy(const_cast(*pThis)); + } + break; + + case storage_operation::TYPE_INFO: + { + #if EASTL_RTTI_ENABLED + return (void*)&typeid(T); + #endif + } + break; + + default: + { + EASTL_ASSERT_MSG(false, "unknown storage operation\n"); + } + break; + }; + + return nullptr; + } + }; + + + ////////////////////////////////////////////////////////////////////////////////////////// + // storage_handler_ptr + // + // defines the function signature of the storage handler that both the internal and + // external storage handlers must implement to retrieve the underlying type of the any + // object. + // + using storage_handler_ptr = void* (*)(storage_operation, const any*, any*); + + + ////////////////////////////////////////////////////////////////////////////////////////// + // storage_handler + // + // based on the specified type T we select the appropriate underlying storage handler + // based on the 'use_internal_storage' trait. + // + template + using storage_handler = typename conditional::value, + storage_handler_internal, + storage_handler_external>::type; + + + ////////////////////////////////////////////////////////////////////////////////////////// + // data layout + // + storage m_storage; + storage_handler_ptr m_handler; + + public: + #ifndef EA_COMPILER_GNUC + // TODO(rparolin): renable constexpr for GCC + EA_CONSTEXPR + #endif + any() EA_NOEXCEPT + : m_storage(), m_handler(nullptr) {} + + any(const any& other) : m_handler(nullptr) + { + if (other.m_handler) + { + // NOTE(rparolin): You can not simply copy the underlying + // storage because it could hold a pointer to an object on the + // heap which breaks the copy semantics of the language. + other.m_handler(storage_operation::COPY, &other, this); + m_handler = other.m_handler; + } + } + + any(any&& other) EA_NOEXCEPT : m_handler(nullptr) + { + if(other.m_handler) + { + // NOTE(rparolin): You can not simply move the underlying + // storage because because the storage class has effectively + // type erased user type so we have to defer to the handler + // function to get the type back and pass on the move request. + m_handler = eastl::move(other.m_handler); + other.m_handler(storage_operation::MOVE, &other, this); + } + } + + ~any() { reset(); } + + template + any(ValueType&& value, + typename eastl::enable_if::type, any>::value>::type* = 0) + { + typedef decay_t DecayedValueType; + static_assert(is_copy_constructible::value, "ValueType must be copy-constructible"); + storage_handler::construct(m_storage, eastl::forward(value)); + m_handler = &storage_handler::handler_func; + } + + template + explicit any(in_place_type_t, Args&&... args) + { + typedef storage_handler> StorageHandlerT; + static_assert(eastl::is_constructible::value, "T must be constructible with Args..."); + + StorageHandlerT::construct_inplace(m_storage, eastl::forward(args)...); + m_handler = &StorageHandlerT::handler_func; + } + + template + explicit any(in_place_type_t, + std::initializer_list il, + Args&&... args, + typename eastl::enable_if&, Args...>::value, + void>::type* = 0) + { + typedef storage_handler> StorageHandlerT; + + StorageHandlerT::construct_inplace(m_storage, il, eastl::forward(args)...); + m_handler = &StorageHandlerT::handler_func; + } + + // 20.7.3.2, assignments + template + any& operator=(ValueType&& value) + { + static_assert(is_copy_constructible>::value, "ValueType must be copy-constructible"); + any(eastl::forward(value)).swap(*this); + return *this; + } + + any& operator=(const any& other) + { + any(other).swap(*this); + return *this; + } + + any& operator=(any&& other) EA_NOEXCEPT + { + any(eastl::move(other)).swap(*this); + return *this; + } + + // 20.7.3.3, modifiers + #if EASTL_VARIADIC_TEMPLATES_ENABLED + template + void emplace(Args&&... args) + { + typedef storage_handler> StorageHandlerT; + static_assert(eastl::is_constructible::value, "T must be constructible with Args..."); + + reset(); + StorageHandlerT::construct_inplace(m_storage, eastl::forward(args)...); + m_handler = &StorageHandlerT::handler_func; + } + + template + typename eastl::enable_if&, Args...>::value, void>::type + emplace(std::initializer_list il, Args&&... args) + { + typedef storage_handler> StorageHandlerT; + + reset(); + StorageHandlerT::construct_inplace(m_storage, il, eastl::forward(args)...); + m_handler = &StorageHandlerT::handler_func; + } + #endif + + void reset() EA_NOEXCEPT + { + if(m_handler) + m_handler(storage_operation::DESTROY, this, nullptr); + } + + void swap(any& other) EA_NOEXCEPT + { + if(this == &other) + return; + + if(m_handler && other.m_handler) + { + any tmp; + tmp.m_handler = other.m_handler; + other.m_handler(storage_operation::MOVE, &other, &tmp); + + other.m_handler = m_handler; + m_handler(storage_operation::MOVE, this, &other); + + m_handler = tmp.m_handler; + tmp.m_handler(storage_operation::MOVE, &tmp, this); + } + else if (m_handler == nullptr && other.m_handler) + { + eastl::swap(m_handler, other.m_handler); + m_handler(storage_operation::MOVE, &other, this); + } + else if(m_handler && other.m_handler == nullptr) + { + eastl::swap(m_handler, other.m_handler); + other.m_handler(storage_operation::MOVE, this, &other); + } + //else if (m_handler == nullptr && other.m_handler == nullptr) + //{ + // // nothing to swap + //} + } + + // 20.7.3.4, observers + bool has_value() const EA_NOEXCEPT { return m_handler != nullptr; } + + #if EASTL_RTTI_ENABLED + inline const std::type_info& type() const EA_NOEXCEPT + { + if(m_handler) + { + auto* pTypeInfo = m_handler(storage_operation::TYPE_INFO, this, nullptr); + return *static_cast(pTypeInfo); + } + else + { + return typeid(void); + } + } + #endif + }; + + + + ////////////////////////////////////////////////////////////////////////////////////////// + // 20.7.4, non-member functions + // + inline void swap(any& rhs, any& lhs) EA_NOEXCEPT { rhs.swap(lhs); } + + + ////////////////////////////////////////////////////////////////////////////////////////// + // 20.7.4, The non-member any_cast functions provide type-safe access to the contained object. + // + template + inline ValueType any_cast(const any& operand) + { + static_assert(eastl::is_reference::value || eastl::is_copy_constructible::value, + "ValueType must be a reference or copy constructible"); + + auto* p = any_cast::type>::type>(&operand); + + if(p == nullptr) + Internal::DoBadAnyCast(); + + return *p; + } + + template + inline ValueType any_cast(any& operand) + { + static_assert(eastl::is_reference::value || eastl::is_copy_constructible::value, + "ValueType must be a reference or copy constructible"); + + auto* p = any_cast::type>(&operand); + + if(p == nullptr) + Internal::DoBadAnyCast(); + + return *p; + } + + template + inline ValueType any_cast(any&& operand) + { + static_assert(eastl::is_reference::value || eastl::is_copy_constructible::value, + "ValueType must be a reference or copy constructible"); + + auto* p = any_cast::type>(&operand); + + if (p == nullptr) + Internal::DoBadAnyCast(); + + return *p; + } + + // NOTE(rparolin): The runtime type check was commented out because in DLL builds the templated function pointer + // value will be different -- completely breaking the validation mechanism. Due to the fact that eastl::any uses + // type erasure we can't refresh (on copy/move) the cached function pointer to the internal handler function because + // we don't statically know the type. + template + inline const ValueType* any_cast(const any* pAny) EA_NOEXCEPT + { + return (pAny && pAny->m_handler EASTL_IF_NOT_DLL(== &any::storage_handler>::handler_func) + #if EASTL_RTTI_ENABLED + && pAny->type() == typeid(typename remove_reference::type) + #endif + ) ? + static_cast(pAny->m_handler(any::storage_operation::GET, pAny, nullptr)) : + nullptr; + } + + template + inline ValueType* any_cast(any* pAny) EA_NOEXCEPT + { + return (pAny && pAny->m_handler EASTL_IF_NOT_DLL(== &any::storage_handler>::handler_func) + #if EASTL_RTTI_ENABLED + && pAny->type() == typeid(typename remove_reference::type) + #endif + ) ? + static_cast(pAny->m_handler(any::storage_operation::GET, pAny, nullptr)) : + nullptr; + } + + //Unsafe operations - use with caution + template + inline const ValueType* unsafe_any_cast(const any* pAny) EA_NOEXCEPT + { + return unsafe_any_cast(const_cast(pAny)); + } + + template + inline ValueType* unsafe_any_cast(any* pAny) EA_NOEXCEPT + { + return static_cast(pAny->m_handler(any::storage_operation::GET, pAny, nullptr)); + } + + ////////////////////////////////////////////////////////////////////////////////////////// + // make_any + // + #if EASTL_VARIADIC_TEMPLATES_ENABLED + template + inline any make_any(Args&&... args) + { + return any(eastl::in_place, eastl::forward(args)...); + } + + template + inline any make_any(std::initializer_list il, Args&&... args) + { + return any(eastl::in_place, il, eastl::forward(args)...); + } + #endif + +} // namespace eastl + +#endif // EASTL_ANY_H diff --git a/libkram/eastl/include/EASTL/array.h b/libkram/eastl/include/EASTL/array.h new file mode 100644 index 00000000..590aa94b --- /dev/null +++ b/libkram/eastl/include/EASTL/array.h @@ -0,0 +1,530 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// Implements a templated array class as per the C++ standard TR1 (technical +// report 1, which is a list of proposed C++ library amendments). +// The primary distinctions between this array and TR1 array are: +// - array::size_type is defined as eastl_size_t instead of size_t in order +// to save memory and run faster on 64 bit systems. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ARRAY_H +#define EASTL_ARRAY_H + + +#include +#include +#include +#include +#include + +#if EASTL_EXCEPTIONS_ENABLED + EA_DISABLE_ALL_VC_WARNINGS() + #include // std::out_of_range, std::length_error. + EA_RESTORE_ALL_VC_WARNINGS() +#endif + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /////////////////////////////////////////////////////////////////////// + /// array + /// + /// Implements a templated array class as per the C++ standard TR1. + /// This class allows you to use a built-in C style array like an STL vector. + /// It does not let you change its size, as it is just like a C built-in array. + /// Our implementation here strives to remove function call nesting, as that + /// makes it hard for us to profile debug builds due to function call overhead. + /// Note that this is intentionally a struct with public data, as per the + /// C++ standard update proposal requirements. + /// + /// Example usage: + /// array a = { { 0, 1, 2, 3, 4 } }; // Strict compilers such as GCC require the double brackets. + /// a[2] = 4; + /// for(array::iterator i = a.begin(); i < a.end(); ++i) + /// *i = 0; + /// + template + struct array + { + public: + typedef array this_type; + typedef T value_type; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef value_type* iterator; + typedef const value_type* const_iterator; + typedef eastl::reverse_iterator reverse_iterator; + typedef eastl::reverse_iterator const_reverse_iterator; + typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. + typedef ptrdiff_t difference_type; + + public: + enum + { + count = N + }; + + // Note that the member data is intentionally public. + // This allows for aggregate initialization of the + // object (e.g. array a = { 0, 3, 2, 4 }; ) + value_type mValue[N ? N : 1]; + + public: + // We intentionally provide no constructor, destructor, or assignment operator. + + void fill(const value_type& value); + + // Unlike the swap function for other containers, array::swap takes linear time, + // may exit via an exception, and does not cause iterators to become associated with the other container. + void swap(this_type& x) EA_NOEXCEPT_IF(eastl::is_nothrow_swappable::value); + + EA_CPP14_CONSTEXPR iterator begin() EA_NOEXCEPT; + EA_CPP14_CONSTEXPR const_iterator begin() const EA_NOEXCEPT; + EA_CPP14_CONSTEXPR const_iterator cbegin() const EA_NOEXCEPT; + + EA_CPP14_CONSTEXPR iterator end() EA_NOEXCEPT; + EA_CPP14_CONSTEXPR const_iterator end() const EA_NOEXCEPT; + EA_CPP14_CONSTEXPR const_iterator cend() const EA_NOEXCEPT; + + EA_CPP14_CONSTEXPR reverse_iterator rbegin() EA_NOEXCEPT; + EA_CPP14_CONSTEXPR const_reverse_iterator rbegin() const EA_NOEXCEPT; + EA_CPP14_CONSTEXPR const_reverse_iterator crbegin() const EA_NOEXCEPT; + + EA_CPP14_CONSTEXPR reverse_iterator rend() EA_NOEXCEPT; + EA_CPP14_CONSTEXPR const_reverse_iterator rend() const EA_NOEXCEPT; + EA_CPP14_CONSTEXPR const_reverse_iterator crend() const EA_NOEXCEPT; + + EA_CPP14_CONSTEXPR bool empty() const EA_NOEXCEPT; + EA_CPP14_CONSTEXPR size_type size() const EA_NOEXCEPT; + EA_CPP14_CONSTEXPR size_type max_size() const EA_NOEXCEPT; + + EA_CPP14_CONSTEXPR T* data() EA_NOEXCEPT; + EA_CPP14_CONSTEXPR const T* data() const EA_NOEXCEPT; + + EA_CPP14_CONSTEXPR reference operator[](size_type i); + EA_CPP14_CONSTEXPR const_reference operator[](size_type i) const; + EA_CPP14_CONSTEXPR const_reference at(size_type i) const; + EA_CPP14_CONSTEXPR reference at(size_type i); + + EA_CPP14_CONSTEXPR reference front(); + EA_CPP14_CONSTEXPR const_reference front() const; + + EA_CPP14_CONSTEXPR reference back(); + EA_CPP14_CONSTEXPR const_reference back() const; + + bool validate() const; + int validate_iterator(const_iterator i) const; + + }; // class array + + + /////////////////////////////////////////////////////////////////////////// + // template deduction guides + /////////////////////////////////////////////////////////////////////////// + #ifdef __cpp_deduction_guides + template array(T, U...) -> array; + #endif + + + /////////////////////////////////////////////////////////////////////// + // array + /////////////////////////////////////////////////////////////////////// + + + template + inline void array::fill(const value_type& value) + { + eastl::fill_n(&mValue[0], N, value); + } + + + template + inline void array::swap(this_type& x) EA_NOEXCEPT_IF(eastl::is_nothrow_swappable::value) + { + eastl::swap_ranges(&mValue[0], &mValue[N], &x.mValue[0]); + } + + + template + EA_CPP14_CONSTEXPR inline typename array::iterator + array::begin() EA_NOEXCEPT + { + return &mValue[0]; + } + + + template + EA_CPP14_CONSTEXPR inline typename array::const_iterator + array::begin() const EA_NOEXCEPT + { + return &mValue[0]; + } + + + template + EA_CPP14_CONSTEXPR inline typename array::const_iterator + array::cbegin() const EA_NOEXCEPT + { + return &mValue[0]; + } + + + template + EA_CPP14_CONSTEXPR inline typename array::iterator + array::end() EA_NOEXCEPT + { + return &mValue[N]; + } + + + template + EA_CPP14_CONSTEXPR inline typename array::const_iterator + array::end() const EA_NOEXCEPT + { + return &mValue[N]; + } + + + template + EA_CPP14_CONSTEXPR inline typename array::const_iterator + array::cend() const EA_NOEXCEPT + { + return &mValue[N]; + } + + + template + EA_CPP14_CONSTEXPR inline typename array::reverse_iterator + array::rbegin() EA_NOEXCEPT + { + return reverse_iterator(&mValue[N]); + } + + + template + EA_CPP14_CONSTEXPR inline typename array::const_reverse_iterator + array::rbegin() const EA_NOEXCEPT + { + return const_reverse_iterator(&mValue[N]); + } + + + template + EA_CPP14_CONSTEXPR inline typename array::const_reverse_iterator + array::crbegin() const EA_NOEXCEPT + { + return const_reverse_iterator(&mValue[N]); + } + + + template + EA_CPP14_CONSTEXPR inline typename array::reverse_iterator + array::rend() EA_NOEXCEPT + { + return reverse_iterator(&mValue[0]); + } + + + template + EA_CPP14_CONSTEXPR inline typename array::const_reverse_iterator + array::rend() const EA_NOEXCEPT + { + return const_reverse_iterator(static_cast(&mValue[0])); + } + + + template + EA_CPP14_CONSTEXPR inline typename array::const_reverse_iterator + array::crend() const EA_NOEXCEPT + { + return const_reverse_iterator(static_cast(&mValue[0])); + } + + + template + EA_CPP14_CONSTEXPR inline typename array::size_type + array::size() const EA_NOEXCEPT + { + return (size_type)N; + } + + + template + EA_CPP14_CONSTEXPR inline typename array::size_type + array::max_size() const EA_NOEXCEPT + { + return (size_type)N; + } + + + template + EA_CPP14_CONSTEXPR inline bool array::empty() const EA_NOEXCEPT + { + return (N == 0); + } + + + template + EA_CPP14_CONSTEXPR inline typename array::reference + array::operator[](size_type i) + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(i >= N)) + EASTL_FAIL_MSG("array::operator[] -- out of range"); + #endif + + EA_ANALYSIS_ASSUME(i < N); + return mValue[i]; + } + + + template + EA_CPP14_CONSTEXPR inline typename array::const_reference + array::operator[](size_type i) const + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(i >= N)) + EASTL_FAIL_MSG("array::operator[] -- out of range"); + + #endif + + EA_ANALYSIS_ASSUME(i < N); + return mValue[i]; + } + + + template + EA_CPP14_CONSTEXPR inline typename array::reference + array::front() + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(empty())) // We don't allow the user to reference an empty container. + EASTL_FAIL_MSG("array::front -- empty array"); + #endif + + return mValue[0]; + } + + + template + EA_CPP14_CONSTEXPR inline typename array::const_reference + array::front() const + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(empty())) // We don't allow the user to reference an empty container. + EASTL_FAIL_MSG("array::front -- empty array"); + #endif + + return mValue[0]; + } + + + template + EA_CPP14_CONSTEXPR inline typename array::reference + array::back() + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(empty())) // We don't allow the user to reference an empty container. + EASTL_FAIL_MSG("array::back -- empty array"); + #endif + + return mValue[N - 1]; + } + + + template + EA_CPP14_CONSTEXPR inline typename array::const_reference + array::back() const + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(empty())) // We don't allow the user to reference an empty container. + EASTL_FAIL_MSG("array::back -- empty array"); + #endif + + return mValue[N - 1]; + } + + + template + EA_CPP14_CONSTEXPR inline T* array::data() EA_NOEXCEPT + { + return mValue; + } + + + template + EA_CPP14_CONSTEXPR inline const T* array::data() const EA_NOEXCEPT + { + return mValue; + } + + + template + EA_CPP14_CONSTEXPR inline typename array::const_reference array::at(size_type i) const + { + #if EASTL_EXCEPTIONS_ENABLED + if(EASTL_UNLIKELY(i >= N)) + throw std::out_of_range("array::at -- out of range"); + #elif EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(i >= N)) + EASTL_FAIL_MSG("array::at -- out of range"); + #endif + + EA_ANALYSIS_ASSUME(i < N); + return static_cast(mValue[i]); + } + + + template + EA_CPP14_CONSTEXPR inline typename array::reference array::at(size_type i) + { + #if EASTL_EXCEPTIONS_ENABLED + if(EASTL_UNLIKELY(i >= N)) + throw std::out_of_range("array::at -- out of range"); + #elif EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(i >= N)) + EASTL_FAIL_MSG("array::at -- out of range"); + #endif + + EA_ANALYSIS_ASSUME(i < N); + return static_cast(mValue[i]); + } + + + template + inline bool array::validate() const + { + return true; // There is nothing to do. + } + + + template + inline int array::validate_iterator(const_iterator i) const + { + if(i >= mValue) + { + if(i < (mValue + N)) + return (isf_valid | isf_current | isf_can_dereference); + + if(i <= (mValue + N)) + return (isf_valid | isf_current); + } + + return isf_none; + } + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + EA_CPP14_CONSTEXPR inline bool operator==(const array& a, const array& b) + { + return eastl::equal(&a.mValue[0], &a.mValue[N], &b.mValue[0]); + } + + + template + EA_CPP14_CONSTEXPR inline bool operator<(const array& a, const array& b) + { + return eastl::lexicographical_compare(&a.mValue[0], &a.mValue[N], &b.mValue[0], &b.mValue[N]); + } + + + template + EA_CPP14_CONSTEXPR inline bool operator!=(const array& a, const array& b) + { + return !eastl::equal(&a.mValue[0], &a.mValue[N], &b.mValue[0]); + } + + + template + EA_CPP14_CONSTEXPR inline bool operator>(const array& a, const array& b) + { + return eastl::lexicographical_compare(&b.mValue[0], &b.mValue[N], &a.mValue[0], &a.mValue[N]); + } + + + template + EA_CPP14_CONSTEXPR inline bool operator<=(const array& a, const array& b) + { + return !eastl::lexicographical_compare(&b.mValue[0], &b.mValue[N], &a.mValue[0], &a.mValue[N]); + } + + + template + EA_CPP14_CONSTEXPR inline bool operator>=(const array& a, const array& b) + { + return !eastl::lexicographical_compare(&a.mValue[0], &a.mValue[N], &b.mValue[0], &b.mValue[N]); + } + + + template + inline void swap(array& a, array& b) + { + eastl::swap_ranges(&a.mValue[0], &a.mValue[N], &b.mValue[0]); + } + + + /////////////////////////////////////////////////////////////////////// + // to_array + /////////////////////////////////////////////////////////////////////// + namespace internal + { + template + EA_CONSTEXPR auto to_array(T (&a)[N], index_sequence) + { + return eastl::array, N>{{a[I]...}}; + } + + template + EA_CONSTEXPR auto to_array(T (&&a)[N], index_sequence) + { + return eastl::array, N>{{eastl::move(a[I])...}}; + } + } + + template + EA_CONSTEXPR eastl::array, N> to_array(T (&a)[N]) + { + static_assert(eastl::is_constructible_v, "element type T must be copy-initializable"); + static_assert(!eastl::is_array_v, "passing multidimensional arrays to to_array is ill-formed"); + return internal::to_array(a, eastl::make_index_sequence{}); + } + + template + EA_CONSTEXPR eastl::array, N> to_array(T (&&a)[N]) + { + static_assert(eastl::is_move_constructible_v, "element type T must be move-constructible"); + static_assert(!eastl::is_array_v, "passing multidimensional arrays to to_array is ill-formed"); + return internal::to_array(eastl::move(a), eastl::make_index_sequence{}); + } + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/atomic.h b/libkram/eastl/include/EASTL/atomic.h new file mode 100644 index 00000000..27117e9c --- /dev/null +++ b/libkram/eastl/include/EASTL/atomic.h @@ -0,0 +1,1772 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_H +#define EASTL_ATOMIC_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// Below is the documentation of the API of the eastl::atomic library. +// This includes class and free functions. +// Anything marked with a '+' in front of the name is an extension to the std API. +// + + +///////////////////////////////////////////////////////////////////////////////// +// +// eastl::atomic memory_order API +// +// See below for full explanations on the memory orders and their guarantees. +// +// - eastl::memory_order_relaxed +// - eastl::memory_order_acquire +// - eastl::memory_order_release +// - eastl::memory_order_acq_rel +// - eastl::memory_order_seq_cst +// - +eastl::memory_order_read_depends +// + + +///////////////////////////////////////////////////////////////////////////////// +// +// eastl::atomic class API +// +// All jargon and prerequisite knowledge is explained below. +// +// Unless otherwise specified all orders except read_depends is a valid order +// on the given operation. +// Unless otherwise specified all operations are valid on all types T. +// If no order is provided, seq_cst memory ordering is used for the operation. +// +// - atomic() : Value-initializes the underlying object as T{}. +// +// - atomic(T) : Initializes the underlying object with a copy of T. +// +// - T operator=(T) : Atomically assigns T as store(T, seq_cst). +// +// - is_lock_free() : true if the operations are lockfree. Always true for eastl. +// +// - store(T, order) : Atomically stores T affecting memory according to order. +// : Valid orders are relaxed, release, and seq_cst. +// +// - T load(order) : Atomically loads T affecting memory according to order. +// : Valid orders are relaxed, acquire, and seq_cst. +// : If T is a pointer type, read_depends is another valid order. +// +// - operator T() : Atomically loads T as load(T, seq_cst). +// +// - T exchange(T, order) : Atomically performs a RMW that replaces the current value with T. +// : Memory is affected according to order. +// : Returns the previous value stored before the RMW operation. +// +// - bool compare_exchange_weak(T&, T, successOrder, failOrder) +// : Atomically compares the value stored with that of T& and if equal replaces it with T. +// : This is a RMW operation. +// : If the comparison fails, loads the observed value into T&. This is a load operation. +// : Memory is affected in the RMW operation according to successOrder. +// : Memory is affected in the load operation according to failOrder. +// : failOrder cannot be a stronger order than successOrder. +// : Returns true or false if the comparison succeeded and T was stored into the atomic object. +// : +// : The weak variant may fail even if the observed value of the atomic object equals T&. +// : This can yield performance gains on platforms with ld/str exclusive pair instructions especially +// : when the compare_exchange operation is done in a loop. +// : Only the bool return value can be used to determine if the operation was successful. +// +// - bool compare_exchange_weak(T&, T, order) +// : Same as the above except that order is used for both the RMW and the load operation. +// : If order == acq_rel then the order of the load operation equals acquire. +// : If order == release then the order of the load operation equals relaxed. +// +// - bool compare_exchange_strong(T&, T, successOrder, failOrder) +// - bool compare_exchange_strong(T&, T, order) +// : This operation is the same as the above weak variants +// : expect that it will not fail spuriously if the value stored equals T&. +// +// The below operations are only valid for Integral types. +// +// - T fetch_add(T, order) +// : Atomically performs a RMW that increments the value stored with T. +// : Returns the previous value stored before the RMW operation. +// - T fetch_sub(T, order) +// : Atomically performs a RMW that decrements the value stored with T. +// : Returns the previous value stored before the RMW operation. +// - T fetch_and(T, order) +// : Atomically performs a RMW that bit-wise and's the value stored with T. +// : Returns the previous value stored before the RMW operation. +// - T fetch_or(T, order) +// : Atomically performs a RMW that bit-wise or's the value stored with T. +// : Returns the previous value stored before the RMW operation. +// - T fetch_xor(T, order) +// : Atomically performs a RMW that bit-wise xor's the value stored with T. +// : Returns the previous value stored before the RMW operation. +// +// - +T add_fetch(T, order) +// : Atomically performs a RMW that increments the value stored with T. +// : Returns the new updated value after the operation. +// - +T sub_fetch(T, order) +// : Atomically performs a RMW that decrements the value stored with T. +// : Returns the new updated value after the operation. +// - +T and_fetch(T, order) +// : Atomically performs a RMW that bit-wise and's the value stored with T. +// : Returns the new updated value after the operation. +// - +T or_fetch(T, order) +// : Atomically performs a RMW that bit-wise or's the value stored with T. +// : Returns the new updated value after the operation. +// - +T xor_fetch(T, order) +// : Atomically performs a RMW that bit-wise xor's the value stored with T. +// : Returns the new updated value after the operation. +// +// - T operator++/--() +// : Atomically increments or decrements the atomic value by one. +// : Returns the previous value stored before the RMW operation. +// : Memory is affected according to seq_cst ordering. +// +// - T ++/--operator() +// : Atomically increments or decrements the atomic value by one. +// : Returns the new updated value after the RMW operation. +// : Memory is affected according to seq_cst ordering. +// +// - T operator+=/-=/&=/|=/^=(T) +// : Atomically adds, subtracts, bitwise and/or/xor the atomic object with T. +// : Returns the new updated value after the operation. +// : Memory is affected according to seq_cst ordering. +// +// +// The below operations are only valid for Pointer types +// +// - T* fetch_add(ptrdiff_t val, order) +// : Atomically performs a RMW that increments the value store with sizeof(T) * val +// : Returns the previous value stored before the RMW operation. +// - T* fetch_sub(ptrdiff_t val, order) +// : Atomically performs a RMW that decrements the value store with sizeof(T) * val +// : Returns the previous value stored before the RMW operation. +// +// - +T* add_fetch(ptrdiff_t val, order) +// : Atomically performs a RMW that increments the value store with sizeof(T) * val +// : Returns the new updated value after the operation. +// - +T* sub_fetch(ptrdiff_t val, order) +// : Atomically performs a RMW that decrements the value store with sizeof(T) * val +// : Returns the new updated value after the operation. +// +// - T* operator++/--() +// : Atomically increments or decrements the atomic value by sizeof(T) * 1. +// : Returns the previous value stored before the RMW operation. +// : Memory is affected according to seq_cst ordering. +// +// - T* ++/--operator() +// : Atomically increments or decrements the atomic value by sizeof(T) * 1. +// : Returns the new updated value after the RMW operation. +// : Memory is affected according to seq_cst ordering. +// +// +// - +EASTL_ATOMIC_HAS_[len]BIT Macro Definitions +// These macros provide the ability to compile-time switch on the availability of support for the specific +// bit width of an atomic object. +// Example: +// +// #if defined(EASTL_ATOMIC_HAS_128BIT) +// #endif +// +// Indicates the support for 128-bit atomic operations on an eastl::atomic object. +// + + +///////////////////////////////////////////////////////////////////////////////// +// +// eastl::atomic_flag class API +// +// Unless otherwise specified all orders except read_depends is a valid order +// on the given operation. +// +// - atomic_flag() : Initializes the flag to false. +// +// - clear(order) +// : Atomically stores the value false to the flag. +// : Valid orders are relaxed, release, and seq_cst. +// +// - bool test_and_set(order) +// : Atomically exchanges flag with true and returns the previous value that was held. +// +// - bool test(order) +// : Atomically loads the flag value. +// : Valid orders are relaxed, acquire, and seq_cst. +// + + +///////////////////////////////////////////////////////////////////////////////// +// +// eastl::atomic standalone free function API +// +// All class methods have a standalone free function that takes a pointer to the +// atomic object as the first argument. These functions just call the correct method +// on the atomic object for the given operation. +// These functions come in two variants, a non-explicit and an explicit variant +// that take on the form atomic_op() and atomic_op_explicit() respectively. +// The non-explicit variants take no order arguments and thus are all seq_cst. +// The explicit variants take an order argument. +// Only the standalone functions that do not have a class method equivalent pair will be +// documented here which includes all new extensions to the std API. +// +// - +compiler_barrier() +// : Read-Write Compiler Barrier. +// - +compiler_barrier_data_dependency(const T&) +// : Read-Write Compiler Barrier. +// : Applies a fake input dependency on const T& so the compiler believes said variable is used. +// : Useful for example when writing benchmark or testing code with local variables that must not get dead-store eliminated. +// - +cpu_pause() +// : Prevents speculative memory order violations in spin-wait loops. +// : Allows giving up core resources, execution units, to other threads while in spin-wait loops. +// - atomic_thread_fence(order) +// : Read docs below. +// - atomic_signal_fence(order) +// : Prevents reordering with a signal handler. +// - +atomic_load_cond(const eastl::atomic*, Predicate) +// : continuously loads the atomic object until Predicate is true +// : will properly ensure the spin-wait loop is optimal +// : very useful when needing to spin-wait for some condition to be true which is common is many lock-free algorithms +// : Memory is affected according to seq_cst ordering. +// - +atomic_load_cond_explicit(const eastl::atomic*, Predicate, Order) +// : Same as above but takes an order for how memory is affected +// + + +///////////////////////////////////////////////////////////////////////////////// +// +// Deviations from the standard. This does not include new features added: +// +// 1. +// Description: Atomics are always lock free +// Reasoning : We don't want people to fall into performance traps where implicit locking +// is done. If your user defined type is large enough to not support atomic +// instructions then your user code should do the locking. +// +// 2. +// Description: Atomic objects can not be volatile +// Reasoning : Volatile objects do not make sense in the context of eastl::atomic. +// Use the given memory orders to get the ordering you need. +// Atomic objects have to become visible on the bus. See below for details. +// +// 3. +// Description: Consume memory order is not supported +// Reasoning : See below for the reasoning. +// +// 4. +// Description: ATOMIC_INIT() macros and the ATOMIC_LOCK_FREE macros are not implemented +// Reasoning : Use the is_lock_free() method instead of the macros. +// ATOMIC_INIT() macros aren't needed since the default constructor value initializes. +// +// 5. +// Description: compare_exchange failure memory order cannot be stronger than success memory order +// Reasoning : Besides the argument that it ideologically does not make sense that a failure +// of the atomic operation shouldn't have a stricter ordering guarantee than the +// success of it; if that is required then just make the whole operation stronger. +// This ability was added and allowed in C++17 only which makes supporting multiple +// C++ versions harder when using the compiler provided intrinsics since their behaviour +// is reliant on the C++ version being compiled. Also makes it harder to reason about code +// using these atomic ops since C++ versions vary the behaviour. We have also noticed +// that versions of compilers that say they support C++17 do not properly adhere to this +// new requirement in their intrinsics. Thus we will not support this. +// +// 6. +// Description: All memory orders are distinct types instead of enum values +// Reasoning : This will not affect how the API is used in user code. +// It allows us to statically assert on invalid memory orders since they are compile-time types +// instead of potentially runtime enum values. +// Allows for more efficient code gen without the use of switch statements or if-else conditionals +// on the memory order enum values on compilers that do not provide intrinsics that take in a +// memory order, such as MSVC, especially in debug and debug-opt builds. +// + + +///////////////////////////////////////////////////////////////////////////////// +// +// ******** DISCLAIMER ******** +// +// This documentation is not meant to provide rigorous proofs on the memory models +// of specific architectures or the C++ memory model introduced in C++11. It is not +// meant to provide formal mathematical definitions and logic that shows that a given +// implementation adheres to the C++ memory model. This isn't meant to be some infallible +// oracle on memory models, barriers, observers, and architecture implementation details. +// What I do hope a reader gets out of this is the following. An understanding of the C++ +// memory model and how that relates to implementations on various architectures. Various +// phenomena and ways that compilers and architectures can steer away from a sequentially +// consistent system. To provide examples on how to use this library with common patterns +// that will be seen in many code bases. Lastly I would like to provide insight and +// further readings into the lesser known topics that aren't shared outside people +// who live in this space and why certain things are done the way they are +// such as cumulativity of memory barriers as one example. Sometimes specifying barriers +// as LDLD/LDST/STST/STLD doesn't actually cut it, and finer grain semantics are needed +// to describe cumulativity of memory barriers. +// +// ******** Layout of the Documentation ******** +// +// This document will first go through a variety of different hardware architectures with examples of the various kinds of +// reordering that is allowed by these architectures. We will use the memory barriers provided by the hardware to "fix" these +// examples. +// Then we will introduce the C++ memory model and revisit the examples using the platform agnostic abstract memory model to "fix" +// them. +// The hope here is that we get a sense of the various types of architectures and weak memory consistency provided by them and thus +// an appreciation for the design of the C++ abstract memory model. +// +// ******** REFERENCES ******** +// [1] Dekker's mutual exclusion algorithm made RW-safe +// [2] Handling Memory Ordering in Multithreaded Applications with Oracle Solaris +// [3] Evaluating the Cost of Atomic Operations on Modern Architectures +// [4] A Tutorial Introduction to the ARM and POWER Relaxed Memory Models +// [5] Memory Barriers: a Hardware View for Software Hackers +// [6] Memory Model = Instruction Reordering + Store Atomicity +// [7] ArMOR: Defending Against Memory Consistency Model Mismatches in Heterogeneous Architectures +// [8] Weak Memory Models: Balancing Definitional Simplicity and Implementation Flexibility +// [9] Repairing Sequential Consistency in C/C++11 +// [10] A high-level operational semantics for hardware weak memory models +// [11] x86-TSO: A Rigorous and Usable Programmer's Model for x86 Multiprocessors +// [12] Simplifying ARM Concurrency: Multicopy-Atomic Axiomatic and Operational Models for ARMv8 +// [13] Mixed-size Concurrency: ARM, POWER, C/C++11, and SC +// [14] P0668R4: Revising the C++ memory model +// [15] Constructing a Weak Memory Model +// [16] The Superfluous Load Queue +// [17] P0190R1: Proposal for New memory_order_consume Definition +// +// ******** What does it mean to be Atomic? ******** +// +// The word atomic has been overloaded and can mean a lot of different things depending on the context, +// so let's digest it. +// +// The first attribute for something to be atomic is that concurrent stores and loads +// must not tear or shear. This means if two threads write 0x01 and 0x02 at the same time +// then the only values that should ever be observed is 0x01 or 0x02. We can only see +// the whole write of 0x01 or 0x02, not 0x03 as an example. Many algorithms rely on +// this property; only very few such a Dekker's algorithm for mutual exclusion don't. +// Well actually a recent paper, [1], showed that Dekker's isn't safe without atomic +// loads and stores so this property is pretty fundamental and also hard to prove that +// your algorithm is safe without this property on loads and stores. +// +// We need to ensure the compiler emits a single load instruction. +// If we are doing 64-bit loads on a 32-bit platform, we need to ensure the load is one +// instruction instead of 2 32-bit loads into two registers. +// Another example is if we have this struct, struct { int32_t i; int32_t k; }, even on +// a 64-bit system we have to ensure the compiler does one 64-bit load and not two +// 32-bit loads for each individual member. +// +// We also need to ensure the correct instruction is emitted. A general load instruction +// to do a 64-bit load on a 32-bit platform may perform a 64-bit load but it may not +// be atomic, it may be turned into two 32-bit loads behind the scenes in the cpu. +// For example on ARMv7 we would have to use ldrexd not ldrd for 64-bit loads +// on a 32-bit ARMv7 core. +// +// An operation may be considered atomic if multiple sub-operations are done as one +// transactional unit. This is commonly known as a Read-Modify-Write, RMW, operation. +// Take a simple add operation; it is actually a load from memory into a register, +// a modification of said register and then a store back to memory. If two threads +// concurrently execute this add operation on the same memory location; any interleaving +// of the 3 sub-operations is possible. It is possible that if the initial value is 0, +// the result may be 1 because each thread executed in lockstep both loading 0, adding 1 +// and then storing 1. A RMW operation may be considered atomic if the whole sequence of +// sub-operations are serialized as one transactional unit. +// +// Atomicity may also refer to the order in which memory operations are observed and the +// dependencies between memory operations to different memory locations. As a quick example +// into the very thing we will be deep diving into that is not very intuitive. If I do, [STORE(A, 2); STORE(B, 1);], +// in one thread and another thread does, [r0 = LOAD(B); r1 = LOAD(A);]; if r0 == 1, thus we observed +// the store to B, will we observe r1 == 2. Our intuition tells us that well A was stored +// first and then B, so if I read the new value of B then I must also read the new value +// of A since the store to A happened before B so if I can see B then I must be able to +// see everything before B which includes A. +// This highlights the ordering of memory operations and why memory barriers and memory +// models are so heavily attached to atomic operations because one could classify something +// is atomic if the dependency highlighted in the above example is allowed to be maintained. +// +// This is what people mean when you hear that volatile does NOT mean atomicity of the operation. +// Usually people imply a lot of implicit assumptions when they mark a variable as volatile. +// All volatile gives us is the ability to tell the compiler it may not assume anything +// about the state of that memory location. This means the compiler must always emit a load +// or store instruction, cannot perform constant folding, dead-store elimination, or +// do any sort of code movement on volatile variables. +// +// ******** Preliminary Basics ******** +// +// It is expected that the reader understands what a cache is, how it is organized and how data +// is chunked into cachelines. It is helpful if the reader understands basic cache coherency +// protocols such as MSI or MESI. +// It is expected the reader understands alignment, especially natural alignment +// of the processor and why alignment is important for data access. +// The reader should have some understanding of how a processor executes instructions, +// basics of what Out-of-Order execution means and basics of what speculative execution means. +// It is expected that the reader has an understanding of threading, multi-threaded programming +// and the use of concurrency primitives such as mutexes. +// Memory Barrier, Barrier, Memory Fence and Fence are all interchangeable synonyms. +// +// Independent memory operations can be performed or observed, depending on your perspective, +// in any order as long as the local cpu thinks its execution is happening in program order. +// This can be a problem for inter-cpu communications and thus we need some way to enforce +// that the compiler does not reorder instructions and that the cpu also does not reorder +// instructions. This is what a barrier is, it is an enforcement of ordering on memory instructions, +// so as the name suggests a barrier. Barriers can be one-sided or both-sided which means +// the barrier enforces a partial order above or below or on both sides of said barrier. +// +// Processors will use tricks such as out-of-order execution, memory instruction buffering and +// combining, speculative loads and speculative execution, branch prediction and many types of caching even +// in various interconnects from the cpu to the memory itself. One key thing to note is that cpus +// do not physically reorder the instruction stream. Instructions are dispatched and retired +// in-order but executed out-of-order. Memory barriers will prevent these tricks from happening +// by controlling the interaction of multiple cpus. +// +// Compilers will morph your code and physically move instructions around as long as the program +// has the same observed behaviour. This is becoming increasingly true with more optimization techniques +// such as Link Time Optimization becoming the norm where once people assumed compilers couldn't assume +// something outside the given TU and now because they have the whole program view they know everything. +// This means the compiler does indeed alter the instruction stream +// and compiler barriers are a way to tell them to not move any memory instructions across the barrier. +// This does not prevent a compiler from doing optimizations such as constant folding, merging of +// overlapping loads, or even dead store elimination. Compiler barriers are also very cheap and +// have zero impact on anything that the compiler knows isn't visible in memory such as local variables +// whose addresses do not escape the function even if their address is taken. You can think of it +// in terms of a sequence point as used with "volatile" qualified variables to denote a place in code where +// things must be stable and the compiler doesn't cache any variables in registers or do any reordering. +// +// Memory Barriers come in many flavours that instill a partial or full ordering on memory operations. +// Some memory operations themselves have implicit ordering guarantees already, for example +// Total-Store Order, TSO, architectures like x86 guarantee that a store operation cannot be reordered with a +// previous store operation thus a memory barrier that only orders stores is not needed +// on this architecture other than ensuring the compiler doesn't do any shenanigans. +// Considering we have 4 permutations of memory operations; a common way to describe an ordering +// is via Load-Load/LDLD, Load-Store/LDST, Store-Store/STST or Store-Load/STLD notation. You read this +// notation as follows; STLD memory barrier means a load cannot be reordered with a previous store. +// For example, on TSO architecture we can say all stores provide a STST memory barrier, +// since a store cannot be reordered with a previous store. +// +// Memory Barriers in itself are not a magic bullet, they come with caveats that must be known. +// Each cpu architecture also has its own flavours and guarantees provided by said memory barriers. +// There is no guarantee that memory instructions specified before a memory barrier will complete, +// be written to memory or fully propagated throughout the rest of the system, when the memory barrier +// instruction completes. The memory barrier creates a point in that local cpus queue of memory instructions +// whereby they must not cross. There is no guarantee that using a memory barrier on one cpu will have +// any effect at all on another remote cpu's observed view of memory. This also implies that executing +// a memory barrier does not hinder, incur, stall or enforce any other cpus to serialize with each other cpu. +// In order for a remote cpu to observe the correct effects it must also use a matching memory barrier. +// This means code communicating in 2 threads through memory must both be employing the use of memory barriers. +// For example, a store memory barrier that only orders stores, STST, in one thread must be paired with a load memory barrier +// that only orders loads, LDLD, in the other thread trying to observe those stores in the correct order. +// +// ******** Memory Types && Devices ******** +// +// eastl::atomic and accompanying memory barriers ONLY ORDER MEMORY to cpu-to-cpu communication through whatever the +// processor designates as normal cacheable memory. It does not order memory to devices. It does not provide any DMA ordering guarantees. +// It does not order memory with other memory types such as Write Combining. It strictly orders memory only to shared memory that is used +// to communicate between cpus only. +// +// ******** Sequentially Consistent Machine ******** +// +// The most intuitive as well as the model people naturally expect a concurrent system to have is Sequential Consistency. +// You may have or definitely have heard this term if you dealt with any type of distributed system. Lamport's definition +// articulates this consistency model the best. +// Leslie Lamport: "the result of any execution is the same as if the operations of all the processors were executed in some +// sequential order, and the operations of each individual processor appear in this sequence in the order +// specified by its program". +// +// A Sequentially Consistent machine is modelled as follows: +// +// ------------ ------------ +// | Thread 0 | ... | Thread N | +// ------------ ------------ +// | | | | +// | | | | +// ---------------------------------------- +// | | +// | Shared Memory | +// | | +// ---------------------------------------- +// +// This is a sequentially consistent machine. Each thread is executing instructions in program order which does loads and stores +// that are serialized in some order to the shared memory. This means all communication is done through the shared memory with one cpu +// doing one access at a time. This system has a couple key properties. +// +// 1. There is no local cpu memory reordering. Each cpu executes instructions in program order and all loads and stores must complete, +// be visible in the shared memory or be visible in a register before starting the next instruction. +// 2. Each memory operation becomes visible to all cpus at the same time. If a store hits the shared memory, then all subsequent loads +// from every other cpu will always see the latest store. +// +// A Sequentially Consistent machine has, Single-Copy Store Atomicity: All stores must become visible to all cores in the system at the same time. +// +// ******** Adding Caches ******** +// +// Caches by nature implicitly add the potential for memory reordering. A centralized shared snoopy bus that we all learned in school +// makes it easy to implement sequential consistency with caches. Writes and reads are all serialized in a total order via the cache bus transaction +// ordering. Every modern day bus is not inorder, and most certainly not a shared centralized bus. Cache coherency guarantees that all memory operations +// will be propagated eventually to all parties, but it doesn't guarantee in what order or in what time frame. Once you add +// caches, various levels of caching and various interconnects between remote cpus, you inevitably run into the issue where +// some cpus observe the effects of a store before other cpus. Obviously we have weakly-ordered and strongly-ordered cpus with +// caches so why is that? The short answer is, where is the onus put, is it on the programmer or the hardware. Does the hardware +// have dependency tracking, is it able to determine when a memory order violation occurs such as rolling back its speculative execution +// and also how far along the chain of interconnects does the hardware wait before it determines that the memory operation has +// been acknowledged or is considered to satisfy its memory ordering guarantees. Again this is a very high level view of the system +// as a whole, but the takeaway is yes; caches do add the potential for reordering but other supporting hardware determines whether +// that is observable by the programmer. There is also some debate whether weakly-ordered processors are actually more performant +// than strongly-ordered cpus eluding to the fact that the hardware has a better picture of what is a violation versus the programmer +// having to emit far more barriers on weakly-ordered architectures in multi-threaded code which may actually not be needed because the +// hardware didn't commit a violation but it may have and we as the programmer cannot rely on may haves. +// +// ******** Store Buffers ******** +// +// Obviously having all stores serialize results in unnecessary stalls. Store buffers alleviate this issue. +// Store buffers are simple fixed size structures that sit between the cpu and the memory hierarchy. This allows +// each cpu to record its write in the store buffer and then move onto the next instruction. The store buffer will +// eventually be flushed to the resulting memory hierarchy in FIFO order. How and when this flushing occurs is irrelevant to the +// understanding of a store buffer. A read from an address will grab the most recent write to the same address in the store buffer. +// +// The introduction of a store buffer is our first dive into weaker memory consistency. The addition of this hardware turns the consistency model weaker, +// into one that is commonly known as TSO, Total-Store Order. This is the exact model used by x86 cpus and we will see what this means +// and what new effects are observed with the addition of the store buffer. Below is a diagram of how the machine may now look. +// This type of store buffer is known as a FIFO store buffer, FIFO write buffer, or Load/Store Queue in some literature. This type of +// store buffer introduces STLD reordering but still prevents STST reordering. We will take a look at another type of store buffer later. +// Even with this store buffer, stores to the same address can still be merged so that only the latest store is written to the cache assuming +// no other intermediary stores happen. x86 cpus do write merging even for consecutive stores, i.e. storing to A and A+1 can be merged into one two-byte store. +// +// ------------ ------------ +// | Thread 0 | ... | Thread N | +// ------------ ------------ +// | | | | +// | | | | +// | Store | | Store | +// | Buffer | | Buffer | +// | | | | +// ---------------------------------------- +// | | +// | Shared Memory | +// | | +// ---------------------------------------- +// +// ---- Store-Buffering / Dekker's Example ---- +// This is a very common litmus test that showcases the introduction of STLD reordering. It is called Store-Buffering example because it is the only weaker +// behaviour observed under TSO and also called Dekker's Example as it famously breaks Dekker's mutual exclusion algorithm. +// +// --------------------------- +// Initial State: +// x = 0; y = 0; +// --------------------------- +// Thread 0 | Thread 1 +// --------------------------- +// STORE(x, 1) | STORE(y, 1) +// r0 = LOAD(y) | r1 = LOAD(x) +// --------------------------- +// Observed: r0 = 0 && r1 = 0 +// --------------------------- +// +// We would normally assume that any interleaving of the two threads cannot possibly end up with both loads reading 0. We assume that the observed outcome +// of r0 = 0 && r1 = 0 to be impossible, clearly that is not the case. Let's start by understanding the example with no reordering possible. Both threads +// run and their first instruction is to write the value 1 into either x or y, the next instruction then loads from the opposite variable. This means no +// matter the interleaving, one of the loads always executes after the other thread's store to that variable. +// We could observe r0 = 1 && r1 = 1 if both threads execute in lockstep. +// We could observe r0 = 0 && r1 = 1 if thread 0 executes and then thread 1 executes. +// We could observe r0 = 1 && r1 = 0 if thread 1 executes and then thread 0 executes. +// Since the stores always execute before that load in the other thread, one thread must always at least observe a store, so let's see why store buffers break this. +// +// What will happen is that STORE(x, 1) is stored to the store buffer but not made globally visible yet. +// STORE(y, 1) is written to the store buffer and also is not made globally visible yet. +// Both loads now read the initial state of x and y which is 0. We got the r0 = 0 && r1 = 0 outcome and just observed a Store-Load reordering. +// It has appeared as if the loads have been reordered with the previous stores and thus executed before the stores. +// Notice even if we execute the instructions in order, a series of other hardware side effects made it appear as if the instructions have been reordered. +// We can solve this by placing a Store-Load barrier after the store and before the load as follows. +// +// --------------------------- +// Thread 0 | Thread 1 +// --------------------------- +// STORE(x, 1) | STORE(y, 1) +// STLD BARRIER | STLD BARRIER +// r0 = LOAD(y) | r1 = LOAD(x) +// --------------------------- +// +// This STLD barrier effectively will flush the store buffer into the memory hierarchy ensuring all stores in the buffer are visible to all other cpus at the same time +// before executing the load instruction. Again nothing prevents a potential hardware from speculatively executing the load even with the STLD barrier, the hardware will have to do +// a proper rollback if it detected a memory order violation otherwise it can continue on with its speculative load. The barrier just delimits a stability point. +// +// Most hardware does not provide granular barrier semantics such as STLD. Most provide a write memory barrier which only orders stores, STST, a read memory barrier +// which only orders loads, LDLD, and then a full memory barrier which is all 4 permutations. So on x86 we will have to use the mfence, memory fence, instruction +// which is a full memory barrier to get our desired STLD requirements. +// +// TSO also has the property that we call, Multi-Copy Store Atomicity. This means a cpu sees its own stores before they become visible to other cpus, +// by forwarding them from the store buffer, but a store becomes visible to all other cpus at the same time when flushed from the store buffer. +// +// +// Let's look at a non-FIFO store buffer now as seen in ARM cpus as an example and we will use a standard Message Passing example to see how it manifests in even weaker consistency. +// A store buffer on ARM as an example allows write merging even with adjacent stores, is not a FIFO queue, any stores in the small hardware hash table may be ejected at any point +// due to a collision eviction or the availability of cachelines in the cache hierarchy meaning that stores may bypass the buffer entirely if that cacheline is already owned by that cpu. +// There is no guarantee that stores will be completed in order as in the FIFO case. +// +// --------------------------- +// Initial State: +// x = 0; y = 0; +// --------------------------- +// Thread 0 | Thread 1 +// --------------------------- +// STORE(x, 1) | while(LOAD(y) == 0); +// STORE(y, 1) | r0 = LOAD(x) +// --------------------------- +// Observed: r0 = 0 +// --------------------------- +// +// This is a classic Message Passing example that is very commonly used in production code. We store some values and then set a flag, STORE(y, 1) in this case. +// The other thread waits until the flag is observed and then reads the value out of x. If we observed the flag then we should obviously see all stores before the flag was set. +// Given our familiarity with TSO consistency above we know this definitely works on TSO and it is impossible to observe the load of x returning 0 under that consistency model. +// Let's see how this breaks with a non-FIFO store buffer. +// +// Thread 0 executes the STORE(x, 1) but the cacheline for x is not in thread 0's cache so we write to the store buffer and wait for the cacheline. +// Thread 1 executes the LOAD(y) and it also does not have y in its cacheline so it waits before completing the load. +// Thread 0 moves on to STORE(y, 1). It owns this cacheline, hypothetically, so it may bypass the store buffer and store directly to the cache. +// Thread 0 receives a message that Thread 1 needs y's cacheline, so it transfers the now modified cacheline to Thread 1. +// Thread 1 completes the load with the updated value of y = 1 and branches out of the while loop since we saw the new value of y. +// Thread 1 executes LOAD(x) which will return 0 since Thread 0 still hasn't flushed its store buffer waiting for x's cacheline. +// Thread 0 receives x's cacheline and now flushes x = 1 to the cache. Thread 1 will also have invalidated its cacheline for x that it brought in via the previous load. +// +// We have now fallen victim to STST reordering, allowing Thread 1 to observe a load of x returning 0. Not only does this store buffer allow STLD reordering due to the nature of +// buffering stores, but it also allows another reordering; that of Store-Store reordering. It was observed as if Thread 0 executed STORE(y, 1) before STORE(x, 1) which completely +// broke our simple message passing scenario. +// +// --------------------------- +// Thread 0 | Thread 1 +// --------------------------- +// STORE(x, 1) | while(LOAD(y) == 0); +// STST BARRIER | +// STORE(y, 1) | r0 = LOAD(x) +// --------------------------- +// +// The STST memory barrier effectively ensures that the cpu will flush its store buffer before executing any subsequent stores. That is not entirely true, the cpu is still allowed +// to continue and execute stores to the store buffer as long as it doesn't flush them to the cache before the previous stores are flushed to the cache. If nothing becomes +// globally visible out of order then we are good. +// The example above will change how the processor executes due to the STST memory barrier. Thread 0 will execute STORE(y, 1), write to the store buffer and mark all current entries. Even though it owns the cacheline +// it cannot write the store to the cache until all marked entries, which are all the previous stores, are flushed to the cache. We have now fixed the message passing code by adding +// a STST or write memory barrier and thus it is no longer possible to observe the load of x returning 0. +// +// ******** Invalidation Queues ******** +// +// Due to the cache coherency protocol in play, a write to a cacheline will have to send invalidation messages to all other cpus that may have that cacheline as well. +// Immediately executing and responding to invalidation messages can cause quite a stall especially if the cache is busy at the moment with other requests. +// The longer we wait to invalidate the cacheline, the longer the remote cpu doing the write is stalled waiting on us. We don't like this very much. +// Invalidation Queues are just that, we queue up the action of actually invalidating the cacheline but immediately respond to the request saying we did it anyway. +// Now the remote cpu thinks we invalidated said cacheline but actually it may very well still be in our cache ready to be read from. We just got weaker again, let's +// see how this manifests in code by starting from the end of our previous example. +// +// --------------------------- +// Initial State: +// x = 0; y = 0; +// --------------------------- +// Thread 0 | Thread 1 +// --------------------------- +// STORE(x, 1) | while(LOAD(y) == 0); +// STST BARRIER | +// STORE(y, 1) | r0 = LOAD(x) +// --------------------------- +// Observed: r0 = 0 +// --------------------------- +// +// Thread 1 receives the invalidate x's cacheline message and queues it because it is busy. +// Thread 1 receives the invalidate y's cacheline message, but we don't have that cacheline so acknowledge immediately. +// Thread 1 executes LOAD(y), loads in y's cacheline and branches out of the loop. +// Thread 1 executes LOAD(x), and loads from the cache the old value of x because the invalidation message is still sitting in the invalidation queue. +// +// We have just again observed the load of x returning 0 but from a different type of reordering now on the reader side. +// This is a form of LDLD, Load-Load, reordering as it appears as if LOAD(x) was executed before LOAD(y). This can be fixed as follows. +// +// --------------------------- +// Thread 0 | Thread 1 +// --------------------------- +// STORE(x, 1) | while(LOAD(y) == 0); +// STST BARRIER | LDLD BARRIER +// STORE(y, 1) | r0 = LOAD(x) +// --------------------------- +// +// The LDLD memory barrier essentially marks all entries currently in the invalidation queue. Any subsequent load must wait until all the marked entries have been +// processed. This ensures once we observe y = 1, we process all entries that came before y and that way we observe all the stores that happened before y. +// The insertion of the read memory barrier creates the required memory barrier pairing as discussed above and ensures that now our code executes as expected. +// +// It must be made clear that these are not the only hardware structure additions or ways that can relax STST, STLD and LDLD orderings. These are merely +// 2 structures that are common and ones that I choose to use as examples of how hardware can reduce ordering guarantees. Knowing how the hardware does this +// isn't always entirely clear but having a model that tells us what operations can be reordered is all we need to be able to reason about our code when executing on that hardware. +// +// ******** Load Buffering ******** +// +// The analog of the Store Buffering example, this litmus test has two threads read from two different locations and then write to the other locations. +// The outcome of having LDST reordering is allowed and observable on many processors such as ARM. +// +// --------------------------- +// Initial State: +// x = 0; y = 0; +// --------------------------- +// Thread 0 | Thread 1 +// --------------------------- +// r0 = LOAD(x) | r1 = LOAD(y) +// STORE(y, 1) | STORE(x, 1) +// --------------------------- +// Observed: r0 = 1 && r1 = 1 +// --------------------------- +// +// This is possible because the processor does not have to wait for the other cpu's cacheline to arrive before storing into the cache. +// Assume Thread 0 owns y's cacheline and Thread 1 owns x's cacheline. +// The processor may execute the load and thus buffer the load waiting for the cacheline to arrive. +// The processor may continue onto the store and since each cpu owns their respective cacheline, store the result into the cache. +// The cpus now receive the cachelines for x and y with the now modified value. +// We have just observed the loads returning 1 and thus observed LDST reordering. +// +// To forbid such outcome it suffices to add any full memory barrier to both threads or a local Read-After-Write/Read-To-Write dependency or a control dependency. +// +// ------------------------------- +// Thread 0 | Thread 1 +// ------------------------------- +// r0 = LOAD(x) | r1 = LOAD(y) +// if (r0 == 1) | if (r1 == 1) +// STORE(y, 1) | STORE(x, 1) +// ------------------------------- +// +// ----------------------------------------------------- +// Thread 0 | Thread 1 +// ----------------------------------------------------- +// r0 = LOAD(x) | r1 = LOAD(y) +// STORE(&(y + r0 - r1), 1) | STORE(&(x + r1 - r1), 1) +// ----------------------------------------------------- +// +// Both fixes above ensure that both writes cannot be committed, made globally visible, until their program source code order preceding reads have been fully satisfied. +// +// ******** Compiler Barriers ******** +// +// Compiler barriers are both-sided barriers that prevent loads and stores from moving down past the compiler barrier and +// loads and stores from moving up above the compiler barrier. Here we will see the various ways our code may be subject +// to compiler optimizations and why compiler barriers are needed. Note as stated above, compiler barriers may not +// prevent all compiler optimizations or transformations. Compiler barriers are usually implemented by reloading all +// variables that are currently cached in registers and flushing all stores in registers back to memory. +// This list isn't exhaustive but will hopefully try to outline what compiler barriers protect against and what they don't. +// +// Compiler may reorder loads. +// LOAD A; LOAD B; -> LOAD B; LOAD A; +// LOAD A; operation on A; LOAD B; operation on B; -> LOAD A; LOAD B; operation on A; operation on B +// +// Insert a compiler barrier in between the two loads to guarantee that they are kept in order. +// LOAD A; COMPILER_BARRIER; LOAD B; +// LOAD A; operation on A; COMPILER_BARRIER; LOAD B; operation on B; +// +// The same with stores. +// STORE(A, 1); STORE(B, 1); -> STORE(B, 1); STORE(A, 1); +// operations and STORE result into A; operations and STORE result int B; -> all operations; STORE result into B; STORE result into A; +// +// Insert a compiler barrier in between the two stores to guarantee that they are kept in order. +// It is not required that the multiple stores to A before the barrier are not merged into one final store. +// It is not required that the store to B after the barrier be written to memory, it may be cached in a register for some indeterminate +// amount of time as an example. +// STORE(A, 1); COMPILER_BARRIER; STORE(B, 1); +// +// The compiler is allowed to merge overlapping loads and stores. +// Inserting a compiler barrier here will not prevent the compiler from doing this optimization as doing one wider load/store is +// technically still abiding by the guarantee that the loads/stores are not reordered with each other. +// LOAD A[0]; LOAD A[1]; -> A single wider LOAD instruction +// STORE(A[0], 1); STORE(A[1], 2); -> A single wider STORE instruction +// +// Compilers do not have to reload the values pointers point to. This is especially common with RISC architectures with lots +// of general purpose registers or even compiler optimizations such as inlining or Link-Time Optimization. +// int i = *ptr; Do bunch of operations; if (*ptr) { do more; } +// It is entirely possible the compiler may remove the last if statement because it can keep the *ptr in a register +// and it may infer from the operations done on i that i is never 0. +// +// int i = *ptr; Do bunch of operations; COMPILER_BARRIER; if (*ptr) { do more; } +// Inserting a compiler barrier at that location will cause the compiler to have reload *ptr thus keeping the if statement assuming +// no other optimizations take place, such as the compiler knowing that *ptr is always greater than 0. +// +// The compiler is within its rights to also merge and reload loads as much as it pleases. +// +// while (int tmp = LOAD(A)) +// process_tmp(tmp) +// +// Will be merged and transformed to +// +// if (int tmp = LOAD(A)) +// for (;;) process_tmp(tmp) +// +// Inserting a compiler barrier will ensure that LOAD(A) is always reloaded and thus the unwanted transformation is avoided. +// +// while (int tmp = LOAD(A)) +// { +// process_tmp(tmp) +// COMPILER_BARRIER +// } +// +// Under heavy register pressure scenarios, say the loop body was larger, the compiler may reload A as follows. +// Compiler barriers cannot prevent this from happening, even if we put it after process_tmp as above; +// the compiler still kept those loads above the barrier so it satisfied its contract even though it reloaded +// from A more than once. +// +// while (int tmp = LOAD(A)) +// process_tmp(LOAD(A)) +// +// In the above transformation it is possible that another cpu stores 0 into A. When we reload A for process_tmp, we pass 0 +// to process_tmp() which it would actually never expect to observe. Because if we observed 0, the while loop condition +// would never be satisfied. If the compiler under register pressure instead stored and loaded tmp from its stack slot, that is fine +// because we are just storing and loading the original observed value from A. Obviously that is slower than just reloading from +// A again so an optimizing compiler may not do the stack slot store. This is an unwanted transformation which eastl::atomic prevents +// even on relaxed loads. +// +// The compiler is allowed to do dead-store elimination if it knows that value has already been stored, or that only the last store +// needs to be stored. The compiler does not assume or know that these variables are shared variables. +// +// STORE(A, 1); STORE(A, 1); +// OPERATIONS; -> OPERATIONS; +// STORE(A, 1); +// +// The compiler is well within its rights to omit the second store to A. Assuming we are doing some fancy lockfree communication +// with another cpu and the last store is meant to ensure the ending value is 1 even if another cpu changed A in between; that +// assumption will not be satisfied. A compiler barrier will not prevent the last store from being dead-store removed. +// +// STORE(A, 1); +// OPERATIONS; +// STORE(A, 2); +// +// Assuming these stores are meant to denote some state changes to communicate with a remote cpu. The compiler is allowed to +// transform this as follows without a compiler barrier. Insert a compiler barrier between the two stores to prevent the transformation. +// Something like this will also require memory barriers, but that is not the point of this section. +// +// STORE(A, 2); +// OPERATIONS; +// +// The compiler is also allowed to invent stores as it may please. +// First on many RISC architectures storing an immediate value either involves loading the immediate from the .data section +// or combing a variety of load upper immediate and add or or immediate instructions to get our constant in a register and then +// doing a single 32-bit store instruction from said register. Some ISAs have 16-bit stores with immediate value so that a store +// may be broken into 2 16-bit store immediate values causing shearing. To reduce instruction dependencies it may also decide +// to do two add immediates and then two 16-bit stores again causing shearing. +// +// lui $t0, 1 # t0 == 0x00010000 +// ori $a0, $t0, 8 # t0 == 0x00010008 +// strw $t0, 0($a1) # store t0 into address at a1 +// -> +// ori $a0, $t0, 1 # t0 == 0x00000001 +// ori $a0, $t1, 8 # t0 == 0x00000008 +// strhw $t0, 0($a1) # store t0 lower half at a1 +// strhw $t1, 2($a1) # store t1 upper half at a1 +// +// The above shows a potential transformation that a compiler barrier cannot solve for us. +// +// A compiler may also introduce stores to save on branching. Let's see. +// +// if (a) +// STORE(X, 10); +// else +// STORE(X, 20); +// +// STORE(X, 20); +// if (a) +// STORE(X, 10); +// +// This is a very common optimization as it saves a potentially more expensive branch instruction but breaks multi-threaded code. +// This is also another case where a compiler barrier doesn't give us the granularity we need. +// The branches may even be completely removed with the compiler instead choosing to use conditional move operations which would +// actually be compliant since there would be one store only done, an extra store wouldn't have been added. +// +// You are now probably thinking that compiler barriers are useful and are definitely needed to tell the compiler to calm down +// and guarantee our hardware guarantees are valid because the code we wrote is the instructions that were emitted. +// But there are definitely lots of caveats where compiler barriers do not at all provide the guarantees we still need. +// This where eastl::atomic comes into play, and under the relaxed memory ordering section it will be explained +// what the standard guarantees and how we achieve those guarantees, like ensuring the compiler never does dead-store elimination or reloads. +// +// ******** Control Dependencies ******** +// +// Control dependencies are implicit local cpu ordering of memory instructions due to branching instructions, specifically +// only conditional branches. The problem is compilers do not understand control dependencies, and control dependencies +// are incredibly hard to understand. This is meant to make the reader aware they exist and to never use them +// because they shouldn't be needed at all with eastl::atomic. Also control dependencies are categorized as LDLD or LDST, +// store control dependencies inherently do not make sense since the conditional branch loads and compares two values. +// +// A LDLD control dependency is an anti-pattern since it is not guaranteed that any architecture will detect the memory-order violation. +// r0 = LOAD(A); +// if (r0) +// r1 = LOAD(B) +// +// Given those sequence of instructions, it is entirely possible that a cpu attempts to speculatively predict and load the value of B +// before the branch instruction has finished executing. It is entirely allowed that the cpu loads from B, assume B is in cache and A +// is not in cache, before A. It is allowed, that even if the cpu was correct in it's prediction that it doesn't reload B and change the +// fact that it speculatively got lucky. +// +// This is also what the x86 pause instruction inserted into spin wait loops is meant to solve. +// LOOP: +// r0 = LOAD(A); +// if (!r0) pause; goto LOOP; +// +// In the above spin loop, after a couple of iterations the processor will fill the pipeline with speculated cmp and load instructions. +// x86 will catch a memory order violation if it sees that an external store was done to A and thus must flush the entire +// pipeline of all the speculated load A. Pause instruction tells the cpu to not do speculative loads so that the pipeline is not +// filled with all said speculative load instructions. This ensures we do not incur the costly pipeline flushes from memory order +// violations which are likely to occur in tight spin wait loops. This also allows other threads on the same physical core to use the +// core's resources better since our speculative nature won't be hogging it all. +// +// A LDST control dependency is a true dependency in which the cpu cannot make a store visible to the system and other cpus until it +// knows its prediction is correct. Thus a LDST ordering is guaranteed and can be always relied upon as in the following example. +// +// r0 = LOAD(A); +// if (r0) +// STORE(B, 1); +// +// The fun part comes in with how does the compiler actually break all of this. +// First is that if the compiler can ensure that the value of A in the LDST example is always not zero, then it is always within its +// rights to completely remove the if statement which would lend us with no control dependency. +// +// Things get more fun when we deal with conditionals with else and else if statements where the compiler might be able to employ +// invariant code motion optimizations. Take this example. +// +// r0 = LOAD(A); +// r1 = LOAD(B); +// if (r0) +// STORE(B, 1); +// /* MORE CODE */ +// else if (r1) +// STORE(B, 1); +// /* MORE CODE */ +// else +// STORE(B, 1); +// /* MORE CODE */ +// +// If we were trying to be smart and entirely rely on the control dependency to ensure order, ya well just don't the compiler +// is always smarter. The compiler is well within its rights to move all the STORE(B, 1) up and above all the conditionals breaking +// our reliance on the LDST control dependency. +// +// Things can get even more complicated especially in C++ when values may come from constexpr, inline, inline constexpr, static const, etc, +// variables and thus the compiler will do all sorts of transformations to reduce, remove, augment and change all your conditional code since +// it knows the values of the expressions or even parts of it at compile time. Even more aggressive optimizations like LTO might break code that was being cautious. +// Even adding simple short circuiting logic or your classic likely/unlikely macros can alter conditionals in ways you didn't expect. +// In short know enough about control dependencies to know not to ever use them. +// +// ******** Multi-Copy Store Atomicity && Barrier Cumulativity ******** +// +// Single-Copy Store Atomicity: All stores must become visible to all cores in the system at the same time. +// +// Multi-Copy Store Atomicity : This means a cpu sees its own stores before they become visible to other cpus, by forwarding them from the store buffer, +// but a store becomes visible to all other cpus at the same time when flushed from the store buffer. +// +// Non-Atomic Store Atomicity : A store becomes visible to different cpus at different times. +// +// Those are the above variations of Store Atomicity. Most processors have Non-Atomic Store Atomicity and thus you must program to that lowest common denominator. +// We can use barriers, with some caveats, to restore Multi-Copy Store Atomicity to a Non-Atomic system though we need to define a new granular definition for +// memory barriers to define this behaviour. Simple LDLD/LDST/STST/STLD definition is not enough to categorize memory barriers at this level. Let's start off +// with a simple example that breaks under a Non-Atomic Store Atomicity system and what potential hardware features allow this behaviour to be observed. +// +// NOTE: For all the below examples we assume no compile reordering and that the processor also executes the instructions with no local reorderings to make the examples simpler, +// to only show off the effects of Multi-Copy Store Atomicity. This is why we don't add any address dependencies, or mark explicit LDLD/LDST memory barriers. +// Thus you may assume all LDLD and LDST pairs have an address dependency between them, so that they are not reordered by the compiler or the local cpu. +// +// --------------------------------------------------------------------------------------------------------- +// Write-To-Read Causality, WRC, Litmus Test +// --------------------------------------------------------------------------------------------------------- +// Initial State: +// X = 0; Y = 0; +// --------------------------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 | Thread 2 +// --------------------------------------------------------------------------------------------------------- +// STORE(X, 1) | r0 = LOAD(X) | r1 = LOAD(Y) +// | STORE(Y, r0) | r2 = LOAD(X) +// --------------------------------------------------------------------------------------------------------- +// Observed: r0 = 1 && r1 = 1 && r2 = 0 +// --------------------------------------------------------------------------------------------------------- +// +// Let's go over this example in detail and whether the outcome shown above can be observed. In this example Thread 0 stores 1 into X. If Thread 1 observes the write to X, +// it stores the observed value into Y. Thread 2 loads from Y then X. This means if the load from Y returns 1, then we intuitively know the global store order +// was 1 to X and then 1 to Y. So is it possible then that the load from X in Thread 2 can return 0 in that case? Under a Multi-Copy Store Atomicity system, that would be +// impossible because once 1 was stored to X all cpus see that store so if Thread 2 saw the store to Y which can only happen after the store to X was observed, then +// Thread 2 must also have observed the store to X and return 1. As you may well have figured out, it is possible under a Non-Atomic Store Atomicity system to still +// observe the load from X returning 0 even if the above load from Y returned 1 in Thread 2. This completely breaks our intuition of causality. Let's now understand what hardware may cause this. +// +// This is possible on cpus that have Simultaneous Multi-Threading, SMT or HyperThreading in Intel parlance, which share resources such as store buffers or L1 cache. +// We are accustomed to the x86 way of SMT where each logical core shares Execution Units on the physical core but each logical core has their own statically partitioned +// cache and store buffer that is not visible to the other cpus. It is possible on cpus like ARMv7 or POWER, POWER9 supports 4 and even 8 threads per physical core, so +// to save on die space though yet enable this large number of threads per physical core it is common for these logical cores to all use the same store buffer or L1 cache +// per physical core on these processors. Let's take the above example and rerun it with this knowledge to get the observed behaviour outlined above. +// +// Assume Thread 0, Thread 1, and Thread 2 run on cpu 0, cpu 1, and cpu 2 respectively. Assume that cpu 0 and cpu 1 are two logical cores on the same physical core so this processor +// has an SMT value of 2. Thread 0 will store 1 into X. This store may be in the store buffer or in the L1 cache that cpu 1 also shares with cpu 0, thus cpu 1 has early access to cpu 0's stores. +// Thread 1 loads X which it observed as 1 early and then stores 1 into Y. Thread 2 may see the load from Y returning 1 but now the load from X returning 0 all because cpu 1 got early +// access to cpu 0 store due to sharing a L1 cache or store buffer. +// We will come back on how to fix this example with the proper memory barriers for the Non-Atomic Store Atomicity systems, but we need to detour first. +// +// We need to take a deeper dive into memory barriers to understand how to restore Multi-Copy Store Atomicity from a Non-Atomic Store Atomicity system. +// Let's start with a motivating example and we will be using the POWER architecture throughout this example because it encompasses all the possible observable behaviour. +// ARMv7 technically allows Non-Atomic Store Atomicity behaviour but no consumer ARMv7 chip actually observes this behaviour. +// ARMv8 reworked its model to specifically say it is a Multi-Copy Store Atomicity system. +// POWER is one of the last few popular consumer architectures that are guaranteed to have Non-Atomic Store Atomicity observable behaviour, thus we will be using it for the following examples. +// +// To preface, POWER has two types of memory barriers called lwsync and sync. The following table lists the guarantees provided by TSO, x86, and the lwsync instruction. +// The table gives a hint as to why using our previous definition of LDLD/LDST/STST/STLD isn't granular enough to categorize memory barrier instructions. +// +// TSO: | POWER lwsync memory barrier: +// LDLD : YES | LDLD : YES +// LDST : YES | LDST : YES +// STST : YES | STST : YES +// STLD : NO | STLD : NO +// A cumulative : YES | A cumulative : YES +// B cumulative : YES | B cumulative : YES +// IRIW : YES | IRIW : NO +// +// The TSO memory model provided by x86 seems to be exactly the same as POWER if we add lwsync memory barrier instructions in between each of the memory instructions. +// This provides us the exact same ordering guarantees as the TSO memory model. If we just looked at the 4 permutations of reorderings we would be inclined to assume that +// TSO has the exact same ordering as sprinkling lwsync in our code in between every pair of memory instructions. That is not the case because memory barrier causality and cumulativity differ in subtle ways. +// In this case they differ by the implicit guarantees from the TSO memory model versus those provided by the POWER lwsync memory barrier. +// So the lwsync memory barrier prevents reordering with instructions that have causality but does not prevent reordering with instructions that are completely independent. +// Let's dive into these concepts a bit more. +// +// Non-Atomic Store Atomicity architectures are prone to behaviours such as the non-causal outcome of the WRC test above. Architectures such as POWER defines memory barriers to enforce +// ordering with respect to memory accesses in remote cpus other than the cpu actually issuing the memory barrier. This is known as memory barrier cumulativity. +// How does the memory barrier issued on my cpu affect the view of memory accesses done by remote cpuss. +// +// Cumulative memory barriers are defined as follows - Take your time this part is very non-trivial: +// A-Cumulative: We denote group A as the set of memory instructions in this cpu or other cpus that are ordered before the memory barrier in this cpu. +// A-Cumulativity requires that memory instructions from any cpu that have performed prior to a memory load before the memory barrier on this cpu are also members of group A. +// B-Cumulative: We denote group B as the set of memory instructions in this cpu or other cpus that are ordered after the memory barrier in this cpu. +// B-Cumulativity requires that memory instructions from any cpu that perform after a load and including the load in that cpu that returns the value of a store in group B are +// also members of group B. +// IRIW : enforces a global ordering even for memory instructions that have no causality. The memory instructions are completely independent. +// +// --------------------------------------------------------------------------------------------------------- +// WRC Litmus Test +// --------------------------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 | Thread 2 +// --------------------------------------------------------------------------------------------------------- +// {i} : STORE(X, 1) | {ii} : r0 = LOAD(X) | {v} : r1 = LOAD(Y) +// | {iii} : lwsync | +// | {iv} : STORE(Y, r0) | {vi} : r2 = LOAD(X) +// --------------------------------------------------------------------------------------------------------- +// Outcome: r0 = 1 && r1 = 1 && r2 = 1 +// +// Group A of {iii} : {i} && {ii} +// +// Group B of {iii} : {iv} && {v} && {vi} +// --------------------------------------------------------------------------------------------------------- +// +// Using the WRC test again and inserting a POWER lwsync, don't concern yourself with why the memory barrier was inserted at that spot right now, we now see the distinctions of group A and group B. +// It demonstrates the A and B Cumulative nature of the lwsync instruction, {iii}. First group A, initially consists of {ii} and group B initially consists of {iv} from the local cpu that issued the lwsync. +// Since {ii} reads from {i} and assume {i} happens before {ii}, by definition of A-Cumulativity {i} is included in group A. +// Similarly {v} reads from {iv} and assume {iv} happens before {v}, then {v} is included in group B by definition of B-Cumulativity. +// {vi} is also included in group B since it happens after {v} by definition of B-Cumulativity. +// +// WRC litmus test represents a scenario where only a A-Cumulative memory barrier is needed. The lwsync not only provides the needed local LDST memory barrier for the local thread but also ensures +// that any write Thread 1 has read from before the memory barrier is kept in order with any write Thread 1 does after the memory barrier as far as any other thread observes. +// In other words it ensures that any write that has propagated to Thread 1 before the memory barrier is propagated to any other thread before the second store after the memory barrier in Thread 1 +// can propagate to other threads in the system. This is exactly the definition of A-Cumulativity and what we need to ensure that causality is maintained in the WRC Litmus Test example. +// With that lwsync in place it is now impossible to observe r0 = 1 && r1 = 1 && r2 = 0. The lwsync has restored causal ordering. Let's look at an example that requires B-Cumulativity. +// +// --------------------------------------------------------------------------------------------------------- +// Example 2 from POWER manual +// --------------------------------------------------------------------------------------------------------- +// Initial State: +// X = 0; Y = 0; Z = 0 +// --------------------------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 | Thread 2 +// --------------------------------------------------------------------------------------------------------- +// STORE(X, 1) | r0 = LOAD(Y) | r1 = LOAD(Z) +// STORE(Y, 1) | STORE(Z, r0) | r2 = LOAD(X) +// --------------------------------------------------------------------------------------------------------- +// Observed: r0 = 1 && r1 = 1 && r2 = 0 +// --------------------------------------------------------------------------------------------------------- +// +// This example is very similar to WRC except that we kinda extended the Message Passing through an additional shared variable instead. +// Think of this as Thread 0 writing some data into X, setting flag Y, Thread 1 waiting for flag Y then writing flag Z, and finally Thread 2 waiting for flag Z before reading the data. +// Take a minute to digest the above example and think about where a memory barrier, lwsync, should be placed. Don't peek at the solution below. +// +// --------------------------------------------------------------------------------------------------------- +// Example 2 from POWER manual +// --------------------------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 | Thread 2 +// --------------------------------------------------------------------------------------------------------- +// STORE(X, 1) | r0 = LOAD(Y) | r1 = LOAD(Z) +// lwsync | | +// STORE(Y, 1) | STORE(Z, r0) | r2 = LOAD(X) +// --------------------------------------------------------------------------------------------------------- +// +// First the lwsync provides the needed local STST memory barrier for the local thread, thus the lwsync here ensures that the store to X propagates to Thread 1 before the store to Y. +// B-Cumulativity applied to all operations after the memory barrier ensure that the store to X is +// kept in order with respect to the store to Z as far as all other threads participating in the dependency chain are concerned. This is the exact definition of B-Cumulativity. +// With this one lwsync the outcome outlined above is impossible to observe. If r0 = 1 && r1 = 1 then r2 must be properly observed to be 1. +// +// We know that lwsync only provides A-Cumulativity and B-Cumulativity. Now we will look at examples that have no causality constraints thus we need to grab heavier memory barriers +// that ensures in short we will say makes a store become visible to all processors, even those not on the dependency chains. Let's get to the first example. +// +// --------------------------------------------------------------------------------------------------------- +// Independent Reads of Independent Writes, IRIW, coined by Doug Lea +// --------------------------------------------------------------------------------------------------------- +// Initial State: +// X = 0; Y = 0; +// --------------------------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 | Thread 2 | Thread 3 +// --------------------------------------------------------------------------------------------------------- +// STORE(X, 1) | r0 = LOAD(X) | STORE(Y, 1) | r2 = LOAD(Y) +// | r1 = LOAD(Y) | | r3 = LOAD(X) +// --------------------------------------------------------------------------------------------------------- +// Observed: r0 = 1 && r1 = 0 && r2 = 1 && r3 = 0 +// --------------------------------------------------------------------------------------------------------- +// +// The IRIW example above clearly shows that writes can be propagated to different cpus in completely different orders. +// Thread 1 sees the store to X but not the store to Y while Thread 3 sees the store to Y but not the store to X, the complete opposite. +// Also to the keen eye you may have noticed this example is a slight modification of the Store Buffer example so try to guess where the memory barriers would go. +// +// --------------------------------------------------------------------------------------------------------- +// Independent Reads of Independent Writes, IRIW, coined by Doug Lea +// --------------------------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 | Thread 2 | Thread 3 +// --------------------------------------------------------------------------------------------------------- +// STORE(X, 1) | r0 = LOAD(X) | STORE(Y, 1) | r2 = LOAD(Y) +// | sync | | sync +// | r1 = LOAD(Y) | | r3 = LOAD(X) +// --------------------------------------------------------------------------------------------------------- +// +// To ensure that the above observation is forbidden we need to add a full sync memory barrier on both the reading threads. Think of sync as restoring sequential consistency. +// The sync memory barrier ensures that any writes that Thread 1 has read from before the memory barrier are fully propagated to all threads before the reads are satisfied after the memory barrier. +// The same can be said for Thread 3. This is why the sync memory barrier is needed because there is no partial causal ordering here or anything that can be considered for our A and B Cumulativity definitions. +// We must ensure that all writes have been propagated to all cpus before proceeding. This gives way to the difference between sync and lwsync with regards to visibility of writes and cumulativity. +// sync guarantees that all program-order previous stores must have been propagated to all other cpus before the memory instructions after the memory barrier. +// lwsync does not ensure that stores before the memory barrier have actually propagated to any other cpu before memory instructions after the memory barrier, but it will keep stores before and after the +// lwsync in order as far as other cpus are concerned that are within the dependency chain. +// +// Fun fact while ARMv7 claims to be Non-Atomic Store Atomicity no mainstream ARM implementation that I have seen has shown cases of Non-Atomic Store Atomicity. +// It's allowed by the ARMv7 memory model and thus you have to program to that. ARMv8 changes this and states that it has Multi-Copy Store Atomicity. +// +// ******** Release-Acquire Semantics ******** +// +// The most useful and common cases where Release-Acquire Semantics are used in every day code is in message passing and mutexes. Let's get onto some examples and the C++ definition of Release-Acquire. +// +// ACQUIRE: +// An Acquire operation is a one-way memory barrier whereby all loads and stores after the acquire operation cannot move up and above the acquire operation. +// Loads and stores before the acquire operation can move down past the acquire operation. An acquire operation should always be paired with a Release operation on the SAME atomic object. +// +// RELEASE: +// A Release operation is a one-way memory barrier whereby all loads and stores before the release operation cannot move down and below the release operation. +// Loads and stores after the release operation can move up and above the release operation. A release operation should always be paired with an Acquire operation on the SAME atomic object. +// +// Release-Acquire pair does not create a full memory barrier but it guarantees that all memory instructions before a Release operation on an atomic object M are visible after an Acquire +// operation on that same atomic object M. Thus these semantics usually are enough to preclude the need for any other memory barriers. +// The synchronization is established only between the threads Releasing and Acquiring the same atomic object M. +// +// --------------------------------------------------- +// Critical Section +// --------------------------------------------------- +// Thread 0 | Thread 1 +// --------------------------------------------------- +// mtx.lock() - Acquire | mtx.lock() - Acquire +// STORE(X, 1) | r0 = LOAD(X) +// mtx.unlock() - Release | mtx.unlock() - Release +// --------------------------------------------------- +// +// A mutex only requires Release-Acquire semantics to protect the critical section. We do not care if operations above the lock leak into the critical section or that operations below the unlock leak into the +// critical section because they are outside the protected region of the lock()/unlock() pair. Release-Acquire semantics does guarantee that everything inside the critical section cannot leak out. +// Thus all accesses of all previous critical sections for the mutex are guaranteed to have completed and be visible when the mutex is handed off to the next party due to the Release-Acquire chaining. +// This also means that mutexes do not provide or restore Multi-Copy Store Atomicity to any memory instructions outside the mutex, like the IRIW example since it does not emit full memory barriers. +// +// ------------------------------------------------------ +// Message Passing +// ------------------------------------------------------ +// Thread 0 | Thread 1 +// ------------------------------------------------------ +// STORE(DATA, 1) | while (!LOAD_ACQUIRE(FLAG)) +// | +// STORE_RELEASE(FLAG, 1) | r0 = LOAD(DATA) +// ------------------------------------------------------ +// +// This is a common message passing idiom that also shows the use of Release-Acquire semantics. It should be obvious by the definitions outlined above why this works. +// An Acquire operation attached to a load needs to provide a LDLD and LDST memory barrier according to our definition of acquire. This is provided by default on x86 TSO thus no memory barrier is emitted. +// A Release operation attached to a store needs to provide a STST and LDST memory barrier according to our definition of release. This is provided by default on x86 TSO thus no memory barrier is emitted. +// +// A couple of things of note here. One is that by attaching the semantics of a memory model directly to the memory instruction/operation itself we can take advantage of the fact the some processors +// already provide guarantees between memory instructions and thus we do not have to emit memory barriers. Another thing of note is that the memory model is directly attached to the operation, +// so you must do the Release-Acquire pairing on the SAME object which in this case is the FLAG variable. Doing an Acquire or Release on a separate object has no guarantee to observe an Acquire or Release on a different object. +// This better encapsulates the meaning of the code and also allows the processor to potentially do more optimizations since a stand alone memory barrier will order all memory instructions of a given type before and after the barrier. +// Where as the memory ordering attached to the load or store tells the processor that it only has to order memory instructions in relation to that specific load or store with the given memory order. +// +// +// --------------------------------------------------------------------------------------------------------- +// Release Attached to a Store VS. Standalone Fence +// --------------------------------------------------------------------------------------------------------- +// STORE(DATA, 1) | STORE(DATA, 1) +// | ATOMIC_THREAD_FENCE_RELEASE() +// STORE_RELEASE(FLAG, 1) | STORE_RELAXED(FLAG, 1) +// STORE_RELAXED(VAR, 2) | STORE_RELAXED(VAR, 2) +// --------------------------------------------------------------------------------------------------------- +// ARMv8 Assembly +// --------------------------------------------------------------------------------------------------------- +// str 1, DATA | str 1, DATA +// | dmb ish +// stlr 1, FLAG | str 1, FLAG +// str 2, VAR | str 2, VAR +// --------------------------------------------------------------------------------------------------------- +// +// In the above example the release is attached to the FLAG variable, thus synchronization only needs to be guaranteed for that atomic variable. +// It is entirely possible for the VAR relaxed store to be reordered above the release store. +// In the fence version, since the fence is standalone, there is no notion where the release is meant to be attached to thus the fence must prevent all subsequent relaxed stores +// from being reordered above the fence. The fence provides a stronger guarantee whereby now the VAR relaxed store cannot be moved up and above the release operation. +// Also notice the ARMv8 assembly is different, the release fence must use the stronger dmb ish barrier instead of the dedicated release store instruction. +// We dive more into fences provided by eastl::atomic below. +// +// Release-Acquire semantics also have the property that it must chain through multiple dependencies which is where our knowledge from the previous section comes into play. +// Everything on the Release-Acquire dependency chain must be visible to the next hop in the chain. +// +// --------------------------------------------------------------------------------------------------------- +// Example 2 from POWER manual +// --------------------------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 | Thread 2 +// --------------------------------------------------------------------------------------------------------- +// STORE(X, 1) | r0 = LOAD_ACQUIRE(Y) | r1 = LOAD_ACQUIRE(Z) +// STORE_RELEASE(Y, 1) | STORE_RELEASE(Z, r0) | r2 = LOAD(X) +// --------------------------------------------------------------------------------------------------------- +// +// --------------------------------------------------------------------------------------------------------- +// Write-To-Read Causality, WRC, Litmus Test +// --------------------------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 | Thread 2 +// --------------------------------------------------------------------------------------------------------- +// STORE(X, 1) | r0 = LOAD(X) | r1 = LOAD_ACQUIRE(Y) +// | STORE_RELEASE(Y, r0) | r2 = LOAD(X) +// --------------------------------------------------------------------------------------------------------- +// +// You may notice both of these examples from the previous section. We replaced the standalone POWER memory barrier instructions with Release-Acquire semantics attached directly to the operations where we want causality preserved. +// We have transformed those examples to use the eastl::atomic memory model. +// Take a moment to digest these examples in relation to the definition of Release-Acquire semantics. +// +// The Acquire chain can be satisfied by reading the value from the store release or any later stored headed by that release operation. The following examples will make this clearer. +// +// ------------------------------------------------------ +// Release Sequence Headed +// ------------------------------------------------------ +// Initial State: +// DATA = 0; FLAG = 0; +// ------------------------------------------------------ +// Thread 0 | Thread 1 +// ------------------------------------------------------ +// STORE(DATA, 1) | r0 = LOAD_ACQUIRE(FLAG) +// | +// STORE_RELEASE(FLAG, 1) | r1 = LOAD(DATA) +// STORE_RELAXED(FLAG, 3) | +// ------------------------------------------------------ +// Observed: r0 = 3 && r1 = 0 +// ------------------------------------------------------ +// +// In the above example we may read the value 3 from FLAG which was not the release store, but it was headed by that release store. Thus we observed a later store and therefore it is still valid to then observe r1 = 1. +// The stores to FLAG from the STORE_RELEASE up to but not including the next STORE_RELEASE operation make up the release sequence headed by the first release store operation. Any store on that sequence can be used to enforce +// causality on the load acquire. +// +// ******** Consume is currently not useful ******** +// +// Consume is a weaker form of an acquire barrier and creates the Release-Consume barrier pairing. +// Consume states that a load operation on an atomic object M cannot allow any loads or stores dependent on the value loaded by the operation to be reordered before the operation. +// To understand consume we must first understand dependent loads. +// You might encounter this being called a data dependency or an address dependency in some literature. +// +// -------------------------------------------------------------- +// Address Dependency +// -------------------------------------------------------------- +// Initial State: +// DATA = 0; PTR = nullptr; +// -------------------------------------------------------------- +// Thread 0 | Thread 1 +// -------------------------------------------------------------- +// STORE(DATA, 1) | r0 = LOAD(PTR) - typeof(r0) = int* +// | +// STORE(PTR, &DATA) | r1 = LOAD(r0) - typeof(r1) = int +// -------------------------------------------------------------- +// +// There is a clear dependency here where we cannot load from *int until we actually read the int* from memory. +// Now it is possible for Thread 1's load from *ptr to be observed before the store to DATA, therefore it can lead to r0 = &DATA && r1 = 0. +// While this is a failure of causality, it is allowed by some cpus such as the DEC Alpha and I believe Blackfin as well. +// Thus a data dependency memory barrier must be inserted between the data dependent loads in Thread 1. Note that this would equate to a nop on any processor other than the DEC Alpha. +// +// This can occur for a variety of hardware reasons. We learned about invalidation queues. It is possible that the invalidation for DATA gets buffered in Thread 1. DEC Alpha allows the Thread 1 +// load from PTR to continue without marking the entries in its invalidation queue. Thus the subsequent load is allowed to return the old cached value of DATA instead of waiting for the +// marked entries in the invalidation queue to be processed. It is a design decision of the processor not to do proper dependency tracking here and instead relying on the programmer to insert memory barriers. +// +// This data dependent ordering guarantee is useful because in places where we were using an Acquire memory barrier we can reduce it to this Consume memory barrier without any hardware barriers actually emitted on every modern processor. +// Let's take the above example, translate it to Acquire and Consume memory barriers and then translate it to the ARMv7 assembly and see the difference. +// +// --------------------------------------------------------------- --------------------------------------------------------------- +// Address Dependency - Release-Acquire Address Dependency - Release-Acquire - ARMv7 Assembly +// --------------------------------------------------------------- --------------------------------------------------------------- +// Thread 0 | Thread 1 Thread 0 | Thread 1 +// --------------------------------------------------------------- --------------------------------------------------------------- +// STORE(DATA, 1) | r0 = LOAD_ACQUIRE(PTR) STORE(DATA, 1) | r0 = LOAD(PTR) +// | dmb ish | dmb ish +// STORE_RELEASE(PTR, &DATA) | r1 = LOAD(r0) STORE(PTR, &DATA) | r1 = LOAD(r0) +// --------------------------------------------------------------- --------------------------------------------------------------- +// +// To get Release-Acquire semantics on ARMv7 we need to emit dmb ish; memory barriers. +// +// --------------------------------------------------------------- --------------------------------------------------------------- +// Address Dependency - Release-Consume Address Dependency - Release-Consume - ARMv7 Assembly +// --------------------------------------------------------------- --------------------------------------------------------------- +// Thread 0 | Thread 1 Thread 0 | Thread 1 +// --------------------------------------------------------------- --------------------------------------------------------------- +// STORE(DATA, 1) | r0 = LOAD_CONSUME(PTR) STORE(DATA, 1) | r0 = LOAD(PTR) +// | dmb ish | +// STORE_RELEASE(PTR, &DATA) | r1 = LOAD(r0) STORE(PTR, &DATA) | r1 = LOAD(r0) +// --------------------------------------------------------------- --------------------------------------------------------------- +// +// Data Dependencies can not only be created by read-after-write/RAW on registers, but also by RAW on memory locations too. Let's look at some more elaborate examples. +// +// --------------------------------------------------------------- --------------------------------------------------------------- +// Address Dependency on Registers - Release-Consume - ARMv7 Address Dependency on Memory - Release-Consume - ARMv7 +// --------------------------------------------------------------- --------------------------------------------------------------- +// Thread 0 | Thread 1 Thread 0 | Thread 1 +// --------------------------------------------------------------- --------------------------------------------------------------- +// STORE(DATA, 1) | r0 = LOAD(PTR) STORE(DATA, 1) | r0 = LOAD(PTR) +// | r1 = r0 + 0 | STORE(TEMP, r0) +// dmb ish | r2 = r1 - 0 dmb ish | r1 = LOAD(TEMP) +// STORE(PTR, &DATA) | r3 = LOAD(r2) STORE(PTR, &DATA) | r2 = LOAD(r1) +// --------------------------------------------------------------- --------------------------------------------------------------- +// +// The above shows a more elaborate example of how data dependent dependencies flow through RAW chains either through memory or through registers. +// +// Notice by identifying that this is a data dependent operation and asking for a consume ordering, we can completely eliminate the memory barrier on Thread 1 since we know ARMv7 does not reorder data dependent loads. Neat. +// Unfortunately every major compiler upgrades a consume to an acquire ordering, because the consume ordering in the standard has a stronger guarantee and requires the compiler to do complicated dependency tracking. +// Dependency chains in source code must be mapped to dependency chains at the machine instruction level until a std::kill_dependency in the source code. +// +// ---------------------------------------------------------------- +// Non-Address Dependency && Multiple Chains +// ---------------------------------------------------------------- +// Initial State: +// std::atomic FLAG; int DATA[1] = 0; +// ---------------------------------------------------------------- +// Thread 0 | Thread 1 +// ---------------------------------------------------------------- +// STORE(DATA[0], 1) | int f = LOAD_CONSUME(FLAG) +// | int x = f +// | if (x) return Func(x); +// | +// STORE_RELEASE(FLAG, 1) | Func(int y) return DATA[y - y] +// ---------------------------------------------------------------- +// +// This example is really concise but there is a lot going on. Let's digest it. +// First is that the standard allows consume ordering even on what we will call not true machine level dependencies like a ptr load and then a load from that ptr as shown in the previous examples. +// Here the dependency is between two ints, and the dependency chain on Thread 1 is as follows. f -> x -> y -> DATA[y - y]. The standard requires that source code dependencies on the loaded value +// from consume flow thru assignments and even thru function calls. Also notice we added a dependency on the dereference of DATA with the value loaded from consume which while it does nothing actually abides by the standard +// by enforcing a source code data dependent load on the consume operation. You may see this referred to as artificial data dependencies in other texts. +// If we assume the compiler is able to track all these dependencies, the question is how do we enforce these dependencies at the machine instruction level. Let's go back to our ptr dependent load example. +// +// ---------------------------------------------------------------- +// addi r0, pc, offset; +// ldr r1, 0(r0); +// ldr r2, 0(r1); +// ---------------------------------------------------------------- +// +// The above pseudo assembly does a pc relative calculation to find the address of ptr. We then load ptr and then continue the dependency chain by loading the int from the loaded ptr. +// Thus r0 has type of int**, which we use to load r1 an int* which we use to load our final value of r2 which is the int. +// The key observation here is that most instructions provided by most architectures only allow moving from a base register + offset into a destination register. +// This allows for trivial capturing of data dependent loads through pointers. But how do we capture the data dependency of DATA[y - y]. We would need something like this. +// +// ---------------------------------------------------------------- +// sub r1, r0, r0; // Assume r0 holds y from the Consume Operation +// add r3, r1, r2; // Assume r2 holds the address of DATA[0] +// ldr r4, 0(r3); +// ---------------------------------------------------------------- +// +// We cannot use two registers as both arguments to the load instruction. Thus to accomplish this you noticed we had to add indirect data dependencies through registers to compute the final address from the consume +// load of y and then load from the final computed address. The compiler would have to recognize all these dependencies and enforce that they be maintained in the generated assembly. +// The compiler must ensure the entire syntactic, source code, data-dependency chain is enforced in the generated assembly, no matter how long such chain may be. +// Because of this and other issues, every major compiler unilaterally promotes consume to an acquire operation across the board. Read reference [15] for more information. +// This completely removes the actual usefulness of consume for the pointer dependent case which is used quite heavily in concurrent read heavy data structures where updates are published via pointer swaps. +// +// ******** read_depends use case - Release-ReadDepends Semantics ******** +// +// eastl::atomic provides a weaker read_depends operation that only encapsulates the pointer dependency case above. Loading from a pointer and then loading the value from the loaded pointer. +// The read_depends operation can be used on loads from only an eastl::atomic type. The return pointer of the load must and can only be used to then further load values. And that is it. +// If you are unsure, upgrade this load to an acquire operation. +// +// MyStruct* ptr = gAtomicPtr.load(memory_order_read_depends); +// int a = ptr->a; +// int b = ptr->b; +// return a + b; +// +// The loads from ptr after the gAtomicPtr load ensure that the correct values of a and b are observed. This pairs with a Release operation on the writer side by releasing gAtomicPtr. +// +// +// As said above the returned pointer from a .load(memory_order_read_depends) can only be used to then further load values. +// Dereferencing(*) and Arrow Dereferencing(->) are valid operations on return values from .load(memory_order_read_depends). +// +// MyStruct* ptr = gAtomicPtr.load(memory_order_read_depends); +// int a = ptr->a; - VALID +// int a = *ptr; - VALID +// +// Since dereferencing is just indexing via some offset from some base address, this also means addition and subtraction of constants is ok. +// +// int* ptr = gAtomicPtr.load(memory_order_read_depends); +// int a = *(ptr + 1) - VALID +// int a = *(ptr - 1) - VALID +// +// Casts also work correctly since casting is just offsetting a pointer depending on the inheritance hierarchy or if using intrusive containers. +// +// ReadDependsIntrusive** intrusivePtr = gAtomicPtr.load(memory_order_read_depends); +// ReadDependsIntrusive* ptr = ((ReadDependsIntrusive*)(((char*)intrusivePtr) - offsetof(ReadDependsIntrusive, next))); +// +// Base* basePtr = gAtomicPtr.load(memory_order_read_depends); +// Dervied* derivedPtr = static_cast(basePtr); +// +// Both of the above castings from the result of the load are valid for this memory order. +// +// You can reinterpret_cast the returned pointer value to a uintptr_t to set bits, clear bits, or xor bits but the pointer must be casted back before doing anything else. +// +// int* ptr = gAtomicPtr.load(memory_order_read_depends); +// ptr = reinterpret_cast(reinterpret_cast(ptr) & ~3); +// +// Do not use any equality or relational operator (==, !=, >, <, >=, <=) results in the computation of offsets before dereferencing. +// As we learned above in the Control Dependencies section, CPUs will not order Load-Load Control Dependencies. Relational and equality operators are often compiled using branches. +// It doesn't have to be compiled to branched, condition instructions could be used. Or some architectures provide comparison instructions such as set less than which do not need +// branches when using the result of the relational operator in arithmetic statements. Then again short circuiting may need to introduct branches since C++ guarantees the +// rest of the expression must not be evaluated. +// The following odd code is forbidden. +// +// int* ptr = gAtomicPtr.load(memory_order_read_depends); +// int* ptr2 = ptr + (ptr >= 0); +// int a = *ptr2; +// +// Only equality comparisons against nullptr are allowed. This is becase the compiler cannot assume that the address of the loaded value is some known address and substitute our loaded value. +// int* ptr = gAtomicPtr.load(memory_order_read_depends); +// if (ptr == nullptr); - VALID +// if (ptr != nullptr); - VALID +// +// Thus the above sentence that states: +// The return pointer of the load must and can only be used to then further load values. And that is it. +// must be respected by the programmer. This memory order is an optimization added for efficient read heavy pointer swapping data structures. IF you are unsure, use memory_order_acquire. +// +// ******** Relaxed && eastl::atomic guarantees ******** +// +// We saw various ways that compiler barriers do not help us and that we need something more granular to make sure accesses are not mangled by the compiler to be considered atomic. +// Ensuring these guarantees like preventing dead-store elimination or the splitting of stores into smaller sub stores is where the C/C++11 +// standard comes into play to define what it means to operate on an atomic object. +// These basic guarantees are provided via new compiler intrinsics on gcc/clang that provide explicit indication to the compiler. +// Or on msvc by casting the underlying atomic T to a volatile T*, providing stronger compiler guarantees than the standard requires. +// Essentially volatile turns off all possible optimizations on that variable access and ensures all volatile variables cannot be +// reordered across sequence points. Again we are not using volatile here to guarantee atomicity, we are using it in its very intended purpose +// to tell the compiler it cannot assume anything about the contents of that variable. Now let's dive into the base guarantees of eastl::atomic. +// +// The standard defines the following for all operations on an atomic object M. +// +// Write-Write Coherence: +// If an operation A modifies an atomic object M(store), happens before an operation B that modifies M(store), then A shall be earlier than B in the modification order of M. +// +// Read-Read Coherence: +// If a value computation A on an atomic object M(load), happens before a value computation B on M(load), and A takes its value from a side effect X on M(from a previous store to M), then the value +// computed by B shall either be the value stored by X or some later side effect Y on M, where Y follows X in the modification order of M. +// +// Read-Write Coherence: +// If a value computation A on an atomic object M(load), happens before an operation B that modifies M(store), then A shall take its value from a side effect X on M, where X precedes B in the modification +// order of M. +// +// Write-Read Coherence: +// If a side effect X on an atomic object M(store), happens before a value computation B on M(load), then the evaluation of B must take its value from X or from some side effect Y that follows X in the +// modification order of M. +// +// What does all this mean. This is just a pedantic way of saying that the preceding coherence requirements disallow compiler reordering of atomic operations to a single atomic object. +// This means all operations must be emitted by the compiler. Stores cannot be dead-store eliminated even if they are the only stores. +// Loads cannot have common subexpression elimination performed on them even if they are the only loads. +// Loads and Stores to the same atomic object cannot be reordered by the compiler. +// Compiler cannot introduce extra loads or stores to the atomic object. +// Compiler also cannot reload from an atomic object, it must save and store to a stack slot. +// Essentially this provides all the necessary guarantees needed when treating an object as atomic from the compilers point of view. +// +// ******** Same Address LoadLoad Reordering ******** +// +// It is expected that same address operations cannot and are not reordered with each other. It is expected that operations to the same address have sequential consistency because +// they are to the same address. If you picture a cpu executing instructions, how is it possible to reorder instructions to the same address and yet keep program behaviour the same. +// Same Address LoadLoad Reordering is one weakening that is possible to do and keep observed program behaviour for a single-threaded program. +// More formally, A and B are two memory instructions onto the same address P, where A is program ordered before B. If A and B are both loads then their order need not be ordered. +// If B is a store then it cannot retire the store before A instruction completes. If A is a store and B is a load, then B must get its value forwarded from the store buffer or observe a later store +// from the cache. Thus Same Address LDST, STST, STLD cannot be reordered but Same Address LDLD can be reordered. +// Intel Itanium and SPARC RMO cpus allow and do Same Address LoadLoad Reordering. +// Let's look at an example. +// +// --------------------------- +// Same Address LoadLoad +// --------------------------- +// Initial State: +// x = 0; +// --------------------------- +// Thread 0 | Thread 1 +// --------------------------- +// STORE(x, 1) | r0 = LOAD(x) +// | r1 = LOAD(x) +// --------------------------- +// Observed: r0 = 1 && r0 = 0 +// --------------------------- +// +// Notice in the above example it has appeared as if the two loads from the same address have been reordered. If we first observed the new store of 1, then the next load should not observe a value in the past. +// Many programmers, expect same address sequential consistency, all accesses to a single address appear to execute in a sequential order. +// Notice this violates the Read-Read Coherence for all atomic objects defined by the std and thus provided by eastl::atomic. +// +// All operations on eastl::atomic irrelevant of the memory ordering of the operation provides Same Address Sequential Consistency since it must abide by the coherence rules above. +// +// ******** eastl::atomic_thread_fence ******** +// +// eastl::atomic_thread_fence(relaxed) : Provides no ordering guarantees +// eastl::atomic_thread_fence(acquire) : Prevents all prior loads from being reordered with all later loads and stores, LDLD && LDST memory barrier +// eastl::atomic_thread_fence(release) : Prevents all prior loads and stores from being reordered with all later stores, STST && LDST memory barrier +// eastl::atomic_thread_fence(acq_rel) : Union of acquire and release, LDLD && STST && LDST memory barrier +// eastl::atomic_thread_fence(seq_cst) : Full memory barrier that provides a single total order +// +// See Reference [9] and Fence-Fence, Atomic-Fence, Fence-Atomic Synchronization, Atomics Order and Consistency in the C++ std. +// +// ******** Atomic && Fence Synchronization ******** +// +// --------------------------- +// Fence-Fence Synchronization +// --------------------------- +// A release fence A synchronizes-with an acquire fence B if there exist operations X and Y on the same atomic object M, such that fence A is sequenced-before operation X and X modifies M, +// operation Y is sequenced-before B and Y reads the value written by X. +// In this case all non-atomic and relaxed atomic stores that are sequenced-before fence A will happen-before all non-atomic and relaxed atomic loads after fence B. +// +// ---------------------------- +// Atomic-Fence Synchronization +// ---------------------------- +// An atomic release operation A on atomic object M synchronizes-with an acquire fence B if there exists some atomic operation X on atomic object M, such that X is sequenced-before B and reads +// the value written by A. +// In this case all non-atomic and relaxed atomic stores that are sequenced-before atomic release operation A will happen-before all non-atomic and relaxed atomic loads after fence B. +// +// ---------------------------- +// Fence-Atomic Synchronization +// ---------------------------- +// A release fence A synchronizes-with an atomic acquire operation B on an atomic object M if there exists an atomic operation X such that A is sequenced-before X, X modifies M and B reads the +// value written by X. +// In this case all non-atomic and relaxed atomic stores that are sequenced-before fence A will happen-before all non-atomic and relaxed atomic loads after atomic acquire operation B. +// +// This can be used to add synchronization to a series of several relaxed atomic operations, as in the following trivial example. +// +// ---------------------------------------------------------------------------------------- +// Initial State: +// x = 0; +// eastl::atomic y = 0; +// z = 0; +// eastl::atomic w = 0; +// ---------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 +// ---------------------------------------------------------------------------------------- +// x = 2 | r0 = y.load(memory_order_relaxed); +// z = 2 | r1 = w.load(memory_order_relaxed); +// atomic_thread_fence(memory_order_release); | atomic_thread_fence(memory_order_acquire); +// y.store(1, memory_order_relaxed); | r2 = x +// w.store(1, memory_order_relaxed); | r3 = z +// ---------------------------------------------------------------------------------------- +// Observed: r0 = 1 && r1 = 1 && r2 = 0 && r3 = 0 +// ---------------------------------------------------------------------------------------- +// +// ******** Atomic vs Standalone Fence ******** +// +// A sequentially consistent fence is stronger than a sequentially consistent operation because it is not tied to a specific atomic object. +// An atomic fence must provide synchronization with ANY atomic object whereas the ordering on the atomic object itself must only provide +// that ordering on that SAME atomic object. Thus this can provide cheaper guarantees on architectures with dependency tracking hardware. +// Let's look at a concrete example that will make this all clear. +// +// ---------------------------------------------------------------------------------------- +// Initial State: +// eastl::atomic y = 0; +// eastl::atomic z = 0; +// ---------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 +// ---------------------------------------------------------------------------------------- +// z.store(2, memory_order_relaxed); | r0 = y.load(memory_order_relaxed); +// atomic_thread_fence(memory_order_seq_cst); | atomic_thread_fence(memory_order_seq_cst); +// y.store(1, memory_order_relaxed); | r1 = z.load(memory_order_relaxed); +// ---------------------------------------------------------------------------------------- +// Observed: r0 = 1 && r1 = 0 +// ---------------------------------------------------------------------------------------- +// +// Here the two sequentially consistent fences synchronize-with each other thus ensuring that if we observe r0 = 1 then we also observe that r1 = 2. +// In the above example if we observe r0 = 1 it is impossible to observe r1 = 0. +// +// ---------------------------------------------------------------------------------------- +// Initial State: +// eastl::atomic x = 0; +// eastl::atomic y = 0; +// eastl::atomic z = 0; +// ---------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 +// ---------------------------------------------------------------------------------------- +// z.store(2, memory_order_relaxed); | r0 = y.load(memory_order_relaxed); +// x.fetch_add(1, memory_order_seq_cst); | x.fetch_add(1, memory_order_seq_cst); +// y.store(1, memory_order_relaxed); | r1 = z.load(memory_order_relaxed); +// ---------------------------------------------------------------------------------------- +// Observed: r0 = 1 && r1 = 0 +// ---------------------------------------------------------------------------------------- +// +// Here the two fetch_add sequentially consistent operations on x synchronize-with each other ensuring that if we observe r0 = 1 then we cannot observer r1 = 0; +// The thing to take note here is that we synchronized on the SAME atomic object, that being the atomic object x. +// Note that replacing the x.fetch_add() in Thread 1 with a sequentially consistent operation on another atomic object or a sequentially consistent fence can lead to +// observing r1 = 0 even if we observe r0 = 1. For example the following code may fail. +// +// ---------------------------------------------------------------------------------------- +// Initial State: +// eastl::atomic x = 0; +// eastl::atomic y = 0; +// eastl::atomic z = 0; +// ---------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 +// ---------------------------------------------------------------------------------------- +// z.store(2, memory_order_relaxed); | r0 = y.load(memory_order_relaxed); +// | x.fetch_add(1, memory_order_seq_cst); +// y.fetch_add(1, memory_order_seq_cst); | r1 = z.load(memory_order_relaxed); +// ---------------------------------------------------------------------------------------- +// Observed: r0 = 1 && r1 = 0 +// ---------------------------------------------------------------------------------------- +// +// ---------------------------------------------------------------------------------------- +// Initial State: +// eastl::atomic x = 0; +// eastl::atomic y = 0; +// eastl::atomic z = 0; +// ---------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 +// ---------------------------------------------------------------------------------------- +// z.store(2, memory_order_relaxed); | r0 = y.load(memory_order_relaxed); +// x.fetch_add(1, memory_order_seq_cst); | atomic_thread_fence(memory_order_seq_cst); +// y.store(1, memory_order_relaxed); | r1 = z.load(memory_order_relaxed); +// ---------------------------------------------------------------------------------------- +// Observed: r0 = 1 && r1 = 0 +// ---------------------------------------------------------------------------------------- +// +// In this example it is entirely possible that we observe r0 = 1 && r1 = 0 even though we have source code causality and sequentially consistent operations. +// Observability is tied to the atomic object on which the operation was performed and the thread fence doesn't synchronize-with the fetch_add because +// there is no load above the fence that reads the value from the fetch_add. +// +// ******** Sequential Consistency Semantics ******** +// +// See section, Order and consistency, in the C++ std and Reference [9]. +// +// A load with memory_order_seq_cst performs an acquire operation +// A store with memory_order_seq_cst performs a release operation +// A RMW with memory_order_seq_cst performs both an acquire and a release operation +// +// All memory_order_seq_cst operations exhibit the below single total order in which all threads observe all modifications in the same order +// +// Paraphrasing, there is a single total order on all memory_order_seq_cst operations, S, such that each sequentially consistent operation B that loads a value from +// atomic object M observes either the result of the last sequentially consistent modification A on M, or some modification on M that isn't memory_order_seq_cst. +// For atomic modifications A and B on an atomic object M, B occurs after A in the total order of M if: +// there is a memory_order_seq_cst fence X whereby A is sequenced before X, and X precedes B, +// there is a memory_order_seq_cst fence Y whereby Y is sequenced before B, and A precedes Y, +// there are memory_order_seq_cst fences X and Y such that A is sequenced before X, Y is sequenced before B, and X precedes Y. +// +// Let's look at some examples using memory_order_seq_cst. +// +// ------------------------------------------------------------ +// Store-Buffer +// ------------------------------------------------------------ +// Initial State: +// x = 0; y = 0; +// ------------------------------------------------------------ +// Thread 0 | Thread 1 +// ------------------------------------------------------------ +// STORE_RELAXED(x, 1) | STORE_RELAXED(y, 1) +// ATOMIC_THREAD_FENCE(SEQ_CST) | ATOMIC_THREAD_FENCE(SEQ_CST) +// r0 = LOAD_RELAXED(y) | r1 = LOAD_RELAXED(x) +// ------------------------------------------------------------ +// Observed: r0 = 0 && r1 = 0 +// ------------------------------------------------------------ +// +// ------------------------------------------------------------ +// Store-Buffer +// ------------------------------------------------------------ +// Initial State: +// x = 0; y = 0; +// ------------------------------------------------------------ +// Thread 0 | Thread 1 +// ------------------------------------------------------------ +// STORE_SEQ_CST(x, 1) | STORE_SEQ_CST(y, 1) +// r0 = LOAD_SEQ_CST(y) | r1 = LOAD_SEQ_CST(x) +// ------------------------------------------------------------ +// Observed: r0 = 0 && r1 = 0 +// ------------------------------------------------------------ +// +// Both solutions above are correct to ensure that the end results cannot lead to both r0 and r1 returning 0. Notice that the second one requires memory_order_seq_cst on both +// operations to ensure they are in the total order, S, for all memory_order_seq_cst operations. The other example uses the stronger guarantee provided by a sequentially consistent fence. +// +// ------------------------------------------------------------------------------------------------ +// Read-To-Write Causality +// ------------------------------------------------------------------------------------------------ +// Initial State: +// x = 0; y = 0; +// ------------------------------------------------------------------------------------------------ +// Thread 0 | Thread 1 | Thread 2 +// ------------------------------------------------------------------------------------------------ +// STORE_SEQ_CST(x, 1) | r0 = LOAD_RELAXED(x) | STORE_RELAXED(y, 1) +// | ATOMIC_THREAD_FENCE(SEQ_CST) | ATOMIC_THREAD_FENCE(SEQ_CST) +// | r1 = LOAD_RELAXED(y) | r2 = LOAD_RELAXED(x) +// ------------------------------------------------------------------------------------------------ +// Observed: r0 = 1 && r1 = 0 && r2 = 0 +// ------------------------------------------------------------------------------------------------ +// +// You'll notice this example is an in between example of the Store-Buffer and IRIW examples we have seen earlier. The store in Thread 0 needs to be sequentially consistent so it synchronizes with the +// thread fence in Thread 1. C++20 due to Reference [9], increased the strength of sequentially consistent fences has been increased to allow for the following. +// +// ------------------------------------------------------------------------------------------------ +// Read-To-Write Causality - C++20 +// ------------------------------------------------------------------------------------------------ +// Initial State: +// x = 0; y = 0; +// ------------------------------------------------------------------------------------------------ +// Thread 0 | Thread 1 | Thread 2 +// ------------------------------------------------------------------------------------------------ +// STORE_RELAXED(x, 1) | r0 = LOAD_RELAXED(x) | STORE_RELAXED(y, 1) +// | ATOMIC_THREAD_FENCE(SEQ_CST) | ATOMIC_THREAD_FENCE(SEQ_CST) +// | r1 = LOAD_RELAXED(y) | r2 = LOAD_RELAXED(x) +// ------------------------------------------------------------------------------------------------ +// Observed: r0 = 1 && r1 = 0 && r2 = 0 +// ------------------------------------------------------------------------------------------------ +// +// Notice we were able to turn the store in Thread 0 into a relaxed store and still properly observe either r1 or r2 returning 1. +// Note that all implementations of the C++11 standard for every architecture even now allows the C++20 behaviour. +// The C++20 standard memory model was brought up to recognize that all current implementations are able to implement them stronger. +// +// ******** False Sharing ******** +// +// As we know operations work on the granularity of a cacheline. A RMW operation obviously must have some help from the cache to ensure the entire operation +// is seen as one whole unit. Conceptually we can think of this as the cpu's cache taking a lock on the cacheline, the cpu doing the read-modify-write operation on the +// locked cacheline, and then releasing the lock on the cacheline. This means during that time any other cpu needing that cacheline must wait for the lock to be released. +// +// If we have two atomic objects doing RMW operations and they are within the same cacheline, they are unintentionally contending and serializing with each other even +// though they are two completely separate objects. This gives us the common name to this phenomona called false sharing. +// You can cacheline align your structure or the eastl::atomic object to prevent false sharing. +// +// ******** union of eastl::atomic ******** +// +// union { eastl::atomic atomic8; eastl::atomic atomic32; }; +// +// While we know that operations operate at the granularity of a processor's cacheline size and so we may expect that storing and loading +// from different width atomic variables at the same address to not cause weird observable behaviour but it may. +// Store Buffers allow smaller stores to replace parts of larger loads that are forwarded from a store buffer. +// This means if there is 2 bytes of modified data in the store buffer that overlaps with a 4 byte load, the 2 bytes will be forwarded +// from the store buffer. This is even documented behaviour of the x86 store buffer in the x86 architecture manual. +// This behaviour can cause processors to observe values that have never and will never be visible on the bus to other processors. +// The use of a union with eastl::atomic is not wrong but your code must be able to withstand these effects. +// +// Assume everything starts out initially as zero. +// +// ------------------------------------------------------------------------------------------------------- +// Thread 0 | Thread 1 | Thread 2 +// -------------------------------------------------------------------------------------------------------- +// cmpxchg 0 -> 0x11111111 | cmpxchg 0x11111111 -> 0x22222222 | mov byte 0x33; mov 4 bytes into register; +// --------------------------------------------------------------------------------------------------------- +// +// After all operations complete, the value in memory at that location is, 0x22222233. +// It is possible that the 4 byte load in thread 2 actually returns 0x11111133. +// Now 0x11111133 is an observed value that no other cpu could observe because it was never globally visible on the data bus. +// +// If the value in memory is 0x22222233 then the first cmpxchg succeeded, then the second cmpxchg succeeded and finally our +// byte to memory was stored, yet our load returned 0x11111133. This is because store buffer contents can be forwarded to overlapping loads. +// It is possible that the byte store got put in the store buffer. Our load happened after the first cmpxchg with the byte forwarded. +// This behaviour is fine as long as your algorithm is able to cope with this kind of store buffer forwarding effects. +// +// Reference [13] is a great read on more about this topic of mixed-size concurrency. +// + + +///////////////////////////////////////////////////////////////////////////////// + + +#include +#include +#include +#include + + +#endif /* EASTL_ATOMIC_H */ diff --git a/libkram/eastl/include/EASTL/bitset.h b/libkram/eastl/include/EASTL/bitset.h new file mode 100644 index 00000000..d9261050 --- /dev/null +++ b/libkram/eastl/include/EASTL/bitset.h @@ -0,0 +1,2232 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements a bitset much like the C++ std::bitset class. +// The primary distinctions between this list and std::bitset are: +// - bitset is more efficient than some other std::bitset implementations, +// notably the bitset that comes with Microsoft and other 1st party platforms. +// - bitset is savvy to an environment that doesn't have exception handling, +// as is sometimes the case with console or embedded environments. +// - bitset is savvy to environments in which 'unsigned long' is not the +// most efficient integral data type. std::bitset implementations use +// unsigned long, even if it is an inefficient integer type. +// - bitset removes as much function calls as practical, in order to allow +// debug builds to run closer in speed and code footprint to release builds. +// - bitset doesn't support string functionality. We can add this if +// it is deemed useful. +// +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_BITSET_H +#define EASTL_BITSET_H + + +#include +#include + +EA_DISABLE_ALL_VC_WARNINGS(); + +#include +#include + +EA_RESTORE_ALL_VC_WARNINGS(); + +#if EASTL_EXCEPTIONS_ENABLED + EA_DISABLE_ALL_VC_WARNINGS(); + + #include // std::out_of_range, std::length_error. + + EA_RESTORE_ALL_VC_WARNINGS(); +#endif + +EA_DISABLE_VC_WARNING(4127); // Conditional expression is constant + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + // To consider: Enable this for backwards compatibility with any user code that might be using BitsetWordType: + // #define BitsetWordType EASTL_BITSET_WORD_TYPE_DEFAULT + + + /// BITSET_WORD_COUNT + /// + /// Defines the number of words we use, based on the number of bits. + /// nBitCount refers to the number of bits in a bitset. + /// WordType refers to the type of integer word which stores bitet data. By default it is BitsetWordType. + /// + #if !defined(__GNUC__) || (__GNUC__ >= 3) // GCC 2.x can't handle the simpler declaration below. + #define BITSET_WORD_COUNT(nBitCount, WordType) (nBitCount == 0 ? 1 : ((nBitCount - 1) / (8 * sizeof(WordType)) + 1)) + #else + #define BITSET_WORD_COUNT(nBitCount, WordType) ((nBitCount - 1) / (8 * sizeof(WordType)) + 1) + #endif + + + /// EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING + /// Before GCC 4.7 the '-Warray-bounds' buggy and was very likely to issue false positives for loops that are + /// difficult to evaluate. + /// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=45978 + /// + #if defined(__GNUC__) && (EA_COMPILER_VERSION > 4007) && defined(EA_PLATFORM_ANDROID) // Earlier than GCC 4.7 + #define EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING 1 + #else + #define EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING 0 + #endif + + + + /// BitsetBase + /// + /// This is a default implementation that works for any number of words. + /// + template // Templated on the number of words used to hold the bitset and the word type. + struct BitsetBase + { + typedef WordType word_type; + typedef BitsetBase this_type; + #if EASTL_BITSET_SIZE_T + typedef size_t size_type; + #else + typedef eastl_size_t size_type; + #endif + + enum { + kBitsPerWord = (8 * sizeof(word_type)), + kBitsPerWordMask = (kBitsPerWord - 1), + kBitsPerWordShift = ((kBitsPerWord == 8) ? 3 : ((kBitsPerWord == 16) ? 4 : ((kBitsPerWord == 32) ? 5 : (((kBitsPerWord == 64) ? 6 : 7))))) + }; + + public: + word_type mWord[NW]; + + public: + BitsetBase(); + BitsetBase(uint32_t value); // This exists only for compatibility with std::bitset, which has a 'long' constructor. + //BitsetBase(uint64_t value); // Disabled because it causes conflicts with the 32 bit version with existing user code. Use from_uint64 to init from a uint64_t instead. + + void operator&=(const this_type& x); + void operator|=(const this_type& x); + void operator^=(const this_type& x); + + void operator<<=(size_type n); + void operator>>=(size_type n); + + void flip(); + void set(); + void set(size_type i, bool value); + void reset(); + + bool operator==(const this_type& x) const; + + bool any() const; + size_type count() const; + + void from_uint32(uint32_t value); + void from_uint64(uint64_t value); + + unsigned long to_ulong() const; + uint32_t to_uint32() const; + uint64_t to_uint64() const; + + word_type& DoGetWord(size_type i); + word_type DoGetWord(size_type i) const; + + size_type DoFindFirst() const; + size_type DoFindNext(size_type last_find) const; + + size_type DoFindLast() const; // Returns NW * kBitsPerWord (the bit count) if no bits are set. + size_type DoFindPrev(size_type last_find) const; // Returns NW * kBitsPerWord (the bit count) if no bits are set. + + }; // class BitsetBase + + + + /// BitsetBase<1, WordType> + /// + /// This is a specialization for a bitset that fits within one word. + /// + template + struct BitsetBase<1, WordType> + { + typedef WordType word_type; + typedef BitsetBase<1, WordType> this_type; + #if EASTL_BITSET_SIZE_T + typedef size_t size_type; + #else + typedef eastl_size_t size_type; + #endif + + enum { + kBitsPerWord = (8 * sizeof(word_type)), + kBitsPerWordMask = (kBitsPerWord - 1), + kBitsPerWordShift = ((kBitsPerWord == 8) ? 3 : ((kBitsPerWord == 16) ? 4 : ((kBitsPerWord == 32) ? 5 : (((kBitsPerWord == 64) ? 6 : 7))))) + }; + + public: + word_type mWord[1]; // Defined as an array of 1 so that bitset can treat this BitsetBase like others. + + public: + BitsetBase(); + BitsetBase(uint32_t value); + //BitsetBase(uint64_t value); // Disabled because it causes conflicts with the 32 bit version with existing user code. Use from_uint64 instead. + + void operator&=(const this_type& x); + void operator|=(const this_type& x); + void operator^=(const this_type& x); + + void operator<<=(size_type n); + void operator>>=(size_type n); + + void flip(); + void set(); + void set(size_type i, bool value); + void reset(); + + bool operator==(const this_type& x) const; + + bool any() const; + size_type count() const; + + void from_uint32(uint32_t value); + void from_uint64(uint64_t value); + + unsigned long to_ulong() const; + uint32_t to_uint32() const; + uint64_t to_uint64() const; + + word_type& DoGetWord(size_type); + word_type DoGetWord(size_type) const; + + size_type DoFindFirst() const; + size_type DoFindNext(size_type last_find) const; + + size_type DoFindLast() const; // Returns 1 * kBitsPerWord (the bit count) if no bits are set. + size_type DoFindPrev(size_type last_find) const; // Returns 1 * kBitsPerWord (the bit count) if no bits are set. + + }; // BitsetBase<1, WordType> + + + + /// BitsetBase<2, WordType> + /// + /// This is a specialization for a bitset that fits within two words. + /// The difference here is that we avoid branching (ifs and loops). + /// + template + struct BitsetBase<2, WordType> + { + typedef WordType word_type; + typedef BitsetBase<2, WordType> this_type; + #if EASTL_BITSET_SIZE_T + typedef size_t size_type; + #else + typedef eastl_size_t size_type; + #endif + + enum { + kBitsPerWord = (8 * sizeof(word_type)), + kBitsPerWordMask = (kBitsPerWord - 1), + kBitsPerWordShift = ((kBitsPerWord == 8) ? 3 : ((kBitsPerWord == 16) ? 4 : ((kBitsPerWord == 32) ? 5 : (((kBitsPerWord == 64) ? 6 : 7))))) + }; + + public: + word_type mWord[2]; + + public: + BitsetBase(); + BitsetBase(uint32_t value); + //BitsetBase(uint64_t value); // Disabled because it causes conflicts with the 32 bit version with existing user code. Use from_uint64 instead. + + void operator&=(const this_type& x); + void operator|=(const this_type& x); + void operator^=(const this_type& x); + + void operator<<=(size_type n); + void operator>>=(size_type n); + + void flip(); + void set(); + void set(size_type i, bool value); + void reset(); + + bool operator==(const this_type& x) const; + + bool any() const; + size_type count() const; + + void from_uint32(uint32_t value); + void from_uint64(uint64_t value); + + unsigned long to_ulong() const; + uint32_t to_uint32() const; + uint64_t to_uint64() const; + + word_type& DoGetWord(size_type); + word_type DoGetWord(size_type) const; + + size_type DoFindFirst() const; + size_type DoFindNext(size_type last_find) const; + + size_type DoFindLast() const; // Returns 2 * kBitsPerWord (the bit count) if no bits are set. + size_type DoFindPrev(size_type last_find) const; // Returns 2 * kBitsPerWord (the bit count) if no bits are set. + + }; // BitsetBase<2, WordType> + + + + + /// bitset + /// + /// Implements a bitset much like the C++ std::bitset. + /// + /// As of this writing we don't implement a specialization of bitset<0>, + /// as it is deemed an academic exercise that nobody would actually + /// use and it would increase code space and provide little practical + /// benefit. Note that this doesn't mean bitset<0> isn't supported; + /// it means that our version of it isn't as efficient as it would be + /// if a specialization was made for it. + /// + /// - N can be any unsigned (non-zero) value, though memory usage is + /// linear with respect to N, so large values of N use large amounts of memory. + /// - WordType must be one of [uint16_t, uint32_t, uint64_t, uint128_t] + /// and the compiler must support the type. By default the WordType is + /// the largest native register type that the target platform supports. + /// + template + class bitset : private BitsetBase + { + public: + typedef BitsetBase base_type; + typedef bitset this_type; + typedef WordType word_type; + typedef typename base_type::size_type size_type; + + enum + { + kBitsPerWord = (8 * sizeof(word_type)), + kBitsPerWordMask = (kBitsPerWord - 1), + kBitsPerWordShift = ((kBitsPerWord == 8) ? 3 : ((kBitsPerWord == 16) ? 4 : ((kBitsPerWord == 32) ? 5 : (((kBitsPerWord == 64) ? 6 : 7))))), + kSize = N, // The number of bits the bitset holds + kWordSize = sizeof(word_type), // The size of individual words the bitset uses to hold the bits. + kWordCount = BITSET_WORD_COUNT(N, WordType) // The number of words the bitset uses to hold the bits. sizeof(bitset) == kWordSize * kWordCount. + }; + + using base_type::mWord; + using base_type::DoGetWord; + using base_type::DoFindFirst; + using base_type::DoFindNext; + using base_type::DoFindLast; + using base_type::DoFindPrev; + using base_type::to_ulong; + using base_type::to_uint32; + using base_type::to_uint64; + using base_type::count; + using base_type::any; + + public: + /// reference + /// + /// A reference is a reference to a specific bit in the bitset. + /// The C++ standard specifies that this be a nested class, + /// though it is not clear if a non-nested reference implementation + /// would be non-conforming. + /// + class reference + { + protected: + friend class bitset; + + word_type* mpBitWord; + size_type mnBitIndex; + + reference(){} // The C++ standard specifies that this is private. + + public: + reference(const bitset& x, size_type i); + + reference& operator=(bool value); + reference& operator=(const reference& x); + + bool operator~() const; + operator bool() const // Defined inline because CodeWarrior fails to be able to compile it outside. + { return (*mpBitWord & (static_cast(1) << (mnBitIndex & kBitsPerWordMask))) != 0; } + + reference& flip(); + }; + + public: + friend class reference; + + bitset(); + bitset(uint32_t value); + //bitset(uint64_t value); // Disabled because it causes conflicts with the 32 bit version with existing user code. Use from_uint64 instead. + + // We don't define copy constructor and operator= because + // the compiler-generated versions will suffice. + + this_type& operator&=(const this_type& x); + this_type& operator|=(const this_type& x); + this_type& operator^=(const this_type& x); + + this_type& operator<<=(size_type n); + this_type& operator>>=(size_type n); + + this_type& set(); + this_type& set(size_type i, bool value = true); + + this_type& reset(); + this_type& reset(size_type i); + + this_type& flip(); + this_type& flip(size_type i); + this_type operator~() const; + + reference operator[](size_type i); + bool operator[](size_type i) const; + + const word_type* data() const; + word_type* data(); + + void from_uint32(uint32_t value); + void from_uint64(uint64_t value); + + //unsigned long to_ulong() const; // We inherit this from the base class. + //uint32_t to_uint32() const; + //uint64_t to_uint64() const; + + //size_type count() const; // We inherit this from the base class. + size_type size() const; + + bool operator==(const this_type& x) const; + bool operator!=(const this_type& x) const; + + bool test(size_type i) const; + //bool any() const; // We inherit this from the base class. + bool all() const; + bool none() const; + + this_type operator<<(size_type n) const; + this_type operator>>(size_type n) const; + + // Finds the index of the first "on" bit, returns kSize if none are set. + size_type find_first() const; + + // Finds the index of the next "on" bit after last_find, returns kSize if none are set. + size_type find_next(size_type last_find) const; + + // Finds the index of the last "on" bit, returns kSize if none are set. + size_type find_last() const; + + // Finds the index of the last "on" bit before last_find, returns kSize if none are set. + size_type find_prev(size_type last_find) const; + + }; // bitset + + + + + + + + /// BitsetCountBits + /// + /// This is a fast trick way to count bits without branches nor memory accesses. + /// + inline uint32_t BitsetCountBits(uint64_t x) + { + // GCC 3.x's implementation of UINT64_C is broken and fails to deal with + // the code below correctly. So we make a workaround for it. Earlier and + // later versions of GCC don't have this bug. + + #if defined(__GNUC__) && (__GNUC__ == 3) + x = x - ((x >> 1) & 0x5555555555555555ULL); + x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); + x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FULL; + return (uint32_t)((x * 0x0101010101010101ULL) >> 56); + #else + x = x - ((x >> 1) & UINT64_C(0x5555555555555555)); + x = (x & UINT64_C(0x3333333333333333)) + ((x >> 2) & UINT64_C(0x3333333333333333)); + x = (x + (x >> 4)) & UINT64_C(0x0F0F0F0F0F0F0F0F); + return (uint32_t)((x * UINT64_C(0x0101010101010101)) >> 56); + #endif + } + + inline uint32_t BitsetCountBits(uint32_t x) + { + x = x - ((x >> 1) & 0x55555555); + x = (x & 0x33333333) + ((x >> 2) & 0x33333333); + x = (x + (x >> 4)) & 0x0F0F0F0F; + return (uint32_t)((x * 0x01010101) >> 24); + } + + inline uint32_t BitsetCountBits(uint16_t x) + { + return BitsetCountBits((uint32_t)x); + } + + inline uint32_t BitsetCountBits(uint8_t x) + { + return BitsetCountBits((uint32_t)x); + } + + + // const static char kBitsPerUint16[16] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; + #define EASTL_BITSET_COUNT_STRING "\0\1\1\2\1\2\2\3\1\2\2\3\2\3\3\4" + + + inline uint32_t GetFirstBit(uint8_t x) + { + if(x) + { + uint32_t n = 1; + + if((x & 0x0000000F) == 0) { n += 4; x >>= 4; } + if((x & 0x00000003) == 0) { n += 2; x >>= 2; } + + return (uint32_t)(n - (x & 1)); + } + + return 8; + } + + inline uint32_t GetFirstBit(uint16_t x) // To do: Update this to use VC++ _BitScanForward, _BitScanForward64; GCC __builtin_ctz, __builtin_ctzl. VC++ __lzcnt16, __lzcnt, __lzcnt64 requires recent CPUs (2013+) and probably can't be used. http://en.wikipedia.org/wiki/Haswell_%28microarchitecture%29#New_features + { + if(x) + { + uint32_t n = 1; + + if((x & 0x000000FF) == 0) { n += 8; x >>= 8; } + if((x & 0x0000000F) == 0) { n += 4; x >>= 4; } + if((x & 0x00000003) == 0) { n += 2; x >>= 2; } + + return (uint32_t)(n - (x & 1)); + } + + return 16; + } + + inline uint32_t GetFirstBit(uint32_t x) + { + if(x) + { + uint32_t n = 1; + + if((x & 0x0000FFFF) == 0) { n += 16; x >>= 16; } + if((x & 0x000000FF) == 0) { n += 8; x >>= 8; } + if((x & 0x0000000F) == 0) { n += 4; x >>= 4; } + if((x & 0x00000003) == 0) { n += 2; x >>= 2; } + + return (n - (x & 1)); + } + + return 32; + } + + inline uint32_t GetFirstBit(uint64_t x) + { + if(x) + { + uint32_t n = 1; + + if((x & 0xFFFFFFFF) == 0) { n += 32; x >>= 32; } + if((x & 0x0000FFFF) == 0) { n += 16; x >>= 16; } + if((x & 0x000000FF) == 0) { n += 8; x >>= 8; } + if((x & 0x0000000F) == 0) { n += 4; x >>= 4; } + if((x & 0x00000003) == 0) { n += 2; x >>= 2; } + + return (n - ((uint32_t)x & 1)); + } + + return 64; + } + + + #if EASTL_INT128_SUPPORTED + inline uint32_t GetFirstBit(eastl_uint128_t x) + { + if(x) + { + uint32_t n = 1; + + if((x & UINT64_C(0xFFFFFFFFFFFFFFFF)) == 0) { n += 64; x >>= 64; } + if((x & 0xFFFFFFFF) == 0) { n += 32; x >>= 32; } + if((x & 0x0000FFFF) == 0) { n += 16; x >>= 16; } + if((x & 0x000000FF) == 0) { n += 8; x >>= 8; } + if((x & 0x0000000F) == 0) { n += 4; x >>= 4; } + if((x & 0x00000003) == 0) { n += 2; x >>= 2; } + + return (n - ((uint32_t)x & 1)); + } + + return 128; + } + #endif + + inline uint32_t GetLastBit(uint8_t x) + { + if(x) + { + uint32_t n = 0; + + if(x & 0xFFF0) { n += 4; x >>= 4; } + if(x & 0xFFFC) { n += 2; x >>= 2; } + if(x & 0xFFFE) { n += 1; } + + return n; + } + + return 8; + } + + inline uint32_t GetLastBit(uint16_t x) + { + if(x) + { + uint32_t n = 0; + + if(x & 0xFF00) { n += 8; x >>= 8; } + if(x & 0xFFF0) { n += 4; x >>= 4; } + if(x & 0xFFFC) { n += 2; x >>= 2; } + if(x & 0xFFFE) { n += 1; } + + return n; + } + + return 16; + } + + inline uint32_t GetLastBit(uint32_t x) + { + if(x) + { + uint32_t n = 0; + + if(x & 0xFFFF0000) { n += 16; x >>= 16; } + if(x & 0xFFFFFF00) { n += 8; x >>= 8; } + if(x & 0xFFFFFFF0) { n += 4; x >>= 4; } + if(x & 0xFFFFFFFC) { n += 2; x >>= 2; } + if(x & 0xFFFFFFFE) { n += 1; } + + return n; + } + + return 32; + } + + inline uint32_t GetLastBit(uint64_t x) + { + if(x) + { + uint32_t n = 0; + + if(x & UINT64_C(0xFFFFFFFF00000000)) { n += 32; x >>= 32; } + if(x & 0xFFFF0000) { n += 16; x >>= 16; } + if(x & 0xFFFFFF00) { n += 8; x >>= 8; } + if(x & 0xFFFFFFF0) { n += 4; x >>= 4; } + if(x & 0xFFFFFFFC) { n += 2; x >>= 2; } + if(x & 0xFFFFFFFE) { n += 1; } + + return n; + } + + return 64; + } + + #if EASTL_INT128_SUPPORTED + inline uint32_t GetLastBit(eastl_uint128_t x) + { + if(x) + { + uint32_t n = 0; + + eastl_uint128_t mask(UINT64_C(0xFFFFFFFF00000000)); // There doesn't seem to exist compiler support for INT128_C() by any compiler. EAStdC's int128_t supports it though. + mask <<= 64; + + if(x & mask) { n += 64; x >>= 64; } + if(x & UINT64_C(0xFFFFFFFF00000000)) { n += 32; x >>= 32; } + if(x & UINT64_C(0x00000000FFFF0000)) { n += 16; x >>= 16; } + if(x & UINT64_C(0x00000000FFFFFF00)) { n += 8; x >>= 8; } + if(x & UINT64_C(0x00000000FFFFFFF0)) { n += 4; x >>= 4; } + if(x & UINT64_C(0x00000000FFFFFFFC)) { n += 2; x >>= 2; } + if(x & UINT64_C(0x00000000FFFFFFFE)) { n += 1; } + + return n; + } + + return 128; + } + #endif + + + + + /////////////////////////////////////////////////////////////////////////// + // BitsetBase + // + // We tried two forms of array access here: + // for(word_type *pWord(mWord), *pWordEnd(mWord + NW); pWord < pWordEnd; ++pWord) + // *pWord = ... + // and + // for(size_t i = 0; i < NW; i++) + // mWord[i] = ... + // + // For our tests (~NW < 16), the latter (using []) access resulted in faster code. + /////////////////////////////////////////////////////////////////////////// + + template + inline BitsetBase::BitsetBase() + { + reset(); + } + + + template + inline BitsetBase::BitsetBase(uint32_t value) + { + // This implementation assumes that sizeof(value) <= sizeof(word_type). + //EASTL_CT_ASSERT(sizeof(value) <= sizeof(word_type)); Disabled because we now have support for uint8_t and uint16_t word types. It would be nice to have a runtime assert that tested this. + + reset(); + mWord[0] = static_cast(value); + } + + + /* + template + inline BitsetBase::BitsetBase(uint64_t value) + { + reset(); + + #if(EA_PLATFORM_WORD_SIZE == 4) + mWord[0] = static_cast(value); + + EASTL_CT_ASSERT(NW > 2); // We can assume this because we have specializations of BitsetBase for <1> and <2>. + //if(NW > 1) // NW is a template constant, but it would be a little messy to take advantage of it's const-ness. + mWord[1] = static_cast(value >> 32); + #else + mWord[0] = static_cast(value); + #endif + } + */ + + + template + inline void BitsetBase::operator&=(const this_type& x) + { + for(size_t i = 0; i < NW; i++) + mWord[i] &= x.mWord[i]; + } + + + template + inline void BitsetBase::operator|=(const this_type& x) + { + for(size_t i = 0; i < NW; i++) + mWord[i] |= x.mWord[i]; + } + + + template + inline void BitsetBase::operator^=(const this_type& x) + { + for(size_t i = 0; i < NW; i++) + mWord[i] ^= x.mWord[i]; + } + + + template + inline void BitsetBase::operator<<=(size_type n) + { + const size_type nWordShift = (size_type)(n >> kBitsPerWordShift); + + if(nWordShift) + { + for(int i = (int)(NW - 1); i >= 0; --i) + mWord[i] = (nWordShift <= (size_type)i) ? mWord[i - nWordShift] : (word_type)0; + } + + if(n &= kBitsPerWordMask) + { + for(size_t i = (NW - 1); i > 0; --i) + mWord[i] = (word_type)((mWord[i] << n) | (mWord[i - 1] >> (kBitsPerWord - n))); + mWord[0] <<= n; + } + + // We let the parent class turn off any upper bits. + } + + + template + inline void BitsetBase::operator>>=(size_type n) + { + const size_type nWordShift = (size_type)(n >> kBitsPerWordShift); + + if(nWordShift) + { + for(size_t i = 0; i < NW; ++i) + mWord[i] = ((nWordShift < (NW - i)) ? mWord[i + nWordShift] : (word_type)0); + } + + if(n &= kBitsPerWordMask) + { + for(size_t i = 0; i < (NW - 1); ++i) + mWord[i] = (word_type)((mWord[i] >> n) | (mWord[i + 1] << (kBitsPerWord - n))); + mWord[NW - 1] >>= n; + } + } + + + template + inline void BitsetBase::flip() + { + for(size_t i = 0; i < NW; i++) + mWord[i] = ~mWord[i]; + // We let the parent class turn off any upper bits. + } + + + template + inline void BitsetBase::set() + { + for(size_t i = 0; i < NW; i++) + mWord[i] = static_cast(~static_cast(0)); + // We let the parent class turn off any upper bits. + } + + + template + inline void BitsetBase::set(size_type i, bool value) + { + if(value) + mWord[i >> kBitsPerWordShift] |= (static_cast(1) << (i & kBitsPerWordMask)); + else + mWord[i >> kBitsPerWordShift] &= ~(static_cast(1) << (i & kBitsPerWordMask)); + } + + + template + inline void BitsetBase::reset() + { + if(NW > 16) // This is a constant expression and should be optimized away. + { + // This will be fastest if compiler intrinsic function optimizations are enabled. + memset(mWord, 0, sizeof(mWord)); + } + else + { + for(size_t i = 0; i < NW; i++) + mWord[i] = 0; + } + } + + + template + inline bool BitsetBase::operator==(const this_type& x) const + { + for(size_t i = 0; i < NW; i++) + { + if(mWord[i] != x.mWord[i]) + return false; + } + return true; + } + + + template + inline bool BitsetBase::any() const + { + for(size_t i = 0; i < NW; i++) + { + if(mWord[i]) + return true; + } + return false; + } + + + template + inline typename BitsetBase::size_type + BitsetBase::count() const + { + size_type n = 0; + + for(size_t i = 0; i < NW; i++) + { + #if defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 304) && !defined(EA_PLATFORM_ANDROID) // GCC 3.4 or later + #if(EA_PLATFORM_WORD_SIZE == 4) + n += (size_type)__builtin_popcountl(mWord[i]); + #else + n += (size_type)__builtin_popcountll(mWord[i]); + #endif + #elif defined(__GNUC__) && (__GNUC__ < 3) + n += BitsetCountBits(mWord[i]); // GCC 2.x compiler inexplicably blows up on the code below. + #else + // todo: use __popcnt16, __popcnt, __popcnt64 for msvc builds + // https://msdn.microsoft.com/en-us/library/bb385231(v=vs.140).aspx + for(word_type w = mWord[i]; w; w >>= 4) + n += EASTL_BITSET_COUNT_STRING[w & 0xF]; + + // Version which seems to run slower in benchmarks: + // n += BitsetCountBits(mWord[i]); + #endif + + } + return n; + } + + + template + inline void BitsetBase::from_uint32(uint32_t value) + { + reset(); + mWord[0] = static_cast(value); + } + + + template + inline void BitsetBase::from_uint64(uint64_t value) + { + reset(); + + #if(EA_PLATFORM_WORD_SIZE == 4) + mWord[0] = static_cast(value); + + EASTL_CT_ASSERT(NW > 2); // We can assume this because we have specializations of BitsetBase for <1> and <2>. + //if(NW > 1) // NW is a template constant, but it would be a little messy to take advantage of it's const-ness. + mWord[1] = static_cast(value >> 32); + #else + mWord[0] = static_cast(value); + #endif + } + + + template + inline unsigned long BitsetBase::to_ulong() const + { + #if EASTL_EXCEPTIONS_ENABLED + for(size_t i = 1; i < NW; ++i) + { + if(mWord[i]) + throw std::overflow_error("BitsetBase::to_ulong"); + } + #endif + return (unsigned long)mWord[0]; // Todo: We need to deal with the case whereby sizeof(word_type) < sizeof(unsigned long) + } + + + template + inline uint32_t BitsetBase::to_uint32() const + { + #if EASTL_EXCEPTIONS_ENABLED + // Verify that high words or bits are not set and thus that to_uint32 doesn't lose information. + for(size_t i = 1; i < NW; ++i) + { + if(mWord[i]) + throw std::overflow_error("BitsetBase::to_uint32"); + } + + #if(EA_PLATFORM_WORD_SIZE > 4) // if we have 64 bit words... + if(mWord[0] >> 32) + throw std::overflow_error("BitsetBase::to_uint32"); + #endif + #endif + + return (uint32_t)mWord[0]; + } + + + template + inline uint64_t BitsetBase::to_uint64() const + { + #if EASTL_EXCEPTIONS_ENABLED + // Verify that high words are not set and thus that to_uint64 doesn't lose information. + + EASTL_CT_ASSERT(NW > 2); // We can assume this because we have specializations of BitsetBase for <1> and <2>. + for(size_t i = 2; i < NW; ++i) + { + if(mWord[i]) + throw std::overflow_error("BitsetBase::to_uint64"); + } + #endif + + #if(EA_PLATFORM_WORD_SIZE == 4) + EASTL_CT_ASSERT(NW > 2); // We can assume this because we have specializations of BitsetBase for <1> and <2>. + return (mWord[1] << 32) | mWord[0]; + #else + return (uint64_t)mWord[0]; + #endif + } + + + template + inline typename BitsetBase::word_type& + BitsetBase::DoGetWord(size_type i) + { + return mWord[i >> kBitsPerWordShift]; + } + + + template + inline typename BitsetBase::word_type + BitsetBase::DoGetWord(size_type i) const + { + return mWord[i >> kBitsPerWordShift]; + } + + + template + inline typename BitsetBase::size_type + BitsetBase::DoFindFirst() const + { + for(size_type word_index = 0; word_index < NW; ++word_index) + { + const size_type fbiw = GetFirstBit(mWord[word_index]); + + if(fbiw != kBitsPerWord) + return (word_index * kBitsPerWord) + fbiw; + } + + return (size_type)NW * kBitsPerWord; + } + + +#if EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING +EA_DISABLE_GCC_WARNING(-Warray-bounds) +#endif + + template + inline typename BitsetBase::size_type + BitsetBase::DoFindNext(size_type last_find) const + { + // Start looking from the next bit. + ++last_find; + + // Set initial state based on last find. + size_type word_index = static_cast(last_find >> kBitsPerWordShift); + size_type bit_index = static_cast(last_find & kBitsPerWordMask); + + // To do: There probably is a more elegant way to write looping below. + if(word_index < NW) + { + // Mask off previous bits of the word so our search becomes a "find first". + word_type this_word = mWord[word_index] & (~static_cast(0) << bit_index); + + for(;;) + { + const size_type fbiw = GetFirstBit(this_word); + + if(fbiw != kBitsPerWord) + return (word_index * kBitsPerWord) + fbiw; + + if(++word_index < NW) + this_word = mWord[word_index]; + else + break; + } + } + + return (size_type)NW * kBitsPerWord; + } + +#if EASTL_DISABLE_BITSET_ARRAYBOUNDS_WARNING +EA_RESTORE_GCC_WARNING() +#endif + + + + template + inline typename BitsetBase::size_type + BitsetBase::DoFindLast() const + { + for(size_type word_index = (size_type)NW; word_index > 0; --word_index) + { + const size_type lbiw = GetLastBit(mWord[word_index - 1]); + + if(lbiw != kBitsPerWord) + return ((word_index - 1) * kBitsPerWord) + lbiw; + } + + return (size_type)NW * kBitsPerWord; + } + + + template + inline typename BitsetBase::size_type + BitsetBase::DoFindPrev(size_type last_find) const + { + if(last_find > 0) + { + // Set initial state based on last find. + size_type word_index = static_cast(last_find >> kBitsPerWordShift); + size_type bit_index = static_cast(last_find & kBitsPerWordMask); + + // Mask off subsequent bits of the word so our search becomes a "find last". + word_type mask = (~static_cast(0) >> (kBitsPerWord - 1 - bit_index)) >> 1; // We do two shifts here because many CPUs ignore requests to shift 32 bit integers by 32 bits, which could be the case above. + word_type this_word = mWord[word_index] & mask; + + for(;;) + { + const size_type lbiw = GetLastBit(this_word); + + if(lbiw != kBitsPerWord) + return (word_index * kBitsPerWord) + lbiw; + + if(word_index > 0) + this_word = mWord[--word_index]; + else + break; + } + } + + return (size_type)NW * kBitsPerWord; + } + + + + /////////////////////////////////////////////////////////////////////////// + // BitsetBase<1, WordType> + /////////////////////////////////////////////////////////////////////////// + + template + inline BitsetBase<1, WordType>::BitsetBase() + { + mWord[0] = 0; + } + + + template + inline BitsetBase<1, WordType>::BitsetBase(uint32_t value) + { + // This implementation assumes that sizeof(value) <= sizeof(word_type). + //EASTL_CT_ASSERT(sizeof(value) <= sizeof(word_type)); Disabled because we now have support for uint8_t and uint16_t word types. It would be nice to have a runtime assert that tested this. + + mWord[0] = static_cast(value); + } + + + /* + template + inline BitsetBase<1, WordType>::BitsetBase(uint64_t value) + { + #if(EA_PLATFORM_WORD_SIZE == 4) + EASTL_ASSERT(value <= 0xffffffff); + mWord[0] = static_cast(value); // This potentially loses data, but that's what the user is requesting. + #else + mWord[0] = static_cast(value); + #endif + } + */ + + + template + inline void BitsetBase<1, WordType>::operator&=(const this_type& x) + { + mWord[0] &= x.mWord[0]; + } + + + template + inline void BitsetBase<1, WordType>::operator|=(const this_type& x) + { + mWord[0] |= x.mWord[0]; + } + + + template + inline void BitsetBase<1, WordType>::operator^=(const this_type& x) + { + mWord[0] ^= x.mWord[0]; + } + + + template + inline void BitsetBase<1, WordType>::operator<<=(size_type n) + { + mWord[0] <<= n; + // We let the parent class turn off any upper bits. + } + + + template + inline void BitsetBase<1, WordType>::operator>>=(size_type n) + { + mWord[0] >>= n; + } + + + template + inline void BitsetBase<1, WordType>::flip() + { + mWord[0] = ~mWord[0]; + // We let the parent class turn off any upper bits. + } + + + template + inline void BitsetBase<1, WordType>::set() + { + mWord[0] = static_cast(~static_cast(0)); + // We let the parent class turn off any upper bits. + } + + + template + inline void BitsetBase<1, WordType>::set(size_type i, bool value) + { + if(value) + mWord[0] |= (static_cast(1) << i); + else + mWord[0] &= ~(static_cast(1) << i); + } + + + template + inline void BitsetBase<1, WordType>::reset() + { + mWord[0] = 0; + } + + + template + inline bool BitsetBase<1, WordType>::operator==(const this_type& x) const + { + return mWord[0] == x.mWord[0]; + } + + + template + inline bool BitsetBase<1, WordType>::any() const + { + return mWord[0] != 0; + } + + + template + inline typename BitsetBase<1, WordType>::size_type + BitsetBase<1, WordType>::count() const + { + #if defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 304) && !defined(EA_PLATFORM_ANDROID) // GCC 3.4 or later + #if(EA_PLATFORM_WORD_SIZE == 4) + return (size_type)__builtin_popcountl(mWord[0]); + #else + return (size_type)__builtin_popcountll(mWord[0]); + #endif + #elif defined(__GNUC__) && (__GNUC__ < 3) + return BitsetCountBits(mWord[0]); // GCC 2.x compiler inexplicably blows up on the code below. + #else + size_type n = 0; + for(word_type w = mWord[0]; w; w >>= 4) + n += EASTL_BITSET_COUNT_STRING[w & 0xF]; + return n; + #endif + } + + + template + inline void BitsetBase<1, WordType>::from_uint32(uint32_t value) + { + mWord[0] = static_cast(value); + } + + + template + inline void BitsetBase<1, WordType>::from_uint64(uint64_t value) + { + #if(EA_PLATFORM_WORD_SIZE == 4) + EASTL_ASSERT(value <= 0xffffffff); + mWord[0] = static_cast(value); // This potentially loses data, but that's what the user is requesting. + #else + mWord[0] = static_cast(value); + #endif + } + + + template + inline unsigned long BitsetBase<1, WordType>::to_ulong() const + { + #if EASTL_EXCEPTIONS_ENABLED + #if((EA_PLATFORM_WORD_SIZE > 4) && defined(EA_PLATFORM_MICROSOFT)) // If we are using 64 bit words but ulong is less than 64 bits... Microsoft platforms alone use a 32 bit long under 64 bit platforms. + // Verify that high bits are not set and thus that to_ulong doesn't lose information. + if(mWord[0] >> 32) + throw std::overflow_error("BitsetBase::to_ulong"); + #endif + #endif + + return static_cast(mWord[0]); + } + + + template + inline uint32_t BitsetBase<1, WordType>::to_uint32() const + { + #if EASTL_EXCEPTIONS_ENABLED + #if(EA_PLATFORM_WORD_SIZE > 4) // If we are using 64 bit words... + // Verify that high bits are not set and thus that to_uint32 doesn't lose information. + if(mWord[0] >> 32) + throw std::overflow_error("BitsetBase::to_uint32"); + #endif + #endif + + return static_cast(mWord[0]); + } + + + template + inline uint64_t BitsetBase<1, WordType>::to_uint64() const + { + // This implementation is the same regardless of the word size, and there is no possibility of overflow_error. + return static_cast(mWord[0]); + } + + + template + inline typename BitsetBase<1, WordType>::word_type& + BitsetBase<1, WordType>::DoGetWord(size_type) + { + return mWord[0]; + } + + + template + inline typename BitsetBase<1, WordType>::word_type + BitsetBase<1, WordType>::DoGetWord(size_type) const + { + return mWord[0]; + } + + + template + inline typename BitsetBase<1, WordType>::size_type + BitsetBase<1, WordType>::DoFindFirst() const + { + return GetFirstBit(mWord[0]); + } + + + template + inline typename BitsetBase<1, WordType>::size_type + BitsetBase<1, WordType>::DoFindNext(size_type last_find) const + { + if(++last_find < kBitsPerWord) + { + // Mask off previous bits of word so our search becomes a "find first". + const word_type this_word = mWord[0] & ((~static_cast(0)) << last_find); + + return GetFirstBit(this_word); + } + + return kBitsPerWord; + } + + + template + inline typename BitsetBase<1, WordType>::size_type + BitsetBase<1, WordType>::DoFindLast() const + { + return GetLastBit(mWord[0]); + } + + + template + inline typename BitsetBase<1, WordType>::size_type + BitsetBase<1, WordType>::DoFindPrev(size_type last_find) const + { + if(last_find > 0) + { + // Mask off previous bits of word so our search becomes a "find first". + const word_type this_word = mWord[0] & ((~static_cast(0)) >> (kBitsPerWord - last_find)); + + return GetLastBit(this_word); + } + + return kBitsPerWord; + } + + + + + /////////////////////////////////////////////////////////////////////////// + // BitsetBase<2, WordType> + /////////////////////////////////////////////////////////////////////////// + + template + inline BitsetBase<2, WordType>::BitsetBase() + { + mWord[0] = 0; + mWord[1] = 0; + } + + + template + inline BitsetBase<2, WordType>::BitsetBase(uint32_t value) + { + // This implementation assumes that sizeof(value) <= sizeof(word_type). + //EASTL_CT_ASSERT(sizeof(value) <= sizeof(word_type)); Disabled because we now have support for uint8_t and uint16_t word types. It would be nice to have a runtime assert that tested this. + + mWord[0] = static_cast(value); + mWord[1] = 0; + } + + + /* + template + inline BitsetBase<2, WordType>::BitsetBase(uint64_t value) + { + #if(EA_PLATFORM_WORD_SIZE == 4) + mWord[0] = static_cast(value); + mWord[1] = static_cast(value >> 32); + #else + mWord[0] = static_cast(value); + mWord[1] = 0; + #endif + } + */ + + + template + inline void BitsetBase<2, WordType>::operator&=(const this_type& x) + { + mWord[0] &= x.mWord[0]; + mWord[1] &= x.mWord[1]; + } + + + template + inline void BitsetBase<2, WordType>::operator|=(const this_type& x) + { + mWord[0] |= x.mWord[0]; + mWord[1] |= x.mWord[1]; + } + + + template + inline void BitsetBase<2, WordType>::operator^=(const this_type& x) + { + mWord[0] ^= x.mWord[0]; + mWord[1] ^= x.mWord[1]; + } + + + template + inline void BitsetBase<2, WordType>::operator<<=(size_type n) + { + if(n) // to avoid a shift by kBitsPerWord, which is undefined + { + if(EASTL_UNLIKELY(n >= kBitsPerWord)) // parent expected to handle high bits and n >= 64 + { + mWord[1] = mWord[0]; + mWord[0] = 0; + n -= kBitsPerWord; + } + + mWord[1] = (mWord[1] << n) | (mWord[0] >> (kBitsPerWord - n)); // Intentionally use | instead of +. + mWord[0] <<= n; + // We let the parent class turn off any upper bits. + } + } + + + template + inline void BitsetBase<2, WordType>::operator>>=(size_type n) + { + if(n) // to avoid a shift by kBitsPerWord, which is undefined + { + if(EASTL_UNLIKELY(n >= kBitsPerWord)) // parent expected to handle n >= 64 + { + mWord[0] = mWord[1]; + mWord[1] = 0; + n -= kBitsPerWord; + } + + mWord[0] = (mWord[0] >> n) | (mWord[1] << (kBitsPerWord - n)); // Intentionally use | instead of +. + mWord[1] >>= n; + } + } + + + template + inline void BitsetBase<2, WordType>::flip() + { + mWord[0] = ~mWord[0]; + mWord[1] = ~mWord[1]; + // We let the parent class turn off any upper bits. + } + + + template + inline void BitsetBase<2, WordType>::set() + { + mWord[0] = ~static_cast(0); + mWord[1] = ~static_cast(0); + // We let the parent class turn off any upper bits. + } + + + template + inline void BitsetBase<2, WordType>::set(size_type i, bool value) + { + if(value) + mWord[i >> kBitsPerWordShift] |= (static_cast(1) << (i & kBitsPerWordMask)); + else + mWord[i >> kBitsPerWordShift] &= ~(static_cast(1) << (i & kBitsPerWordMask)); + } + + + template + inline void BitsetBase<2, WordType>::reset() + { + mWord[0] = 0; + mWord[1] = 0; + } + + + template + inline bool BitsetBase<2, WordType>::operator==(const this_type& x) const + { + return (mWord[0] == x.mWord[0]) && (mWord[1] == x.mWord[1]); + } + + + template + inline bool BitsetBase<2, WordType>::any() const + { + // Or with two branches: { return (mWord[0] != 0) || (mWord[1] != 0); } + return (mWord[0] | mWord[1]) != 0; + } + + template + inline typename BitsetBase<2, WordType>::size_type + BitsetBase<2, WordType>::count() const + { + #if defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 304) // GCC 3.4 or later + #if(EA_PLATFORM_WORD_SIZE == 4) + return (size_type)__builtin_popcountl(mWord[0]) + (size_type)__builtin_popcountl(mWord[1]); + #else + return (size_type)__builtin_popcountll(mWord[0]) + (size_type)__builtin_popcountll(mWord[1]); + #endif + + #else + return BitsetCountBits(mWord[0]) + BitsetCountBits(mWord[1]); + #endif + } + + + template + inline void BitsetBase<2, WordType>::from_uint32(uint32_t value) + { + mWord[0] = static_cast(value); + mWord[1] = 0; + } + + + template + inline void BitsetBase<2, WordType>::from_uint64(uint64_t value) + { + #if(EA_PLATFORM_WORD_SIZE == 4) + mWord[0] = static_cast(value); + mWord[1] = static_cast(value >> 32); + #else + mWord[0] = static_cast(value); + mWord[1] = 0; + #endif + } + + + template + inline unsigned long BitsetBase<2, WordType>::to_ulong() const + { + #if EASTL_EXCEPTIONS_ENABLED + if(mWord[1]) + throw std::overflow_error("BitsetBase::to_ulong"); + #endif + return (unsigned long)mWord[0]; // Todo: We need to deal with the case whereby sizeof(word_type) < sizeof(unsigned long) + } + + + template + inline uint32_t BitsetBase<2, WordType>::to_uint32() const + { + #if EASTL_EXCEPTIONS_ENABLED + // Verify that high words or bits are not set and thus that to_uint32 doesn't lose information. + + #if(EA_PLATFORM_WORD_SIZE == 4) + if(mWord[1]) + throw std::overflow_error("BitsetBase::to_uint32"); + #else + if(mWord[1] || (mWord[0] >> 32)) + throw std::overflow_error("BitsetBase::to_uint32"); + #endif + #endif + + return (uint32_t)mWord[0]; + } + + + template + inline uint64_t BitsetBase<2, WordType>::to_uint64() const + { + #if(EA_PLATFORM_WORD_SIZE == 4) + // There can't possibly be an overflow_error here. + + return ((uint64_t)mWord[1] << 32) | mWord[0]; + #else + #if EASTL_EXCEPTIONS_ENABLED + if(mWord[1]) + throw std::overflow_error("BitsetBase::to_uint64"); + #endif + + return (uint64_t)mWord[0]; + #endif + } + + + template + inline typename BitsetBase<2, WordType>::word_type& + BitsetBase<2, WordType>::DoGetWord(size_type i) + { + return mWord[i >> kBitsPerWordShift]; + } + + + template + inline typename BitsetBase<2, WordType>::word_type + BitsetBase<2, WordType>::DoGetWord(size_type i) const + { + return mWord[i >> kBitsPerWordShift]; + } + + + template + inline typename BitsetBase<2, WordType>::size_type + BitsetBase<2, WordType>::DoFindFirst() const + { + size_type fbiw = GetFirstBit(mWord[0]); + + if(fbiw != kBitsPerWord) + return fbiw; + + fbiw = GetFirstBit(mWord[1]); + + if(fbiw != kBitsPerWord) + return kBitsPerWord + fbiw; + + return 2 * kBitsPerWord; + } + + + template + inline typename BitsetBase<2, WordType>::size_type + BitsetBase<2, WordType>::DoFindNext(size_type last_find) const + { + // If the last find was in the first word, we must check it and then possibly the second. + if(++last_find < (size_type)kBitsPerWord) + { + // Mask off previous bits of word so our search becomes a "find first". + word_type this_word = mWord[0] & ((~static_cast(0)) << last_find); + + // Step through words. + size_type fbiw = GetFirstBit(this_word); + + if(fbiw != kBitsPerWord) + return fbiw; + + fbiw = GetFirstBit(mWord[1]); + + if(fbiw != kBitsPerWord) + return kBitsPerWord + fbiw; + } + else if(last_find < (size_type)(2 * kBitsPerWord)) + { + // The last find was in the second word, remove the bit count of the first word from the find. + last_find -= kBitsPerWord; + + // Mask off previous bits of word so our search becomes a "find first". + word_type this_word = mWord[1] & ((~static_cast(0)) << last_find); + + const size_type fbiw = GetFirstBit(this_word); + + if(fbiw != kBitsPerWord) + return kBitsPerWord + fbiw; + } + + return 2 * kBitsPerWord; + } + + + template + inline typename BitsetBase<2, WordType>::size_type + BitsetBase<2, WordType>::DoFindLast() const + { + size_type lbiw = GetLastBit(mWord[1]); + + if(lbiw != kBitsPerWord) + return kBitsPerWord + lbiw; + + lbiw = GetLastBit(mWord[0]); + + if(lbiw != kBitsPerWord) + return lbiw; + + return 2 * kBitsPerWord; + } + + + template + inline typename BitsetBase<2, WordType>::size_type + BitsetBase<2, WordType>::DoFindPrev(size_type last_find) const + { + // If the last find was in the second word, we must check it and then possibly the first. + if(last_find > (size_type)kBitsPerWord) + { + // This has the same effect as last_find %= kBitsPerWord in our case. + last_find -= kBitsPerWord; + + // Mask off previous bits of word so our search becomes a "find first". + word_type this_word = mWord[1] & ((~static_cast(0)) >> (kBitsPerWord - last_find)); + + // Step through words. + size_type lbiw = GetLastBit(this_word); + + if(lbiw != kBitsPerWord) + return kBitsPerWord + lbiw; + + lbiw = GetLastBit(mWord[0]); + + if(lbiw != kBitsPerWord) + return lbiw; + } + else if(last_find != 0) + { + // Mask off previous bits of word so our search becomes a "find first". + word_type this_word = mWord[0] & ((~static_cast(0)) >> (kBitsPerWord - last_find)); + + const size_type lbiw = GetLastBit(this_word); + + if(lbiw != kBitsPerWord) + return lbiw; + } + + return 2 * kBitsPerWord; + } + + + + /////////////////////////////////////////////////////////////////////////// + // bitset::reference + /////////////////////////////////////////////////////////////////////////// + + template + inline bitset::reference::reference(const bitset& x, size_type i) + : mpBitWord(&const_cast(x).DoGetWord(i)), + mnBitIndex(i & kBitsPerWordMask) + { // We have an issue here because the above is casting away the const-ness of the source bitset. + // Empty + } + + + template + inline typename bitset::reference& + bitset::reference::operator=(bool value) + { + if(value) + *mpBitWord |= (static_cast(1) << (mnBitIndex & kBitsPerWordMask)); + else + *mpBitWord &= ~(static_cast(1) << (mnBitIndex & kBitsPerWordMask)); + return *this; + } + + + template + inline typename bitset::reference& + bitset::reference::operator=(const reference& x) + { + if(*x.mpBitWord & (static_cast(1) << (x.mnBitIndex & kBitsPerWordMask))) + *mpBitWord |= (static_cast(1) << (mnBitIndex & kBitsPerWordMask)); + else + *mpBitWord &= ~(static_cast(1) << (mnBitIndex & kBitsPerWordMask)); + return *this; + } + + + template + inline bool bitset::reference::operator~() const + { + return (*mpBitWord & (static_cast(1) << (mnBitIndex & kBitsPerWordMask))) == 0; + } + + + //Defined inline in the class because Metrowerks fails to be able to compile it here. + //template + //inline bitset::reference::operator bool() const + //{ + // return (*mpBitWord & (static_cast(1) << (mnBitIndex & kBitsPerWordMask))) != 0; + //} + + + template + inline typename bitset::reference& + bitset::reference::flip() + { + *mpBitWord ^= static_cast(1) << (mnBitIndex & kBitsPerWordMask); + return *this; + } + + + + + /////////////////////////////////////////////////////////////////////////// + // bitset + /////////////////////////////////////////////////////////////////////////// + + template + inline bitset::bitset() + : base_type() + { + // Empty. The base class will set all bits to zero. + } + + EA_DISABLE_VC_WARNING(6313) + template + inline bitset::bitset(uint32_t value) + : base_type(value) + { + if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32. + mWord[kWordCount - 1] &= ~(static_cast(~static_cast(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. + } + EA_RESTORE_VC_WARNING() + + /* + template + inline bitset::bitset(uint64_t value) + : base_type(value) + { + if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... + mWord[kWordCount - 1] &= ~(~static_cast(0) << (N & kBitsPerWordMask)); // This clears any high unused bits. + } + */ + + + template + inline typename bitset::this_type& + bitset::operator&=(const this_type& x) + { + base_type::operator&=(x); + return *this; + } + + + template + inline typename bitset::this_type& + bitset::operator|=(const this_type& x) + { + base_type::operator|=(x); + return *this; + } + + + template + inline typename bitset::this_type& + bitset::operator^=(const this_type& x) + { + base_type::operator^=(x); + return *this; + } + + + template + inline typename bitset::this_type& + bitset::operator<<=(size_type n) + { + if(EASTL_LIKELY((intptr_t)n < (intptr_t)N)) + { + EA_DISABLE_VC_WARNING(6313) + base_type::operator<<=(n); + if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32. + mWord[kWordCount - 1] &= ~(static_cast(~static_cast(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. We need to do this so that shift operations proceed correctly. + EA_RESTORE_VC_WARNING() + } + else + base_type::reset(); + return *this; + } + + + template + inline typename bitset::this_type& + bitset::operator>>=(size_type n) + { + if(EASTL_LIKELY(n < N)) + base_type::operator>>=(n); + else + base_type::reset(); + return *this; + } + + + template + inline typename bitset::this_type& + bitset::set() + { + base_type::set(); // This sets all bits. + if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32. + mWord[kWordCount - 1] &= ~(static_cast(~static_cast(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. We need to do this so that shift operations proceed correctly. + return *this; + } + + + template + inline typename bitset::this_type& + bitset::set(size_type i, bool value) + { + if(i < N) + base_type::set(i, value); + else + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(!(i < N))) + EASTL_FAIL_MSG("bitset::set -- out of range"); + #endif + + #if EASTL_EXCEPTIONS_ENABLED + throw std::out_of_range("bitset::set"); + #endif + } + + return *this; + } + + + template + inline typename bitset::this_type& + bitset::reset() + { + base_type::reset(); + return *this; + } + + + template + inline typename bitset::this_type& + bitset::reset(size_type i) + { + if(EASTL_LIKELY(i < N)) + DoGetWord(i) &= ~(static_cast(1) << (i & kBitsPerWordMask)); + else + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(!(i < N))) + EASTL_FAIL_MSG("bitset::reset -- out of range"); + #endif + + #if EASTL_EXCEPTIONS_ENABLED + throw std::out_of_range("bitset::reset"); + #endif + } + + return *this; + } + + + template + inline typename bitset::this_type& + bitset::flip() + { + EA_DISABLE_VC_WARNING(6313) + base_type::flip(); + if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32. + mWord[kWordCount - 1] &= ~(static_cast(~static_cast(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. We need to do this so that shift operations proceed correctly. + return *this; + EA_RESTORE_VC_WARNING() + } + + + template + inline typename bitset::this_type& + bitset::flip(size_type i) + { + if(EASTL_LIKELY(i < N)) + DoGetWord(i) ^= (static_cast(1) << (i & kBitsPerWordMask)); + else + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(!(i < N))) + EASTL_FAIL_MSG("bitset::flip -- out of range"); + #endif + + #if EASTL_EXCEPTIONS_ENABLED + throw std::out_of_range("bitset::flip"); + #endif + } + return *this; + } + + + template + inline typename bitset::this_type + bitset::operator~() const + { + return this_type(*this).flip(); + } + + + template + inline typename bitset::reference + bitset::operator[](size_type i) + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(!(i < N))) + EASTL_FAIL_MSG("bitset::operator[] -- out of range"); + #endif + + return reference(*this, i); + } + + + template + inline bool bitset::operator[](size_type i) const + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(!(i < N))) + EASTL_FAIL_MSG("bitset::operator[] -- out of range"); + #endif + + return (DoGetWord(i) & (static_cast(1) << (i & kBitsPerWordMask))) != 0; + } + + + template + inline const typename bitset::word_type* bitset::data() const + { + return base_type::mWord; + } + + + template + inline typename bitset::word_type* bitset::data() + { + return base_type::mWord; + } + + + template + inline void bitset::from_uint32(uint32_t value) + { + base_type::from_uint32(value); + + if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32. + mWord[kWordCount - 1] &= ~(static_cast(~static_cast(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. We need to do this so that shift operations proceed correctly. + } + + + template + inline void bitset::from_uint64(uint64_t value) + { + base_type::from_uint64(value); + + if((N & kBitsPerWordMask) || (N == 0)) // If there are any high bits to clear... (If we didn't have this check, then the code below would do the wrong thing when N == 32. + mWord[kWordCount - 1] &= ~(static_cast(~static_cast(0)) << (N & kBitsPerWordMask)); // This clears any high unused bits. We need to do this so that shift operations proceed correctly. + } + + + // template + // inline unsigned long bitset::to_ulong() const + // { + // return base_type::to_ulong(); + // } + + + // template + // inline uint32_t bitset::to_uint32() const + // { + // return base_type::to_uint32(); + // } + + + // template + // inline uint64_t bitset::to_uint64() const + // { + // return base_type::to_uint64(); + // } + + + // template + // inline typename bitset::size_type + // bitset::count() const + // { + // return base_type::count(); + // } + + + template + inline typename bitset::size_type + bitset::size() const + { + return (size_type)N; + } + + + template + inline bool bitset::operator==(const this_type& x) const + { + return base_type::operator==(x); + } + + + template + inline bool bitset::operator!=(const this_type& x) const + { + return !base_type::operator==(x); + } + + + template + inline bool bitset::test(size_type i) const + { + if(EASTL_UNLIKELY(i < N)) + return (DoGetWord(i) & (static_cast(1) << (i & kBitsPerWordMask))) != 0; + + #if EASTL_ASSERT_ENABLED + EASTL_FAIL_MSG("bitset::test -- out of range"); + #endif + + #if EASTL_EXCEPTIONS_ENABLED + throw std::out_of_range("bitset::test"); + #else + return false; + #endif + } + + + // template + // inline bool bitset::any() const + // { + // return base_type::any(); + // } + + + template + inline bool bitset::all() const + { + return count() == size(); + } + + + template + inline bool bitset::none() const + { + return !base_type::any(); + } + + + template + inline typename bitset::this_type + bitset::operator<<(size_type n) const + { + return this_type(*this).operator<<=(n); + } + + + template + inline typename bitset::this_type + bitset::operator>>(size_type n) const + { + return this_type(*this).operator>>=(n); + } + + + template + inline typename bitset::size_type + bitset::find_first() const + { + const size_type i = base_type::DoFindFirst(); + + if(i < kSize) + return i; + // Else i could be the base type bit count, so we clamp it to our size. + + return kSize; + } + + + template + inline typename bitset::size_type + bitset::find_next(size_type last_find) const + { + const size_type i = base_type::DoFindNext(last_find); + + if(i < kSize) + return i; + // Else i could be the base type bit count, so we clamp it to our size. + + return kSize; + } + + + template + inline typename bitset::size_type + bitset::find_last() const + { + const size_type i = base_type::DoFindLast(); + + if(i < kSize) + return i; + // Else i could be the base type bit count, so we clamp it to our size. + + return kSize; + } + + + template + inline typename bitset::size_type + bitset::find_prev(size_type last_find) const + { + const size_type i = base_type::DoFindPrev(last_find); + + if(i < kSize) + return i; + // Else i could be the base type bit count, so we clamp it to our size. + + return kSize; + } + + + + /////////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////////// + + template + inline bitset operator&(const bitset& a, const bitset& b) + { + // We get betting inlining when we don't declare temporary variables. + return bitset(a).operator&=(b); + } + + + template + inline bitset operator|(const bitset& a, const bitset& b) + { + return bitset(a).operator|=(b); + } + + + template + inline bitset operator^(const bitset& a, const bitset& b) + { + return bitset(a).operator^=(b); + } + + +} // namespace eastl + + +EA_RESTORE_VC_WARNING(); + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/bitvector.h b/libkram/eastl/include/EASTL/bitvector.h new file mode 100644 index 00000000..ade67823 --- /dev/null +++ b/libkram/eastl/include/EASTL/bitvector.h @@ -0,0 +1,1474 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Implements a bit vector, which is essentially a vector of bool but which +// uses bits instead of bytes. It is thus similar to the original std::vector. +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Note: This code is not yet complete: it isn't tested and doesn't yet +// support containers other than vector. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_BITVECTOR_H +#define EASTL_BITVECTOR_H + + +#include +#include +#include +#include + +EA_DISABLE_VC_WARNING(4480); // nonstandard extension used: specifying underlying type for enum + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /// EASTL_BITVECTOR_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_BITVECTOR_DEFAULT_NAME + #define EASTL_BITVECTOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " bitvector" // Unless the user overrides something, this is "EASTL bitvector". + #endif + + /// EASTL_BITVECTOR_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_BITVECTOR_DEFAULT_ALLOCATOR + #define EASTL_BITVECTOR_DEFAULT_ALLOCATOR allocator_type(EASTL_BITVECTOR_DEFAULT_NAME) + #endif + + + + /// BitvectorWordType + /// Defines the integral data type used by bitvector. + typedef EASTL_BITSET_WORD_TYPE_DEFAULT BitvectorWordType; + + + template + class bitvector_const_iterator; + + + template + class bitvector_reference + { + public: + typedef eastl_size_t size_type; + bitvector_reference(Element* ptr, eastl_size_t i); + + bitvector_reference& operator=(bool value); + bitvector_reference& operator=(const bitvector_reference& rhs); + + operator bool() const // Defined here because some compilers fail otherwise. + { return (*mpBitWord & (Element(1) << mnBitIndex)) != 0; } + + protected: + friend class bitvector_const_iterator; + + Element* mpBitWord; + size_type mnBitIndex; + + bitvector_reference() {} + void CopyFrom(const bitvector_reference& rhs); + }; + + + + template + class bitvector_const_iterator + { + public: + typedef EASTL_ITC_NS::random_access_iterator_tag iterator_category; + typedef bitvector_const_iterator this_type; + typedef bool value_type; + typedef bitvector_reference reference_type; + typedef ptrdiff_t difference_type; + typedef Element element_type; + typedef element_type* pointer; // This is wrong. It needs to be someting that acts as a pointer to a bit. + typedef element_type& reference; // This is not right. It needs to be someting that acts as a pointer to a bit. + typedef eastl_size_t size_type; + + protected: + reference_type mReference; + + enum + { + kBitCount = (8 * sizeof(Element)) + }; + + public: + bool operator*() const; + bool operator[](difference_type n) const; + + bitvector_const_iterator(); + bitvector_const_iterator(const element_type* p, eastl_size_t i); + bitvector_const_iterator(const reference_type& referenceType); + + bitvector_const_iterator& operator++(); + bitvector_const_iterator operator++(int); + bitvector_const_iterator& operator--(); + bitvector_const_iterator operator--(int); + + bitvector_const_iterator& operator+=(difference_type dist); + bitvector_const_iterator& operator-=(difference_type dist); + bitvector_const_iterator operator+ (difference_type dist) const; + bitvector_const_iterator operator- (difference_type dist) const; + + difference_type operator-(const this_type& rhs) const; + + bitvector_const_iterator& operator= (const this_type& rhs); + + bool operator==(const this_type& rhs) const; + bool operator!=(const this_type& rhs) const; + + bool operator< (const this_type& rhs) const; + bool operator<=(const this_type& rhs) const; + bool operator> (const this_type& rhs) const; + bool operator>=(const this_type& rhs) const; + + int validate(const element_type* pStart, const element_type* pEnd, eastl_size_t nExtraBits) const; + + protected: + template + friend class bitvector; + + reference_type& get_reference_type() { return mReference; } + }; + + + + template + class bitvector_iterator : public bitvector_const_iterator + { + public: + typedef EASTL_ITC_NS::random_access_iterator_tag iterator_category; + typedef bitvector_iterator this_type; + typedef bitvector_const_iterator base_type; + typedef bool value_type; + typedef bitvector_reference reference_type; + typedef ptrdiff_t difference_type; + typedef Element element_type; + typedef element_type* pointer; // This is wrong. It needs to be someting that acts as a pointer to a bit. + typedef element_type& reference; // This is not right. It needs to be someting that acts as a pointer to a bit. + + public: + reference_type operator*() const; + reference_type operator[](difference_type n) const; + + bitvector_iterator(); + bitvector_iterator(element_type* p, eastl_size_t i); + bitvector_iterator(reference_type& referenceType); + + bitvector_iterator& operator++() { base_type::operator++(); return *this; } + bitvector_iterator& operator--() { base_type::operator--(); return *this; } + bitvector_iterator operator++(int); + bitvector_iterator operator--(int); + + bitvector_iterator& operator+=(difference_type dist) { base_type::operator+=(dist); return *this; } + bitvector_iterator& operator-=(difference_type dist) { base_type::operator-=(dist); return *this; } + bitvector_iterator operator+ (difference_type dist) const; + bitvector_iterator operator- (difference_type dist) const; + + // We need this here because we are overloading operator-, so for some reason the + // other overload of the function can't be found unless it's explicitly specified. + difference_type operator-(const base_type& rhs) const { return base_type::operator-(rhs); } + }; + + + + /// bitvector + /// + /// Implements an array of bits treated as boolean values. + /// bitvector is similar to vector but uses bits instead of bytes and + /// allows the user to use other containers such as deque instead of vector. + /// bitvector is different from bitset in that bitset is less flexible but + /// uses less memory and has higher performance. + /// + /// To consider: Rename the Element template parameter to WordType, for + /// consistency with bitset. + /// + template > + class bitvector + { + public: + typedef bitvector this_type; + typedef bool value_type; + typedef bitvector_reference reference; + typedef bool const_reference; + typedef bitvector_iterator iterator; + typedef bitvector_const_iterator const_iterator; + typedef eastl::reverse_iterator reverse_iterator; + typedef eastl::reverse_iterator const_reverse_iterator; + typedef Allocator allocator_type; + typedef Element element_type; + typedef Container container_type; + typedef eastl_size_t size_type; + typedef ptrdiff_t difference_type; + + #if defined(_MSC_VER) && (_MSC_VER >= 1400) && (_MSC_VER <= 1600) && !EASTL_STD_CPP_ONLY // _MSC_VER of 1400 means VS2005, 1600 means VS2010. VS2012 generates errors with usage of enum:size_type. + enum : size_type { // Use Microsoft enum language extension, allowing for smaller debug symbols than using a static const. Users have been affected by this. + npos = container_type::npos, + kMaxSize = container_type::kMaxSize + }; + #else + static const size_type npos = container_type::npos; /// 'npos' means non-valid position or simply non-position. + static const size_type kMaxSize = container_type::kMaxSize; /// -1 is reserved for 'npos'. It also happens to be slightly beneficial that kMaxSize is a value less than -1, as it helps us deal with potential integer wraparound issues. + #endif + + enum + { + kBitCount = 8 * sizeof(Element) + }; + + protected: + container_type mContainer; + size_type mFreeBitCount; // Unused bits in the last word of mContainer. + + public: + bitvector(); + explicit bitvector(const allocator_type& allocator); + explicit bitvector(size_type n, const allocator_type& allocator = EASTL_BITVECTOR_DEFAULT_ALLOCATOR); + bitvector(size_type n, value_type value, const allocator_type& allocator = EASTL_BITVECTOR_DEFAULT_ALLOCATOR); + bitvector(const bitvector& copy); + + template + bitvector(InputIterator first, InputIterator last); + + bitvector& operator=(const bitvector& x); + void swap(this_type& x); + + template + void assign(InputIterator first, InputIterator last); + + iterator begin() EA_NOEXCEPT; + const_iterator begin() const EA_NOEXCEPT; + const_iterator cbegin() const EA_NOEXCEPT; + + iterator end() EA_NOEXCEPT; + const_iterator end() const EA_NOEXCEPT; + const_iterator cend() const EA_NOEXCEPT; + + reverse_iterator rbegin() EA_NOEXCEPT; + const_reverse_iterator rbegin() const EA_NOEXCEPT; + const_reverse_iterator crbegin() const EA_NOEXCEPT; + + reverse_iterator rend() EA_NOEXCEPT; + const_reverse_iterator rend() const EA_NOEXCEPT; + const_reverse_iterator crend() const EA_NOEXCEPT; + + bool empty() const EA_NOEXCEPT; + size_type size() const EA_NOEXCEPT; + size_type capacity() const EA_NOEXCEPT; + + void resize(size_type n, value_type value); + void resize(size_type n); + void reserve(size_type n); + void set_capacity(size_type n = npos); // Revises the capacity to the user-specified value. Resizes the container to match the capacity if the requested capacity n is less than the current size. If n == npos then the capacity is reallocated (if necessary) such that capacity == size. + + void push_back(); + void push_back(value_type value); + void pop_back(); + + reference front(); + const_reference front() const; + reference back(); + const_reference back() const; + + bool test(size_type n, bool defaultValue) const; // Returns true if the bit index is < size() and set. Returns defaultValue if the bit is >= size(). + void set(size_type n, bool value); // Resizes the container to accomodate n if necessary. + + reference at(size_type n); // throws an out_of_range exception if n is invalid. + const_reference at(size_type n) const; + + reference operator[](size_type n); // behavior is undefined if n is invalid. + const_reference operator[](size_type n) const; + + /* + Work in progress: + template iterator find_first(); // Finds the lowest "on" bit. + template iterator find_next(const_iterator it); // Finds the next lowest "on" bit after it. + template iterator find_last(); // Finds the index of the last "on" bit, returns size if none are set. + template iterator find_prev(const_iterator it); // Finds the index of the last "on" bit before last_find, returns size if none are set. + + template const_iterator find_first() const; // Finds the lowest "on" bit. + template const_iterator find_next(const_iterator it) const; // Finds the next lowest "on" bit after it. + template const_iterator find_last() const; // Finds the index of the last "on" bit, returns size if none are set. + template const_iterator find_prev(const_iterator it) const; // Finds the index of the last "on" bit before last_find, returns size if none are set. + */ + + element_type* data() EA_NOEXCEPT; + const element_type* data() const EA_NOEXCEPT; + + iterator insert(const_iterator position, value_type value); + void insert(const_iterator position, size_type n, value_type value); + + // template Not yet implemented. See below for disabled definition. + // void insert(const_iterator position, InputIterator first, InputIterator last); + + iterator erase(const_iterator position); + iterator erase(const_iterator first, const_iterator last); + + reverse_iterator erase(const_reverse_iterator position); + reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last); + + void clear(); + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + + container_type& get_container(); + const container_type& get_container() const; + + bool validate() const; + int validate_iterator(const_iterator i) const; + }; + + + + + /////////////////////////////////////////////////////////////////////// + // bitvector_reference + /////////////////////////////////////////////////////////////////////// + + template + bitvector_reference::bitvector_reference(Element* p, eastl_size_t i) + : mpBitWord(p), + mnBitIndex(i) + { + } + + + template + bitvector_reference& + bitvector_reference::operator=(bool value) + { + const Element mask = (Element)(Element(1) << mnBitIndex); + + if(value) + *mpBitWord |= mask; + else + *mpBitWord &= ~mask; + + return *this; + } + + + template + bitvector_reference& + bitvector_reference::operator=(const bitvector_reference& rhs) + { + return (*this = (bool)rhs); + } + + + template + void bitvector_reference::CopyFrom(const bitvector_reference& rhs) + { + mpBitWord = rhs.mpBitWord; + mnBitIndex = rhs.mnBitIndex; + } + + + + + /////////////////////////////////////////////////////////////////////// + // bitvector_const_iterator + /////////////////////////////////////////////////////////////////////// + + template + bitvector_const_iterator::bitvector_const_iterator() + : mReference(0, 0) + { + } + + + template + bitvector_const_iterator::bitvector_const_iterator(const Element* p, eastl_size_t i) + : mReference(const_cast(p), i) // const_cast is safe here because we never let mReference leak and we don't modify it. + { + } + + + template + bitvector_const_iterator::bitvector_const_iterator(const reference_type& reference) + : mReference(reference) + { + } + + + template + bitvector_const_iterator& + bitvector_const_iterator::operator++() + { + ++mReference.mnBitIndex; + + if(mReference.mnBitIndex == kBitCount) + { + ++mReference.mpBitWord; + mReference.mnBitIndex = 0; + } + + return *this; + } + + + template + bitvector_const_iterator& + bitvector_const_iterator::operator--() + { + if(mReference.mnBitIndex == 0) + { + --mReference.mpBitWord; + mReference.mnBitIndex = kBitCount; + } + + --mReference.mnBitIndex; + return *this; + } + + + template + bitvector_const_iterator + bitvector_const_iterator::operator++(int) + { + bitvector_const_iterator copy(*this); + ++*this; + return copy; + } + + + template + bitvector_const_iterator + bitvector_const_iterator::operator--(int) + { + bitvector_const_iterator copy(*this); + --*this; + return copy; + } + + + template + bitvector_const_iterator& + bitvector_const_iterator::operator+=(difference_type n) + { + n += mReference.mnBitIndex; + + if(n >= difference_type(0)) + { + mReference.mpBitWord += n / kBitCount; + mReference.mnBitIndex = (size_type)(n % kBitCount); + } + else + { + // backwards is tricky + // figure out how many full words backwards we need to move + // n = [-1..-32] => 1 + // n = [-33..-64] => 2 + const size_type backwards = (size_type)(-n + kBitCount - 1); + mReference.mpBitWord -= backwards / kBitCount; + + // -1 => 31; backwards = 32; 31 - (backwards % 32) = 31 + // -2 => 30; backwards = 33; 31 - (backwards % 32) = 30 + // -3 => 29; backwards = 34 + // .. + // -32 => 0; backwards = 63; 31 - (backwards % 32) = 0 + // -33 => 31; backwards = 64; 31 - (backwards % 32) = 31 + mReference.mnBitIndex = (kBitCount - 1) - (backwards % kBitCount); + } + + return *this; + } + + + template + bitvector_const_iterator& + bitvector_const_iterator::operator-=(difference_type n) + { + return (*this += -n); + } + + + template + bitvector_const_iterator + bitvector_const_iterator::operator+(difference_type n) const + { + bitvector_const_iterator copy(*this); + copy += n; + return copy; + } + + + template + bitvector_const_iterator + bitvector_const_iterator::operator-(difference_type n) const + { + bitvector_const_iterator copy(*this); + copy -= n; + return copy; + } + + + template + typename bitvector_const_iterator::difference_type + bitvector_const_iterator::operator-(const this_type& rhs) const + { + return ((mReference.mpBitWord - rhs.mReference.mpBitWord) * kBitCount) + mReference.mnBitIndex - rhs.mReference.mnBitIndex; + } + + + template + bool bitvector_const_iterator::operator==(const this_type& rhs) const + { + return (mReference.mpBitWord == rhs.mReference.mpBitWord) && (mReference.mnBitIndex == rhs.mReference.mnBitIndex); + } + + + template + bool bitvector_const_iterator::operator!=(const this_type& rhs) const + { + return !(*this == rhs); + } + + + template + bool bitvector_const_iterator::operator<(const this_type& rhs) const + { + return (mReference.mpBitWord < rhs.mReference.mpBitWord) || + ((mReference.mpBitWord == rhs.mReference.mpBitWord) && (mReference.mnBitIndex < rhs.mReference.mnBitIndex)); + } + + + template + bool bitvector_const_iterator::operator<=(const this_type& rhs) const + { + return (mReference.mpBitWord < rhs.mReference.mpBitWord) || + ((mReference.mpBitWord == rhs.mReference.mpBitWord) && (mReference.mnBitIndex <= rhs.mReference.mnBitIndex)); + } + + + template + bool bitvector_const_iterator::operator>(const this_type& rhs) const + { + return !(*this <= rhs); + } + + + template + bool bitvector_const_iterator::operator>=(const this_type& rhs) const + { + return !(*this < rhs); + } + + + template + bool bitvector_const_iterator::operator*() const + { + return mReference; + } + + + template + bool bitvector_const_iterator::operator[](difference_type n) const + { + return *(*this + n); + } + + + template + bitvector_const_iterator& bitvector_const_iterator::operator= (const this_type& rhs) + { + mReference.CopyFrom(rhs.mReference); + return *this; + } + + + template + int bitvector_const_iterator::validate(const Element* pStart, const Element* pEnd, eastl_size_t nExtraBits) const + { + const Element* const pCurrent = mReference.mpBitWord; + + if(pCurrent >= pStart) + { + if(nExtraBits == 0) + { + if(pCurrent == pEnd && mReference) + return eastl::isf_valid | eastl::isf_current; + else if(pCurrent < pEnd) + return eastl::isf_valid | eastl::isf_current | eastl::isf_can_dereference; + } + else if(pCurrent == (pEnd - 1)) + { + const size_type bit = mReference.mnBitIndex; + const size_type lastbit = kBitCount - nExtraBits; + + if(bit == lastbit) + return eastl::isf_valid | eastl::isf_current; + else if(bit < lastbit) + return eastl::isf_valid | eastl::isf_current | eastl::isf_can_dereference; + } + else if(pCurrent < pEnd) + { + return eastl::isf_valid | eastl::isf_current | eastl::isf_can_dereference; + } + } + + return eastl::isf_none; + } + + + + /////////////////////////////////////////////////////////////////////// + // bitvector_iterator + /////////////////////////////////////////////////////////////////////// + + template + bitvector_iterator::bitvector_iterator() + : base_type() + { + } + + template + bitvector_iterator::bitvector_iterator(Element* p, eastl_size_t i) + : base_type(p, i) + { + } + + + template + bitvector_iterator::bitvector_iterator(reference_type& reference) + : base_type(reference) + { + } + + + template + typename bitvector_iterator::reference_type + bitvector_iterator::operator*() const + { + return base_type::mReference; + } + + + template + typename bitvector_iterator::reference_type + bitvector_iterator::operator[](difference_type n) const + { + return *(*this + n); + } + + + template + void MoveBits(bitvector_iterator start, + bitvector_iterator end, + bitvector_iterator dest) + { + // Slow implemenation; could optimize by moving a word at a time. + if(dest <= start) + { + while(start != end) + { + *dest = *start; + ++dest; + ++start; + } + } + else + { + // Need to move backwards + dest += (end - start); + + while(start != end) + { + --dest; + --end; + *dest = *end; + } + } + } + + + template + bitvector_iterator + bitvector_iterator::operator++(int) + { + bitvector_iterator copy(*this); + ++*this; + return copy; + } + + + template + bitvector_iterator + bitvector_iterator::operator--(int) + { + bitvector_iterator copy(*this); + --*this; + return copy; + } + + + template + bitvector_iterator + bitvector_iterator::operator+(difference_type n) const + { + bitvector_iterator copy(*this); + copy += n; + return copy; + } + + + template + bitvector_iterator + bitvector_iterator::operator-(difference_type n) const + { + bitvector_iterator copy(*this); + copy -= n; + return copy; + } + + + + + /////////////////////////////////////////////////////////////////////// + // bitvector + /////////////////////////////////////////////////////////////////////// + + template + template + void bitvector::assign(InputIterator first, InputIterator last) + { + // To consider: We can maybe specialize this on bitvector_iterator to do a fast bitwise copy. + // We can also specialize for random access iterators to figure out the size & reserve first. + + clear(); + + while(first != last) + { + push_back(*first); + ++first; + } + } + + + template + typename bitvector::iterator + bitvector::begin() EA_NOEXCEPT + { + return iterator(mContainer.begin(), 0); + } + + + template + typename bitvector::const_iterator + bitvector::begin() const EA_NOEXCEPT + { + return const_iterator(mContainer.begin(), 0); + } + + + template + typename bitvector::const_iterator + bitvector::cbegin() const EA_NOEXCEPT + { + return const_iterator(mContainer.begin(), 0); + } + + + template + typename bitvector::iterator + bitvector::end() EA_NOEXCEPT + { + return iterator(mContainer.end(), 0) - mFreeBitCount; + } + + + template + typename bitvector::const_iterator + bitvector::end() const EA_NOEXCEPT + { + return const_iterator(mContainer.end(), 0) - mFreeBitCount; + } + + + template + typename bitvector::const_iterator + bitvector::cend() const EA_NOEXCEPT + { + return const_iterator(mContainer.end(), 0) - mFreeBitCount; + } + + + template + bool bitvector::empty() const EA_NOEXCEPT + { + return mContainer.empty(); + } + + + template + typename bitvector::size_type + bitvector::size() const EA_NOEXCEPT + { + return (mContainer.size() * kBitCount) - mFreeBitCount; + } + + + template + typename bitvector::size_type + bitvector::capacity() const EA_NOEXCEPT + { + return mContainer.capacity() * kBitCount; + } + + + template + void bitvector::set_capacity(size_type n) + { + if(n == npos) + mContainer.set_capacity(npos); + else + mContainer.set_capacity((n + kBitCount - 1) / kBitCount); + } + + + template + typename bitvector::reverse_iterator + bitvector::rbegin() EA_NOEXCEPT + { + return reverse_iterator(end()); + } + + + template + typename bitvector::const_reverse_iterator + bitvector::rbegin() const EA_NOEXCEPT + { + return const_reverse_iterator(end()); + } + + + template + typename bitvector::const_reverse_iterator + bitvector::crbegin() const EA_NOEXCEPT + { + return const_reverse_iterator(end()); + } + + + template + typename bitvector::reverse_iterator + bitvector::rend() EA_NOEXCEPT + { + return reverse_iterator(begin()); + } + + + template + typename bitvector::const_reverse_iterator + bitvector::rend() const EA_NOEXCEPT + { + return const_reverse_iterator(begin()); + } + + + template + typename bitvector::const_reverse_iterator + bitvector::crend() const EA_NOEXCEPT + { + return const_reverse_iterator(begin()); + } + + + template + typename bitvector::reference + bitvector::front() + { + EASTL_ASSERT(!empty()); + return reference(&mContainer[0], 0); + } + + + template + typename bitvector::const_reference + bitvector::front() const + { + EASTL_ASSERT(!empty()); + + // To consider: make a better solution to this than const_cast. + return reference(const_cast(&mContainer[0]), 0); + } + + + template + typename bitvector::reference + bitvector::back() + { + EASTL_ASSERT(!empty()); + return *(--end()); + } + + + template + typename bitvector::const_reference + bitvector::back() const + { + EASTL_ASSERT(!empty()); + return *(--end()); + } + + + template + void bitvector::push_back() + { + if(!mFreeBitCount) + { + mContainer.push_back(); + mFreeBitCount = kBitCount; + } + + --mFreeBitCount; + } + + + template + void bitvector::push_back(value_type value) + { + push_back(); + *--end() = value; + } + + + template + void bitvector::pop_back() + { + EASTL_ASSERT(!empty()); + + if(++mFreeBitCount == kBitCount) + { + mContainer.pop_back(); + mFreeBitCount = 0; + } + } + + + template + void bitvector::reserve(size_type n) + { + const size_type wordCount = (n + kBitCount - 1) / kBitCount; + mContainer.reserve(wordCount); + } + + + template + void bitvector::resize(size_type n) + { + const size_type wordCount = (n + kBitCount - 1) / kBitCount; + const size_type extra = (wordCount * kBitCount) - n; + + mContainer.resize(wordCount); + mFreeBitCount = extra; + } + + + template + void bitvector::resize(size_type n, value_type value) + { + const size_type s = size(); + if(n < s) + resize(n); + + // Fill up to the end of a word + size_type newbits = n - s; + + while(mFreeBitCount && newbits) + { + push_back(value); + --newbits; + } + + // Fill the rest a word at a time + if(newbits) + { + element_type element(0); + if(value) + element = ~element; + + const size_type words = (n + kBitCount - 1) / kBitCount; + const size_type extra = words * kBitCount - n; + mContainer.resize(words, element); + mFreeBitCount = extra; + } + } + + + template + bool bitvector::test(size_type n, bool defaultValue) const + { + if(n < size()) + return *(begin() + (difference_type)n); + + return defaultValue; + } + + + template + void bitvector::set(size_type n, bool value) + { + if(EASTL_UNLIKELY(n >= size())) + resize(n + 1); + + *(begin() + (difference_type)n) = value; + } + + + template + typename bitvector::reference + bitvector::at(size_type n) + { + // The difference between at and operator[] is that at signals + // if the requested position is out of range by throwing an + // out_of_range exception. + + #if EASTL_EXCEPTIONS_ENABLED + if(EASTL_UNLIKELY(n >= size())) + throw std::out_of_range("bitvector::at -- out of range"); + #elif EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(n >= size())) + EASTL_FAIL_MSG("bitvector::at -- out of range"); + #endif + + return *(begin() + (difference_type)n); + } + + + template + typename bitvector::const_reference + bitvector::at(size_type n) const + { + #if EASTL_EXCEPTIONS_ENABLED + if(EASTL_UNLIKELY(n >= size())) + throw std::out_of_range("bitvector::at -- out of range"); + #elif EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(n >= size())) + EASTL_FAIL_MSG("bitvector::at -- out of range"); + #endif + + return *(begin() + (difference_type)n); + } + + + template + typename bitvector::reference + bitvector::operator[](size_type n) + { + return *(begin() + (difference_type)n); + } + + + template + typename bitvector::const_reference + bitvector::operator[](size_type n) const + { + return *(begin() + (difference_type)n); + } + + +/* + template + template + typename bitvector::iterator + bitvector::find_first() + { + return begin(); + } + + template iterator find_next(const_iterator it); + template iterator find_last(); + template iterator find_prev(const_iterator it); + + template const_iterator find_first() const; + template const_iterator find_next(const_iterator it) const; + template const_iterator find_last() const; + template const_iterator find_prev(const_iterator it) const; +*/ + + + + + template + inline typename bitvector::container_type& + bitvector::get_container() + { + return mContainer; + } + + + template + inline const typename bitvector::container_type& + bitvector::get_container() const + { + return mContainer; + } + + + template + bool bitvector::validate() const + { + if(!mContainer.validate()) + return false; + + if((unsigned)mFreeBitCount >= kBitCount) + return false; + + return true; + } + + + template + int bitvector::validate_iterator(const_iterator i) const + { + return i.validate(mContainer.begin(), mContainer.end(), mFreeBitCount); + } + + + template + typename bitvector::element_type* + bitvector::data() EA_NOEXCEPT + { + return mContainer.data(); + } + + + template + const typename bitvector::element_type* + bitvector::data() const EA_NOEXCEPT + { + return mContainer.data(); + } + + + template + typename bitvector::iterator + bitvector::insert(const_iterator position, value_type value) + { + iterator iPosition(position.get_reference_type()); // This is just a non-const version of position. + + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(validate_iterator(iPosition) & eastl::isf_valid) == 0) + EASTL_FAIL_MSG("bitvector::insert -- invalid iterator"); + #endif + + // Save because we might reallocate + const typename iterator::difference_type n = iPosition - begin(); + push_back(); + iPosition = begin() + n; + + MoveBits(iPosition, --end(), ++iterator(iPosition)); + *iPosition = value; + + return iPosition; + } + + + template + void bitvector::insert(const_iterator position, size_type n, value_type value) + { + iterator iPosition(position.get_reference_type()); // This is just a non-const version of position. + + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(validate_iterator(iPosition) & eastl::isf_valid) == 0) + EASTL_FAIL_MSG("bitvector::insert -- invalid iterator"); + #endif + + // Save because we might reallocate. + const typename iterator::difference_type p = iPosition - begin(); + resize(size() + n); + iPosition = begin() + p; + + iterator insert_end = iPosition + n; + MoveBits(iPosition, end() - n, insert_end); + + // To do: Optimize this to word-at-a-time for large inserts + while(iPosition != insert_end) + { + *iPosition = value; + ++iPosition; + } + } + + + /* + The following is a placeholder for a future implementation. It turns out that a correct implementation of + insert(pos, first, last) is a non-trivial exercise that would take a few hours to implement and test. + The reasons why involve primarily the problem of handling the case where insertion source comes from + within the container itself, and the case that first and last (note they are templated) might not refer + to iterators might refer to a value/count pair. The C++ Standard requires you to handle this case and + I (Paul Pedriana) believe that it applies even for a bitvector, given that bool is an integral type. + So you have to set up a compile-time type traits function chooser. See vector, for example. + + template + template + void bitvector::insert(const_iterator position, InputIterator first, InputIterator last) + { + iterator iPosition(position.get_reference_type()); // This is just a non-const version of position. + + // This implementation is probably broken due to not handling insertion into self. + // To do: Make a more efficient version of this. + difference_type distance = (iPosition - begin()); + + while(first != last) + { + insert(iPosition, *first); + iPosition = begin() + ++distance; + ++first; + } + } + */ + + + template + typename bitvector::iterator + bitvector::erase(const_iterator position) + { + iterator iPosition(position.get_reference_type()); // This is just a non-const version of position. + + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(validate_iterator(iPosition) & eastl::isf_can_dereference) == 0) + EASTL_FAIL_MSG("bitvector::erase -- invalid iterator"); + #endif + + MoveBits(++iterator(iPosition), end(), iPosition); + resize(size() - 1); + + // Verify that no reallocation occurred. + EASTL_ASSERT(validate_iterator(iPosition) & eastl::isf_valid); + return iPosition; + } + + + template + typename bitvector::iterator + bitvector::erase(const_iterator first, const_iterator last) + { + iterator iFirst(first.get_reference_type()); // This is just a non-const version of first. + iterator iLast(last.get_reference_type()); // This is just a non-const version of last. + + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(validate_iterator(iLast) & eastl::isf_valid) == 0) + EASTL_FAIL_MSG("bitvector::erase -- invalid iterator"); + #endif + + if(!(iFirst == iLast)) + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(validate_iterator(iFirst) & eastl::isf_can_dereference) == 0) + EASTL_FAIL_MSG("bitvector::erase -- invalid iterator"); + #endif + + const size_type eraseCount = (size_type)(iLast - iFirst); + MoveBits(iLast, end(), iFirst); + resize(size() - eraseCount); + + // Verify that no reallocation occurred. + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(validate_iterator(iFirst) & eastl::isf_valid) == 0) + EASTL_FAIL_MSG("bitvector::erase -- invalid iterator"); + #endif + } + + return iFirst; + } + + + template + typename bitvector::reverse_iterator + bitvector::erase(const_reverse_iterator position) + { + return reverse_iterator(erase((++position).base())); + } + + + template + typename bitvector::reverse_iterator + bitvector::erase(const_reverse_iterator first, const_reverse_iterator last) + { + // Version which erases in order from first to last. + // difference_type i(first.base() - last.base()); + // while(i--) + // first = erase(first); + // return first; + + // Version which erases in order from last to first, but is slightly more efficient: + return reverse_iterator(erase(last.base(), first.base())); + } + + + template + void bitvector::swap(this_type& rhs) + { + mContainer.swap(rhs.mContainer); + eastl::swap(mFreeBitCount, rhs.mFreeBitCount); + } + + + template + void bitvector::reset_lose_memory() + { + mContainer.reset_lose_memory(); // intentional memory leak. + mFreeBitCount = 0; + } + + + template + void bitvector::clear() + { + mContainer.clear(); + mFreeBitCount = 0; + } + + + template + bitvector& + bitvector::operator=(const bitvector& rhs) + { + // The following is OK if (&rhs == this) + mContainer = rhs.mContainer; + mFreeBitCount = rhs.mFreeBitCount; + + return *this; + } + + + template + bitvector::bitvector() + : mContainer(), + mFreeBitCount(0) + { + } + + + template + bitvector::bitvector(const allocator_type& allocator) + : mContainer(allocator), + mFreeBitCount(0) + { + } + + + template + bitvector::bitvector(size_type n, const allocator_type& allocator) + : mContainer((n + kBitCount - 1) / kBitCount, allocator) + { + mFreeBitCount = kBitCount - (n % kBitCount); + + if(mFreeBitCount == kBitCount) + mFreeBitCount = 0; + } + + + template + bitvector::bitvector(size_type n, value_type value, const allocator_type& allocator) + : mContainer((n + kBitCount - 1) / kBitCount, value ? ~element_type(0) : element_type(0), allocator) + { + mFreeBitCount = kBitCount - (n % kBitCount); + + if(mFreeBitCount == kBitCount) + mFreeBitCount = 0; + } + + + template + bitvector::bitvector(const bitvector& copy) + : mContainer(copy.mContainer), + mFreeBitCount(copy.mFreeBitCount) + { + } + + + template + template + bitvector::bitvector(InputIterator first, InputIterator last) + : mContainer(), + mFreeBitCount(0) + { + assign(first, last); + } + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const bitvector& a, + const bitvector& b) + { + // To do: Replace this with a smart compare implementation. This is much slower than it needs to be. + return ((a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin())); + } + + + template + inline bool operator!=(const bitvector& a, + const bitvector& b) + { + return !operator==(a, b); + } + + + template + inline bool operator<(const bitvector& a, + const bitvector& b) + { + // To do: Replace this with a smart compare implementation. This is much slower than it needs to be. + return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); + } + + + template + inline bool operator>(const bitvector& a, + const bitvector& b) + { + return b < a; + } + + + template + inline bool operator<=(const bitvector& a, + const bitvector& b) + { + return !(b < a); + } + + + template + inline bool operator>=(const bitvector& a, + const bitvector& b) + { + return !(a < b); + } + + template + inline void swap(bitvector& a, + bitvector& b) + { + a.swap(b); + } + + +} // namespace eastl + + +EA_RESTORE_VC_WARNING(); + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/bonus/adaptors.h b/libkram/eastl/include/EASTL/bonus/adaptors.h new file mode 100644 index 00000000..423cacdd --- /dev/null +++ b/libkram/eastl/include/EASTL/bonus/adaptors.h @@ -0,0 +1,88 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ADAPTORS_H +#define EASTL_ADAPTORS_H + + +#include +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + +EA_DISABLE_VC_WARNING(4512 4626) +#if defined(_MSC_VER) && (_MSC_VER >= 1900) // VS2015+ + EA_DISABLE_VC_WARNING(5027) // move assignment operator was implicitly defined as deleted +#endif + + +namespace eastl +{ + /// reverse + /// + /// This adaptor allows reverse iteration of a container in ranged base for-loops. + /// + /// for (auto& i : reverse(c)) { ... } + /// + template + struct reverse_wrapper + { + template + reverse_wrapper(C&& c) + : mContainer(eastl::forward(c)) + { + /** + * NOTE: + * + * Due to reference collapsing rules of universal references Container type is either + * + * const C& if the input is a const lvalue + * C& if the input is a non-const lvalue + * C if the input is an rvalue + * const C if the input is a const rvalue thus the object will have to be copied and the copy-ctor will be called + * + * + * Thus we either move the whole container into this object or take a reference to the lvalue avoiding the copy. + * The static_assert below ensures this. + */ + static_assert(eastl::is_same_v, "Reference collapsed deduced type must be the same as the deduced Container type!"); + } + + Container mContainer; + }; + + template + auto begin(const reverse_wrapper& w) -> decltype(eastl::rbegin(w.mContainer)) + { + return eastl::rbegin(w.mContainer); + } + + template + auto end(const reverse_wrapper& w) -> decltype(eastl::rend(w.mContainer)) + { + return eastl::rend(w.mContainer); + } + + template + reverse_wrapper reverse(Container&& c) + { + return reverse_wrapper(eastl::forward(c)); + } + +} // namespace eastl + +#if defined(_MSC_VER) && (_MSC_VER >= 1900) // VS2015+ + EA_RESTORE_VC_WARNING() +#endif +EA_RESTORE_VC_WARNING() + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/bonus/call_traits.h b/libkram/eastl/include/EASTL/bonus/call_traits.h new file mode 100644 index 00000000..0995d051 --- /dev/null +++ b/libkram/eastl/include/EASTL/bonus/call_traits.h @@ -0,0 +1,117 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// The design for call_traits here is very similar to that found in template +// metaprogramming libraries such as Boost, GCC, and Metrowerks, given that +// these libraries have established this interface as a defacto standard for +// solving this problem. Also, these are described in various books on the +// topic of template metaprogramming, such as "Modern C++ Design". +// +// See http://www.boost.org/libs/utility/call_traits.htm or search for +// call_traits in Google for a description of call_traits. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_CALL_TRAITS_H +#define EASTL_CALL_TRAITS_H + + +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + + template + struct ct_imp2 { typedef const T& param_type; }; + + template + struct ct_imp2 { typedef const T param_type; }; + + template + struct ct_imp { typedef const T& param_type; }; + + template + struct ct_imp { typedef typename ct_imp2::param_type param_type; }; + + template + struct ct_imp { typedef T const param_type; }; + + + + template + struct call_traits + { + public: + typedef T value_type; + typedef T& reference; + typedef const T& const_reference; + typedef typename ct_imp::value, is_arithmetic::value>::param_type param_type; + }; + + + template + struct call_traits + { + typedef T& value_type; + typedef T& reference; + typedef const T& const_reference; + typedef T& param_type; + }; + + + template + struct call_traits + { + private: + typedef T array_type[N]; + + public: + typedef const T* value_type; + typedef array_type& reference; + typedef const array_type& const_reference; + typedef const T* const param_type; + }; + + + template + struct call_traits + { + private: + typedef const T array_type[N]; + + public: + typedef const T* value_type; + typedef array_type& reference; + typedef const array_type& const_reference; + typedef const T* const param_type; + }; + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/bonus/compressed_pair.h b/libkram/eastl/include/EASTL/bonus/compressed_pair.h new file mode 100644 index 00000000..379642ba --- /dev/null +++ b/libkram/eastl/include/EASTL/bonus/compressed_pair.h @@ -0,0 +1,460 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// The compressed pair class is very similar to std::pair, but if either of the +// template arguments are empty classes, then the "empty base-class optimization" +// is applied to compress the size of the pair. +// +// The design for compressed_pair here is very similar to that found in template +// metaprogramming libraries such as Boost, GCC, and Metrowerks, given that +// these libraries have established this interface as a defacto standard for +// solving this problem. Also, these are described in various books on the +// topic of template metaprogramming, such as "Modern C++ Design". +// +// template +// class compressed_pair +// { +// public: +// typedef T1 first_type; +// typedef T2 second_type; +// typedef typename call_traits::param_type first_param_type; +// typedef typename call_traits::param_type second_param_type; +// typedef typename call_traits::reference first_reference; +// typedef typename call_traits::reference second_reference; +// typedef typename call_traits::const_reference first_const_reference; +// typedef typename call_traits::const_reference second_const_reference; +// +// compressed_pair() : base() {} +// compressed_pair(first_param_type x, second_param_type y); +// explicit compressed_pair(first_param_type x); +// explicit compressed_pair(second_param_type y); +// +// compressed_pair& operator=(const compressed_pair&); +// +// first_reference first(); +// first_const_reference first() const; +// +// second_reference second(); +// second_const_reference second() const; +// +// void swap(compressed_pair& y); +// }; +// +// The two members of the pair can be accessed using the member functions first() +// and second(). Note that not all member functions can be instantiated for all +// template parameter types. In particular compressed_pair can be instantiated for +// reference and array types, however in these cases the range of constructors that +// can be used are limited. If types T1 and T2 are the same type, then there is +// only one version of the single-argument constructor, and this constructor +// initialises both values in the pair to the passed value. +// +// Note that compressed_pair can not be instantiated if either of the template +// arguments is a union type, unless there is compiler support for is_union, +// or if is_union is specialised for the union type. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_COMPRESSED_PAIR_H +#define EASTL_COMPRESSED_PAIR_H + + +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + +#if defined(_MSC_VER) && (_MSC_VER >= 1900) // VS2015 or later + EA_DISABLE_VC_WARNING(4626 5027) // warning C4626: 'eastl::compressed_pair_imp': assignment operator was implicitly defined as deleted because a base class assignment operator is inaccessible or deleted +#endif + +namespace eastl +{ + + template + class compressed_pair; + + + template + struct compressed_pair_switch; + + template + struct compressed_pair_switch{ static const int value = 0; }; + + template + struct compressed_pair_switch { static const int value = 1; }; + + template + struct compressed_pair_switch { static const int value = 2; }; + + template + struct compressed_pair_switch { static const int value = 3; }; + + template + struct compressed_pair_switch { static const int value = 4; }; + + template + struct compressed_pair_switch { static const int value = 5; }; + + template + class compressed_pair_imp; + + + + template + inline void cp_swap(T& t1, T& t2) + { + T tTemp = t1; + t1 = t2; + t2 = tTemp; + } + + + // Derive from neither + template + class compressed_pair_imp + { + public: + typedef T1 first_type; + typedef T2 second_type; + typedef typename call_traits::param_type first_param_type; + typedef typename call_traits::param_type second_param_type; + typedef typename call_traits::reference first_reference; + typedef typename call_traits::reference second_reference; + typedef typename call_traits::const_reference first_const_reference; + typedef typename call_traits::const_reference second_const_reference; + + compressed_pair_imp() {} + + compressed_pair_imp(first_param_type x, second_param_type y) + : mFirst(x), mSecond(y) {} + + compressed_pair_imp(first_param_type x) + : mFirst(x) {} + + compressed_pair_imp(second_param_type y) + : mSecond(y) {} + + first_reference first() { return mFirst; } + first_const_reference first() const { return mFirst; } + + second_reference second() { return mSecond; } + second_const_reference second() const { return mSecond; } + + void swap(compressed_pair& y) + { + cp_swap(mFirst, y.first()); + cp_swap(mSecond, y.second()); + } + + private: + first_type mFirst; + second_type mSecond; + }; + + + // Derive from T1 + template + class compressed_pair_imp : private T1 + { + public: + typedef T1 first_type; + typedef T2 second_type; + typedef typename call_traits::param_type first_param_type; + typedef typename call_traits::param_type second_param_type; + typedef typename call_traits::reference first_reference; + typedef typename call_traits::reference second_reference; + typedef typename call_traits::const_reference first_const_reference; + typedef typename call_traits::const_reference second_const_reference; + + compressed_pair_imp() {} + + compressed_pair_imp(first_param_type x, second_param_type y) + : first_type(x), mSecond(y) {} + + compressed_pair_imp(first_param_type x) + : first_type(x) {} + + compressed_pair_imp(second_param_type y) + : mSecond(y) {} + + first_reference first() { return *this; } + first_const_reference first() const { return *this; } + + second_reference second() { return mSecond; } + second_const_reference second() const { return mSecond; } + + void swap(compressed_pair& y) + { + // No need to swap empty base class + cp_swap(mSecond, y.second()); + } + + private: + second_type mSecond; + }; + + + + // Derive from T2 + template + class compressed_pair_imp : private T2 + { + public: + typedef T1 first_type; + typedef T2 second_type; + typedef typename call_traits::param_type first_param_type; + typedef typename call_traits::param_type second_param_type; + typedef typename call_traits::reference first_reference; + typedef typename call_traits::reference second_reference; + typedef typename call_traits::const_reference first_const_reference; + typedef typename call_traits::const_reference second_const_reference; + + compressed_pair_imp() {} + + compressed_pair_imp(first_param_type x, second_param_type y) + : second_type(y), mFirst(x) {} + + compressed_pair_imp(first_param_type x) + : mFirst(x) {} + + compressed_pair_imp(second_param_type y) + : second_type(y) {} + + first_reference first() { return mFirst; } + first_const_reference first() const { return mFirst; } + + second_reference second() { return *this; } + second_const_reference second() const { return *this; } + + void swap(compressed_pair& y) + { + // No need to swap empty base class + cp_swap(mFirst, y.first()); + } + + private: + first_type mFirst; + }; + + + + // Derive from T1 and T2 + template + class compressed_pair_imp : private T1, private T2 + { + public: + typedef T1 first_type; + typedef T2 second_type; + typedef typename call_traits::param_type first_param_type; + typedef typename call_traits::param_type second_param_type; + typedef typename call_traits::reference first_reference; + typedef typename call_traits::reference second_reference; + typedef typename call_traits::const_reference first_const_reference; + typedef typename call_traits::const_reference second_const_reference; + + compressed_pair_imp() {} + + compressed_pair_imp(first_param_type x, second_param_type y) + : first_type(x), second_type(y) {} + + compressed_pair_imp(first_param_type x) + : first_type(x) {} + + compressed_pair_imp(second_param_type y) + : second_type(y) {} + + first_reference first() { return *this; } + first_const_reference first() const { return *this; } + + second_reference second() { return *this; } + second_const_reference second() const { return *this; } + + // No need to swap empty bases + void swap(compressed_pair&) + { } + }; + + + // T1 == T2, T1 and T2 are both empty + // Note does not actually store an instance of T2 at all; + // but reuses T1 base class for both first() and second(). + template + class compressed_pair_imp : private T1 + { + public: + typedef T1 first_type; + typedef T2 second_type; + typedef typename call_traits::param_type first_param_type; + typedef typename call_traits::param_type second_param_type; + typedef typename call_traits::reference first_reference; + typedef typename call_traits::reference second_reference; + typedef typename call_traits::const_reference first_const_reference; + typedef typename call_traits::const_reference second_const_reference; + + compressed_pair_imp() {} + + compressed_pair_imp(first_param_type x, second_param_type) + : first_type(x) {} + + compressed_pair_imp(first_param_type x) + : first_type(x) {} + + first_reference first() { return *this; } + first_const_reference first() const { return *this; } + + second_reference second() { return *this; } + second_const_reference second() const { return *this; } + + void swap(compressed_pair&) { } + }; + + + // T1 == T2 and are not empty + template + class compressed_pair_imp + { + public: + typedef T1 first_type; + typedef T2 second_type; + typedef typename call_traits::param_type first_param_type; + typedef typename call_traits::param_type second_param_type; + typedef typename call_traits::reference first_reference; + typedef typename call_traits::reference second_reference; + typedef typename call_traits::const_reference first_const_reference; + typedef typename call_traits::const_reference second_const_reference; + + compressed_pair_imp() {} + + compressed_pair_imp(first_param_type x, second_param_type y) + : mFirst(x), mSecond(y) {} + + compressed_pair_imp(first_param_type x) + : mFirst(x), mSecond(x) {} + + first_reference first() { return mFirst; } + first_const_reference first() const { return mFirst; } + + second_reference second() { return mSecond; } + second_const_reference second() const { return mSecond; } + + void swap(compressed_pair& y) + { + cp_swap(mFirst, y.first()); + cp_swap(mSecond, y.second()); + } + + private: + first_type mFirst; + second_type mSecond; + }; + + + + template + class compressed_pair + : private compressed_pair_imp::type, typename remove_cv::type>::value, + is_empty::value, + is_empty::value>::value> + { + private: + typedef compressed_pair_imp::type, typename remove_cv::type>::value, + is_empty::value, + is_empty::value>::value> base; + public: + typedef T1 first_type; + typedef T2 second_type; + typedef typename call_traits::param_type first_param_type; + typedef typename call_traits::param_type second_param_type; + typedef typename call_traits::reference first_reference; + typedef typename call_traits::reference second_reference; + typedef typename call_traits::const_reference first_const_reference; + typedef typename call_traits::const_reference second_const_reference; + + compressed_pair() : base() {} + compressed_pair(first_param_type x, second_param_type y) : base(x, y) {} + explicit compressed_pair(first_param_type x) : base(x) {} + explicit compressed_pair(second_param_type y) : base(y) {} + + first_reference first() { return base::first(); } + first_const_reference first() const { return base::first(); } + + second_reference second() { return base::second(); } + second_const_reference second() const { return base::second(); } + + void swap(compressed_pair& y) { base::swap(y); } + }; + + + // Partial specialisation for case where T1 == T2: + template + class compressed_pair + : private compressed_pair_imp::type, typename remove_cv::type>::value, + is_empty::value, + is_empty::value>::value> + { + private: + typedef compressed_pair_imp::type, typename remove_cv::type>::value, + is_empty::value, + is_empty::value>::value> base; + public: + typedef T first_type; + typedef T second_type; + typedef typename call_traits::param_type first_param_type; + typedef typename call_traits::param_type second_param_type; + typedef typename call_traits::reference first_reference; + typedef typename call_traits::reference second_reference; + typedef typename call_traits::const_reference first_const_reference; + typedef typename call_traits::const_reference second_const_reference; + + compressed_pair() : base() {} + compressed_pair(first_param_type x, second_param_type y) : base(x, y) {} + explicit compressed_pair(first_param_type x) : base(x) {} + + first_reference first() { return base::first(); } + first_const_reference first() const { return base::first(); } + + second_reference second() { return base::second(); } + second_const_reference second() const { return base::second(); } + + void swap(compressed_pair& y) { base::swap(y); } + }; + + + template + inline void swap(compressed_pair& x, compressed_pair& y) + { + x.swap(y); + } + + +} // namespace eastl + +#if defined(_MSC_VER) && (_MSC_VER >= 1900) // VS2015 or later + EA_RESTORE_VC_WARNING() +#endif + +#endif // Header include guard + + + diff --git a/libkram/eastl/include/EASTL/bonus/fixed_ring_buffer.h b/libkram/eastl/include/EASTL/bonus/fixed_ring_buffer.h new file mode 100644 index 00000000..2bb54e47 --- /dev/null +++ b/libkram/eastl/include/EASTL/bonus/fixed_ring_buffer.h @@ -0,0 +1,50 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_FIXED_RING_BUFFER_H +#define EASTL_FIXED_RING_BUFFER_H + +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + +namespace eastl +{ + + /// fixed_ring_buffer + /// + /// This is a convenience template alias for creating a fixed-sized + /// ring_buffer using eastl::fixed_vector as its storage container. This has + /// been tricky for users to get correct due to the constructor requirements + /// of eastl::ring_buffer leaking the implementation detail of the sentinel + /// value being used internally. In addition, it was not obvious what the + /// correct allocator_type template parameter should be used for containers + /// providing both a default allocator type and an overflow allocator type. + /// + /// We are over-allocating the fixed_vector container to accommodate the + /// ring_buffer sentinel to prevent that implementation detail leaking into + /// user code. + /// + /// Example usage: + /// + /// fixed_ring_buffer rb = {0, 1, 2, 3, 4, 5, 6, 7}; + /// or + /// fixed_ring_buffer rb(8); // capacity doesn't need to respect sentinel + /// rb.push_back(0); + /// + /// +#if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + template + using fixed_ring_buffer = + ring_buffer, typename fixed_vector::overflow_allocator_type>; +#endif + +} // namespace eastl + +#endif // Header include guard + diff --git a/libkram/eastl/include/EASTL/bonus/fixed_tuple_vector.h b/libkram/eastl/include/EASTL/bonus/fixed_tuple_vector.h new file mode 100644 index 00000000..e9ce0ec0 --- /dev/null +++ b/libkram/eastl/include/EASTL/bonus/fixed_tuple_vector.h @@ -0,0 +1,210 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_FIXEDTUPLEVECTOR_H +#define EASTL_FIXEDTUPLEVECTOR_H + +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + +namespace eastl +{ + + /// EASTL_FIXED_TUPLE_VECTOR_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// In the case of fixed-size containers, the allocator name always refers + /// to overflow allocations. + /// + #ifndef EASTL_FIXED_TUPLE_VECTOR_DEFAULT_NAME + #define EASTL_FIXED_TUPLE_VECTOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_tuple_vector" // Unless the user overrides something, this is "EASTL fixed_vector". + #endif + + + /// EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR + #define EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_TUPLE_VECTOR_DEFAULT_NAME) + #endif + +// External interface of fixed_tuple_vector +template +class fixed_tuple_vector : public TupleVecInternal::TupleVecImpl::GetTotalAllocationSize(nodeCount, 0), 1, + TupleVecInternal::TupleRecurser::GetTotalAlignment(), 0, + bEnableOverflow, EASTLAllocatorType>, make_index_sequence, Ts...> +{ +public: + typedef fixed_vector_allocator< + TupleVecInternal::TupleRecurser::GetTotalAllocationSize(nodeCount, 0), 1, + TupleVecInternal::TupleRecurser::GetTotalAlignment(), 0, + bEnableOverflow, EASTLAllocatorType> fixed_allocator_type; + typedef aligned_buffer aligned_buffer_type; + typedef fixed_tuple_vector this_type; + typedef EASTLAllocatorType overflow_allocator_type; + + typedef TupleVecInternal::TupleVecImpl, Ts...> base_type; + typedef typename base_type::size_type size_type; + +private: + aligned_buffer_type mBuffer; + +public: + fixed_tuple_vector() + : base_type(fixed_allocator_type(mBuffer.buffer), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { } + + fixed_tuple_vector(const overflow_allocator_type& allocator) + : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { } + + fixed_tuple_vector(this_type&& x) + : base_type(fixed_allocator_type(mBuffer.buffer), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { + base_type::get_allocator().copy_overflow_allocator(x.get_allocator()); + base_type::DoInitFromIterator(make_move_iterator(x.begin()), make_move_iterator(x.end())); + x.clear(); + } + + fixed_tuple_vector(this_type&& x, const overflow_allocator_type& allocator) + : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { + base_type::DoInitFromIterator(make_move_iterator(x.begin()), make_move_iterator(x.end())); + x.clear(); + } + + fixed_tuple_vector(const this_type& x) + : base_type(fixed_allocator_type(mBuffer.buffer), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { + base_type::get_allocator().copy_overflow_allocator(x.get_allocator()); + base_type::DoInitFromIterator(x.begin(), x.end()); + } + + fixed_tuple_vector(const this_type& x, const overflow_allocator_type& allocator) + : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { + base_type::DoInitFromIterator(x.begin(), x.end()); + } + + template + fixed_tuple_vector(move_iterator begin, move_iterator end, const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { + base_type::DoInitFromIterator(begin, end); + } + + template + fixed_tuple_vector(Iterator begin, Iterator end, const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { + base_type::DoInitFromIterator(begin, end); + } + + fixed_tuple_vector(size_type n, const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { + base_type::DoInitDefaultFill(n); + } + + fixed_tuple_vector(size_type n, const Ts&... args) + : base_type(fixed_allocator_type(mBuffer.buffer), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { + base_type::DoInitFillArgs(n, args...); + } + + fixed_tuple_vector(size_type n, const Ts&... args, const overflow_allocator_type& allocator) + : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { + base_type::DoInitFillArgs(n, args...); + } + + fixed_tuple_vector(size_type n, + typename base_type::const_reference_tuple tup, + const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { + base_type::DoInitFillTuple(n, tup); + } + + fixed_tuple_vector(const typename base_type::value_tuple* first, const typename base_type::value_tuple* last, + const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { + base_type::DoInitFromTupleArray(first, last); + } + + fixed_tuple_vector(std::initializer_list iList, + const overflow_allocator_type& allocator = EASTL_FIXED_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + : base_type(fixed_allocator_type(mBuffer.buffer, allocator), mBuffer.buffer, nodeCount, fixed_allocator_type::kNodeSize) + { + base_type::DoInitFromTupleArray(iList.begin(), iList.end()); + } + + this_type& operator=(const this_type& other) + { + base_type::operator=(other); + return *this; + } + + this_type& operator=(this_type&& other) + { + base_type::clear(); + // OK to call DoInitFromIterator in a non-ctor scenario because clear() reset everything, more-or-less + base_type::DoInitFromIterator(make_move_iterator(other.begin()), make_move_iterator(other.end())); + other.clear(); + return *this; + } + + this_type& operator=(std::initializer_list iList) + { + base_type::operator=(iList); + return *this; + } + + void swap(this_type& x) + { + // If both containers are using the heap instead of local memory + // then we can do a fast pointer swap instead of content swap. + if ((has_overflowed() && x.has_overflowed()) && (get_overflow_allocator() == x.get_overflow_allocator())) + { + base_type::swap(x); + } + else + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(*this, x); + } + } + + // Returns the max fixed size, which is the user-supplied nodeCount parameter. + size_type max_size() const { return nodeCount; } + // Returns true if the fixed space has been fully allocated. Note that if overflow is enabled, + // the container size can be greater than nodeCount but full() could return true because the + // fixed space may have a recently freed slot. + bool full() const { return (base_type::mNumElements >= nodeCount) || ((void*)base_type::mpData != (void*)mBuffer.buffer); } + // Returns true if the allocations spilled over into the overflow allocator. Meaningful + // only if overflow is enabled. + bool has_overflowed() const { return ((void*)base_type::mpData != (void*)mBuffer.buffer); } + // Returns the value of the bEnableOverflow template parameter. + bool can_overflow() const { return bEnableOverflow; } + + const overflow_allocator_type& get_overflow_allocator() const { return base_type::get_allocator().get_overflow_allocator(); } +}; + + +template +inline void swap(fixed_tuple_vector& a, + fixed_tuple_vector& b) +{ + a.swap(b); +} + + +} // namespace eastl + +#endif // EASTL_TUPLEVECTOR_H diff --git a/libkram/eastl/include/EASTL/bonus/intrusive_sdlist.h b/libkram/eastl/include/EASTL/bonus/intrusive_sdlist.h new file mode 100644 index 00000000..1b126d43 --- /dev/null +++ b/libkram/eastl/include/EASTL/bonus/intrusive_sdlist.h @@ -0,0 +1,694 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// intrusive_sdlist is a special kind of intrusive list which we say is +// "singly-doubly" linked. Instead of having a typical intrusive list node +// which looks like this: +// +// struct intrusive_sdlist_node { +// intrusive_sdlist_node *mpNext; +// intrusive_sdlist_node *mpPrev; +// }; +// +// We instead have one that looks like this: +// +// struct intrusive_sdlist_node { +// intrusive_sdlist_node* mpNext; +// intrusive_sdlist_node** mppPrevNext; +// }; +// +// This may seem to be suboptimal, but it has one specific advantage: it allows +// the intrusive_sdlist class to be the size of only one pointer instead of two. +// This may seem like a minor optimization, but some users have wanted to create +// thousands of empty instances of these. +// This is because while an intrusive_list class looks like this: +// +// class intrusive_list { +// intrusive_list_node mBaseNode; +// }; +// +// an intrusive_sdlist class looks like this: +// +// class intrusive_sdlist { +// intrusive_sdlist_node* mpNext; +// }; +// +// So here we make a list of plusses and minuses of intrusive sdlists +// compared to intrusive_lists and intrusive_slists: +// +// | list | slist | sdlist +// --------------------------------------------------------- +// min size | 8 | 4 | 4 +// node size | 8 | 4 | 8 +// anonymous erase | yes | no | yes +// reverse iteration | yes | no | no +// +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTRUSIVE_SDLIST_H +#define EASTL_INTRUSIVE_SDLIST_H + + +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + + /// intrusive_sdlist_node + /// + struct intrusive_sdlist_node + { + intrusive_sdlist_node* mpNext; + intrusive_sdlist_node** mppPrevNext; + }; + + + /// IntrusiveSDListIterator + /// + template + struct IntrusiveSDListIterator + { + typedef IntrusiveSDListIterator this_type; + typedef IntrusiveSDListIterator iterator; + typedef IntrusiveSDListIterator const_iterator; + typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. + typedef ptrdiff_t difference_type; + typedef T value_type; + typedef T node_type; + typedef Pointer pointer; + typedef Reference reference; + typedef EASTL_ITC_NS::forward_iterator_tag iterator_category; + + public: + pointer mpNode; + + public: + IntrusiveSDListIterator(); + explicit IntrusiveSDListIterator(pointer pNode); // Note that you can also construct an iterator from T via this, since value_type == node_type. + IntrusiveSDListIterator(const iterator& x); + + reference operator*() const; + pointer operator->() const; + + this_type& operator++(); + this_type operator++(int); + + }; // struct IntrusiveSDListIterator + + + + + /// intrusive_sdlist_base + /// + /// Provides a template-less base class for intrusive_sdlist. + /// + class intrusive_sdlist_base + { + public: + typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. + typedef ptrdiff_t difference_type; + + protected: + intrusive_sdlist_node* mpNext; + + public: + intrusive_sdlist_base(); + + bool empty() const; ///< Returns true if the container is empty. + size_type size() const; ///< Returns the number of elements in the list; O(n). + + void clear(); ///< Clears the list; O(1). No deallocation occurs. + void pop_front(); ///< Removes an element from the front of the list; O(1). The element must be present, but is not deallocated. + void reverse(); ///< Reverses a list so that front and back are swapped; O(n). + + //bool validate() const; ///< Scans a list for linkage inconsistencies; O(n) time, O(1) space. Returns false if errors are detected, such as loops or branching. + + }; // class intrusive_sdlist_base + + + + /// intrusive_sdlist + /// + template + class intrusive_sdlist : public intrusive_sdlist_base + { + public: + typedef intrusive_sdlist this_type; + typedef intrusive_sdlist_base base_type; + typedef T node_type; + typedef T value_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::difference_type difference_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + typedef IntrusiveSDListIterator iterator; + typedef IntrusiveSDListIterator const_iterator; + typedef eastl::reverse_iterator reverse_iterator; + typedef eastl::reverse_iterator const_reverse_iterator; + + public: + intrusive_sdlist(); ///< Creates an empty list. + intrusive_sdlist(const this_type& x); ///< Creates an empty list; ignores the argument. + this_type& operator=(const this_type& x); ///< Clears the list; ignores the argument. + + iterator begin(); ///< Returns an iterator pointing to the first element in the list. + const_iterator begin() const; ///< Returns a const_iterator pointing to the first element in the list. + const_iterator cbegin() const; ///< Returns a const_iterator pointing to the first element in the list. + + iterator end(); ///< Returns an iterator pointing one-after the last element in the list. + const_iterator end() const; ///< Returns a const_iterator pointing one-after the last element in the list. + const_iterator cend() const; ///< Returns a const_iterator pointing one-after the last element in the list. + + reference front(); ///< Returns a reference to the first element. The list must be empty. + const_reference front() const; ///< Returns a const reference to the first element. The list must be empty. + + void push_front(value_type& value); ///< Adds an element to the front of the list; O(1). The element is not copied. The element must not be in any other list. + void push_back(value_type& value); ///< Adds an element to the back of the list; O(N). The element is not copied. The element must not be in any other list. + void pop_back(); ///< Removes an element from the back of the list; O(N). The element must be present, but is not deallocated. + + bool contains(const value_type& value) const; ///< Returns true if the given element is in the list; O(n). Equivalent to (locate(x) != end()). + + iterator locate(value_type& value); ///< Converts a reference to an object in the list back to an iterator, or returns end() if it is not part of the list. O(n) + const_iterator locate(const value_type& value) const; ///< Converts a const reference to an object in the list back to a const iterator, or returns end() if it is not part of the list. O(n) + + iterator insert(iterator position, value_type& value); ///< Inserts an element before the element pointed to by the iterator. O(1) + iterator erase(iterator position); ///< Erases the element pointed to by the iterator. O(1) + iterator erase(iterator first, iterator last); ///< Erases elements within the iterator range [first, last). O(1). + void swap(intrusive_sdlist& x); ///< Swaps the contents of two intrusive lists; O(1). + + static void remove(value_type& value); ///< Erases an element from a list; O(1). Note that this is static so you don't need to know which list the element, although it must be in some list. + + void splice(iterator position, value_type& value); ///< Moves the given element into this list before the element pointed to by position; O(1). + ///< Required: x must be in some list or have first/next pointers that point it itself. + + void splice(iterator position, this_type& x); ///< Moves the contents of a list into this list before the element pointed to by position; O(1). + ///< Required: &x != this (same as std::list). + + void splice(iterator position, this_type& x, iterator xPosition); ///< Moves the given element pointed to i within the list x into the current list before + ///< the element pointed to by position; O(1). + + void splice(iterator position, this_type& x, iterator first, iterator last); ///< Moves the range of elements [first, last) from list x into the current list before + ///< the element pointed to by position; O(1). + ///< Required: position must not be in [first, last). (same as std::list). + bool validate() const; + int validate_iterator(const_iterator i) const; + + }; // intrusive_sdlist + + + + + /////////////////////////////////////////////////////////////////////// + // IntrusiveSDListIterator functions + /////////////////////////////////////////////////////////////////////// + + template + inline IntrusiveSDListIterator::IntrusiveSDListIterator() + { + #if EASTL_DEBUG + mpNode = NULL; + #endif + } + + template + inline IntrusiveSDListIterator::IntrusiveSDListIterator(pointer pNode) + : mpNode(pNode) + { + } + + template + inline IntrusiveSDListIterator::IntrusiveSDListIterator(const iterator& x) + : mpNode(x.mpNode) + { + } + + template + inline typename IntrusiveSDListIterator::reference + IntrusiveSDListIterator::operator*() const + { + return *mpNode; + } + + template + inline typename IntrusiveSDListIterator::pointer + IntrusiveSDListIterator::operator->() const + { + return mpNode; + } + + template + inline typename IntrusiveSDListIterator::this_type& + IntrusiveSDListIterator::operator++() + { + mpNode = static_cast(mpNode->mpNext); + return *this; + } + + template + inline typename IntrusiveSDListIterator::this_type + IntrusiveSDListIterator::operator++(int) + { + this_type temp = *this; + mpNode = static_cast(mpNode->mpNext); + return temp; + } + + // The C++ defect report #179 requires that we support comparisons between const and non-const iterators. + // Thus we provide additional template paremeters here to support this. The defect report does not + // require us to support comparisons between reverse_iterators and const_reverse_iterators. + template + inline bool operator==(const IntrusiveSDListIterator& a, + const IntrusiveSDListIterator& b) + { + return a.mpNode == b.mpNode; + } + + + template + inline bool operator!=(const IntrusiveSDListIterator& a, + const IntrusiveSDListIterator& b) + { + return a.mpNode != b.mpNode; + } + + + // We provide a version of operator!= for the case where the iterators are of the + // same type. This helps prevent ambiguity errors in the presence of rel_ops. + template + inline bool operator!=(const IntrusiveSDListIterator& a, + const IntrusiveSDListIterator& b) + { + return a.mpNode != b.mpNode; + } + + + + /////////////////////////////////////////////////////////////////////// + // intrusive_sdlist_base + /////////////////////////////////////////////////////////////////////// + + inline intrusive_sdlist_base::intrusive_sdlist_base() + { mpNext = NULL; } + + + inline bool intrusive_sdlist_base::empty() const + { return mpNext == NULL; } + + + inline intrusive_sdlist_base::size_type intrusive_sdlist_base::size() const + { + size_type n = 0; + for(const intrusive_sdlist_node* pCurrent = mpNext; pCurrent; pCurrent = pCurrent->mpNext) + n++; + return n; + } + + + inline void intrusive_sdlist_base::clear() + { mpNext = NULL; } // Note that we don't do anything with the list nodes. + + + inline void intrusive_sdlist_base::pop_front() + { + // To consider: Set mpNext's pointers to NULL in debug builds. + mpNext = mpNext->mpNext; + mpNext->mppPrevNext = &mpNext; + } + + + + /////////////////////////////////////////////////////////////////////// + // intrusive_sdlist + /////////////////////////////////////////////////////////////////////// + + template + inline intrusive_sdlist::intrusive_sdlist() + { + } + + + template + inline intrusive_sdlist::intrusive_sdlist(const this_type& /*x*/) + : intrusive_sdlist_base() + { + // We intentionally ignore argument x. + } + + + template + inline typename intrusive_sdlist::this_type& intrusive_sdlist::operator=(const this_type& /*x*/) + { + return *this; // We intentionally ignore argument x. + } + + + template + inline typename intrusive_sdlist::iterator intrusive_sdlist::begin() + { return iterator(static_cast(mpNext)); } + + + template + inline typename intrusive_sdlist::const_iterator intrusive_sdlist::begin() const + { return const_iterator(static_cast(const_cast(mpNext))); } + + + template + inline typename intrusive_sdlist::const_iterator intrusive_sdlist::cbegin() const + { return const_iterator(static_cast(const_cast(mpNext))); } + + + template + inline typename intrusive_sdlist::iterator intrusive_sdlist::end() + { return iterator(static_cast(NULL)); } + + + template + inline typename intrusive_sdlist::const_iterator intrusive_sdlist::end() const + { return const_iterator(static_cast(NULL)); } + + + template + inline typename intrusive_sdlist::const_iterator intrusive_sdlist::cend() const + { return const_iterator(static_cast(NULL)); } + + + template + inline typename intrusive_sdlist::reference intrusive_sdlist::front() + { return *static_cast(mpNext); } + + + template + inline typename intrusive_sdlist::const_reference intrusive_sdlist::front() const + { return *static_cast(mpNext); } + + + template + inline void intrusive_sdlist::push_front(value_type& value) + { + value.mpNext = mpNext; + value.mppPrevNext = &mpNext; + if(mpNext) + mpNext->mppPrevNext = &value.mpNext; + mpNext = &value; + } + + + template + inline void intrusive_sdlist::push_back(value_type& value) + { + intrusive_sdlist_node* pNext = mpNext; + intrusive_sdlist_node** ppPrevNext = &mpNext; + + while(pNext) + { + ppPrevNext = &pNext->mpNext; + pNext = pNext->mpNext; + } + + *ppPrevNext = &value; + value.mppPrevNext = ppPrevNext; + value.mpNext = NULL; + } + + + template + inline void intrusive_sdlist::pop_back() + { + node_type* pCurrent = static_cast(mpNext); + + while(pCurrent->mpNext) + pCurrent = static_cast(pCurrent->mpNext); + + *pCurrent->mppPrevNext = NULL; + } + + template + inline bool intrusive_sdlist::contains(const value_type& value) const + { + const intrusive_sdlist_node* pCurrent; + + for(pCurrent = mpNext; pCurrent; pCurrent = pCurrent->mpNext) + { + if(pCurrent == &value) + break; + } + + return (pCurrent != NULL); + } + + + template + inline typename intrusive_sdlist::iterator intrusive_sdlist::locate(value_type& value) + { + intrusive_sdlist_node* pCurrent; + + for(pCurrent = static_cast(mpNext); pCurrent; pCurrent = pCurrent->mpNext) + { + if(pCurrent == &value) + break; + } + + return iterator(static_cast(pCurrent)); + } + + + template + inline typename intrusive_sdlist::const_iterator intrusive_sdlist::locate(const T& value) const + { + const intrusive_sdlist_node* pCurrent; + + for(pCurrent = static_cast(mpNext); pCurrent; pCurrent = pCurrent->mpNext) + { + if(pCurrent == &value) + break; + } + + return const_iterator(static_cast(const_cast(pCurrent))); + } + + + template + inline typename intrusive_sdlist::iterator + intrusive_sdlist::insert(iterator position, value_type& value) + { + value.mppPrevNext = position.mpNode->mppPrevNext; + value.mpNext = position.mpNode; + *value.mppPrevNext = &value; + position.mpNode->mppPrevNext = &value.mpNext; + + return iterator(&value); + } + + + template + inline typename intrusive_sdlist::iterator + intrusive_sdlist::erase(iterator position) + { + *position.mpNode->mppPrevNext = position.mpNode->mpNext; + position.mpNode->mpNext->mppPrevNext = position.mpNode->mppPrevNext; + + return iterator(position.mpNode); + } + + + template + inline typename intrusive_sdlist::iterator + intrusive_sdlist::erase(iterator first, iterator last) + { + if(first.mpNode) // If not erasing the end... + { + *first.mpNode->mppPrevNext = last.mpNode; + + if(last.mpNode) // If not erasing to the end... + last.mpNode->mppPrevNext = first.mpNode->mppPrevNext; + } + + return last; + } + + + template + inline void intrusive_sdlist::remove(value_type& value) + { + *value.mppPrevNext = value.mpNext; + if(value.mpNext) + value.mpNext->mppPrevNext = value.mppPrevNext; + } + + + template + void intrusive_sdlist::swap(intrusive_sdlist& x) + { + // swap anchors + intrusive_sdlist_node* const temp(mpNext); + mpNext = x.mpNext; + x.mpNext = temp; + + if(x.mpNext) + x.mpNext->mppPrevNext = &mpNext; + + if(mpNext) + mpNext->mppPrevNext = &x.mpNext; + } + + + + + + // To do: Complete these splice functions. Might want to look at intrusive_sdlist for help. + + template + void intrusive_sdlist::splice(iterator /*position*/, value_type& /*value*/) + { + EASTL_ASSERT(false); // If you need this working, ask Paul Pedriana or submit a working version for inclusion. + } + + + template + void intrusive_sdlist::splice(iterator /*position*/, intrusive_sdlist& /*x*/) + { + EASTL_ASSERT(false); // If you need this working, ask Paul Pedriana or submit a working version for inclusion. + } + + + template + void intrusive_sdlist::splice(iterator /*position*/, intrusive_sdlist& /*x*/, iterator /*xPosition*/) + { + EASTL_ASSERT(false); // If you need this working, ask Paul Pedriana or submit a working version for inclusion. + } + + + template + void intrusive_sdlist::splice(iterator /*position*/, intrusive_sdlist& /*x*/, iterator /*first*/, iterator /*last*/) + { + EASTL_ASSERT(false); // If you need this working, ask Paul Pedriana or submit a working version for inclusion. + } + + + template + inline bool intrusive_sdlist::validate() const + { + return true; // To do. + } + + + template + inline int intrusive_sdlist::validate_iterator(const_iterator i) const + { + // To do: Come up with a more efficient mechanism of doing this. + + for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp) + { + if(temp == i) + return (isf_valid | isf_current | isf_can_dereference); + } + + if(i == end()) + return (isf_valid | isf_current); + + return isf_none; + } + + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + bool operator==(const intrusive_sdlist& a, const intrusive_sdlist& b) + { + // If we store an mSize member for intrusive_sdlist, we want to take advantage of it here. + typename intrusive_sdlist::const_iterator ia = a.begin(); + typename intrusive_sdlist::const_iterator ib = b.begin(); + typename intrusive_sdlist::const_iterator enda = a.end(); + typename intrusive_sdlist::const_iterator endb = b.end(); + + while((ia != enda) && (ib != endb) && (*ia == *ib)) + { + ++ia; + ++ib; + } + return (ia == enda) && (ib == endb); + } + + template + bool operator<(const intrusive_sdlist& a, const intrusive_sdlist& b) + { + return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); + } + + template + bool operator!=(const intrusive_sdlist& a, const intrusive_sdlist& b) + { + return !(a == b); + } + + template + bool operator>(const intrusive_sdlist& a, const intrusive_sdlist& b) + { + return b < a; + } + + template + bool operator<=(const intrusive_sdlist& a, const intrusive_sdlist& b) + { + return !(b < a); + } + + template + bool operator>=(const intrusive_sdlist& a, const intrusive_sdlist& b) + { + return !(a < b); + } + + template + void swap(intrusive_sdlist& a, intrusive_sdlist& b) + { + a.swap(b); + } + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/bonus/intrusive_slist.h b/libkram/eastl/include/EASTL/bonus/intrusive_slist.h new file mode 100644 index 00000000..28d445d9 --- /dev/null +++ b/libkram/eastl/include/EASTL/bonus/intrusive_slist.h @@ -0,0 +1,321 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// *** Note *** +// This implementation is incomplete. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTRUSIVE_SLIST_H +#define EASTL_INTRUSIVE_SLIST_H + + +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /// intrusive_slist_node + /// + struct intrusive_slist_node + { + intrusive_slist_node* mpNext; + }; + + + /// IntrusiveSListIterator + /// + template + struct IntrusiveSListIterator + { + typedef IntrusiveSListIterator this_type; + typedef IntrusiveSListIterator iterator; + typedef IntrusiveSListIterator const_iterator; + typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. + typedef ptrdiff_t difference_type; + typedef T value_type; + typedef T node_type; + typedef Pointer pointer; + typedef Reference reference; + typedef EASTL_ITC_NS::forward_iterator_tag iterator_category; + + public: + node_type* mpNode; + + public: + IntrusiveSListIterator(); + explicit IntrusiveSListIterator(pointer pNode); // Note that you can also construct an iterator from T via this, since value_type == node_type. + IntrusiveSListIterator(const iterator& x); + + reference operator*() const; + pointer operator->() const; + + this_type& operator++(); + this_type operator++(int); + + }; // struct IntrusiveSListIterator + + + + /// intrusive_slist_base + /// + /// Provides a template-less base class for intrusive_slist. + /// + class intrusive_slist_base + { + public: + typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. + typedef ptrdiff_t difference_type; + + protected: + intrusive_slist_node* mpNext; + + public: + intrusive_slist_base(); + + bool empty() const; ///< Returns true if the container is empty. + size_type size() const; ///< Returns the number of elements in the list; O(n). + + void clear(); ///< Clears the list; O(1). No deallocation occurs. + void pop_front(); ///< Removes an element from the front of the list; O(1). The element must be present, but is not deallocated. + void reverse(); ///< Reverses a list so that front and back are swapped; O(n). + + //bool validate() const; ///< Scans a list for linkage inconsistencies; O(n) time, O(1) space. Returns false if errors are detected, such as loops or branching. + + }; // class intrusive_slist_base + + + + /// intrusive_slist + /// + template + class intrusive_slist : public intrusive_slist_base + { + public: + typedef intrusive_slist this_type; + typedef intrusive_slist_base base_type; + typedef T node_type; + typedef T value_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::difference_type difference_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + typedef IntrusiveSListIterator iterator; + typedef IntrusiveSListIterator const_iterator; + + public: + intrusive_slist(); ///< Creates an empty list. + //intrusive_slist(const this_type& x); ///< Creates an empty list; ignores the argument. To consider: Is this a useful function? + //this_type& operator=(const this_type& x); ///< Clears the list; ignores the argument. To consider: Is this a useful function? + + iterator begin(); ///< Returns an iterator pointing to the first element in the list. O(1). + const_iterator begin() const; ///< Returns a const_iterator pointing to the first element in the list. O(1). + const_iterator cbegin() const; ///< Returns a const_iterator pointing to the first element in the list. O(1). + iterator end(); ///< Returns an iterator pointing one-after the last element in the list. O(1). + const_iterator end() const; ///< Returns a const_iterator pointing one-after the last element in the list. O(1). + const_iterator cend() const; ///< Returns a const_iterator pointing one-after the last element in the list. O(1). + iterator before_begin(); ///< Returns iterator to position before begin. O(1). + const_iterator before_begin() const; ///< Returns iterator to previous position. O(1). + const_iterator cbefore_begin() const; ///< Returns iterator to previous position. O(1). + + iterator previous(const_iterator position); ///< Returns iterator to previous position. O(n). + const_iterator previous(const_iterator position) const; ///< Returns iterator to previous position. O(n). + + reference front(); ///< Returns a reference to the first element. The list must be empty. + const_reference front() const; ///< Returns a const reference to the first element. The list must be empty. + + void push_front(value_type& value); ///< Adds an element to the front of the list; O(1). The element is not copied. The element must not be in any other list. + void pop_front(); ///< Removes an element from the back of the list; O(n). The element must be present, but is not deallocated. + + bool contains(const value_type& value) const; ///< Returns true if the given element is in the list; O(n). Equivalent to (locate(x) != end()). + + iterator locate(value_type& value); ///< Converts a reference to an object in the list back to an iterator, or returns end() if it is not part of the list. O(n) + const_iterator locate(const value_type& value) const; ///< Converts a const reference to an object in the list back to a const iterator, or returns end() if it is not part of the list. O(n) + + iterator insert(iterator position, value_type& value); ///< Inserts an element before the element pointed to by the iterator. O(n) + iterator insert_after(iterator position, value_type& value); ///< Inserts an element after the element pointed to by the iterator. O(1) + + iterator erase(iterator position); ///< Erases the element pointed to by the iterator. O(n) + iterator erase_after(iterator position); ///< Erases the element after the element pointed to by the iterator. O(1) + + iterator erase(iterator first, iterator last); ///< Erases elements within the iterator range [first, last). O(n). + iterator erase_after(iterator before_first, iterator last); ///< Erases elements within the iterator range [before_first, last). O(1). + + void swap(this_type& x); ///< Swaps the contents of two intrusive lists; O(1). + + + void splice(iterator position, value_type& value); ///< Moves the given element into this list before the element pointed to by position; O(n). + ///< Required: x must be in some list or have first/next pointers that point it itself. + + void splice(iterator position, this_type& x); ///< Moves the contents of a list into this list before the element pointed to by position; O(n). + ///< Required: &x != this (same as std::list). + + void splice(iterator position, this_type& x, iterator xPosition); ///< Moves the given element pointed to i within the list x into the current list before + ///< the element pointed to by position; O(n). + + void splice(iterator position, this_type& x, iterator first, iterator last); ///< Moves the range of elements [first, last) from list x into the current list before + ///< the element pointed to by position; O(n). + ///< Required: position must not be in [first, last). (same as std::list). + + void splice_after(iterator position, value_type& value); ///< Moves the given element into this list after the element pointed to by position; O(1). + ///< Required: x must be in some list or have first/next pointers that point it itself. + + void splice_after(iterator position, this_type& x); ///< Moves the contents of a list into this list after the element pointed to by position; O(n). + ///< Required: &x != this (same as std::list). + + void splice_after(iterator position, this_type& x, iterator xPrevious); ///< Moves the element after xPrevious to be after position. O(1). + ///< Required: &x != this (same as std::list). + + void splice_after(iterator position, this_type& x, iterator before_first, iterator before_last); ///< Moves the elements in the range of [before_first+1, before_last+1) to be after position. O(1). + + bool validate() const; + int validate_iterator(const_iterator i) const; + + }; // intrusive_slist + + + + + /////////////////////////////////////////////////////////////////////// + // IntrusiveSListIterator + /////////////////////////////////////////////////////////////////////// + + template + inline IntrusiveSListIterator::IntrusiveSListIterator() + { + #if EASTL_DEBUG + mpNode = NULL; + #endif + } + + template + inline IntrusiveSListIterator::IntrusiveSListIterator(pointer pNode) + : mpNode(pNode) + { + } + + template + inline IntrusiveSListIterator::IntrusiveSListIterator(const iterator& x) + : mpNode(x.mpNode) + { + } + + + /////////////////////////////////////////////////////////////////////// + // intrusive_slist_base + /////////////////////////////////////////////////////////////////////// + + // To do. + + + /////////////////////////////////////////////////////////////////////// + // intrusive_slist + /////////////////////////////////////////////////////////////////////// + + // To do. + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + bool operator==(const intrusive_slist& a, const intrusive_slist& b) + { + // If we store an mSize member for intrusive_slist, we want to take advantage of it here. + typename intrusive_slist::const_iterator ia = a.begin(); + typename intrusive_slist::const_iterator ib = b.begin(); + typename intrusive_slist::const_iterator enda = a.end(); + typename intrusive_slist::const_iterator endb = b.end(); + + while((ia != enda) && (ib != endb) && (*ia == *ib)) + { + ++ia; + ++ib; + } + return (ia == enda) && (ib == endb); + } + + template + bool operator<(const intrusive_slist& a, const intrusive_slist& b) + { + return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); + } + + template + bool operator!=(const intrusive_slist& a, const intrusive_slist& b) + { + return !(a == b); + } + + template + bool operator>(const intrusive_slist& a, const intrusive_slist& b) + { + return b < a; + } + + template + bool operator<=(const intrusive_slist& a, const intrusive_slist& b) + { + return !(b < a); + } + + template + bool operator>=(const intrusive_slist& a, const intrusive_slist& b) + { + return !(a < b); + } + + template + void swap(intrusive_slist& a, intrusive_slist& b) + { + a.swap(b); + } + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/bonus/list_map.h b/libkram/eastl/include/EASTL/bonus/list_map.h new file mode 100644 index 00000000..8a080d6d --- /dev/null +++ b/libkram/eastl/include/EASTL/bonus/list_map.h @@ -0,0 +1,932 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_LIST_MAP_H +#define EASTL_LIST_MAP_H + + +#include + + +namespace eastl +{ + + /// EASTL_MAP_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_LIST_MAP_DEFAULT_NAME + #define EASTL_LIST_MAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " list_map" // Unless the user overrides something, this is "EASTL list_map". + #endif + + /// EASTL_MAP_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_LIST_MAP_DEFAULT_ALLOCATOR + #define EASTL_LIST_MAP_DEFAULT_ALLOCATOR allocator_type(EASTL_LIST_MAP_DEFAULT_NAME) + #endif + + + /// list_map_data_base + /// + /// We define a list_map_data_base separately from list_map_data (below), because it + /// allows us to have non-templated operations, and it makes it so that the + /// list_map anchor node doesn't carry a T with it, which would waste space and + /// possibly lead to surprising the user due to extra Ts existing that the user + /// didn't explicitly create. The downside to all of this is that it makes debug + /// viewing of an list_map harder, given that the node pointers are of type + /// list_map_data_base and not list_map_data. + /// + struct list_map_data_base + { + list_map_data_base* mpNext; + list_map_data_base* mpPrev; + }; + + + /// list_map_data + /// + template + struct list_map_data : public list_map_data_base + { + typedef Value value_type; + + list_map_data(const value_type& value); + + value_type mValue; // This is a pair of key/value. + }; + + + /// list_map_iterator + /// + template + struct list_map_iterator + { + typedef list_map_iterator this_type; + typedef list_map_iterator iterator; + typedef list_map_iterator const_iterator; + typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. + typedef ptrdiff_t difference_type; + typedef T value_type; + typedef list_map_data_base base_node_type; + typedef list_map_data node_type; + typedef Pointer pointer; + typedef Reference reference; + typedef EASTL_ITC_NS::bidirectional_iterator_tag iterator_category; + + public: + node_type* mpNode; + + public: + list_map_iterator(); + list_map_iterator(const base_node_type* pNode); + list_map_iterator(const iterator& x); + + reference operator*() const; + pointer operator->() const; + + this_type& operator++(); + this_type operator++(int); + + this_type& operator--(); + this_type operator--(int); + + }; // list_map_iterator + + + /// use_value_first + /// + /// operator()(x) simply returns x.mValue.first. Used in list_map. + /// This is similar to eastl::use_first, however it assumes that the input type is an object + /// whose mValue is an eastl::pair, and the first value in the pair is the desired return. + /// + template + struct use_value_first + { + typedef Object argument_type; + typedef typename Object::value_type::first_type result_type; + + const result_type& operator()(const Object& x) const + { return x.mValue.first; } + }; + + + /// list_map + /// + /// Implements a map like container, which also provides functionality similar to a list. + /// + /// Note: Like a map, keys must still be unique. As such, push_back() and push_front() operations + /// return a bool indicating success, or failure if the entry's key is already in use. + /// + /// list_map is designed to improve performance for situations commonly implemented as: + /// A map, which must be iterated over to find the oldest entry, or purge expired entries. + /// A list, which must be iterated over to remove a player's record when they sign off. + /// + /// list_map requires a little more memory per node than either a list or map alone, + /// and many of list_map's functions have a higher operational cost (CPU time) than their + /// counterparts in list and map. However, as the node count increases, list_map quickly outperforms + /// either a list or a map when find [by-index] and front/back type operations are required. + /// + /// In essence, list_map avoids O(n) iterations at the expense of additional costs to quick (O(1) and O(log n) operations: + /// push_front(), push_back(), pop_front() and pop_back() have O(log n) operation time, similar to map::insert(), rather than O(1) time like a list, + /// however, front() and back() maintain O(1) operation time. + /// + /// As a canonical example, consider a large backlog of player group invites, which are removed when either: + /// The invitation times out - in main loop: while( !listMap.empty() && listMap.front().IsExpired() ) { listMap.pop_front(); } + /// The player rejects the outstanding invitation - on rejection: iter = listMap.find(playerId); if (iter != listMap.end()) { listMap.erase(iter); } + /// + /// For a similar example, consider a high volume pending request container which must: + /// Time out old requests (similar to invites timing out above) + /// Remove requests once they've been handled (similar to rejecting invites above) + /// + /// For such usage patterns, the performance benefits of list_map become dramatic with + /// common O(n) operations once the node count rises to hundreds or more. + /// + /// When high performance is a priority, Containers with thousands of nodes or more + /// can quickly result in unacceptable performance when executing even infrequenty O(n) operations. + /// + /// In order to maintain strong performance, avoid iterating over list_map whenever possible. + /// + /////////////////////////////////////////////////////////////////////// + /// find_as + /// In order to support the ability to have a tree of strings but + /// be able to do efficiently lookups via char pointers (i.e. so they + /// aren't converted to string objects), we provide the find_as + /// function. This function allows you to do a find with a key of a + /// type other than the tree's key type. See the find_as function + /// for more documentation on this. + /// + /////////////////////////////////////////////////////////////////////// + /// Pool allocation + /// If you want to make a custom memory pool for a list_map container, your pool + /// needs to contain items of type list_map::node_type. So if you have a memory + /// pool that has a constructor that takes the size of pool items and the + /// count of pool items, you would do this (assuming that MemoryPool implements + /// the Allocator interface): + /// typedef list_map, MemoryPool> WidgetMap; // Delare your WidgetMap type. + /// MemoryPool myPool(sizeof(WidgetMap::node_type), 100); // Make a pool of 100 Widget nodes. + /// WidgetMap myMap(&myPool); // Create a map that uses the pool. + /// + template , typename Allocator = EASTLAllocatorType> + class list_map + : protected rbtree >, Compare, Allocator, eastl::use_value_first > >, true, true> + { + public: + typedef rbtree >, Compare, Allocator, + eastl::use_value_first > >, true, true> base_type; + typedef list_map this_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::key_type key_type; + typedef T mapped_type; + typedef typename eastl::pair value_type; // This is intentionally different from base_type::value_type + typedef value_type& reference; + typedef const value_type& const_reference; + typedef typename base_type::node_type node_type; // Despite the internal and external values being different, we're keeping the node type the same as the base + // in order to allow for pool allocation. See EASTL/map.h for more information. + typedef typename eastl::list_map_iterator iterator; // This is intentionally different from base_type::iterator + typedef typename eastl::list_map_iterator const_iterator; // This is intentionally different from base_type::const_iterator + typedef eastl::reverse_iterator reverse_iterator; + typedef eastl::reverse_iterator const_reverse_iterator; + typedef typename base_type::allocator_type allocator_type; + typedef typename eastl::pair insert_return_type; // This is intentionally removed, as list_map doesn't support insert() functions, in favor of list like push_back and push_front + typedef typename eastl::use_first extract_key; // This is intentionally different from base_type::extract_key + + using base_type::get_allocator; + using base_type::set_allocator; + using base_type::key_comp; + using base_type::empty; + using base_type::size; + + protected: + typedef typename eastl::list_map_data > internal_value_type; + + protected: + // internal base node, acting as the sentinel for list like behaviors + list_map_data_base mNode; + + public: + list_map(const allocator_type& allocator = EASTL_LIST_MAP_DEFAULT_ALLOCATOR); + list_map(const Compare& compare, const allocator_type& allocator = EASTL_MAP_DEFAULT_ALLOCATOR); + + // To do: Implement the following: + + //list_map(const this_type& x); + //list_map(this_type&& x); + //list_map(this_type&& x, const allocator_type& allocator); + //list_map(std::initializer_list ilist, const Compare& compare = Compare(), const allocator_type& allocator = EASTL_LIST_MAP_DEFAULT_ALLOCATOR); + + //template + //list_map(Iterator itBegin, Iterator itEnd); + + //this_type& operator=(const this_type& x); + //this_type& operator=(std::initializer_list ilist); + //this_type& operator=(this_type&& x); + + //void swap(this_type& x); + + public: + // iterators + iterator begin() EA_NOEXCEPT; + const_iterator begin() const EA_NOEXCEPT; + const_iterator cbegin() const EA_NOEXCEPT; + + iterator end() EA_NOEXCEPT; + const_iterator end() const EA_NOEXCEPT; + const_iterator cend() const EA_NOEXCEPT; + + reverse_iterator rbegin() EA_NOEXCEPT; + const_reverse_iterator rbegin() const EA_NOEXCEPT; + const_reverse_iterator crbegin() const EA_NOEXCEPT; + + reverse_iterator rend() EA_NOEXCEPT; + const_reverse_iterator rend() const EA_NOEXCEPT; + const_reverse_iterator crend() const EA_NOEXCEPT; + + public: + // List like methods + reference front(); + const_reference front() const; + + reference back(); + const_reference back() const; + + // push_front and push_back which takes in a key/value pair + bool push_front(const value_type& value); + bool push_back(const value_type& value); + + // push_front and push_back which take key and value separately, for convenience + bool push_front(const key_type& key, const mapped_type& value); + bool push_back(const key_type& key, const mapped_type& value); + + void pop_front(); + void pop_back(); + + public: + // Map like methods + iterator find(const key_type& key); + const_iterator find(const key_type& key) const; + + template + iterator find_as(const U& u, Compare2 compare2); + template + const_iterator find_as(const U& u, Compare2 compare2) const; + + size_type count(const key_type& key) const; + size_type erase(const key_type& key); + + public: + // Shared methods which are common to list and map + iterator erase(const_iterator position); + reverse_iterator erase(const_reverse_iterator position); + + void clear(); + void reset_lose_memory(); + + bool validate() const; + int validate_iterator(const_iterator i) const; + + public: + // list like functionality which is in consideration for implementation: + // iterator insert(const_iterator position, const value_type& value); + // void remove(const mapped_type& x); + + public: + // list like functionality which may be implemented, but is discouraged from implementation: + // due to the liklihood that they would require O(n) time to execute. + // template + // void remove_if(Predicate); + // void reverse(); + // void sort(); + // template + // void sort(Compare compare); + + public: + // map like functionality which list_map does not support, due to abmiguity with list like functionality: + #if !defined(EA_COMPILER_NO_DELETED_FUNCTIONS) + template + list_map(InputIterator first, InputIterator last, const Compare& compare, const allocator_type& allocator = EASTL_RBTREE_DEFAULT_ALLOCATOR) = delete; + + insert_return_type insert(const value_type& value) = delete; + iterator insert(const_iterator position, const value_type& value) = delete; + + template + void insert(InputIterator first, InputIterator last) = delete; + + insert_return_type insert(const key_type& key) = delete; + + iterator erase(const_iterator first, const_iterator last) = delete; + reverse_iterator erase(reverse_iterator first, reverse_iterator last) = delete; + + void erase(const key_type* first, const key_type* last) = delete; + + iterator lower_bound(const key_type& key) = delete; + const_iterator lower_bound(const key_type& key) const = delete; + + iterator upper_bound(const key_type& key) = delete; + const_iterator upper_bound(const key_type& key) const = delete; + + eastl::pair equal_range(const key_type& key) = delete; + eastl::pair equal_range(const key_type& key) const = delete; + + mapped_type& operator[](const key_type& key) = delete; // Of map, multimap, set, and multimap, only map has operator[]. + #endif + + public: + // list like functionality which list_map does not support, due to ambiguity with map like functionality: + #if 0 + reference push_front() = delete; + void* push_front_uninitialized() = delete; + + reference push_back() = delete; + void* push_back_uninitialized() = delete; + + iterator insert(const_iterator position) = delete; + + void insert(const_iterator position, size_type n, const value_type& value) = delete; + + template + void insert(const_iterator position, InputIterator first, InputIterator last) = delete; + + iterator erase(const_iterator first, const_iterator last) = delete; + reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last) = delete; + + void splice(const_iterator position, this_type& x) = delete + void splice(const_iterator position, this_type& x, const_iterator i) = delete; + void splice(const_iterator position, this_type& x, const_iterator first, const_iterator last) = delete; + + void merge(this_type& x) = delete; + + template + void merge(this_type& x, Compare compare) = delete; + + void unique() = delete; // Uniqueness is enforced by map functionality + + template + void unique(BinaryPredicate) = delete; // Uniqueness is enforced by map functionality + #endif + + }; // list_map + + + /////////////////////////////////////////////////////////////////////// + // list_map_data + /////////////////////////////////////////////////////////////////////// + + template + inline list_map_data::list_map_data(const Value& value) + : mValue(value) + { + mpNext = NULL; // GCC 4.8 is generating warnings about referencing these values in list_map::push_front unless we + mpPrev = NULL; // initialize them here. The compiler seems to be mistaken, as our code isn't actually using them unintialized. + } + + + /////////////////////////////////////////////////////////////////////// + // list_map_iterator + /////////////////////////////////////////////////////////////////////// + + template + inline list_map_iterator::list_map_iterator() + : mpNode(NULL) + { + // Empty + } + + + template + inline list_map_iterator::list_map_iterator(const base_node_type* pNode) + : mpNode(static_cast(const_cast(pNode))) + { + // Empty + } + + + template + inline list_map_iterator::list_map_iterator(const iterator& x) + : mpNode(const_cast(x.mpNode)) + { + // Empty + } + + + template + inline typename list_map_iterator::reference + list_map_iterator::operator*() const + { + return mpNode->mValue; + } + + + template + inline typename list_map_iterator::pointer + list_map_iterator::operator->() const + { + return &mpNode->mValue; + } + + + template + inline typename list_map_iterator::this_type& + list_map_iterator::operator++() + { + mpNode = static_cast(mpNode->mpNext); + return *this; + } + + + template + inline typename list_map_iterator::this_type + list_map_iterator::operator++(int) + { + this_type temp(*this); + mpNode = static_cast(mpNode->mpNext); + return temp; + } + + + template + inline typename list_map_iterator::this_type& + list_map_iterator::operator--() + { + mpNode = static_cast(mpNode->mpPrev); + return *this; + } + + + template + inline typename list_map_iterator::this_type + list_map_iterator::operator--(int) + { + this_type temp(*this); + mpNode = static_cast(mpNode->mpPrev); + return temp; + } + + + // We provide additional template paremeters here to support comparisons between const and non-const iterators. + // See C++ defect report #179, or EASTL/list.h for more information. + template + inline bool operator==(const list_map_iterator& a, + const list_map_iterator& b) + { + return a.mpNode == b.mpNode; + } + + + template + inline bool operator!=(const list_map_iterator& a, + const list_map_iterator& b) + { + return a.mpNode != b.mpNode; + } + + + // We provide a version of operator!= for the case where the iterators are of the + // same type. This helps prevent ambiguity errors in the presence of rel_ops. + template + inline bool operator!=(const list_map_iterator& a, + const list_map_iterator& b) + { + return a.mpNode != b.mpNode; + } + + + /////////////////////////////////////////////////////////////////////// + // list_map + /////////////////////////////////////////////////////////////////////// + + template + inline list_map::list_map(const allocator_type& allocator) + : base_type(allocator) + { + mNode.mpNext = &mNode; + mNode.mpPrev = &mNode; + } + + template + inline list_map::list_map(const Compare& compare, const allocator_type& allocator) + : base_type(compare, allocator) + { + mNode.mpNext = &mNode; + mNode.mpPrev = &mNode; + } + + template + inline typename list_map::iterator + list_map::begin() EA_NOEXCEPT + { + return iterator(mNode.mpNext); + } + + template + inline typename list_map::const_iterator + list_map::begin() const EA_NOEXCEPT + { + return const_iterator(mNode.mpNext); + } + + template + inline typename list_map::const_iterator + list_map::cbegin() const EA_NOEXCEPT + { + return const_iterator(mNode.mpNext); + } + + template + inline typename list_map::iterator + list_map::end() EA_NOEXCEPT + { + return iterator(&mNode); + } + + template + inline typename list_map::const_iterator + list_map::end() const EA_NOEXCEPT + { + return const_iterator(&mNode); + } + + template + inline typename list_map::const_iterator + list_map::cend() const EA_NOEXCEPT + { + return const_iterator(&mNode); + } + + template + inline typename list_map::reverse_iterator + list_map::rbegin() EA_NOEXCEPT + { + return reverse_iterator(&mNode); + } + + template + inline typename list_map::const_reverse_iterator + list_map::rbegin() const EA_NOEXCEPT + { + return const_reverse_iterator(&mNode); + } + + template + inline typename list_map::const_reverse_iterator + list_map::crbegin() const EA_NOEXCEPT + { + return const_reverse_iterator(&mNode); + } + + template + inline typename list_map::reverse_iterator + list_map::rend() EA_NOEXCEPT + { + return reverse_iterator(mNode.mpNext); + } + + template + inline typename list_map::const_reverse_iterator + list_map::rend() const EA_NOEXCEPT + { + return const_reverse_iterator(mNode.mpNext); + } + + template + inline typename list_map::const_reverse_iterator + list_map::crend() const EA_NOEXCEPT + { + return const_reverse_iterator(mNode.mpNext); + } + + template + inline typename list_map::reference + list_map::front() + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(static_cast(mNode.mpNext) == &mNode)) + EASTL_FAIL_MSG("list_map::front -- empty container"); + #else + // We allow the user to reference an empty container. + #endif + + return static_cast(mNode.mpNext)->mValue; + } + + template + inline typename list_map::const_reference + list_map::front() const + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(static_cast(mNode.mpNext) == &mNode)) + EASTL_FAIL_MSG("list_map::front -- empty container"); + #else + // We allow the user to reference an empty container. + #endif + + return static_cast(mNode.mpNext)->mValue; + } + + template + inline typename list_map::reference + list_map::back() + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(static_cast(mNode.mpNext) == &mNode)) + EASTL_FAIL_MSG("list_map::back -- empty container"); + #else + // We allow the user to reference an empty container. + #endif + + return static_cast(mNode.mpPrev)->mValue; + } + + template + inline typename list_map::const_reference + list_map::back() const + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(static_cast(mNode.mpNext) == &mNode)) + EASTL_FAIL_MSG("list_map::back -- empty container"); + #else + // We allow the user to reference an empty container. + #endif + + return static_cast(mNode.mpPrev)->mValue; + } + + template + bool list_map::push_front(const value_type& value) + { + internal_value_type tempValue(value); + typename base_type::insert_return_type baseReturn = base_type::insert(tempValue); + + // Did the insert succeed? + if (baseReturn.second) + { + internal_value_type* pNode = &(*baseReturn.first); + + pNode->mpNext = mNode.mpNext; + pNode->mpPrev = &mNode; + + mNode.mpNext->mpPrev = pNode; + mNode.mpNext = pNode; + + return true; + } + else + { + return false; + } + } + + template + bool list_map::push_back(const value_type& value) + { + internal_value_type tempValue(value); + typename base_type::insert_return_type baseReturn = base_type::insert(tempValue); + + // Did the insert succeed? + if (baseReturn.second) + { + internal_value_type* pNode = &(*baseReturn.first); + + pNode->mpPrev = mNode.mpPrev; + pNode->mpNext = &mNode; + + mNode.mpPrev->mpNext = pNode; + mNode.mpPrev = pNode; + + return true; + } + else + { + return false; + } + } + + template + bool list_map::push_front(const key_type& key, const mapped_type& value) + { + return push_front(eastl::make_pair(key, value)); + } + + template + bool list_map::push_back(const key_type& key, const mapped_type& value) + { + return push_back(eastl::make_pair(key, value)); + } + + template + void list_map::pop_front() + { + #if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(empty())) + EASTL_FAIL_MSG("list_map::pop_front -- empty container"); + #endif + + erase(static_cast(mNode.mpNext)->mValue.first); + } + + template + void list_map::pop_back() + { + #if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(empty())) + EASTL_FAIL_MSG("list_map::pop_back -- empty container"); + #endif + + erase(static_cast(mNode.mpPrev)->mValue.first); + } + + template + inline typename list_map::iterator + list_map::find(const key_type& key) + { + typename base_type::iterator baseIter = base_type::find(key); + if (baseIter != base_type::end()) + { + return iterator(&(*baseIter)); + } + else + { + return end(); + } + } + + template + inline typename list_map::const_iterator + list_map::find(const key_type& key) const + { + typename base_type::const_iterator baseIter = base_type::find(key); + if (baseIter != base_type::end()) + { + return const_iterator(&(*baseIter)); + } + else + { + return end(); + } + } + + template + template + inline typename list_map::iterator + list_map::find_as(const U& u, Compare2 compare2) + { + typename base_type::iterator baseIter = base_type::find_as(u, compare2); + if (baseIter != base_type::end()) + { + return iterator(&(*baseIter)); + } + else + { + return end(); + } + } + + template + template + inline typename list_map::const_iterator + list_map::find_as(const U& u, Compare2 compare2) const + { + typename base_type::const_iterator baseIter = base_type::find_as(u, compare2); + if (baseIter != base_type::end()) + { + return const_iterator(&(*baseIter)); + } + else + { + return end(); + } + } + + template + inline typename list_map::size_type + list_map::count(const key_type& key) const + { + const typename base_type::const_iterator it = base_type::find(key); + return (it != base_type::end()) ? 1 : 0; + } + + template + inline typename list_map::size_type + list_map::erase(const key_type& key) + { + typename base_type::iterator baseIter = base_type::find(key); + if (baseIter != base_type::end()) + { + internal_value_type* node = &(*baseIter); + + node->mpNext->mpPrev = node->mpPrev; + node->mpPrev->mpNext = node->mpNext; + + base_type::erase(baseIter); + + return 1; + } + return 0; + } + + template + inline typename list_map::iterator + list_map::erase(const_iterator position) + { + iterator posIter(position.mpNode); // Convert from const. + iterator eraseIter(posIter++); + erase(eraseIter->first); + return posIter; + } + + template + inline typename list_map::reverse_iterator + list_map::erase(const_reverse_iterator position) + { + return reverse_iterator(erase((++position).base())); + } + + template + void list_map::clear() + { + base_type::clear(); + + mNode.mpNext = &mNode; + mNode.mpPrev = &mNode; + } + + template + void list_map::reset_lose_memory() + { + base_type::reset_lose_memory(); + + mNode.mpNext = &mNode; + mNode.mpPrev = &mNode; + } + + template + bool list_map::validate() const + { + if (!base_type::validate()) + { + return false; + } + + size_type nodeCount(0); + list_map_data_base* node = mNode.mpNext; + while (node != &mNode) + { + internal_value_type* data = static_cast(node); + if (base_type::find(data->mValue.first) == base_type::end()) + { + return false; + } + node = node->mpNext; + ++nodeCount; + } + if (nodeCount != size()) + { + return false; + } + nodeCount = 0; + node = mNode.mpPrev; + while (node != &mNode) + { + internal_value_type* data = static_cast(node); + if (base_type::find(data->mValue.first) == base_type::end()) + { + return false; + } + node = node->mpPrev; + ++nodeCount; + } + if (nodeCount != size()) + { + return false; + } + + return true; + } + + template + int list_map::validate_iterator(const_iterator iter) const + { + for (const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp) + { + if (temp == iter) + { + return (isf_valid | isf_current | isf_can_dereference); + } + } + + if (iter == end()) + return (isf_valid | isf_current); + + return isf_none; + } + + +} // namespace eastl + + +#endif // Header include guard + + + + diff --git a/libkram/eastl/include/EASTL/bonus/lru_cache.h b/libkram/eastl/include/EASTL/bonus/lru_cache.h new file mode 100644 index 00000000..46d053dc --- /dev/null +++ b/libkram/eastl/include/EASTL/bonus/lru_cache.h @@ -0,0 +1,424 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// lru_cache is a container that simplifies caching of objects in a map. +// Basically, you give the container a key, like a string, and the data you want. +// The container provides callback mechanisms to generate data if it's missing +// as well as delete data when it's purged from the cache. This container +// uses a least recently used method: whatever the oldest item is will be +// replaced with a new entry. +// +// Algorithmically, the container is a combination of a map and a list. +// The list stores the age of the entries by moving the entry to the head +// of the list on each access, either by a call to get() or to touch(). +// The map is just the map as one would expect. +// +// This is useful for caching off data that is expensive to generate, +// for example text to speech wave files that are dynamically generated, +// but that will need to be reused, as is the case in narration of menu +// entries as a user scrolls through the entries. +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_LRUCACHE_H +#define EASTL_LRUCACHE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) +#pragma once +#endif + +#include +#include +#include + +namespace eastl +{ + /// EASTL_LRUCACHE_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_LRUCACHE_DEFAULT_NAME + #define EASTL_LRUCACHE_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " lru_cache" // Unless the user overrides something, this is "EASTL lru_cache". + #endif + + + /// EASTL_LRUCACHE_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_LRUCACHE_DEFAULT_ALLOCATOR + #define EASTL_LRUCACHE_DEFAULT_ALLOCATOR allocator_type(EASTL_LRUCACHE_DEFAULT_NAME) + #endif + + /// lru_cache + /// + /// Implements a caching map based off of a key and data. + /// LRUList parameter is any container that guarantees the validity of its iterator even after a modification (e.g. list) + /// LRUMap is any mapping container that can map a key to some data. By default, we use unordered_set, but it might be better + /// to use hash_map or some other structure depending on your key/data combination. For example, you may want to swap the + /// map backing if using strings as keys or if the data objects are small. In any case, unordered_set is a good default and should + /// work well enough since the purpose of this class is to cache results of expensive, order of milliseconds, operations + /// + /// Algorithmic Performance (default data structures): + /// touch() -> O(1) + /// insert() / update(), get() / operator[] -> equivalent to unordered_set (O(1) on average, O(n) worst) + /// size() -> O(1) + /// + /// All accesses to a given key (insert, update, get) will push that key to most recently used. + /// If the data objects are shared between threads, it would be best to use a smartptr to manage the lifetime of the data. + /// as it could be removed from the cache while in use by another thread. + template , + typename map_type = eastl::unordered_map, + eastl::hash, + eastl::equal_to, + Allocator>> + class lru_cache + { + public: + using key_type = Key; + using value_type = Value; + using allocator_type = Allocator; + using size_type = eastl_size_t; + using list_iterator = typename list_type::iterator; + using map_iterator = typename map_type::iterator; + using data_container_type = eastl::pair; + using iterator = typename map_type::iterator; + using const_iterator = typename map_type::const_iterator; + using this_type = lru_cache; + using create_callback_type = eastl::function; + using delete_callback_type = eastl::function; + + /// lru_cache constructor + /// + /// Creates a Key / Value map that only stores size Value objects until it deletes them. + /// For complex objects or operations, the creator and deletor callbacks can be used. + /// This works just like a regular map object: on access, the Value will be created if it doesn't exist, returned otherwise. + explicit lru_cache(size_type size, + const allocator_type& allocator = EASTL_LRUCACHE_DEFAULT_ALLOCATOR, + create_callback_type creator = nullptr, + delete_callback_type deletor = nullptr) + : m_list(allocator) + , m_map(allocator) + , m_capacity(size) + , m_create_callback(creator) + , m_delete_callback(deletor) + { + } + + /// lru_cache destructor + /// + /// Iterates across every entry in the map and calls the deletor before calling the standard destructors + ~lru_cache() + { + // Destruct everything we have cached + for (auto& iter : m_map) + { + if (m_delete_callback) + m_delete_callback(iter.second.first); + } + } + + lru_cache(std::initializer_list> il) + : lru_cache(il.size()) + { + for(auto& p : il) + insert_or_assign(p.first, p.second); + } + + // TODO(rparolin): Why do we prevent copies? And what about moves? + lru_cache(const this_type&) = delete; + this_type &operator=(const this_type&) = delete; + + /// insert + /// + /// insert key k with value v. + /// If key already exists, no change is made and the return value is false. + /// If the key doesn't exist, the data is added to the map and the return value is true. + bool insert(const key_type& k, const value_type& v) + { + if (m_map.find(k) == m_map.end()) + { + make_space(); + + m_list.push_front(k); + m_map[k] = data_container_type(v, m_list.begin()); + + return true; + } + else + { + return false; + } + } + + /// emplace + /// + /// Places a new object in place k created with args + /// If the key already exists, it is replaced. + template + void emplace(const key_type& k, Args&&... args) + { + make_space(); + + m_list.push_front(k); + m_map.emplace(k, data_container_type(eastl::forward(args)..., m_list.begin())); + } + + /// insert_or_assign + /// + /// Same as add, but replaces the data at key k, if it exists, with the new entry v + /// Note that the deletor for the old v will be called before it's replaced with the new value of v + void insert_or_assign(const key_type& k, const value_type& v) + { + auto iter = m_map.find(k); + + if (m_map.find(k) != m_map.end()) + { + assign(iter, v); + } + else + { + insert(k, v); + } + } + + /// contains + /// + /// Returns true if key k exists in the cache + bool contains(const key_type& k) const + { + return m_map.find(k) != m_map.end(); + } + + /// at + /// + /// Retrives the data for key k, not valid if k does not exist + eastl::optional at(const key_type& k) + { + auto iter = m_map.find(k); + + if (iter != m_map.end()) + { + return iter->second.first; + } + else + { + return eastl::nullopt; + } + } + + /// get + /// + /// Retrives the data for key k. If no data exists, it will be created by calling the + /// creator. + value_type& get(const key_type& k) + { + auto iter = m_map.find(k); + + // The entry exists in the cache + if (iter != m_map.end()) + { + touch(k); + return iter->second.first; + } + else // The entry doesn't exist in the cache, so create one + { + // Add the entry to the map + insert(k, m_create_callback ? m_create_callback(k) : value_type()); + + // return the new data + return m_map[k].first; + } + } + + /// Equivalent to get(k) + value_type& operator[](const key_type& k) { return get(k); } + + /// erase + /// + /// erases key k from the cache. + /// If k does not exist, returns false. If k exists, returns true. + bool erase(const key_type& k) + { + auto iter = m_map.find(k); + + if (iter != m_map.end()) + { + m_list.erase(iter->second.second); + + // Delete the actual entry + map_erase(iter); + + return true; + } + + return false; + } + + /// erase_oldest + /// + /// Removes the oldest entry from the cache. + void erase_oldest() + { + auto key = m_list.back(); + m_list.pop_back(); + + // Delete the actual entry + auto iter = m_map.find(key); + map_erase(iter); + } + + /// touch + /// + /// Touches key k, marking it as most recently used. + /// If k does not exist, returns false. If the touch was successful, returns true. + bool touch(const key_type& k) + { + auto iter = m_map.find(k); + + if (iter != m_map.end()) + { + touch(iter); + return true; + } + + return false; + } + + /// touch + /// + /// Touches key at iterator iter, moving it to most recently used position + void touch(iterator& iter) + { + auto listRef = iter->second.second; + + m_list.erase(listRef); + m_list.push_front(iter->first); + iter->second.second = m_list.begin(); + } + + /// assign + /// + /// Updates key k with data v. + /// If key k does not exist, returns false and no changes are made. + /// If key k exists, existing data has its deletor called and key k's data is replaced with new v data + bool assign(const key_type& k, const value_type& v) + { + auto iter = m_map.find(k); + + if (iter != m_map.end()) + { + assign(iter, v); + return true; + } + + return false; + } + + /// assign + /// + /// Updates data at spot iter with data v. + void assign(iterator& iter, const value_type& v) + { + if (m_delete_callback) + m_delete_callback(iter->second.first); + touch(iter); + iter->second.first = v; + } + + // standard container functions + iterator begin() EA_NOEXCEPT { return m_map.begin(); } + iterator end() EA_NOEXCEPT { return m_map.end(); } + iterator rbegin() EA_NOEXCEPT { return m_map.rbegin(); } + iterator rend() EA_NOEXCEPT { return m_map.rend(); } + const_iterator begin() const EA_NOEXCEPT { return m_map.begin(); } + const_iterator cbegin() const EA_NOEXCEPT { return m_map.cbegin(); } + const_iterator crbegin() const EA_NOEXCEPT { return m_map.crbegin(); } + const_iterator end() const EA_NOEXCEPT { return m_map.end(); } + const_iterator cend() const EA_NOEXCEPT { return m_map.cend(); } + const_iterator crend() const EA_NOEXCEPT { return m_map.crend(); } + + bool empty() const EA_NOEXCEPT { return m_map.empty(); } + size_type size() const EA_NOEXCEPT { return m_map.size(); } + size_type capacity() const EA_NOEXCEPT { return m_capacity; } + + void clear() EA_NOEXCEPT + { + // Since we have a delete callback, we want to reuse the trim function by cheating the max + // size to clear all the entries to avoid duplicating code. + auto old_max = m_capacity; + + m_capacity = 0; + trim(); + m_capacity = old_max; + } + + /// resize + /// + /// Resizes the cache. Can be used to either expand or contract the cache. + /// In the case of a contraction, the oldest entries will be evicted with their respective + /// deletors called before completing. + void resize(size_type newSize) + { + m_capacity = newSize; + trim(); + } + + void setCreateCallback(create_callback_type callback) { m_create_callback = callback; } + void setDeleteCallback(delete_callback_type callback) { m_delete_callback = callback; } + + // EASTL extensions + const allocator_type& get_allocator() const EA_NOEXCEPT { return m_map.get_allocator(); } + allocator_type& get_allocator() EA_NOEXCEPT { return m_map.get_allocator(); } + void set_allocator(const allocator_type& allocator) { m_map.set_allocator(allocator); m_list.set_allocator(allocator); } + + /// Does not reset the callbacks + void reset_lose_memory() EA_NOEXCEPT { m_map.reset_lose_memory(); m_list.reset_lose_memory(); } + + private: + inline void map_erase(map_iterator pos) + { + if (m_delete_callback) + m_delete_callback(pos->second.first); + m_map.erase(pos); + } + + bool trim() + { + if (size() <= m_capacity) + { + return false; // No trim necessary + } + + // We need to trim + do + { + erase_oldest(); + } while (m_list.size() > m_capacity); + + return true; + } + + void make_space() + { + if (size() == m_capacity) + { + erase_oldest(); + } + } + + private: + list_type m_list; + map_type m_map; + size_type m_capacity; + create_callback_type m_create_callback; + delete_callback_type m_delete_callback; + }; +} + + + +#endif diff --git a/libkram/eastl/include/EASTL/bonus/ring_buffer.h b/libkram/eastl/include/EASTL/bonus/ring_buffer.h new file mode 100644 index 00000000..fcd8fd2c --- /dev/null +++ b/libkram/eastl/include/EASTL/bonus/ring_buffer.h @@ -0,0 +1,1581 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// A ring buffer is a FIFO (first-in, first-out) container which acts +// much like a queue. The difference is that a ring buffer is implemented +// via chasing pointers around a given container instead of like queue +// adds to the writes to the end of the container are reads from the begin. +// The benefit of a ring buffer is that memory allocations don't occur +// and new elements are neither added nor removed from the container. +// Elements in the container are simply assigned values in circles around +// the container. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_RING_BUFFER_H +#define EASTL_RING_BUFFER_H + + +#include +#include +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + /// EASTL_RING_BUFFER_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_RING_BUFFER_DEFAULT_NAME + #define EASTL_RING_BUFFER_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " ring_buffer" // Unless the user overrides something, this is "EASTL ring_buffer". + #endif + + /// EASTL_RING_BUFFER_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_RING_BUFFER_DEFAULT_ALLOCATOR + #define EASTL_RING_BUFFER_DEFAULT_ALLOCATOR allocator_type(EASTL_RING_BUFFER_DEFAULT_NAME) + #endif + + + /// ring_buffer_iterator + /// + /// We force this iterator to act like a random access iterator even if + /// the underlying container doesn't support random access iteration. + /// Any BidirectionalIterator can be a RandomAccessIterator; it just + /// might be inefficient in some cases. + /// + template + struct ring_buffer_iterator + { + public: + typedef ring_buffer_iterator this_type; + typedef T value_type; + typedef Pointer pointer; + typedef Reference reference; + typedef typename Container::size_type size_type; + typedef typename Container::difference_type difference_type; + typedef typename Container::iterator container_iterator; + typedef typename Container::const_iterator container_const_iterator; + typedef ring_buffer_iterator iterator; + typedef ring_buffer_iterator const_iterator; + typedef EASTL_ITC_NS::random_access_iterator_tag iterator_category; + + public: + Container* mpContainer; + container_iterator mContainerIterator; + + public: + ring_buffer_iterator(); + ring_buffer_iterator(Container* pContainer, const container_iterator& containerIterator); + ring_buffer_iterator(const iterator& x); + + ring_buffer_iterator& operator=(const iterator& x); + + reference operator*() const; + pointer operator->() const; + + this_type& operator++(); + this_type operator++(int); + + this_type& operator--(); + this_type operator--(int); + + this_type& operator+=(difference_type n); + this_type& operator-=(difference_type n); + + this_type operator+(difference_type n) const; + this_type operator-(difference_type n) const; + + protected: + void increment(difference_type n, EASTL_ITC_NS::input_iterator_tag); + void increment(difference_type n, EASTL_ITC_NS::random_access_iterator_tag); + + }; // struct ring_buffer_iterator + + + + /// ring_buffer + /// + /// Implements a ring buffer via a given container type, which would + /// typically be a vector or array, though any container which supports + /// bidirectional iteration would work. + /// + /// A ring buffer is a FIFO (first-in, first-out) container which acts + /// much like a queue. The difference is that a ring buffer is implemented + /// via chasing pointers around a container and moving the read and write + /// positions forward (and possibly wrapping around) as the container is + /// read and written via pop_front and push_back. + /// + /// The benefit of a ring buffer is that memory allocations don't occur + /// and new elements are neither added nor removed from the container. + /// Elements in the container are simply assigned values in circles around + /// the container. + /// + /// ring_buffer is different from other containers -- including adapter + /// containers -- in how iteration is done. Iteration of a ring buffer + /// starts at the current begin position, proceeds to the end of the underlying + /// container, and continues at the begin of the underlying container until + /// the ring buffer's current end position. Thus a ring_buffer does + /// indeed have a begin and an end, though the values of begin and end + /// chase each other around the container. An empty ring_buffer is one + /// in which end == begin, and a full ring_buffer is one in which + /// end + 1 == begin. + /// + /// Example of a ring buffer layout, where + indicates queued items: + /// ++++++++++--------------------------------+++++++++ + /// ^ ^ + /// end begin + /// + /// Empty ring buffer: + /// --------------------------------------------------- + /// ^ + /// begin / end + /// + /// Full ring buffer. Note that one item is necessarily unused; it is + /// analagous to a '\0' at the end of a C string: + /// +++++++++++++++++++++++++++++++++++++++++-+++++++++ + /// ^^ + /// end begin + /// + /// A push_back operation on a ring buffer assigns the new value to end. + /// If there is no more space in the buffer, this will result in begin + /// being overwritten and the begin position being moved foward one position. + /// The user can use the full() function to detect this condition. + /// Note that elements in a ring buffer are not created or destroyed as + /// their are added and removed; they are merely assigned. Only on + /// container construction and destruction are any elements created and + /// destroyed. + /// + /// The ring buffer can be used in either direction. By this we mean that + /// you can use push_back to add items and pop_front to remove them; or you can + /// use push_front to add items and pop_back to remove them. You aren't + /// limited to these operations; you can push or pop from either side + /// arbitrarily and you can insert or erase anywhere in the container. + /// + /// The ring buffer requires the user to specify a Container type, which + /// by default is vector. However, any container with bidirectional iterators + /// will work, such as list, deque, string or any of the fixed_* versions + /// of these containers, such as fixed_string. Since ring buffer works via copying + /// elements instead of allocating and freeing nodes, inserting in the middle + /// of a ring buffer based on list (instead of vector) is no more efficient. + /// + /// To use the ring buffer, its container must be resized to the desired + /// ring buffer size. Changing the size of a ring buffer may cause ring + /// buffer iterators to invalidate. + /// + /// An alternative to using a ring buffer is to use a list with a user-created + /// node pool and custom allocator. There are various tradeoffs that result from this. + /// + /// Example usage: + /// ring_buffer< int, list > rb(100); + /// rb.push_back(1); + /// + /// Example usage: + /// // Example of creating an on-screen debug log that shows 16 + /// // strings at a time and scrolls older strings away. + /// + /// // Create ring buffer of 16 strings. + /// ring_buffer< string, vector > debugLogText(16); + /// + /// // Reserve 128 chars for each line. This can make it so that no + /// // runtime memory allocations occur. + /// for(vector::iterator it = debugLogText.get_container().begin(), + /// itEnd = debugLogText.get_container().end(); it != itEnd; ++it) + /// { + /// (*it).reserve(128); + /// } + /// + /// // Add a new string, using push_front() and front() instead of + /// // push_front(str) in order to avoid creating a temporary str. + /// debugLogText.push_front(); + /// debugLogText.front() = "Player fired weapon"; + /// + template , typename Allocator = typename Container::allocator_type> + class ring_buffer + { + public: + typedef ring_buffer this_type; + typedef Container container_type; + typedef Allocator allocator_type; + + typedef typename Container::value_type value_type; + typedef typename Container::reference reference; + typedef typename Container::const_reference const_reference; + typedef typename Container::size_type size_type; + typedef typename Container::difference_type difference_type; + typedef typename Container::iterator container_iterator; + typedef typename Container::const_iterator container_const_iterator; + typedef ring_buffer_iterator iterator; + typedef ring_buffer_iterator const_iterator; + typedef eastl::reverse_iterator reverse_iterator; + typedef eastl::reverse_iterator const_reverse_iterator; + + public: // We declare public so that global comparison operators can be implemented without adding an inline level and without tripping up GCC 2.x friend declaration failures. GCC (through at least v4.0) is poor at inlining and performance wins over correctness. + Container c; // We follow the naming convention established for stack, queue, priority_queue and name this 'c'. This variable must always have a size of at least 1, as even an empty ring_buffer has an unused terminating element. + + protected: + container_iterator mBegin; // We keep track of where our begin and end are by using Container iterators. + container_iterator mEnd; + size_type mSize; + + public: + // There currently isn't a ring_buffer constructor that specifies an initial size, unlike other containers. + explicit ring_buffer(size_type cap = 0); // Construct with an initial capacity (but size of 0). + explicit ring_buffer(size_type cap, const allocator_type& allocator); + explicit ring_buffer(const Container& x); + explicit ring_buffer(const allocator_type& allocator); + ring_buffer(const this_type& x); + ring_buffer(this_type&& x); + ring_buffer(this_type&& x, const allocator_type& allocator); + ring_buffer(std::initializer_list ilist, const allocator_type& allocator = EASTL_RING_BUFFER_DEFAULT_ALLOCATOR); // This function sets the capacity to be equal to the size of the initializer list. + + // No destructor necessary. Default will do. + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + template + void assign(InputIterator first, InputIterator last); + + void swap(this_type& x); + + iterator begin() EA_NOEXCEPT; + const_iterator begin() const EA_NOEXCEPT; + const_iterator cbegin() const EA_NOEXCEPT; + + iterator end() EA_NOEXCEPT; + const_iterator end() const EA_NOEXCEPT; + const_iterator cend() const EA_NOEXCEPT; + + reverse_iterator rbegin() EA_NOEXCEPT; + const_reverse_iterator rbegin() const EA_NOEXCEPT; + const_reverse_iterator crbegin() const EA_NOEXCEPT; + + reverse_iterator rend() EA_NOEXCEPT; + const_reverse_iterator rend() const EA_NOEXCEPT; + const_reverse_iterator crend() const EA_NOEXCEPT; + + bool empty() const EA_NOEXCEPT; + bool full() const EA_NOEXCEPT; + size_type size() const EA_NOEXCEPT; + size_type capacity() const EA_NOEXCEPT; + + void resize(size_type n); + void set_capacity(size_type n); // Sets the capacity to the given value, including values less than the current capacity. Adjusts the size downward if n < size, by throwing out the oldest elements in the buffer. + void reserve(size_type n); // Reserve a given capacity. Doesn't decrease the capacity; it only increases it (for compatibility with other containers' behavior). + + reference front(); + const_reference front() const; + + reference back(); + const_reference back() const; + + void push_back(const value_type& value); + reference push_back(); + + void push_front(const value_type& value); + reference push_front(); + + void pop_back(); + void pop_front(); + + reference operator[](size_type n); + const_reference operator[](size_type n) const; + + // To consider: + // size_type read(value_type* pDestination, size_type nCount); + // size_type read(iterator** pPosition1, iterator** pPosition2, size_type& nCount1, size_type& nCount2); + + /* To do: + template + void emplace_front(Args&&... args); + + template + void emplace_back(Args&&... args); + + template + iterator emplace(const_iterator position, Args&&... args); + */ + + iterator insert(const_iterator position, const value_type& value); + void insert(const_iterator position, size_type n, const value_type& value); + void insert(const_iterator position, std::initializer_list ilist); + + template + void insert(const_iterator position, InputIterator first, InputIterator last); + + iterator erase(const_iterator position); + iterator erase(const_iterator first, const_iterator last); + reverse_iterator erase(const_reverse_iterator position); + reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last); + + void clear(); + + container_type& get_container(); + const container_type& get_container() const; + + bool validate() const; + int validate_iterator(const_iterator i) const; + + protected: + //size_type DoGetSize(EASTL_ITC_NS::input_iterator_tag) const; + //size_type DoGetSize(EASTL_ITC_NS::random_access_iterator_tag) const; + + }; // class ring_buffer + + + + + /////////////////////////////////////////////////////////////////////// + // ring_buffer_iterator + /////////////////////////////////////////////////////////////////////// + + template + ring_buffer_iterator::ring_buffer_iterator() + : mpContainer(NULL), mContainerIterator() + { + } + + + template + ring_buffer_iterator::ring_buffer_iterator(Container* pContainer, const container_iterator& containerIterator) + : mpContainer(pContainer), mContainerIterator(containerIterator) + { + } + + + template + ring_buffer_iterator::ring_buffer_iterator(const iterator& x) + : mpContainer(x.mpContainer), mContainerIterator(x.mContainerIterator) + { + } + + + template + ring_buffer_iterator& + ring_buffer_iterator::operator=(const iterator& x) + { + mpContainer = x.mpContainer; + mContainerIterator = x.mContainerIterator; + return *this; + } + + template + typename ring_buffer_iterator::reference + ring_buffer_iterator::operator*() const + { + return *mContainerIterator; + } + + + template + typename ring_buffer_iterator::pointer + ring_buffer_iterator::operator->() const + { + return &*mContainerIterator; + } + + + template + typename ring_buffer_iterator::this_type& + ring_buffer_iterator::operator++() + { + if(EASTL_UNLIKELY(++mContainerIterator == mpContainer->end())) + mContainerIterator = mpContainer->begin(); + return *this; + } + + + template + typename ring_buffer_iterator::this_type + ring_buffer_iterator::operator++(int) + { + const this_type temp(*this); + if(EASTL_UNLIKELY(++mContainerIterator == mpContainer->end())) + mContainerIterator = mpContainer->begin(); + return temp; + } + + + template + typename ring_buffer_iterator::this_type& + ring_buffer_iterator::operator--() + { + if(EASTL_UNLIKELY(mContainerIterator == mpContainer->begin())) + mContainerIterator = mpContainer->end(); + --mContainerIterator; + return *this; + } + + + template + typename ring_buffer_iterator::this_type + ring_buffer_iterator::operator--(int) + { + const this_type temp(*this); + if(EASTL_UNLIKELY(mContainerIterator == mpContainer->begin())) + mContainerIterator = mpContainer->end(); + --mContainerIterator; + return temp; + } + + + template + typename ring_buffer_iterator::this_type& + ring_buffer_iterator::operator+=(difference_type n) + { + typedef typename eastl::iterator_traits::iterator_category IC; + increment(n, IC()); + return *this; + } + + + template + typename ring_buffer_iterator::this_type& + ring_buffer_iterator::operator-=(difference_type n) + { + typedef typename eastl::iterator_traits::iterator_category IC; + increment(-n, IC()); + return *this; + } + + + template + typename ring_buffer_iterator::this_type + ring_buffer_iterator::operator+(difference_type n) const + { + return this_type(*this).operator+=(n); + } + + + template + typename ring_buffer_iterator::this_type + ring_buffer_iterator::operator-(difference_type n) const + { + return this_type(*this).operator+=(-n); + } + + + template + void ring_buffer_iterator::increment(difference_type n, EASTL_ITC_NS::input_iterator_tag) + { + // n cannot be negative, as input iterators don't support reverse iteration. + while(n-- > 0) + operator++(); + } + + + template + void ring_buffer_iterator::increment(difference_type n, EASTL_ITC_NS::random_access_iterator_tag) + { + // We make the assumption here that the user is incrementing from a valid + // starting position to a valid ending position. Thus *this + n yields a + // valid iterator, including if n happens to be a negative value. + + if(n >= 0) + { + const difference_type d = mpContainer->end() - mContainerIterator; + + if(n < d) + mContainerIterator += n; + else + mContainerIterator = mpContainer->begin() + (n - d); + } + else + { + // Recall that n and d here will be negative and so the logic here works as intended. + const difference_type d = mpContainer->begin() - mContainerIterator; + + if(n >= d) + mContainerIterator += n; + else + mContainerIterator = mpContainer->end() + (n - d); + } + } + + + // Random access iterators must support operator + and operator -. + // You can only add an integer to an iterator, and you cannot add two iterators. + template + inline ring_buffer_iterator + operator+(ptrdiff_t n, const ring_buffer_iterator& x) + { + return x + n; // Implement (n + x) in terms of (x + n). + } + + + // You can only add an integer to an iterator, but you can subtract two iterators. + template + inline typename ring_buffer_iterator::difference_type + operator-(const ring_buffer_iterator& a, + const ring_buffer_iterator& b) + { + typedef typename ring_buffer_iterator::difference_type difference_type; + + // To do: If container_iterator is a random access iterator, then do a simple calculation. + // Otherwise, we have little choice but to iterate from a to b and count as we go. + // See the ring_buffer::size function for an implementation of this. + + // Iteration implementation: + difference_type d = 0; + + for(ring_buffer_iterator temp(b); temp != a; ++temp) + ++d; + + return d; + } + + + // The C++ defect report #179 requires that we support comparisons between const and non-const iterators. + // Thus we provide additional template paremeters here to support this. The defect report does not + // require us to support comparisons between reverse_iterators and const_reverse_iterators. + template + inline bool operator==(const ring_buffer_iterator& a, + const ring_buffer_iterator& b) + { + // Perhaps we should compare the container pointer as well. + // However, for valid iterators this shouldn't be necessary. + return a.mContainerIterator == b.mContainerIterator; + } + + + template + inline bool operator!=(const ring_buffer_iterator& a, + const ring_buffer_iterator& b) + { + // Perhaps we should compare the container pointer as well. + // However, for valid iterators this shouldn't be necessary. + return !(a.mContainerIterator == b.mContainerIterator); + } + + + // We provide a version of operator!= for the case where the iterators are of the + // same type. This helps prevent ambiguity errors in the presence of rel_ops. + template + inline bool operator!=(const ring_buffer_iterator& a, + const ring_buffer_iterator& b) + { + return !(a.mContainerIterator == b.mContainerIterator); + } + + + + + /////////////////////////////////////////////////////////////////////// + // ring_buffer + /////////////////////////////////////////////////////////////////////// + + template + ring_buffer::ring_buffer(size_type cap) + : c() // Default construction with default allocator for the container. + { + // To do: This code needs to be amended to deal with possible exceptions + // that could occur during the resize call below. + + // We add one because the element at mEnd is necessarily unused. + c.resize(cap + 1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function. + mBegin = c.begin(); + mEnd = mBegin; + mSize = 0; + } + + + template + ring_buffer::ring_buffer(size_type cap, const allocator_type& allocator) + : c(allocator) + { + // To do: This code needs to be amended to deal with possible exceptions + // that could occur during the resize call below. + + // We add one because the element at mEnd is necessarily unused. + c.resize(cap + 1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function. + mBegin = c.begin(); + mEnd = mBegin; + mSize = 0; + } + + + template + ring_buffer::ring_buffer(const Container& x) + : c(x) // This copies elements from x, but unless the user is doing some tricks, the only thing that matters is that c.size() == x.size(). + { + // To do: This code needs to be amended to deal with possible exceptions + // that could occur during the resize call below. + if(c.empty()) + c.resize(1); + mBegin = c.begin(); + mEnd = mBegin; + mSize = 0; + } + + + template + ring_buffer::ring_buffer(const allocator_type& allocator) + : c(allocator) + { + // To do: This code needs to be amended to deal with possible exceptions + // that could occur during the resize call below. + + // We add one because the element at mEnd is necessarily unused. + c.resize(1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function. + mBegin = c.begin(); + mEnd = mBegin; + mSize = 0; + } + + + template + ring_buffer::ring_buffer(const this_type& x) + : c(x.c) + { + mBegin = c.begin(); + mEnd = mBegin; + mSize = x.mSize; + + eastl::advance(mBegin, eastl::distance(const_cast(x).c.begin(), x.mBegin)); // We can do a simple distance algorithm here, as there will be no wraparound. + eastl::advance(mEnd, eastl::distance(const_cast(x).c.begin(), x.mEnd)); + } + + template + ring_buffer::ring_buffer(this_type&& x) + : c() // Default construction with default allocator for the container. + { + c.resize(1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function. + mBegin = c.begin(); + mEnd = mBegin; + mSize = 0; + + swap(x); // We are leaving x in an unusual state by swapping default-initialized members with it, as it won't be usable and can be only destructible. + } + + template + ring_buffer::ring_buffer(this_type&& x, const allocator_type& allocator) + : c(allocator) + { + c.resize(1); // Possibly we could construct 'c' with size, but c may not have such a ctor, though we rely on it having a resize function. + mBegin = c.begin(); + mEnd = mBegin; + mSize = 0; + + if(c.get_allocator() == x.c.get_allocator()) + swap(x); // We are leaving x in an unusual state by swapping default-initialized members with it, as it won't be usable and can be only destructible. + else + operator=(x); + } + + + template + ring_buffer::ring_buffer(std::initializer_list ilist, const allocator_type& allocator) + : c(allocator) + { + c.resize((eastl_size_t)ilist.size() + 1); + mBegin = c.begin(); + mEnd = mBegin; + mSize = 0; + + assign(ilist.begin(), ilist.end()); + } + + + template + typename ring_buffer::this_type& + ring_buffer::operator=(const this_type& x) + { + if(&x != this) + { + c = x.c; + + mBegin = c.begin(); + mEnd = mBegin; + mSize = x.mSize; + + eastl::advance(mBegin, eastl::distance(const_cast(x).c.begin(), x.mBegin)); // We can do a simple distance algorithm here, as there will be no wraparound. + eastl::advance(mEnd, eastl::distance(const_cast(x).c.begin(), x.mEnd)); + } + + return *this; + } + + + template + typename ring_buffer::this_type& + ring_buffer::operator=(this_type&& x) + { + swap(x); + return *this; + } + + + template + typename ring_buffer::this_type& + ring_buffer::operator=(std::initializer_list ilist) + { + assign(ilist.begin(), ilist.end()); + return *this; + } + + + template + template + void ring_buffer::assign(InputIterator first, InputIterator last) + { + // To consider: We can make specializations of this for pointer-based + // iterators to PODs and turn the action into a memcpy. + clear(); + + for(; first != last; ++first) + push_back(*first); + } + + + template + void ring_buffer::swap(this_type& x) + { + if(&x != this) + { + const difference_type dBegin = eastl::distance(c.begin(), mBegin); // We can do a simple distance algorithm here, as there will be no wraparound. + const difference_type dEnd = eastl::distance(c.begin(), mEnd); + + const difference_type dxBegin = eastl::distance(x.c.begin(), x.mBegin); + const difference_type dxEnd = eastl::distance(x.c.begin(), x.mEnd); + + eastl::swap(c, x.c); + eastl::swap(mSize, x.mSize); + + mBegin = c.begin(); + eastl::advance(mBegin, dxBegin); // We can do a simple advance algorithm here, as there will be no wraparound. + + mEnd = c.begin(); + eastl::advance(mEnd, dxEnd); + + x.mBegin = x.c.begin(); + eastl::advance(x.mBegin, dBegin); + + x.mEnd = x.c.begin(); + eastl::advance(x.mEnd, dEnd); + } + } + + + template + typename ring_buffer::iterator + ring_buffer::begin() EA_NOEXCEPT + { + return iterator(&c, mBegin); + } + + + template + typename ring_buffer::const_iterator + ring_buffer::begin() const EA_NOEXCEPT + { + return const_iterator(const_cast(&c), mBegin); // We trust that the const_iterator will respect const-ness. + } + + + template + typename ring_buffer::const_iterator + ring_buffer::cbegin() const EA_NOEXCEPT + { + return const_iterator(const_cast(&c), mBegin); // We trust that the const_iterator will respect const-ness. + } + + + template + typename ring_buffer::iterator + ring_buffer::end() EA_NOEXCEPT + { + return iterator(&c, mEnd); + } + + + template + typename ring_buffer::const_iterator + ring_buffer::end() const EA_NOEXCEPT + { + return const_iterator(const_cast(&c), mEnd); // We trust that the const_iterator will respect const-ness. + } + + + template + typename ring_buffer::const_iterator + ring_buffer::cend() const EA_NOEXCEPT + { + return const_iterator(const_cast(&c), mEnd); // We trust that the const_iterator will respect const-ness. + } + + + template + typename ring_buffer::reverse_iterator + ring_buffer::rbegin() EA_NOEXCEPT + { + return reverse_iterator(iterator(&c, mEnd)); + } + + + template + typename ring_buffer::const_reverse_iterator + ring_buffer::rbegin() const EA_NOEXCEPT + { + return const_reverse_iterator(const_iterator(const_cast(&c), mEnd)); + } + + + template + typename ring_buffer::const_reverse_iterator + ring_buffer::crbegin() const EA_NOEXCEPT + { + return const_reverse_iterator(const_iterator(const_cast(&c), mEnd)); + } + + + template + typename ring_buffer::reverse_iterator + ring_buffer::rend() EA_NOEXCEPT + { + return reverse_iterator(iterator(&c, mBegin)); + } + + + template + typename ring_buffer::const_reverse_iterator + ring_buffer::rend() const EA_NOEXCEPT + { + return const_reverse_iterator(const_iterator(const_cast(&c), mBegin)); + } + + + template + typename ring_buffer::const_reverse_iterator + ring_buffer::crend() const EA_NOEXCEPT + { + return const_reverse_iterator(const_iterator(const_cast(&c), mBegin)); + } + + + template + bool ring_buffer::empty() const EA_NOEXCEPT + { + return mBegin == mEnd; + } + + + template + bool ring_buffer::full() const EA_NOEXCEPT + { + // Implementation that relies on c.size() being a fast operation: + // return mSize == (c.size() - 1); // (c.size() - 1) == capacity(); we are attempting to reduce function calls. + + // Version that has constant speed guarantees, but is still pretty fast. + const_iterator afterEnd(end()); + ++afterEnd; + return afterEnd.mContainerIterator == mBegin; + } + + + template + typename ring_buffer::size_type + ring_buffer::size() const EA_NOEXCEPT + { + return mSize; + + // Alternatives: + // return eastl::distance(begin(), end()); + // return end() - begin(); // This is more direct than using distance(). + //typedef typename eastl::iterator_traits::iterator_category IC; + //return DoGetSize(IC()); // This is more direct than using iterator math. + } + + + /* + template + typename ring_buffer::size_type + ring_buffer::DoGetSize(EASTL_ITC_NS::input_iterator_tag) const + { + // We could alternatively just use eastl::distance() here, but we happen to + // know that such code would boil down to what we have here, and we might + // as well remove function calls where possible. + difference_type d = 0; + + for(const_iterator temp(begin()), tempEnd(end()); temp != tempEnd; ++temp) + ++d; + + return (size_type)d; + } + */ + + /* + template + typename ring_buffer::size_type + ring_buffer::DoGetSize(EASTL_ITC_NS::random_access_iterator_tag) const + { + // A simpler but less efficient implementation fo this function would be: + // return eastl::distance(mBegin, mEnd); + // + // The calculation of distance here takes advantage of the fact that random + // access iterators' distances can be calculated by simple pointer calculation. + // Thus the code below boils down to a few subtractions when using a vector, + // string, or array as the Container type. + // + const difference_type dBegin = eastl::distance(const_cast(c).begin(), mBegin); // const_cast here solves a little compiler + const difference_type dEnd = eastl::distance(const_cast(c).begin(), mEnd); // argument matching problem. + + if(dEnd >= dBegin) + return dEnd - dBegin; + + return c.size() - (dBegin - dEnd); + } + */ + + + namespace Internal + { + /////////////////////////////////////////////////////////////// + // has_overflow_allocator + // + // returns true_type when the specified container type is an + // eastl::fixed_* container and therefore has an overflow + // allocator type. + // + template + struct has_overflow_allocator : false_type {}; + + template + struct has_overflow_allocator().get_overflow_allocator())>> : true_type {}; + + + /////////////////////////////////////////////////////////////// + // GetFixedContainerCtorAllocator + // + // eastl::fixed_* containers are only constructible via their + // overflow allocator type. This helper select the appropriate + // allocator from the specified container. + // + template ()()> + struct GetFixedContainerCtorAllocator + { + auto& operator()(Container& c) { return c.get_overflow_allocator(); } + }; + + template + struct GetFixedContainerCtorAllocator + { + auto& operator()(Container& c) { return c.get_allocator(); } + }; + } // namespace Internal + + + /////////////////////////////////////////////////////////////// + // ContainerTemporary + // + // Helper type which prevents utilizing excessive stack space + // when creating temporaries when swapping/copying the underlying + // ring_buffer container type. + // + template = EASTL_MAX_STACK_USAGE)> + struct ContainerTemporary + { + Container mContainer; + + ContainerTemporary(Container& parentContainer) + : mContainer(Internal::GetFixedContainerCtorAllocator{}(parentContainer)) + { + } + + Container& get() { return mContainer; } + }; + + template + struct ContainerTemporary + { + typename Container::allocator_type* mAllocator; + Container* mContainer; + + ContainerTemporary(Container& parentContainer) + : mAllocator(&parentContainer.get_allocator()) + , mContainer(new (mAllocator->allocate(sizeof(Container))) Container) + { + } + + ~ContainerTemporary() + { + mContainer->~Container(); + mAllocator->deallocate(mContainer, sizeof(Container)); + } + + Container& get() { return *mContainer; } + }; + + + template + void ring_buffer::resize(size_type n) + { + // Note that if n > size(), we just move the end position out to + // the begin + n, with the data being the old end and the new end + // being stale values from the past. This is by design, as the concept + // of arbitrarily resizing a ring buffer like this is currently deemed + // to be vague in what it intends to do. We can only assume that the + // user knows what he is doing and will deal with the stale values. + EASTL_ASSERT(c.size() >= 1); + const size_type cap = (c.size() - 1); + + mSize = n; + + if(n > cap) // If we need to grow in capacity... + { + // Given that a growing operation will always result in memory allocation, + // we currently implement this function via the usage of a temp container. + // This makes for a simple implementation, but in some cases it is less + // efficient. In particular, if the container is a node-based container like + // a (linked) list, this function would be faster if we simply added nodes + // to ourself. We would do this by inserting the nodes to be after end() + // and adjusting the begin() position if it was after end(). + + // To do: This code needs to be amended to deal with possible exceptions + // that could occur during the resize call below. + + ContainerTemporary cTemp(c); + cTemp.get().resize(n + 1); + eastl::copy(begin(), end(), cTemp.get().begin()); + eastl::swap(c, cTemp.get()); + + mBegin = c.begin(); + mEnd = mBegin; + eastl::advance(mEnd, n); // We can do a simple advance algorithm on this because we know that mEnd will not wrap around. + } + else // We could do a check here for n != size(), but that would be costly and people don't usually resize things to their same size. + { + mEnd = mBegin; + + // eastl::advance(mEnd, n); // We *cannot* use this because there may be wraparound involved. + + // To consider: Possibly we should implement some more detailed logic to optimize the code here. + // We'd need to do different behaviour dending on whether the container iterator type is a + // random access iterator or otherwise. + + while(n--) + { + if(EASTL_UNLIKELY(++mEnd == c.end())) + mEnd = c.begin(); + } + } + } + + + template + typename ring_buffer::size_type + ring_buffer::capacity() const EA_NOEXCEPT + { + EASTL_ASSERT(c.size() >= 1); // This is required because even an empty ring_buffer has one unused termination element, somewhat like a \0 at the end of a C string. + + return (c.size() - 1); // Need to subtract one because the position at mEnd is unused. + } + + + template + void ring_buffer::set_capacity(size_type n) + { + const size_type capacity = (c.size() - 1); + + if(n != capacity) // If we need to change capacity... + { + ContainerTemporary cTemp(c); + cTemp.get().resize(n + 1); + + iterator itCopyBegin = begin(); + + if(n < mSize) // If we are shrinking the capacity, to less than our size... + { + eastl::advance(itCopyBegin, mSize - n); + mSize = n; + } + + eastl::copy(itCopyBegin, end(), cTemp.get().begin()); // The begin-end range may in fact be larger than n, in which case values will be overwritten. + eastl::swap(c, cTemp.get()); + + mBegin = c.begin(); + mEnd = mBegin; + eastl::advance(mEnd, mSize); // We can do a simple advance algorithm on this because we know that mEnd will not wrap around. + } + } + + + template + void ring_buffer::reserve(size_type n) + { + // We follow the pattern of vector and only do something if n > capacity. + EASTL_ASSERT(c.size() >= 1); + + if(n > (c.size() - 1)) // If we need to grow in capacity... // (c.size() - 1) == capacity(); we are attempting to reduce function calls. + { + ContainerTemporary cTemp(c); + cTemp.get().resize(n + 1); + eastl::copy(begin(), end(), cTemp.get().begin()); + eastl::swap(c, cTemp.get()); + + mBegin = c.begin(); + mEnd = mBegin; + eastl::advance(mEnd, mSize); // We can do a simple advance algorithm on this because we know that mEnd will not wrap around. + } + } + + + template + typename ring_buffer::reference + ring_buffer::front() + { + return *mBegin; + } + + + template + typename ring_buffer::const_reference + ring_buffer::front() const + { + return *mBegin; + } + + + template + typename ring_buffer::reference + ring_buffer::back() + { + // return *(end() - 1); // Can't use this because not all iterators support operator-. + + iterator temp(end()); // To do: Find a way to construct this temporary in the return statement. + return *(--temp); // We can do it by making all our containers' iterators support operator-. + } + + + template + typename ring_buffer::const_reference + ring_buffer::back() const + { + // return *(end() - 1); // Can't use this because not all iterators support operator-. + + const_iterator temp(end()); // To do: Find a way to construct this temporary in the return statement. + return *(--temp); // We can do it by making all our containers' iterators support operator-. + } + + + /// A push_back operation on a ring buffer assigns the new value to end. + /// If there is no more space in the buffer, this will result in begin + /// being overwritten and the begin position being moved foward one position. + template + void ring_buffer::push_back(const value_type& value) + { + *mEnd = value; + + if(++mEnd == c.end()) + mEnd = c.begin(); + + if(mEnd == mBegin) + { + if(++mBegin == c.end()) + mBegin = c.begin(); + } + else + ++mSize; + } + + + /// A push_back operation on a ring buffer assigns the new value to end. + /// If there is no more space in the buffer, this will result in begin + /// being overwritten and the begin position being moved foward one position. + template + typename ring_buffer::reference + ring_buffer::push_back() + { + // We don't do the following assignment, as the value at mEnd is already constructed; + // it is merely possibly not default-constructed. However, the spirit of push_back + // is that the user intends to do an assignment or data modification after the + // push_back call. The user can always execute *back() = value_type() if he wants. + //*mEnd = value_type(); + + if(++mEnd == c.end()) + mEnd = c.begin(); + + if(mEnd == mBegin) + { + if(++mBegin == c.end()) + mBegin = c.begin(); + } + else + ++mSize; + + return back(); + } + + + template + void ring_buffer::pop_back() + { + EASTL_ASSERT(mEnd != mBegin); // We assume that size() > 0 and thus that there is something to pop. + + if(EASTL_UNLIKELY(mEnd == c.begin())) + mEnd = c.end(); + --mEnd; + --mSize; + } + + + template + void ring_buffer::push_front(const value_type& value) + { + if(EASTL_UNLIKELY(mBegin == c.begin())) + mBegin = c.end(); + + if(--mBegin == mEnd) + { + if(EASTL_UNLIKELY(mEnd == c.begin())) + mEnd = c.end(); + --mEnd; + } + else + ++mSize; + + *mBegin = value; + } + + + template + typename ring_buffer::reference + ring_buffer::push_front() + { + if(EASTL_UNLIKELY(mBegin == c.begin())) + mBegin = c.end(); + + if(--mBegin == mEnd) + { + if(EASTL_UNLIKELY(mEnd == c.begin())) + mEnd = c.end(); + --mEnd; + } + else + ++mSize; + + // See comments above in push_back for why we don't execute this: + // *mBegin = value_type(); + + return *mBegin; // Same as return front(); + } + + + template + void ring_buffer::pop_front() + { + EASTL_ASSERT(mBegin != mEnd); // We assume that mEnd > mBegin and thus that there is something to pop. + + if(++mBegin == c.end()) + mBegin = c.begin(); + --mSize; + } + + + template + typename ring_buffer::reference + ring_buffer::operator[](size_type n) + { + // return *(begin() + n); // Can't use this because not all iterators support operator+. + + // This should compile to code that is nearly as efficient as that above. + // The primary difference is the possible generation of a temporary in this case. + iterator temp(begin()); + eastl::advance(temp, n); + return *(temp.mContainerIterator); + } + + + template + typename ring_buffer::const_reference + ring_buffer::operator[](size_type n) const + { + // return *(begin() + n); // Can't use this because not all iterators support operator+. + + // This should compile to code that is nearly as efficient as that above. + // The primary difference is the possible generation of a temporary in this case. + const_iterator temp(begin()); + eastl::advance(temp, n); + return *(temp.mContainerIterator); + } + + + template + typename ring_buffer::iterator + ring_buffer::insert(const_iterator position, const value_type& value) + { + // To consider: It would be faster if we could tell that position was in the first + // half of the container and instead of moving things after the position back, + // we could move things before the position forward. + + iterator afterEnd(end()); + iterator beforeEnd(afterEnd); + + ++afterEnd; + + if(afterEnd.mContainerIterator == mBegin) // If we are at full capacity... + --beforeEnd; + else + push_back(); + + iterator itPosition(position.mpContainer, position.mContainerIterator); // We merely copy from const_iterator to iterator. + eastl::copy_backward(itPosition, beforeEnd, end()); + *itPosition = value; + + return itPosition; + } + + + template + void ring_buffer::insert(const_iterator position, size_type n, const value_type& value) + { + // To do: This can be improved with a smarter version. However, + // this is a little tricky because we need to deal with the case + // whereby n is greater than the size of the container itself. + while(n--) + insert(position, value); + } + + + template + void ring_buffer::insert(const_iterator position, std::initializer_list ilist) + { + insert(position, ilist.begin(), ilist.end()); + } + + + template + template + void ring_buffer::insert(const_iterator position, InputIterator first, InputIterator last) + { + // To do: This can possibly be improved with a smarter version. + // However, this can be tricky if distance(first, last) is greater + // than the size of the container itself. + for(; first != last; ++first, ++position) + insert(position, *first); + } + + + template + typename ring_buffer::iterator + ring_buffer::erase(const_iterator position) + { + iterator itPosition(position.mpContainer, position.mContainerIterator); // We merely copy from const_iterator to iterator. + iterator iNext(itPosition); + + eastl::copy(++iNext, end(), itPosition); + pop_back(); + + return itPosition; + } + + + template + typename ring_buffer::iterator + ring_buffer::erase(const_iterator first, const_iterator last) + { + iterator itFirst(first.mpContainer, first.mContainerIterator); // We merely copy from const_iterator to iterator. + iterator itLast(last.mpContainer, last.mContainerIterator); + + typename iterator::difference_type d = eastl::distance(itFirst, itLast); + + eastl::copy(itLast, end(), itFirst); + + while(d--) // To do: improve this implementation. + pop_back(); + + return itFirst; + } + + + template + typename ring_buffer::reverse_iterator + ring_buffer::erase(const_reverse_iterator position) + { + return reverse_iterator(erase((++position).base())); + } + + + template + typename ring_buffer::reverse_iterator + ring_buffer::erase(const_reverse_iterator first, const_reverse_iterator last) + { + // Version which erases in order from first to last. + // difference_type i(first.base() - last.base()); + // while(i--) + // first = erase(first); + // return first; + + // Version which erases in order from last to first, but is slightly more efficient: + return reverse_iterator(erase((++last).base(), (++first).base())); + } + + + template + void ring_buffer::clear() + { + // Don't clear the container; we use its valid data for our elements. + mBegin = c.begin(); + mEnd = c.begin(); + mSize = 0; + } + + + template + typename ring_buffer::container_type& + ring_buffer::get_container() + { + return c; + } + + + template + const typename ring_buffer::container_type& + ring_buffer::get_container() const + { + return c; + } + + + template + inline bool ring_buffer::validate() const + { + if(!c.validate()) // This requires that the container implement the validate function. That pretty much + return false; // means that the container is an EASTL container and not a std STL container. + + if(c.empty()) // c must always have a size of at least 1, as even an empty ring_buffer has an unused terminating element. + return false; + + if(size() > capacity()) + return false; + + if((validate_iterator(begin()) & (isf_valid | isf_current)) != (isf_valid | isf_current)) + return false; + + if((validate_iterator(end()) & (isf_valid | isf_current)) != (isf_valid | isf_current)) + return false; + + // Verify that the size calculation is consistent. + size_type n = 0; + for(const_iterator i(begin()), iEnd(end()); i != iEnd; ++i) + ++n; + if(n != mSize) + return false; + + return true; + } + + + template + inline int ring_buffer::validate_iterator(const_iterator i) const + { + // To do: Replace this with a more efficient implementation if possible. + + for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp) + { + if(temp == i) + return (isf_valid | isf_current | isf_can_dereference); + } + + if(i == end()) + return (isf_valid | isf_current); + + return isf_none; + } + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const ring_buffer& a, const ring_buffer& b) + { + return (a.size() == b.size()) && (a.c == b.c); + } + + + template + inline bool operator<(const ring_buffer& a, const ring_buffer& b) + { + const typename ring_buffer::size_type sizeA = a.size(); + const typename ring_buffer::size_type sizeB = b.size(); + + if(sizeA == sizeB) + return (a.c < b.c); + return sizeA < sizeB; + } + + + template + inline bool operator!=(const ring_buffer& a, const ring_buffer& b) + { + return !(a == b); + } + + + template + inline bool operator>(const ring_buffer& a, const ring_buffer& b) + { + return (b < a); + } + + + template + inline bool operator<=(const ring_buffer& a, const ring_buffer& b) + { + return !(b < a); + } + + + template + inline bool operator>=(const ring_buffer& a, const ring_buffer& b) + { + return !(a < b); + } + + + template + inline void swap(ring_buffer& a, ring_buffer& b) + { + a.swap(b); + } + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/bonus/sort_extra.h b/libkram/eastl/include/EASTL/bonus/sort_extra.h new file mode 100644 index 00000000..5f9a0c46 --- /dev/null +++ b/libkram/eastl/include/EASTL/bonus/sort_extra.h @@ -0,0 +1,204 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////////// +// This file implements additional sort algorithms beyond the basic set. +// Included here are: +// selection_sort -- Unstable. +// shaker_sort -- Stable. +// bucket_sort -- Stable. +// +////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_SORT_EXTRA_H +#define EASTL_SORT_EXTRA_H + + +#include +#include +#include +#include +#include +#include // For backwards compatibility due to sorts moved from here to sort.h. +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + /// selection_sort + /// + /// Implements the SelectionSort algorithm. + /// + template + void selection_sort(ForwardIterator first, ForwardIterator last, StrictWeakOrdering compare) + { + ForwardIterator iCurrent, iMin; + + for(; first != last; ++first) + { + iCurrent = first; + iMin = iCurrent; + + for(++iCurrent; iCurrent != last; ++iCurrent) + { + if(compare(*iCurrent, *iMin)) + { + EASTL_VALIDATE_COMPARE(!compare(*iMin, *iCurrent)); // Validate that the compare function is sane. + iMin = iCurrent; + } + } + + if(first != iMin) + eastl::iter_swap(first, iMin); + } + } // selection_sort + + template + inline void selection_sort(ForwardIterator first, ForwardIterator last) + { + typedef eastl::less::value_type> Less; + + eastl::selection_sort(first, last, Less()); + } + + + + /// shaker_sort + /// + /// Implements the ShakerSort algorithm, which is a sorting algorithm which + /// improves on bubble_sort by sweeping both from left to right and right + /// to left, resulting in less iteration. + /// + template + void shaker_sort(BidirectionalIterator first, BidirectionalIterator last, StrictWeakOrdering compare) + { + if(first != last) + { + BidirectionalIterator iCurrent, iNext, iLastModified; + + --last; + + while(first != last) + { + iLastModified = first; + + for(iCurrent = first; iCurrent != last; iCurrent = iNext) + { + iNext = iCurrent; + ++iNext; + + if(compare(*iNext, *iCurrent)) + { + EASTL_VALIDATE_COMPARE(!compare(*iCurrent, *iNext)); // Validate that the compare function is sane. + iLastModified = iCurrent; + eastl::iter_swap(iCurrent, iNext); + } + } + + last = iLastModified; + + if(first != last) + { + for(iCurrent = last; iCurrent != first; iCurrent = iNext) + { + iNext = iCurrent; + --iNext; + + if(compare(*iCurrent, *iNext)) + { + EASTL_VALIDATE_COMPARE(!compare(*iNext, *iCurrent)); // Validate that the compare function is sane. + iLastModified = iCurrent; + eastl::iter_swap(iNext, iCurrent); + } + } + first = iLastModified; + } + } + } + } // shaker_sort + + template + inline void shaker_sort(BidirectionalIterator first, BidirectionalIterator last) + { + typedef eastl::less::value_type> Less; + + eastl::shaker_sort(first, last, Less()); + } + + + + /// bucket_sort + /// + /// Implements the BucketSort algorithm. + /// + /// Example usage: + /// const size_t kElementRange = 32; + /// vector intArray(1000); + /// + /// for(int i = 0; i < 1000; i++) + /// intArray[i] = rand() % kElementRange; + /// + /// vector< vector > bucketArray(kElementRange); + /// bucket_sort(intArray.begin(), intArray.end(), bucketArray, eastl::hash_use_self()); + /// + template + struct hash_use_self + { + T operator()(const T& x) const + { return x; } + }; + + // Requires buckeyArray to be an array of arrays with a size equal to the range of values + // returned by the hash function. The hash function is required to return a unique value + // for each uniquely sorted element. Usually the way this is done is the elements are + // integers of a limited range (e.g. 0-64) and the hash function returns the element value + // itself. If you had a case where all elements were always even numbers (e.g. 0-128), + // you could use a custom hash function that returns (element value / 2). + // + // The user is required to provide an empty bucketArray to this function. This function returns + // with the bucketArray non-empty. This function doesn't clear the bucketArray because that takes + // time and the user might not need it to be cleared, at least at that time. + // + template + void bucket_sort(ForwardIterator first, ForwardIterator last, ContainerArray& bucketArray, HashFunction hash /*= hash_use_self*/) + { + for(ForwardIterator iInput = first; iInput != last; ++iInput) + bucketArray[hash(*iInput)].push_back(*iInput); + + for(typename ContainerArray::const_iterator iBucket = bucketArray.begin(); iBucket != bucketArray.end(); ++iBucket) + first = eastl::copy((*iBucket).begin(), (*iBucket).end(), first); + } + + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/bonus/tuple_vector.h b/libkram/eastl/include/EASTL/bonus/tuple_vector.h new file mode 100644 index 00000000..7123c57f --- /dev/null +++ b/libkram/eastl/include/EASTL/bonus/tuple_vector.h @@ -0,0 +1,1592 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// tuple_vector is a data container that is designed to abstract and simplify +// the handling of a "structure of arrays" layout of data in memory. In +// particular, it mimics the interface of vector, including functionality to do +// inserts, erases, push_backs, and random-access. It also provides a +// RandomAccessIterator and corresponding functionality, making it compatible +// with most STL (and STL-esque) algorithms such as ranged-for loops, find_if, +// remove_if, or sort. + +// When used or applied properly, this container can improve performance of +// some algorithms through cache-coherent data accesses or allowing for +// sensible SIMD programming, while keeping the structure of a single +// container, to permit a developer to continue to use existing algorithms in +// STL and the like. +// +// Consult doc/Bonus/tuple_vector_readme.md for more information. +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_TUPLEVECTOR_H +#define EASTL_TUPLEVECTOR_H + +#include +#include +#include +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + +EA_DISABLE_VC_WARNING(4244) // warning C4244: 'conversion from '___' to '___', possible loss of data +EA_DISABLE_VC_WARNING(4623) // warning C4623: default constructor was implicitly defined as deleted +EA_DISABLE_VC_WARNING(4625) // warning C4625: copy constructor was implicitly defined as deleted +EA_DISABLE_VC_WARNING(4510) // warning C4510: default constructor could not be generated + +namespace eastl +{ + /// EASTL_TUPLE_VECTOR_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_TUPLE_VECTOR_DEFAULT_NAME + #define EASTL_TUPLE_VECTOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " tuple-vector" // Unless the user overrides something, this is "EASTL tuple-vector". + #endif + + + /// EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR + #define EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR allocator_type(EASTL_TUPLE_VECTOR_DEFAULT_NAME) + #endif + +namespace TupleVecInternal +{ + +// forward declarations +template +struct tuplevec_element; + +template +using tuplevec_element_t = typename tuplevec_element::type; + +template +struct TupleTypes {}; + +template +class TupleVecImpl; + +template +struct TupleRecurser; + +template +struct TupleIndexRecurser; + +template +struct TupleVecLeaf; + +template +struct TupleVecIter; + +// tuplevec_element helper to be able to isolate a type given an index +template +struct tuplevec_element +{ + static_assert(I != I, "tuplevec_element index out of range"); +}; + +template +struct tuplevec_element<0, T, Ts...> +{ + tuplevec_element() = delete; // tuplevec_element should only be used for compile-time assistance, and never be instantiated + typedef T type; +}; + +template +struct tuplevec_element +{ + typedef tuplevec_element_t type; +}; + +// attempt to isolate index given a type +template +struct tuplevec_index +{ +}; + +template +struct tuplevec_index> +{ + typedef void DuplicateTypeCheck; + tuplevec_index() = delete; // tuplevec_index should only be used for compile-time assistance, and never be instantiated + static const eastl_size_t index = 0; +}; + +template +struct tuplevec_index> +{ + typedef int DuplicateTypeCheck; + static_assert(is_void>::DuplicateTypeCheck>::value, "duplicate type T in tuple_vector::get(); unique types must be provided in declaration, or only use get()"); + + static const eastl_size_t index = 0; +}; + +template +struct tuplevec_index> +{ + typedef typename tuplevec_index>::DuplicateTypeCheck DuplicateTypeCheck; + static const eastl_size_t index = tuplevec_index>::index + 1; +}; + +template +struct tuplevec_index> : public tuplevec_index> +{ +}; + + +// helper to calculate the layout of the allocations for the tuple of types (esp. to take alignment into account) +template <> +struct TupleRecurser<> +{ + typedef eastl_size_t size_type; + + // This class should never be instantiated. This is just a helper for working with static functions when anonymous functions don't work + // and provide some other utilities + TupleRecurser() = delete; + + static EA_CONSTEXPR size_type GetTotalAlignment() + { + return 0; + } + + static EA_CONSTEXPR size_type GetTotalAllocationSize(size_type capacity, size_type offset) + { + EA_UNUSED(capacity); + return offset; + } + + template + static pair DoAllocate(TupleVecImpl &vec, void** ppNewLeaf, size_type capacity, size_type offset) + { + EA_UNUSED(ppNewLeaf); + + // If n is zero, then we allocate no memory and just return NULL. + // This is fine, as our default ctor initializes with NULL pointers. + size_type alignment = TupleRecurser::GetTotalAlignment(); + void* ptr = capacity ? allocate_memory(vec.get_allocator(), offset, alignment, 0) : nullptr; + + #if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY((size_t)ptr & (alignment - 1)) != 0) + { + EASTL_FAIL_MSG("tuple_vector::DoAllocate -- memory not alignment at requested alignment"); + } + #endif + + return make_pair(ptr, offset); + } + + template + static void SetNewData(TupleVecImplType &vec, void* pData, size_type capacity, size_type offset) + { + EA_UNUSED(vec); + EA_UNUSED(pData); + EA_UNUSED(capacity); + EA_UNUSED(offset); + } +}; + +template +struct TupleRecurser : TupleRecurser +{ + typedef eastl_size_t size_type; + + static EA_CONSTEXPR size_type GetTotalAlignment() + { + return max(static_cast(alignof(T)), TupleRecurser::GetTotalAlignment()); + } + + static EA_CONSTEXPR size_type GetTotalAllocationSize(size_type capacity, size_type offset) + { + return TupleRecurser::GetTotalAllocationSize(capacity, CalculateAllocationSize(offset, capacity)); + } + + template + static pair DoAllocate(TupleVecImpl &vec, void** ppNewLeaf, size_type capacity, size_type offset) + { + size_type allocationOffset = CalculatAllocationOffset(offset); + size_type allocationSize = CalculateAllocationSize(offset, capacity); + pair allocation = TupleRecurser::template DoAllocate( + vec, ppNewLeaf, capacity, allocationSize); + ppNewLeaf[I] = (void*)((uintptr_t)(allocation.first) + allocationOffset); + return allocation; + } + + template + static void SetNewData(TupleVecImplType &vec, void* pData, size_type capacity, size_type offset) + { + size_type allocationOffset = CalculatAllocationOffset(offset); + size_type allocationSize = CalculateAllocationSize(offset, capacity); + vec.TupleVecLeaf::mpData = (T*)((uintptr_t)pData + allocationOffset); + TupleRecurser::template SetNewData(vec, pData, capacity, allocationSize); + } + +private: + static EA_CONSTEXPR size_type CalculateAllocationSize(size_type offset, size_type capacity) + { + return CalculatAllocationOffset(offset) + sizeof(T) * capacity; + } + + static EA_CONSTEXPR size_type CalculatAllocationOffset(size_type offset) { return (offset + alignof(T) - 1) & (~alignof(T) + 1); } +}; + +template +struct TupleVecLeaf +{ + typedef eastl_size_t size_type; + + void DoUninitializedMoveAndDestruct(const size_type begin, const size_type end, T* pDest) + { + T* pBegin = mpData + begin; + T* pEnd = mpData + end; + eastl::uninitialized_move_ptr_if_noexcept(pBegin, pEnd, pDest); + eastl::destruct(pBegin, pEnd); + } + + void DoInsertAndFill(size_type pos, size_type n, size_type numElements, const T& arg) + { + T* pDest = mpData + pos; + T* pDataEnd = mpData + numElements; + const T temp = arg; + const size_type nExtra = (numElements - pos); + if (n < nExtra) // If the inserted values are entirely within initialized memory (i.e. are before mpEnd)... + { + eastl::uninitialized_move_ptr(pDataEnd - n, pDataEnd, pDataEnd); + eastl::move_backward(pDest, pDataEnd - n, pDataEnd); // We need move_backward because of potential overlap issues. + eastl::fill(pDest, pDest + n, temp); + } + else + { + eastl::uninitialized_fill_n_ptr(pDataEnd, n - nExtra, temp); + eastl::uninitialized_move_ptr(pDest, pDataEnd, pDataEnd + n - nExtra); + eastl::fill(pDest, pDataEnd, temp); + } + } + + void DoInsertRange(T* pSrcBegin, T* pSrcEnd, T* pDestBegin, size_type numDataElements) + { + size_type pos = pDestBegin - mpData; + size_type n = pSrcEnd - pSrcBegin; + T* pDataEnd = mpData + numDataElements; + const size_type nExtra = numDataElements - pos; + if (n < nExtra) // If the inserted values are entirely within initialized memory (i.e. are before mpEnd)... + { + eastl::uninitialized_move_ptr(pDataEnd - n, pDataEnd, pDataEnd); + eastl::move_backward(pDestBegin, pDataEnd - n, pDataEnd); // We need move_backward because of potential overlap issues. + eastl::copy(pSrcBegin, pSrcEnd, pDestBegin); + } + else + { + eastl::uninitialized_copy(pSrcEnd - (n - nExtra), pSrcEnd, pDataEnd); + eastl::uninitialized_move_ptr(pDestBegin, pDataEnd, pDataEnd + n - nExtra); + eastl::copy(pSrcBegin, pSrcEnd - (n - nExtra), pDestBegin); + } + } + + void DoInsertValue(size_type pos, size_type numElements, T&& arg) + { + T* pDest = mpData + pos; + T* pDataEnd = mpData + numElements; + + eastl::uninitialized_move_ptr(pDataEnd - 1, pDataEnd, pDataEnd); + eastl::move_backward(pDest, pDataEnd - 1, pDataEnd); // We need move_backward because of potential overlap issues. + eastl::destruct(pDest); + ::new (pDest) T(eastl::forward(arg)); + } + + T* mpData = nullptr; +}; + +// swallow allows for parameter pack expansion of arguments as means of expanding operations performed +// if a void function is used for operation expansion, it should be wrapped in (..., 0) so that the compiler +// thinks it has a parameter to pass into the function +template +void swallow(Ts&&...) { } + +inline bool variadicAnd(bool cond) { return cond; } + +inline bool variadicAnd(bool cond, bool conds...) { return cond && variadicAnd(conds); } + +// Helper struct to check for strict compatibility between two iterators, whilst still allowing for +// conversion between TupleVecImpl::iterator and TupleVecImpl::const_iterator. +template +struct TupleVecIterCompatibleImpl : public false_type { }; + +template<> +struct TupleVecIterCompatibleImpl, TupleTypes<>> : public true_type { }; + +template +struct TupleVecIterCompatibleImpl, TupleTypes> : public integral_constant, TupleTypes>::value && + is_same::type, typename remove_const::type>::value > +{ }; + +template +struct TupleVecIterCompatible; + +template +struct TupleVecIterCompatible, TupleTypes> : + public TupleVecIterCompatibleImpl, TupleTypes> +{ }; + +// The Iterator operates by storing a persistent index internally, +// and resolving the tuple of pointers to the various parts of the original tupleVec when dereferenced. +// While resolving the tuple is a non-zero operation, it consistently generated better code than the alternative of +// storing - and harmoniously updating on each modification - a full tuple of pointers to the tupleVec's data +template +struct TupleVecIter, Ts...> + : public iterator, eastl_size_t, tuple, tuple> +{ +private: + typedef TupleVecIter, Ts...> this_type; + typedef eastl_size_t size_type; + + typedef iterator, eastl_size_t, tuple, tuple> iter_type; + + template + friend struct TupleVecIter; + + template + friend class TupleVecImpl; + + template + friend class move_iterator; +public: + typedef typename iter_type::iterator_category iterator_category; + typedef typename iter_type::value_type value_type; + typedef typename iter_type::difference_type difference_type; + typedef typename iter_type::pointer pointer; + typedef typename iter_type::reference reference; + + TupleVecIter() = default; + + template + TupleVecIter(VecImplType* tupleVec, size_type index) + : mIndex(index) + , mpData{(void*)tupleVec->TupleVecLeaf::mpData...} + { } + + template , TupleTypes>::value, bool>::type> + TupleVecIter(const TupleVecIter& other) + : mIndex(other.mIndex) + , mpData{other.mpData[Indices]...} + { + } + + bool operator==(const TupleVecIter& other) const { return mIndex == other.mIndex && mpData[0] == other.mpData[0]; } + bool operator!=(const TupleVecIter& other) const { return mIndex != other.mIndex || mpData[0] != other.mpData[0]; } + reference operator*() const { return MakeReference(); } + + this_type& operator++() { ++mIndex; return *this; } + this_type operator++(int) + { + this_type temp = *this; + ++mIndex; + return temp; + } + + this_type& operator--() { --mIndex; return *this; } + this_type operator--(int) + { + this_type temp = *this; + --mIndex; + return temp; + } + + this_type& operator+=(difference_type n) { mIndex += n; return *this; } + this_type operator+(difference_type n) const + { + this_type temp = *this; + return temp += n; + } + friend this_type operator+(difference_type n, const this_type& rhs) + { + this_type temp = rhs; + return temp += n; + } + + this_type& operator-=(difference_type n) { mIndex -= n; return *this; } + this_type operator-(difference_type n) const + { + this_type temp = *this; + return temp -= n; + } + friend this_type operator-(difference_type n, const this_type& rhs) + { + this_type temp = rhs; + return temp -= n; + } + + difference_type operator-(const this_type& rhs) const { return mIndex - rhs.mIndex; } + bool operator<(const this_type& rhs) const { return mIndex < rhs.mIndex; } + bool operator>(const this_type& rhs) const { return mIndex > rhs.mIndex; } + bool operator>=(const this_type& rhs) const { return mIndex >= rhs.mIndex; } + bool operator<=(const this_type& rhs) const { return mIndex <= rhs.mIndex; } + + reference operator[](const size_type n) const + { + return *(*this + n); + } + +private: + + value_type MakeValue() const + { + return value_type(((Ts*)mpData[Indices])[mIndex]...); + } + + reference MakeReference() const + { + return reference(((Ts*)mpData[Indices])[mIndex]...); + } + + pointer MakePointer() const + { + return pointer(&((Ts*)mpData[Indices])[mIndex]...); + } + + size_type mIndex = 0; + const void* mpData[sizeof...(Ts)]; +}; + +// TupleVecImpl +template +class TupleVecImpl, Ts...> : public TupleVecLeaf... +{ + typedef Allocator allocator_type; + typedef index_sequence index_sequence_type; + typedef TupleVecImpl this_type; + typedef TupleVecImpl const_this_type; + +public: + typedef TupleVecInternal::TupleVecIter iterator; + typedef TupleVecInternal::TupleVecIter const_iterator; + typedef eastl::reverse_iterator reverse_iterator; + typedef eastl::reverse_iterator const_reverse_iterator; + typedef eastl_size_t size_type; + typedef eastl::tuple value_tuple; + typedef eastl::tuple reference_tuple; + typedef eastl::tuple const_reference_tuple; + typedef eastl::tuple ptr_tuple; + typedef eastl::tuple const_ptr_tuple; + typedef eastl::tuple rvalue_tuple; + + TupleVecImpl() + : mDataSizeAndAllocator(0, EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + {} + + TupleVecImpl(const allocator_type& allocator) + : mDataSizeAndAllocator(0, allocator) + {} + + TupleVecImpl(this_type&& x) + : mDataSizeAndAllocator(0, eastl::move(x.get_allocator())) + { + swap(x); + } + + TupleVecImpl(this_type&& x, const Allocator& allocator) + : mDataSizeAndAllocator(0, allocator) + { + if (get_allocator() == x.get_allocator()) // If allocators are equivalent, then we can safely swap member-by-member + { + swap(x); + } + else + { + this_type temp(eastl::move(*this)); + temp.swap(x); + } + } + + TupleVecImpl(const this_type& x) + : mDataSizeAndAllocator(0, x.get_allocator()) + { + DoInitFromIterator(x.begin(), x.end()); + } + + template + TupleVecImpl(const TupleVecImpl& x, const Allocator& allocator) + : mDataSizeAndAllocator(0, allocator) + { + DoInitFromIterator(x.begin(), x.end()); + } + + template + TupleVecImpl(move_iterator begin, move_iterator end, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + : mDataSizeAndAllocator(0, allocator) + { + DoInitFromIterator(begin, end); + } + + TupleVecImpl(const_iterator begin, const_iterator end, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + : mDataSizeAndAllocator(0, allocator ) + { + DoInitFromIterator(begin, end); + } + + TupleVecImpl(size_type n, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + : mDataSizeAndAllocator(0, allocator) + { + DoInitDefaultFill(n); + } + + TupleVecImpl(size_type n, const Ts&... args) + : mDataSizeAndAllocator(0, EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + { + DoInitFillArgs(n, args...); + } + + TupleVecImpl(size_type n, const Ts&... args, const allocator_type& allocator) + : mDataSizeAndAllocator(0, allocator) + { + DoInitFillArgs(n, args...); + } + + TupleVecImpl(size_type n, const_reference_tuple tup, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + : mDataSizeAndAllocator(0, allocator) + { + DoInitFillTuple(n, tup); + } + + TupleVecImpl(const value_tuple* first, const value_tuple* last, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + : mDataSizeAndAllocator(0, allocator) + { + DoInitFromTupleArray(first, last); + } + + TupleVecImpl(std::initializer_list iList, const allocator_type& allocator = EASTL_TUPLE_VECTOR_DEFAULT_ALLOCATOR) + : mDataSizeAndAllocator(0, allocator) + { + DoInitFromTupleArray(iList.begin(), iList.end()); + } + +protected: + // ctor to provide a pre-allocated field of data that the container will own, specifically for fixed_tuple_vector + TupleVecImpl(const allocator_type& allocator, void* pData, size_type capacity, size_type dataSize) + : mpData(pData), mNumCapacity(capacity), mDataSizeAndAllocator(dataSize, allocator) + { + TupleRecurser::template SetNewData(*this, mpData, mNumCapacity, 0); + } + +public: + ~TupleVecImpl() + { + swallow((eastl::destruct(TupleVecLeaf::mpData, TupleVecLeaf::mpData + mNumElements), 0)...); + if (mpData) + EASTLFree(get_allocator(), mpData, internalDataSize()); + } + + void assign(size_type n, const Ts&... args) + { + if (n > mNumCapacity) + { + this_type temp(n, args..., get_allocator()); // We have little choice but to reallocate with new memory. + swap(temp); + } + else if (n > mNumElements) // If n > mNumElements ... + { + size_type oldNumElements = mNumElements; + swallow((eastl::fill(TupleVecLeaf::mpData, TupleVecLeaf::mpData + oldNumElements, args), 0)...); + swallow((eastl::uninitialized_fill_ptr(TupleVecLeaf::mpData + oldNumElements, + TupleVecLeaf::mpData + n, args), 0)...); + mNumElements = n; + } + else // else 0 <= n <= mNumElements + { + swallow((eastl::fill(TupleVecLeaf::mpData, TupleVecLeaf::mpData + n, args), 0)...); + erase(begin() + n, end()); + } + } + + void assign(const_iterator first, const_iterator last) + { +#if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(!validate_iterator_pair(first, last))) + EASTL_FAIL_MSG("tuple_vector::assign -- invalid iterator pair"); +#endif + size_type newNumElements = last - first; + if (newNumElements > mNumCapacity) + { + this_type temp(first, last, get_allocator()); + swap(temp); + } + else + { + const void* ppOtherData[sizeof...(Ts)] = {first.mpData[Indices]...}; + size_type firstIdx = first.mIndex; + size_type lastIdx = last.mIndex; + if (newNumElements > mNumElements) // If n > mNumElements ... + { + size_type oldNumElements = mNumElements; + swallow((eastl::copy((Ts*)(ppOtherData[Indices]) + firstIdx, + (Ts*)(ppOtherData[Indices]) + firstIdx + oldNumElements, + TupleVecLeaf::mpData), 0)...); + swallow((eastl::uninitialized_copy_ptr((Ts*)(ppOtherData[Indices]) + firstIdx + oldNumElements, + (Ts*)(ppOtherData[Indices]) + lastIdx, + TupleVecLeaf::mpData + oldNumElements), 0)...); + mNumElements = newNumElements; + } + else // else 0 <= n <= mNumElements + { + swallow((eastl::copy((Ts*)(ppOtherData[Indices]) + firstIdx, (Ts*)(ppOtherData[Indices]) + lastIdx, + TupleVecLeaf::mpData), 0)...); + erase(begin() + newNumElements, end()); + } + } + } + + void assign(const value_tuple* first, const value_tuple* last) + { +#if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(first > last || first == nullptr || last == nullptr)) + EASTL_FAIL_MSG("tuple_vector::assign from tuple array -- invalid ptrs"); +#endif + size_type newNumElements = last - first; + if (newNumElements > mNumCapacity) + { + this_type temp(first, last, get_allocator()); + swap(temp); + } + else + { + if (newNumElements > mNumElements) // If n > mNumElements ... + { + size_type oldNumElements = mNumElements; + + DoCopyFromTupleArray(begin(), begin() + oldNumElements, first); + DoUninitializedCopyFromTupleArray(begin() + oldNumElements, begin() + newNumElements, first + oldNumElements); + mNumElements = newNumElements; + } + else // else 0 <= n <= mNumElements + { + DoCopyFromTupleArray(begin(), begin() + newNumElements, first); + erase(begin() + newNumElements, end()); + } + } + } + + reference_tuple push_back() + { + size_type oldNumElements = mNumElements; + size_type newNumElements = oldNumElements + 1; + size_type oldNumCapacity = mNumCapacity; + mNumElements = newNumElements; + DoGrow(oldNumElements, oldNumCapacity, newNumElements); + swallow(::new(TupleVecLeaf::mpData + oldNumElements) Ts()...); + return back(); + } + + void push_back(const Ts&... args) + { + size_type oldNumElements = mNumElements; + size_type newNumElements = oldNumElements + 1; + size_type oldNumCapacity = mNumCapacity; + mNumElements = newNumElements; + DoGrow(oldNumElements, oldNumCapacity, newNumElements); + swallow(::new(TupleVecLeaf::mpData + oldNumElements) Ts(args)...); + } + + void push_back_uninitialized() + { + size_type oldNumElements = mNumElements; + size_type newNumElements = oldNumElements + 1; + size_type oldNumCapacity = mNumCapacity; + mNumElements = newNumElements; + DoGrow(oldNumElements, oldNumCapacity, newNumElements); + } + + reference_tuple emplace_back(Ts&&... args) + { + size_type oldNumElements = mNumElements; + size_type newNumElements = oldNumElements + 1; + size_type oldNumCapacity = mNumCapacity; + mNumElements = newNumElements; + DoGrow(oldNumElements, oldNumCapacity, newNumElements); + swallow(::new(TupleVecLeaf::mpData + oldNumElements) Ts(eastl::forward(args))...); + return back(); + } + + iterator emplace(const_iterator pos, Ts&&... args) + { +#if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none)) + EASTL_FAIL_MSG("tuple_vector::emplace -- invalid iterator"); +#endif + size_type firstIdx = pos - cbegin(); + size_type oldNumElements = mNumElements; + size_type newNumElements = mNumElements + 1; + size_type oldNumCapacity = mNumCapacity; + mNumElements = newNumElements; + if (newNumElements > oldNumCapacity || firstIdx != oldNumElements) + { + if (newNumElements > oldNumCapacity) + { + const size_type newCapacity = eastl::max(GetNewCapacity(oldNumCapacity), newNumElements); + + void* ppNewLeaf[sizeof...(Ts)]; + pair allocation = TupleRecurser::template DoAllocate( + *this, ppNewLeaf, newCapacity, 0); + + swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( + 0, firstIdx, (Ts*)ppNewLeaf[Indices]), 0)...); + swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( + firstIdx, oldNumElements, (Ts*)ppNewLeaf[Indices] + firstIdx + 1), 0)...); + swallow(::new ((Ts*)ppNewLeaf[Indices] + firstIdx) Ts(eastl::forward(args))...); + swallow(TupleVecLeaf::mpData = (Ts*)ppNewLeaf[Indices]...); + + EASTLFree(get_allocator(), mpData, internalDataSize()); + mpData = allocation.first; + mNumCapacity = newCapacity; + internalDataSize() = allocation.second; + } + else + { + swallow((TupleVecLeaf::DoInsertValue(firstIdx, oldNumElements, eastl::forward(args)), 0)...); + } + } + else + { + swallow(::new (TupleVecLeaf::mpData + oldNumElements) Ts(eastl::forward(args))...); + } + return begin() + firstIdx; + } + + iterator insert(const_iterator pos, size_type n, const Ts&... args) + { +#if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none)) + EASTL_FAIL_MSG("tuple_vector::insert -- invalid iterator"); +#endif + size_type firstIdx = pos - cbegin(); + size_type lastIdx = firstIdx + n; + size_type oldNumElements = mNumElements; + size_type newNumElements = mNumElements + n; + size_type oldNumCapacity = mNumCapacity; + mNumElements = newNumElements; + if (newNumElements > oldNumCapacity || firstIdx != oldNumElements) + { + if (newNumElements > oldNumCapacity) + { + const size_type newCapacity = eastl::max(GetNewCapacity(oldNumCapacity), newNumElements); + + void* ppNewLeaf[sizeof...(Ts)]; + pair allocation = TupleRecurser::template DoAllocate( + *this, ppNewLeaf, newCapacity, 0); + + swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( + 0, firstIdx, (Ts*)ppNewLeaf[Indices]), 0)...); + swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( + firstIdx, oldNumElements, (Ts*)ppNewLeaf[Indices] + lastIdx), 0)...); + swallow((eastl::uninitialized_fill_ptr((Ts*)ppNewLeaf[Indices] + firstIdx, (Ts*)ppNewLeaf[Indices] + lastIdx, args), 0)...); + swallow(TupleVecLeaf::mpData = (Ts*)ppNewLeaf[Indices]...); + + EASTLFree(get_allocator(), mpData, internalDataSize()); + mpData = allocation.first; + mNumCapacity = newCapacity; + internalDataSize() = allocation.second; + } + else + { + swallow((TupleVecLeaf::DoInsertAndFill(firstIdx, n, oldNumElements, args), 0)...); + } + } + else + { + swallow((eastl::uninitialized_fill_ptr(TupleVecLeaf::mpData + oldNumElements, + TupleVecLeaf::mpData + newNumElements, args), 0)...); + } + return begin() + firstIdx; + } + + iterator insert(const_iterator pos, const_iterator first, const_iterator last) + { +#if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none)) + EASTL_FAIL_MSG("tuple_vector::insert -- invalid iterator"); + if (EASTL_UNLIKELY(!validate_iterator_pair(first, last))) + EASTL_FAIL_MSG("tuple_vector::insert -- invalid iterator pair"); +#endif + size_type posIdx = pos - cbegin(); + size_type firstIdx = first.mIndex; + size_type lastIdx = last.mIndex; + size_type numToInsert = last - first; + size_type oldNumElements = mNumElements; + size_type newNumElements = oldNumElements + numToInsert; + size_type oldNumCapacity = mNumCapacity; + mNumElements = newNumElements; + const void* ppOtherData[sizeof...(Ts)] = {first.mpData[Indices]...}; + if (newNumElements > oldNumCapacity || posIdx != oldNumElements) + { + if (newNumElements > oldNumCapacity) + { + const size_type newCapacity = eastl::max(GetNewCapacity(oldNumCapacity), newNumElements); + + void* ppNewLeaf[sizeof...(Ts)]; + pair allocation = TupleRecurser::template DoAllocate( + *this, ppNewLeaf, newCapacity, 0); + + swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( + 0, posIdx, (Ts*)ppNewLeaf[Indices]), 0)...); + swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( + posIdx, oldNumElements, (Ts*)ppNewLeaf[Indices] + posIdx + numToInsert), 0)...); + swallow((eastl::uninitialized_copy_ptr((Ts*)(ppOtherData[Indices]) + firstIdx, + (Ts*)(ppOtherData[Indices]) + lastIdx, + (Ts*)ppNewLeaf[Indices] + posIdx), 0)...); + swallow(TupleVecLeaf::mpData = (Ts*)ppNewLeaf[Indices]...); + + EASTLFree(get_allocator(), mpData, internalDataSize()); + mpData = allocation.first; + mNumCapacity = newCapacity; + internalDataSize() = allocation.second; + } + else + { + swallow((TupleVecLeaf::DoInsertRange( + (Ts*)(ppOtherData[Indices]) + firstIdx, (Ts*)(ppOtherData[Indices]) + lastIdx, + TupleVecLeaf::mpData + posIdx, oldNumElements), 0)...); + } + } + else + { + swallow((eastl::uninitialized_copy_ptr((Ts*)(ppOtherData[Indices]) + firstIdx, + (Ts*)(ppOtherData[Indices]) + lastIdx, + TupleVecLeaf::mpData + posIdx), 0)...); + } + return begin() + posIdx; + } + + iterator insert(const_iterator pos, const value_tuple* first, const value_tuple* last) + { +#if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none)) + EASTL_FAIL_MSG("tuple_vector::insert -- invalid iterator"); + if (EASTL_UNLIKELY(first > last || first == nullptr || last == nullptr)) + EASTL_FAIL_MSG("tuple_vector::insert -- invalid source pointers"); +#endif + size_type posIdx = pos - cbegin(); + size_type numToInsert = last - first; + size_type oldNumElements = mNumElements; + size_type newNumElements = oldNumElements + numToInsert; + size_type oldNumCapacity = mNumCapacity; + mNumElements = newNumElements; + if (newNumElements > oldNumCapacity || posIdx != oldNumElements) + { + if (newNumElements > oldNumCapacity) + { + const size_type newCapacity = eastl::max(GetNewCapacity(oldNumCapacity), newNumElements); + + void* ppNewLeaf[sizeof...(Ts)]; + pair allocation = TupleRecurser::template DoAllocate( + *this, ppNewLeaf, newCapacity, 0); + + swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( + 0, posIdx, (Ts*)ppNewLeaf[Indices]), 0)...); + swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct( + posIdx, oldNumElements, (Ts*)ppNewLeaf[Indices] + posIdx + numToInsert), 0)...); + + swallow(TupleVecLeaf::mpData = (Ts*)ppNewLeaf[Indices]...); + + // Do this after mpData is updated so that we can use new iterators + DoUninitializedCopyFromTupleArray(begin() + posIdx, begin() + posIdx + numToInsert, first); + + EASTLFree(get_allocator(), mpData, internalDataSize()); + mpData = allocation.first; + mNumCapacity = newCapacity; + internalDataSize() = allocation.second; + } + else + { + const size_type nExtra = oldNumElements - posIdx; + void* ppDataEnd[sizeof...(Ts)] = { (void*)(TupleVecLeaf::mpData + oldNumElements)... }; + void* ppDataBegin[sizeof...(Ts)] = { (void*)(TupleVecLeaf::mpData + posIdx)... }; + if (numToInsert < nExtra) // If the inserted values are entirely within initialized memory (i.e. are before mpEnd)... + { + swallow((eastl::uninitialized_move_ptr((Ts*)ppDataEnd[Indices] - numToInsert, + (Ts*)ppDataEnd[Indices], (Ts*)ppDataEnd[Indices]), 0)...); + // We need move_backward because of potential overlap issues. + swallow((eastl::move_backward((Ts*)ppDataBegin[Indices], + (Ts*)ppDataEnd[Indices] - numToInsert, (Ts*)ppDataEnd[Indices]), 0)...); + + DoCopyFromTupleArray(pos, pos + numToInsert, first); + } + else + { + size_type numToInitialize = numToInsert - nExtra; + swallow((eastl::uninitialized_move_ptr((Ts*)ppDataBegin[Indices], + (Ts*)ppDataEnd[Indices], (Ts*)ppDataEnd[Indices] + numToInitialize), 0)...); + + DoCopyFromTupleArray(pos, begin() + oldNumElements, first); + DoUninitializedCopyFromTupleArray(begin() + oldNumElements, pos + numToInsert, first + nExtra); + } + } + } + else + { + DoUninitializedCopyFromTupleArray(pos, pos + numToInsert, first); + } + return begin() + posIdx; + } + + iterator erase(const_iterator first, const_iterator last) + { +#if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(validate_iterator(first) == isf_none || validate_iterator(last) == isf_none)) + EASTL_FAIL_MSG("tuple_vector::erase -- invalid iterator"); + if (EASTL_UNLIKELY(!validate_iterator_pair(first, last))) + EASTL_FAIL_MSG("tuple_vector::erase -- invalid iterator pair"); +#endif + if (first != last) + { + size_type firstIdx = first - cbegin(); + size_type lastIdx = last - cbegin(); + size_type oldNumElements = mNumElements; + size_type newNumElements = oldNumElements - (lastIdx - firstIdx); + mNumElements = newNumElements; + swallow((eastl::move(TupleVecLeaf::mpData + lastIdx, + TupleVecLeaf::mpData + oldNumElements, + TupleVecLeaf::mpData + firstIdx), 0)...); + swallow((eastl::destruct(TupleVecLeaf::mpData + newNumElements, + TupleVecLeaf::mpData + oldNumElements), 0)...); + } + return begin() + first.mIndex; + } + + iterator erase_unsorted(const_iterator pos) + { +#if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(validate_iterator(pos) == isf_none)) + EASTL_FAIL_MSG("tuple_vector::erase_unsorted -- invalid iterator"); +#endif + size_type oldNumElements = mNumElements; + size_type newNumElements = oldNumElements - 1; + mNumElements = newNumElements; + swallow((eastl::move(TupleVecLeaf::mpData + newNumElements, + TupleVecLeaf::mpData + oldNumElements, + TupleVecLeaf::mpData + (pos - begin())), 0)...); + swallow((eastl::destruct(TupleVecLeaf::mpData + newNumElements, + TupleVecLeaf::mpData + oldNumElements), 0)...); + return begin() + pos.mIndex; + } + + void resize(size_type n) + { + size_type oldNumElements = mNumElements; + size_type oldNumCapacity = mNumCapacity; + mNumElements = n; + if (n > oldNumElements) + { + if (n > oldNumCapacity) + { + DoReallocate(oldNumElements, eastl::max(GetNewCapacity(oldNumCapacity), n)); + } + swallow((eastl::uninitialized_default_fill_n(TupleVecLeaf::mpData + oldNumElements, n - oldNumElements), 0)...); + } + else + { + swallow((eastl::destruct(TupleVecLeaf::mpData + n, + TupleVecLeaf::mpData + oldNumElements), 0)...); + } + } + + void resize(size_type n, const Ts&... args) + { + size_type oldNumElements = mNumElements; + size_type oldNumCapacity = mNumCapacity; + mNumElements = n; + if (n > oldNumElements) + { + if (n > oldNumCapacity) + { + DoReallocate(oldNumElements, eastl::max(GetNewCapacity(oldNumCapacity), n)); + } + swallow((eastl::uninitialized_fill_ptr(TupleVecLeaf::mpData + oldNumElements, + TupleVecLeaf::mpData + n, args), 0)...); + } + else + { + swallow((eastl::destruct(TupleVecLeaf::mpData + n, + TupleVecLeaf::mpData + oldNumElements), 0)...); + } + } + + void reserve(size_type n) + { + DoConditionalReallocate(mNumElements, mNumCapacity, n); + } + + void shrink_to_fit() + { + this_type temp(move_iterator(begin()), move_iterator(end()), get_allocator()); + swap(temp); + } + + void clear() EA_NOEXCEPT + { + size_type oldNumElements = mNumElements; + mNumElements = 0; + swallow((eastl::destruct(TupleVecLeaf::mpData, TupleVecLeaf::mpData + oldNumElements), 0)...); + } + + void pop_back() + { +#if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(mNumElements <= 0)) + EASTL_FAIL_MSG("tuple_vector::pop_back -- container is empty"); +#endif + size_type oldNumElements = mNumElements--; + swallow((eastl::destruct(TupleVecLeaf::mpData + oldNumElements - 1, + TupleVecLeaf::mpData + oldNumElements), 0)...); + } + + void swap(this_type& x) + { + swallow((eastl::swap(TupleVecLeaf::mpData, x.TupleVecLeaf::mpData), 0)...); + eastl::swap(mpData, x.mpData); + eastl::swap(mNumElements, x.mNumElements); + eastl::swap(mNumCapacity, x.mNumCapacity); + eastl::swap(get_allocator(), x.get_allocator()); + eastl::swap(internalDataSize(), x.internalDataSize()); + } + + void assign(size_type n, const_reference_tuple tup) { assign(n, eastl::get(tup)...); } + void assign(std::initializer_list iList) { assign(iList.begin(), iList.end()); } + + void push_back(Ts&&... args) { emplace_back(eastl::forward(args)...); } + void push_back(const_reference_tuple tup) { push_back(eastl::get(tup)...); } + void push_back(rvalue_tuple tup) { emplace_back(eastl::forward(eastl::get(tup))...); } + + void emplace_back(rvalue_tuple tup) { emplace_back(eastl::forward(eastl::get(tup))...); } + void emplace(const_iterator pos, rvalue_tuple tup) { emplace(pos, eastl::forward(eastl::get(tup))...); } + + iterator insert(const_iterator pos, const Ts&... args) { return insert(pos, 1, args...); } + iterator insert(const_iterator pos, Ts&&... args) { return emplace(pos, eastl::forward(args)...); } + iterator insert(const_iterator pos, rvalue_tuple tup) { return emplace(pos, eastl::forward(eastl::get(tup))...); } + iterator insert(const_iterator pos, const_reference_tuple tup) { return insert(pos, eastl::get(tup)...); } + iterator insert(const_iterator pos, size_type n, const_reference_tuple tup) { return insert(pos, n, eastl::get(tup)...); } + iterator insert(const_iterator pos, std::initializer_list iList) { return insert(pos, iList.begin(), iList.end()); } + + iterator erase(const_iterator pos) { return erase(pos, pos + 1); } + reverse_iterator erase(const_reverse_iterator pos) { return reverse_iterator(erase((pos + 1).base(), (pos).base())); } + reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last) { return reverse_iterator(erase((last).base(), (first).base())); } + reverse_iterator erase_unsorted(const_reverse_iterator pos) { return reverse_iterator(erase_unsorted((pos + 1).base())); } + + void resize(size_type n, const_reference_tuple tup) { resize(n, eastl::get(tup)...); } + + bool empty() const EA_NOEXCEPT { return mNumElements == 0; } + size_type size() const EA_NOEXCEPT { return mNumElements; } + size_type capacity() const EA_NOEXCEPT { return mNumCapacity; } + + iterator begin() EA_NOEXCEPT { return iterator(this, 0); } + const_iterator begin() const EA_NOEXCEPT { return const_iterator((const_this_type*)(this), 0); } + const_iterator cbegin() const EA_NOEXCEPT { return const_iterator((const_this_type*)(this), 0); } + + iterator end() EA_NOEXCEPT { return iterator(this, size()); } + const_iterator end() const EA_NOEXCEPT { return const_iterator((const_this_type*)(this), size()); } + const_iterator cend() const EA_NOEXCEPT { return const_iterator((const_this_type*)(this), size()); } + + reverse_iterator rbegin() EA_NOEXCEPT { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const EA_NOEXCEPT { return const_reverse_iterator(end()); } + const_reverse_iterator crbegin() const EA_NOEXCEPT { return const_reverse_iterator(end()); } + + reverse_iterator rend() EA_NOEXCEPT { return reverse_iterator(begin()); } + const_reverse_iterator rend() const EA_NOEXCEPT { return const_reverse_iterator(begin()); } + const_reverse_iterator crend() const EA_NOEXCEPT { return const_reverse_iterator(begin()); } + + ptr_tuple data() EA_NOEXCEPT { return ptr_tuple(TupleVecLeaf::mpData...); } + const_ptr_tuple data() const EA_NOEXCEPT { return const_ptr_tuple(TupleVecLeaf::mpData...); } + + reference_tuple at(size_type n) + { +#if EASTL_EXCEPTIONS_ENABLED + if (EASTL_UNLIKELY(n >= mNumElements)) + throw std::out_of_range("tuple_vector::at -- out of range"); +#elif EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(n >= mNumElements)) + EASTL_FAIL_MSG("tuple_vector::at -- out of range"); +#endif + return reference_tuple(*(TupleVecLeaf::mpData + n)...); + } + + const_reference_tuple at(size_type n) const + { +#if EASTL_EXCEPTIONS_ENABLED + if (EASTL_UNLIKELY(n >= mNumElements)) + throw std::out_of_range("tuple_vector::at -- out of range"); +#elif EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(n >= mNumElements)) + EASTL_FAIL_MSG("tuple_vector::at -- out of range"); +#endif + return const_reference_tuple(*(TupleVecLeaf::mpData + n)...); + } + + reference_tuple operator[](size_type n) { return at(n); } + const_reference_tuple operator[](size_type n) const { return at(n); } + + reference_tuple front() + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(mNumElements == 0)) // We don't allow the user to reference an empty container. + EASTL_FAIL_MSG("tuple_vector::front -- empty vector"); + #else + // We allow the user to reference an empty container. + #endif + + return at(0); + } + + const_reference_tuple front() const + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(mNumElements == 0)) // We don't allow the user to reference an empty container. + EASTL_FAIL_MSG("tuple_vector::front -- empty vector"); + #else + // We allow the user to reference an empty container. + #endif + + return at(0); + } + + reference_tuple back() + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(mNumElements == 0)) // We don't allow the user to reference an empty container. + EASTL_FAIL_MSG("tuple_vector::back -- empty vector"); + #else + // We allow the user to reference an empty container. + #endif + + return at(size() - 1); + } + + const_reference_tuple back() const + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(mNumElements == 0)) // We don't allow the user to reference an empty container. + EASTL_FAIL_MSG("tuple_vector::back -- empty vector"); + #else + // We allow the user to reference an empty container. + #endif + + return at(size() - 1); + } + + template + tuplevec_element_t* get() + { + typedef tuplevec_element_t Element; + return TupleVecLeaf::mpData; + } + template + const tuplevec_element_t* get() const + { + typedef tuplevec_element_t Element; + return TupleVecLeaf::mpData; + } + + template + T* get() + { + typedef tuplevec_index> Index; + return TupleVecLeaf::mpData; + } + template + const T* get() const + { + typedef tuplevec_index> Index; + return TupleVecLeaf::mpData; + } + + this_type& operator=(const this_type& other) + { + if (this != &other) + { + clear(); + assign(other.begin(), other.end()); + } + return *this; + } + + this_type& operator=(this_type&& other) + { + if (this != &other) + { + swap(other); + } + return *this; + } + + this_type& operator=(std::initializer_list iList) + { + assign(iList.begin(), iList.end()); + return *this; + } + + bool validate() const EA_NOEXCEPT + { + if (mNumElements > mNumCapacity) + return false; + if (!(variadicAnd(mpData <= TupleVecLeaf::mpData...))) + return false; + void* pDataEnd = (void*)((uintptr_t)mpData + internalDataSize()); + if (!(variadicAnd(pDataEnd >= TupleVecLeaf::mpData...))) + return false; + return true; + } + + int validate_iterator(const_iterator iter) const EA_NOEXCEPT + { + if (!(variadicAnd(iter.mpData[Indices] == TupleVecLeaf::mpData...))) + return isf_none; + if (iter.mIndex < mNumElements) + return (isf_valid | isf_current | isf_can_dereference); + if (iter.mIndex <= mNumElements) + return (isf_valid | isf_current); + return isf_none; + } + + static bool validate_iterator_pair(const_iterator first, const_iterator last) EA_NOEXCEPT + { + return (first.mIndex <= last.mIndex) && variadicAnd(first.mpData[Indices] == last.mpData[Indices]...); + } + + template ::value, bool>::type> + int validate_iterator(Iterator iter) const EA_NOEXCEPT { return validate_iterator(unwrap_iterator(iter)); } + + template ::value, bool>::type> + static bool validate_iterator_pair(Iterator first, Iterator last) EA_NOEXCEPT { return validate_iterator_pair(unwrap_iterator(first), unwrap_iterator(last)); } + + allocator_type& get_allocator() EA_NOEXCEPT { return mDataSizeAndAllocator.second(); } + const allocator_type& get_allocator() const EA_NOEXCEPT { return mDataSizeAndAllocator.second(); } + + void set_allocator(const allocator_type& alloc) { mDataSizeAndAllocator.second() = alloc; } + +protected: + + void* mpData = nullptr; + size_type mNumElements = 0; + size_type mNumCapacity = 0; + + compressed_pair mDataSizeAndAllocator; + + size_type& internalDataSize() EA_NOEXCEPT { return mDataSizeAndAllocator.first(); } + size_type const& internalDataSize() const EA_NOEXCEPT { return mDataSizeAndAllocator.first(); } + + friend struct TupleRecurser<>; + template + friend struct TupleRecurser; + + template + void DoInitFromIterator(move_iterator begin, move_iterator end) + { +#if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(!validate_iterator_pair(begin, end))) + EASTL_FAIL_MSG("tuple_vector::erase -- invalid iterator pair"); +#endif + size_type newNumElements = (size_type)(end - begin); + const void* ppOtherData[sizeof...(Ts)] = { begin.base().mpData[Indices]... }; + size_type beginIdx = begin.base().mIndex; + size_type endIdx = end.base().mIndex; + DoConditionalReallocate(0, mNumCapacity, newNumElements); + mNumElements = newNumElements; + swallow((eastl::uninitialized_move_ptr(eastl::move_iterator((Ts*)(ppOtherData[Indices]) + beginIdx), + eastl::move_iterator((Ts*)(ppOtherData[Indices]) + endIdx), + TupleVecLeaf::mpData), 0)...); + } + + void DoInitFromIterator(const_iterator begin, const_iterator end) + { +#if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(!validate_iterator_pair(begin, end))) + EASTL_FAIL_MSG("tuple_vector::erase -- invalid iterator pair"); +#endif + size_type newNumElements = (size_type)(end - begin); + const void* ppOtherData[sizeof...(Ts)] = { begin.mpData[Indices]... }; + size_type beginIdx = begin.mIndex; + size_type endIdx = end.mIndex; + DoConditionalReallocate(0, mNumCapacity, newNumElements); + mNumElements = newNumElements; + swallow((eastl::uninitialized_copy_ptr((Ts*)(ppOtherData[Indices]) + beginIdx, + (Ts*)(ppOtherData[Indices]) + endIdx, + TupleVecLeaf::mpData), 0)...); + } + + void DoInitFillTuple(size_type n, const_reference_tuple tup) { DoInitFillArgs(n, eastl::get(tup)...); } + + void DoInitFillArgs(size_type n, const Ts&... args) + { + DoConditionalReallocate(0, mNumCapacity, n); + mNumElements = n; + swallow((eastl::uninitialized_fill_ptr(TupleVecLeaf::mpData, TupleVecLeaf::mpData + n, args), 0)...); + } + + void DoInitDefaultFill(size_type n) + { + DoConditionalReallocate(0, mNumCapacity, n); + mNumElements = n; + swallow((eastl::uninitialized_default_fill_n(TupleVecLeaf::mpData, n), 0)...); + } + + void DoInitFromTupleArray(const value_tuple* first, const value_tuple* last) + { +#if EASTL_ASSERT_ENABLED + if (EASTL_UNLIKELY(first > last || first == nullptr || last == nullptr)) + EASTL_FAIL_MSG("tuple_vector::ctor from tuple array -- invalid ptrs"); +#endif + size_type newNumElements = last - first; + DoConditionalReallocate(0, mNumCapacity, newNumElements); + mNumElements = newNumElements; + DoUninitializedCopyFromTupleArray(begin(), end(), first); + } + + void DoCopyFromTupleArray(iterator destPos, iterator destEnd, const value_tuple* srcTuple) + { + // assign to constructed region + while (destPos < destEnd) + { + *destPos = *srcTuple; + ++destPos; + ++srcTuple; + } + } + + void DoUninitializedCopyFromTupleArray(iterator destPos, iterator destEnd, const value_tuple* srcTuple) + { + // placement-new/copy-ctor to unconstructed regions + while (destPos < destEnd) + { + swallow(::new(eastl::get(destPos.MakePointer())) Ts(eastl::get(*srcTuple))...); + ++destPos; + ++srcTuple; + } + } + + // Try to grow the size of the container "naturally" given the number of elements being used + void DoGrow(size_type oldNumElements, size_type oldNumCapacity, size_type requiredCapacity) + { + if (requiredCapacity > oldNumCapacity) + DoReallocate(oldNumElements, GetNewCapacity(requiredCapacity)); + } + + // Reallocate to the newCapacity (IFF it's actually larger, though) + void DoConditionalReallocate(size_type oldNumElements, size_type oldNumCapacity, size_type requiredCapacity) + { + if (requiredCapacity > oldNumCapacity) + DoReallocate(oldNumElements, requiredCapacity); + } + + void DoReallocate(size_type oldNumElements, size_type requiredCapacity) + { + void* ppNewLeaf[sizeof...(Ts)]; + pair allocation = TupleRecurser::template DoAllocate( + *this, ppNewLeaf, requiredCapacity, 0); + swallow((TupleVecLeaf::DoUninitializedMoveAndDestruct(0, oldNumElements, (Ts*)ppNewLeaf[Indices]), 0)...); + swallow(TupleVecLeaf::mpData = (Ts*)ppNewLeaf[Indices]...); + + EASTLFree(get_allocator(), mpData, internalDataSize()); + mpData = allocation.first; + mNumCapacity = requiredCapacity; + internalDataSize() = allocation.second; + } + + size_type GetNewCapacity(size_type oldNumCapacity) + { + return (oldNumCapacity > 0) ? (2 * oldNumCapacity) : 1; + } +}; + +} // namespace TupleVecInternal + +// Move_iterator specialization for TupleVecIter. +// An rvalue reference of a move_iterator would normaly be "tuple &&" whereas +// what we actually want is "tuple". This specialization gives us that. +template +class move_iterator, Ts...>> +{ +public: + typedef TupleVecInternal::TupleVecIter, Ts...> iterator_type; + typedef iterator_type wrapped_iterator_type; // This is not in the C++ Standard; it's used by use to identify it as + // a wrapping iterator type. + typedef iterator_traits traits_type; + typedef typename traits_type::iterator_category iterator_category; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + typedef typename traits_type::pointer pointer; + typedef tuple reference; + typedef move_iterator this_type; + +protected: + iterator_type mIterator; + +public: + move_iterator() : mIterator() {} + explicit move_iterator(iterator_type mi) : mIterator(mi) {} + + template + move_iterator(const move_iterator& mi) : mIterator(mi.base()) {} + + iterator_type base() const { return mIterator; } + reference operator*() const { return eastl::move(MakeReference()); } + pointer operator->() const { return mIterator; } + + this_type& operator++() { ++mIterator; return *this; } + this_type operator++(int) { + this_type tempMoveIterator = *this; + ++mIterator; + return tempMoveIterator; + } + + this_type& operator--() { --mIterator; return *this; } + this_type operator--(int) + { + this_type tempMoveIterator = *this; + --mIterator; + return tempMoveIterator; + } + + this_type operator+(difference_type n) const { return move_iterator(mIterator + n); } + this_type& operator+=(difference_type n) + { + mIterator += n; + return *this; + } + + this_type operator-(difference_type n) const { return move_iterator(mIterator - n); } + this_type& operator-=(difference_type n) + { + mIterator -= n; + return *this; + } + + difference_type operator-(const this_type& rhs) const { return mIterator - rhs.mIterator; } + bool operator<(const this_type& rhs) const { return mIterator < rhs.mIterator; } + bool operator>(const this_type& rhs) const { return mIterator > rhs.mIterator; } + bool operator>=(const this_type& rhs) const { return mIterator >= rhs.mIterator; } + bool operator<=(const this_type& rhs) const { return mIterator <= rhs.mIterator; } + + reference operator[](difference_type n) const { return *(*this + n); } + +private: + reference MakeReference() const + { + return reference(eastl::move(((Ts*)mIterator.mpData[Indices])[mIterator.mIndex])...); + } +}; + +template +inline bool operator==(const TupleVecInternal::TupleVecImpl& a, + const TupleVecInternal::TupleVecImpl& b) +{ + return ((a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin())); +} + +template +inline bool operator!=(const TupleVecInternal::TupleVecImpl& a, + const TupleVecInternal::TupleVecImpl& b) +{ + return ((a.size() != b.size()) || !eastl::equal(a.begin(), a.end(), b.begin())); +} + +template +inline bool operator<(const TupleVecInternal::TupleVecImpl& a, + const TupleVecInternal::TupleVecImpl& b) +{ + return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); +} + +template +inline bool operator>(const TupleVecInternal::TupleVecImpl& a, + const TupleVecInternal::TupleVecImpl& b) +{ + return b < a; +} + +template +inline bool operator<=(const TupleVecInternal::TupleVecImpl& a, + const TupleVecInternal::TupleVecImpl& b) +{ + return !(b < a); +} + +template +inline bool operator>=(const TupleVecInternal::TupleVecImpl& a, + const TupleVecInternal::TupleVecImpl& b) +{ + return !(a < b); +} + +template +inline void swap(TupleVecInternal::TupleVecImpl& a, + TupleVecInternal::TupleVecImpl& b) +{ + a.swap(b); +} + +// A customization of swap is made for r-values of tuples-of-references - +// normally, swapping rvalues doesn't make sense, but in this case, we do want to +// swap the contents of what the tuple-of-references are referring to +// +// This is required due to TupleVecIter returning a value-type for its dereferencing, +// as opposed to an actual real reference of some sort +template +inline +typename enable_if...>::value>::type +swap(tuple&& a, tuple&& b) +{ + a.swap(b); +} + +template +inline +typename enable_if...>::value>::type +swap(tuple&& a, tuple&& b) = delete; + + +// External interface of tuple_vector +template +class tuple_vector : public TupleVecInternal::TupleVecImpl, Ts...> +{ + typedef tuple_vector this_type; + typedef TupleVecInternal::TupleVecImpl, Ts...> base_type; + using base_type::base_type; + +public: + this_type& operator=(std::initializer_list iList) + { + base_type::operator=(iList); + return *this; + } +}; + +// Variant of tuple_vector that allows a user-defined allocator type (can't mix default template params with variadics) +template +class tuple_vector_alloc + : public TupleVecInternal::TupleVecImpl, Ts...> +{ + typedef tuple_vector_alloc this_type; + typedef TupleVecInternal::TupleVecImpl, Ts...> base_type; + using base_type::base_type; + +public: + + this_type& operator=(std::initializer_list iList) + { + base_type::operator=(iList); + return *this; + } +}; + +} // namespace eastl + +EA_RESTORE_VC_WARNING() +EA_RESTORE_VC_WARNING() +EA_RESTORE_VC_WARNING() +EA_RESTORE_VC_WARNING() + +#endif // EASTL_TUPLEVECTOR_H diff --git a/libkram/eastl/include/EASTL/chrono.h b/libkram/eastl/include/EASTL/chrono.h new file mode 100644 index 00000000..453ab0f4 --- /dev/null +++ b/libkram/eastl/include/EASTL/chrono.h @@ -0,0 +1,744 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// This file implements the eastl::chrono specification which is part of the +// standard STL date and time library. eastl::chrono implements all the +// mechanisms required to capture and manipulate times retrieved from the +// provided clocks. It implements the all of the features to allow type safe +// durations to be used in code. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_CHRONO_H +#define EASTL_CHRONO_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include +#include +#include + + +// TODO: move to platform specific cpp or header file +#if defined EA_PLATFORM_MICROSOFT + EA_DISABLE_ALL_VC_WARNINGS() + + #ifndef WIN32_LEAN_AND_MEAN + #define WIN32_LEAN_AND_MEAN + #endif + + #undef NOMINMAX + #define NOMINMAX + + #include + + #ifdef min + #undef min + #endif + #ifdef max + #undef max + #endif + + EA_RESTORE_ALL_VC_WARNINGS() +#endif + +#if defined(EA_PLATFORM_MICROSOFT) && !defined(EA_PLATFORM_MINGW) + // Nothing to do +#elif defined(EA_PLATFORM_SONY) + #include + #include +#elif defined(EA_PLATFORM_APPLE) + #include +#elif defined(EA_PLATFORM_POSIX) || defined(EA_PLATFORM_MINGW) || defined(EA_PLATFORM_ANDROID) + // Posix means Linux, Unix, and Macintosh OSX, among others (including Linux-based mobile platforms). + #if defined(EA_PLATFORM_MINGW) + #include + #endif + #include + #if (defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC)) + #include + #else + #include + #include + #endif +#endif + + +namespace eastl +{ +namespace chrono +{ + /////////////////////////////////////////////////////////////////////////////// + // treat_as_floating_point + /////////////////////////////////////////////////////////////////////////////// + template + struct treat_as_floating_point : is_floating_point {}; + + + /////////////////////////////////////////////////////////////////////////////// + // 20.12.4, duration_values + /////////////////////////////////////////////////////////////////////////////// + template + struct duration_values + { + public: + EASTL_FORCE_INLINE static EA_CONSTEXPR Rep zero() { return Rep(0); } + EASTL_FORCE_INLINE static EA_CONSTEXPR Rep max() { return eastl::numeric_limits::max(); } + EASTL_FORCE_INLINE static EA_CONSTEXPR Rep min() { return eastl::numeric_limits::lowest(); } + }; + + + /////////////////////////////////////////////////////////////////////////////// + // duration fwd_decl + /////////////////////////////////////////////////////////////////////////////// + template > + class duration; + + + namespace Internal + { + /////////////////////////////////////////////////////////////////////////////// + // IsRatio + /////////////////////////////////////////////////////////////////////////////// + template struct IsRatio : eastl::false_type {}; + template struct IsRatio> : eastl::true_type {}; + template struct IsRatio> : eastl::true_type {}; + template struct IsRatio> : eastl::true_type {}; + template struct IsRatio> : eastl::true_type {}; + + + /////////////////////////////////////////////////////////////////////////////// + // IsDuration + /////////////////////////////////////////////////////////////////////////////// + template struct IsDuration : eastl::false_type{}; + template struct IsDuration> : eastl::true_type{}; + template struct IsDuration> : eastl::true_type{}; + template struct IsDuration> : eastl::true_type{}; + template struct IsDuration> : eastl::true_type{}; + + + /////////////////////////////////////////////////////////////////////////////// + // RatioGCD + /////////////////////////////////////////////////////////////////////////////// + template + struct RatioGCD + { + static_assert(IsRatio::value, "Period1 is not a eastl::ratio type"); + static_assert(IsRatio::value, "Period2 is not a eastl::ratio type"); + + typedef ratio::value, + eastl::Internal::lcm::value> type; + }; + }; + + + /////////////////////////////////////////////////////////////////////////////// + // 20.12.5.7, duration_cast + /////////////////////////////////////////////////////////////////////////////// + namespace Internal + { + template ::type, + typename CommonRep = typename eastl::decay::type>::type, + bool = CommonPeriod::num == 1, + bool = CommonPeriod::den == 1> + struct DurationCastImpl; + + template + struct DurationCastImpl + { + inline static ToDuration DoCast(const FromDuration& fd) + { + return ToDuration(static_cast(fd.count())); + } + }; + + template + struct DurationCastImpl + { + inline static ToDuration DoCast(const FromDuration& d) + { + return ToDuration(static_cast(static_cast(d.count()) * + static_cast(CommonPeriod::num))); + } + }; + + template + struct DurationCastImpl + { + inline static ToDuration DoCast(const FromDuration& d) + { + return ToDuration(static_cast(static_cast(d.count()) / + static_cast(CommonPeriod::den))); + } + }; + + template + struct DurationCastImpl + { + inline static ToDuration DoCast(const FromDuration& d) + { + return ToDuration(static_cast(static_cast(d.count()) * + static_cast(CommonPeriod::num) / + static_cast(CommonPeriod::den))); + } + }; + }; // namespace Internal + + + /////////////////////////////////////////////////////////////////////////////// + // duration_cast + /////////////////////////////////////////////////////////////////////////////// + template + inline typename eastl::enable_if::value, ToDuration>::type + duration_cast(const duration& d) + { + typedef typename duration::this_type FromDuration; + return Internal::DurationCastImpl::DoCast(d); + } + + + /////////////////////////////////////////////////////////////////////////////// + // duration + /////////////////////////////////////////////////////////////////////////////// + template + class duration + { + Rep mRep; + + public: + typedef Rep rep; + typedef Period period; + typedef duration this_type; + + #if defined(EA_COMPILER_NO_DEFAULTED_FUNCTIONS) + EA_CONSTEXPR duration() + : mRep() {} + + duration(const duration& other) + : mRep(Rep(other.mRep)) {} + + duration& operator=(const duration& other) + { mRep = other.mRep; return *this; } + #else + EA_CONSTEXPR duration() = default; + duration(const duration&) = default; + duration& operator=(const duration&) = default; + #endif + + + /////////////////////////////////////////////////////////////////////////////// + // conversion constructors + /////////////////////////////////////////////////////////////////////////////// + template + inline EA_CONSTEXPR explicit duration( + const Rep2& rep2, + typename eastl::enable_if::value && + (treat_as_floating_point::value || + !treat_as_floating_point::value)>::type** = 0) + : mRep(static_cast(rep2)) {} + + + template + EA_CONSTEXPR duration(const duration& d2, + typename eastl::enable_if::value || + (eastl::ratio_divide::type::den == 1 && + !treat_as_floating_point::value), + void>::type** = 0) + : mRep(duration_cast(d2).count()) {} + + /////////////////////////////////////////////////////////////////////////////// + // returns the count of ticks + /////////////////////////////////////////////////////////////////////////////// + EA_CONSTEXPR Rep count() const { return mRep; } + + /////////////////////////////////////////////////////////////////////////////// + // static accessors of special duration values + /////////////////////////////////////////////////////////////////////////////// + EA_CONSTEXPR inline static duration zero() { return duration(duration_values::zero()); } + EA_CONSTEXPR inline static duration min() { return duration(duration_values::min()); } + EA_CONSTEXPR inline static duration max() { return duration(duration_values::max()); } + + /////////////////////////////////////////////////////////////////////////////// + // const arithmetic operations + /////////////////////////////////////////////////////////////////////////////// + EA_CONSTEXPR inline duration operator+() const { return *this; } + EA_CONSTEXPR inline duration operator-() const { return duration(0-mRep); } + + /////////////////////////////////////////////////////////////////////////////// + // arithmetic operations + /////////////////////////////////////////////////////////////////////////////// + inline duration operator++(int) { return duration(mRep++); } + inline duration operator--(int) { return duration(mRep--); } + inline duration& operator++() { ++mRep; return *this; } + inline duration& operator--() { --mRep; return *this; } + inline duration& operator+=(const duration& d) { mRep += d.count(); return *this; } + inline duration& operator-=(const duration& d) { mRep -= d.count(); return *this; } + inline duration& operator*=(const Rep& rhs) { mRep *= rhs; return *this; } + inline duration& operator/=(const Rep& rhs) { mRep /= rhs; return *this; } + inline duration& operator%=(const Rep& rhs) { mRep %= rhs; return *this; } + inline duration& operator%=(const duration& d) { mRep %= d.count(); return *this; } + }; + + + /////////////////////////////////////////////////////////////////////////////// + // 20.12.5.5, arithmetic operations with durations as arguments + /////////////////////////////////////////////////////////////////////////////// + template + typename eastl::common_type, duration>::type EASTL_FORCE_INLINE + operator+(const duration& lhs, const duration& rhs) + { + typedef typename eastl::common_type, duration>::type common_duration_t; + return common_duration_t(common_duration_t(lhs).count() + common_duration_t(rhs).count()); + } + + template + typename eastl::common_type, duration>::type EASTL_FORCE_INLINE + operator-(const duration& lhs, const duration& rhs) + { + typedef typename eastl::common_type, duration>::type common_duration_t; + return common_duration_t(common_duration_t(lhs).count() - common_duration_t(rhs).count()); + } + + template + duration::type, Period1> EASTL_FORCE_INLINE + operator*(const duration& lhs, const Rep2& rhs) + { + typedef typename duration, Period1>::type common_duration_t; + return common_duration_t(common_duration_t(lhs).count() * rhs); + } + + template + duration::type, Period2> EASTL_FORCE_INLINE + operator*(const Rep1& lhs, const duration& rhs) + { + typedef duration::type, Period2> common_duration_t; + return common_duration_t(lhs * common_duration_t(rhs).count()); + } + + template + duration::type, Period1> EASTL_FORCE_INLINE + operator/(const duration& lhs, const Rep2& rhs) + { + typedef duration::type, Period1> common_duration_t; + return common_duration_t(common_duration_t(lhs).count() / rhs); + } + + template + typename eastl::common_type, duration>::type EASTL_FORCE_INLINE + operator/(const duration& lhs, const duration& rhs) + { + typedef typename eastl::common_type, duration>::type common_duration_t; + return common_duration_t(common_duration_t(lhs).count() / common_duration_t(rhs).count()); + } + + template + duration::type, Period1> EASTL_FORCE_INLINE + operator%(const duration& lhs, const Rep2& rhs) + { + typedef duration::type, Period1> common_duration_t; + return common_duration_t(common_duration_t(lhs).count() % rhs); + } + + template + typename eastl::common_type, duration>::type EASTL_FORCE_INLINE + operator%(const duration& lhs, const duration& rhs) + { + typedef typename eastl::common_type, duration>::type common_duration_t; + return common_duration_t(common_duration_t(lhs).count() % common_duration_t(rhs).count()); + } + + + /////////////////////////////////////////////////////////////////////////////// + // 20.12.5.6, compares two durations + /////////////////////////////////////////////////////////////////////////////// + template + EASTL_FORCE_INLINE bool operator==(const duration& lhs, + const duration& rhs) + { + typedef typename eastl::common_type, duration>::type common_duration_t; + return common_duration_t(lhs).count() == common_duration_t(rhs).count(); + } + + template + EASTL_FORCE_INLINE bool operator<(const duration& lhs, + const duration& rhs) + { + typedef typename eastl::common_type, duration>::type common_duration_t; + return common_duration_t(lhs).count() < common_duration_t(rhs).count(); + } + + template + EASTL_FORCE_INLINE bool operator!=(const duration& lhs, + const duration& rhs) + { + return !(lhs == rhs); + } + + template + EASTL_FORCE_INLINE bool operator<=(const duration& lhs, + const duration& rhs) + { + return !(rhs < lhs); + } + + template + EASTL_FORCE_INLINE bool operator>(const duration& lhs, + const duration& rhs) + { + return rhs < lhs; + } + + template + EASTL_FORCE_INLINE bool operator>=(const duration& lhs, + const duration& rhs) + { + return !(lhs < rhs); + } + + + /////////////////////////////////////////////////////////////////////////////// + // standard duration units + /////////////////////////////////////////////////////////////////////////////// + typedef duration nanoseconds; + typedef duration microseconds; + typedef duration milliseconds; + typedef duration seconds; + typedef duration> minutes; + typedef duration> hours; + + + /////////////////////////////////////////////////////////////////////////////// + // 20.12.6, time_point + /////////////////////////////////////////////////////////////////////////////// + template + class time_point + { + Duration mDuration; + + public: + typedef Clock clock; + typedef Duration duration; + typedef typename Duration::rep rep; + typedef typename Duration::period period; + + inline EA_CONSTEXPR time_point() : mDuration(Duration::zero()) {} + EA_CONSTEXPR explicit time_point(const Duration& other) : mDuration(other) {} + + template + inline EA_CONSTEXPR time_point( + const time_point& t, + typename eastl::enable_if::value>::type** = 0) + : mDuration(t.time_since_epoch()) {} + + EA_CONSTEXPR Duration time_since_epoch() const { return mDuration; } + + time_point& operator+=(const Duration& d) { mDuration += d; return *this; } + time_point& operator-=(const Duration& d) { mDuration -= d; return *this; } + + static EA_CONSTEXPR time_point min() { return time_point(Duration::min()); } + static EA_CONSTEXPR time_point max() { return time_point(Duration::max()); } + }; + + + /////////////////////////////////////////////////////////////////////////////// + // 20.12.6.5, time_point arithmetic + /////////////////////////////////////////////////////////////////////////////// + template + inline EA_CONSTEXPR time_point>::type> + operator+(const time_point& lhs, const duration& rhs) + { + typedef time_point>::type> common_timepoint_t; + return common_timepoint_t(lhs.time_since_epoch() + rhs); + } + + template + inline EA_CONSTEXPR time_point>::type> + operator+(const duration& lhs, const time_point& rhs) + { + typedef time_point>::type> common_timepoint_t; + return common_timepoint_t(lhs + rhs.time_since_epoch()); + } + + template + inline EA_CONSTEXPR time_point>::type> + operator-(const time_point& lhs, const duration& rhs) + { + typedef time_point>::type> common_timepoint_t; + return common_timepoint_t(lhs.time_since_epoch() - rhs); + } + + template + inline EA_CONSTEXPR typename eastl::common_type::type operator-( + const time_point& lhs, + const time_point& rhs) + { + return lhs.time_since_epoch() - rhs.time_since_epoch(); + } + + template + inline EA_CONSTEXPR bool operator==(const time_point& lhs, + const time_point& rhs) + { + return lhs.time_since_epoch() == rhs.time_since_epoch(); + } + + template + inline EA_CONSTEXPR bool operator!=(const time_point& lhs, + const time_point& rhs) + { + return !(lhs == rhs); + } + + template + inline EA_CONSTEXPR bool operator<(const time_point& lhs, const time_point& rhs) + { + return lhs.time_since_epoch() < rhs.time_since_epoch(); + } + + template + inline EA_CONSTEXPR bool operator<=(const time_point& lhs, + const time_point& rhs) + { + return !(rhs < lhs); + } + + template + inline EA_CONSTEXPR bool operator>(const time_point& lhs, const time_point& rhs) + { + return rhs < lhs; + } + + template + inline EA_CONSTEXPR bool operator>=(const time_point& lhs, + const time_point& rhs) + { + return !(lhs < rhs); + } + + + /////////////////////////////////////////////////////////////////////////////// + // 20.12.6.7, time_point_cast + /////////////////////////////////////////////////////////////////////////////// + template + EA_CONSTEXPR time_point time_point_cast( + const time_point& t, + typename eastl::enable_if::value>::type** = 0) + { + return time_point(duration_cast(t.time_since_epoch())); + } + + + /////////////////////////////////////////////////////////////////////////////// + // 20.12.7, clocks + /////////////////////////////////////////////////////////////////////////////// + + namespace Internal + { + #if defined(EA_PLATFORM_MICROSOFT) && !defined(EA_PLATFORM_MINGW) + #define EASTL_NS_PER_TICK 1 + #elif defined EA_PLATFORM_SONY + #define EASTL_NS_PER_TICK _XTIME_NSECS_PER_TICK + #elif defined EA_PLATFORM_POSIX + #define EASTL_NS_PER_TICK _XTIME_NSECS_PER_TICK + #else + #define EASTL_NS_PER_TICK 100 + #endif + + #if defined(EA_PLATFORM_POSIX) + typedef chrono::nanoseconds::period SystemClock_Period; + typedef chrono::nanoseconds::period SteadyClock_Period; + #else + typedef eastl::ratio_multiply, nano>::type SystemClock_Period; + typedef eastl::ratio_multiply, nano>::type SteadyClock_Period; + #endif + + + /////////////////////////////////////////////////////////////////////////////// + // Internal::GetTicks + /////////////////////////////////////////////////////////////////////////////// + inline uint64_t GetTicks() + { + #if defined EA_PLATFORM_MICROSOFT + auto queryFrequency = [] + { + LARGE_INTEGER frequency; + QueryPerformanceFrequency(&frequency); + return double(1000000000.0L / frequency.QuadPart); // nanoseconds per tick + }; + + auto queryCounter = [] + { + LARGE_INTEGER counter; + QueryPerformanceCounter(&counter); + return counter.QuadPart; + }; + + EA_DISABLE_VC_WARNING(4640) // warning C4640: construction of local static object is not thread-safe (VS2013) + static auto frequency = queryFrequency(); // cache cpu frequency on first call + EA_RESTORE_VC_WARNING() + return uint64_t(frequency * queryCounter()); + #elif defined EA_PLATFORM_SONY + return sceKernelGetProcessTimeCounter(); + #elif defined(EA_PLATFORM_APPLE) + return mach_absolute_time(); + #elif defined(EA_PLATFORM_POSIX) // Posix means Linux, Unix, and Macintosh OSX, among others (including Linux-based mobile platforms). + #if (defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC)) + timespec ts; + int result = clock_gettime(CLOCK_MONOTONIC, &ts); + + if(result == EINVAL + ) + result = clock_gettime(CLOCK_REALTIME, &ts); + + const uint64_t nNanoseconds = (uint64_t)ts.tv_nsec + ((uint64_t)ts.tv_sec * UINT64_C(1000000000)); + return nNanoseconds; + #else + struct timeval tv; + gettimeofday(&tv, NULL); + const uint64_t nMicroseconds = (uint64_t)tv.tv_usec + ((uint64_t)tv.tv_sec * 1000000); + return nMicroseconds; + #endif + #else + #error "chrono not implemented for platform" + #endif + } + } // namespace Internal + + + /////////////////////////////////////////////////////////////////////////////// + // system_clock + /////////////////////////////////////////////////////////////////////////////// + class system_clock + { + public: + typedef long long rep; // signed arithmetic type representing the number of ticks in the clock's duration + typedef Internal::SystemClock_Period period; + typedef chrono::duration duration; // duration, capable of representing negative durations + typedef chrono::time_point time_point; + + // true if the time between ticks is always increases monotonically + EA_CONSTEXPR_OR_CONST static bool is_steady = false; + + // returns a time point representing the current point in time. + static time_point now() EA_NOEXCEPT + { + return time_point(duration(Internal::GetTicks())); + } + }; + + + /////////////////////////////////////////////////////////////////////////////// + // steady_clock + /////////////////////////////////////////////////////////////////////////////// + class steady_clock + { + public: + typedef long long rep; // signed arithmetic type representing the number of ticks in the clock's duration + typedef Internal::SteadyClock_Period period; + typedef chrono::duration duration; // duration, capable of representing negative durations + typedef chrono::time_point time_point; + + // true if the time between ticks is always increases monotonically + EA_CONSTEXPR_OR_CONST static bool is_steady = true; + + // returns a time point representing the current point in time. + static time_point now() EA_NOEXCEPT + { + return time_point(duration(Internal::GetTicks())); + } + }; + + + /////////////////////////////////////////////////////////////////////////////// + // high_resolution_clock + /////////////////////////////////////////////////////////////////////////////// + typedef system_clock high_resolution_clock; + + +} // namespace chrono + + + /////////////////////////////////////////////////////////////////////////////// + // duration common_type specialization + /////////////////////////////////////////////////////////////////////////////// + template + struct common_type, chrono::duration> + { + typedef chrono::duration::type>::type, + typename chrono::Internal::RatioGCD::type> type; + }; + + + /////////////////////////////////////////////////////////////////////////////// + // time_point common_type specialization + /////////////////////////////////////////////////////////////////////////////// + template + struct common_type, chrono::time_point> + { + typedef chrono::time_point::type> type; + }; + + + /////////////////////////////////////////////////////////////////////////////// + // chrono_literals + /////////////////////////////////////////////////////////////////////////////// + #if EASTL_USER_LITERALS_ENABLED && EASTL_INLINE_NAMESPACES_ENABLED + EA_DISABLE_VC_WARNING(4455) // disable warning C4455: literal suffix identifiers that do not start with an underscore are reserved + inline namespace literals + { + inline namespace chrono_literals + { + /////////////////////////////////////////////////////////////////////////////// + // integer chrono literals + /////////////////////////////////////////////////////////////////////////////// + EA_CONSTEXPR chrono::hours operator"" h(unsigned long long h) { return chrono::hours(h); } + EA_CONSTEXPR chrono::minutes operator"" min(unsigned long long m) { return chrono::minutes(m); } + EA_CONSTEXPR chrono::seconds operator"" s(unsigned long long s) { return chrono::seconds(s); } + EA_CONSTEXPR chrono::milliseconds operator"" ms(unsigned long long ms) { return chrono::milliseconds(ms); } + EA_CONSTEXPR chrono::microseconds operator"" us(unsigned long long us) { return chrono::microseconds(us); } + EA_CONSTEXPR chrono::nanoseconds operator"" ns(unsigned long long ns) { return chrono::nanoseconds(ns); } + + /////////////////////////////////////////////////////////////////////////////// + // float chrono literals + /////////////////////////////////////////////////////////////////////////////// + EA_CONSTEXPR chrono::duration> operator"" h(long double h) + { return chrono::duration>(h); } + EA_CONSTEXPR chrono::duration> operator"" min(long double m) + { return chrono::duration>(m); } + EA_CONSTEXPR chrono::duration operator"" s(long double s) + { return chrono::duration(s); } + EA_CONSTEXPR chrono::duration operator"" ms(long double ms) + { return chrono::duration(ms); } + EA_CONSTEXPR chrono::duration operator"" us(long double us) + { return chrono::duration(us); } + EA_CONSTEXPR chrono::duration operator"" ns(long double ns) + { return chrono::duration(ns); } + + } // namespace chrono_literals + }// namespace literals + EA_RESTORE_VC_WARNING() // warning: 4455 + #endif + +} // namespace eastl + + +#if EASTL_USER_LITERALS_ENABLED && EASTL_INLINE_NAMESPACES_ENABLED +namespace chrono +{ + using namespace eastl::literals::chrono_literals; +} // namespace chrono +#endif + + +#endif diff --git a/libkram/eastl/include/EASTL/core_allocator.h b/libkram/eastl/include/EASTL/core_allocator.h new file mode 100644 index 00000000..e4374912 --- /dev/null +++ b/libkram/eastl/include/EASTL/core_allocator.h @@ -0,0 +1,70 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_CORE_ALLOCATOR_H +#define EASTL_CORE_ALLOCATOR_H + +#if EASTL_CORE_ALLOCATOR_ENABLED + +#include + +namespace EA +{ + namespace Allocator + { + /// EASTLCoreAllocatorImpl + /// + /// EASTL provides an out of the box implementation of the + /// ICoreAllocator interface. This is provided as a convenience for + /// users who wish to provide ICoreAllocator implementations for EASTL to use. + /// + /// EASTL has a dependency on coreallocator so to provide an out of + /// the box implementation for EASTLCoreAlloctor and EASTLCoreDeleter + /// that can be used and tested. Historically we could not test + /// ICoreAllocator interface because we relied on the code being linked + /// in user code. + /// + + class EASTLCoreAllocatorImpl : public ICoreAllocator + { + public: + virtual void* Alloc(size_t size, const char* name, unsigned int flags) + { + return ::operator new[](size, name, flags, 0, __FILE__, __LINE__); + } + + virtual void* Alloc(size_t size, const char* name, unsigned int flags, unsigned int alignment, unsigned int alignOffset = 0) + { + return ::operator new[](size, alignment, alignOffset, name, flags, 0, __FILE__, __LINE__); + } + + virtual void Free(void* ptr, size_t size = 0) + { + ::operator delete(static_cast(ptr)); + } + + virtual void* AllocDebug(size_t size, const DebugParams debugParams, unsigned int flags) + { + return Alloc(size, debugParams.mName, flags); + } + + virtual void* AllocDebug(size_t size, const DebugParams debugParams, unsigned int flags, unsigned int align, unsigned int alignOffset = 0) + { + return Alloc(size, debugParams.mName, flags, align, alignOffset); + } + + static EASTLCoreAllocatorImpl* GetDefaultAllocator(); + }; + + inline EASTLCoreAllocatorImpl* EASTLCoreAllocatorImpl::GetDefaultAllocator() + { + static EASTLCoreAllocatorImpl allocator; + return &allocator; + } + } +} + +#endif // EASTL_CORE_ALLOCATOR_ENABLED +#endif // EASTL_CORE_ALLOCATOR_H + diff --git a/libkram/eastl/include/EASTL/core_allocator_adapter.h b/libkram/eastl/include/EASTL/core_allocator_adapter.h new file mode 100644 index 00000000..d6f18275 --- /dev/null +++ b/libkram/eastl/include/EASTL/core_allocator_adapter.h @@ -0,0 +1,368 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Implements an EASTL allocator that uses an ICoreAllocator. +// However, this header file is not dependent on ICoreAllocator or its package. +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_CORE_ALLOCATOR_ADAPTER_H +#define EASTL_CORE_ALLOCATOR_ADAPTER_H + +#if EASTL_CORE_ALLOCATOR_ENABLED + + +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + +/// EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR +/// +/// This allows the application to override the default name for the default global core allocator. +/// However, you must be careful in your usage of this, as if this file is shared between uses then +/// you will need to be careful that your override of this doesn't conflict with others. +/// +#ifndef EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR + #define EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR AllocatorType::GetDefaultAllocator +#endif + + + +namespace EA +{ + namespace Allocator + { + /// CoreAllocatorAdapter + /// + /// Implements the EASTL allocator interface. + /// Allocates memory from an instance of ICoreAllocator or another class with an equivalent interface. + /// ICoreAllocator is a pure-virtual memory allocation interface used by a number of EA games and + /// shared libraries. It's completely unrelated to EASTL, but it's prevalent enough that it's useful + /// for EASTL to have a built-in adapter for this interface. ICoreAllocator is declared in the + /// CoreAllocator package icoreallocator_interface.h header, but CoreAllocatorAdapter can work with + /// any equivalent interface, as defined below. + /// + /// Expected interface: + /// enum AllocFlags { + /// kFlagTempMemory = 0, + /// kFlagPermMemory = 1 + /// }; + /// + /// struct CoreAllocator { + /// void* Alloc(size_t size, const char* name, unsigned int allocFlags); + /// void* Alloc(size_t size, const char* name, unsigned int allocFlags, // Not required unless you are working with types that require custom alignment. + /// unsigned int align, unsigned int alignOffset = 0); + /// void Free(void* block, size_t size = 0); + /// static CoreAllocator* GetDefaultAllocator(); + /// }; + /// + /// Example usage: + /// #include + /// typedef EA::Allocator::CoreAllocatorAdapter Adapter; + /// eastl::list widgetList(Adapter("UI/WidgetList", pSomeCoreAllocator)); + /// widgetList.push_back(Widget()); + /// + /// Example usage: + /// #include + /// eastl::list > widgetList; + /// widgetList.push_back(Widget()); + /// + /// Example usage: + /// #include + /// typedef EA::Allocator::CoreAllocatorAdapter Adapter; + /// typedef eastl::list WidgetList; + /// CoreAllocatorFixed widgetCoreAllocator(pFixedAllocatorForWidgetListValueType); // CoreAllocatorFixed is a hypothetical implementation of the ICoreAllocator interface. + /// WidgetList widgetList(Adapter("UI/WidgetList", &widgetCoreAllocator)); // Note that the widgetCoreAllocator is declared before and thus destroyed after the widget list. + /// + template + class CoreAllocatorAdapter + { + public: + typedef CoreAllocatorAdapter this_type; + + public: + // To do: Make this constructor explicit, when there is no known code dependent on it being otherwise. + CoreAllocatorAdapter(const char* pName = EASTL_NAME_VAL(EASTL_ALLOCATOR_DEFAULT_NAME), AllocatorType* pAllocator = EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR()); + CoreAllocatorAdapter(const char* pName, AllocatorType* pAllocator, int flags); + CoreAllocatorAdapter(const CoreAllocatorAdapter& x); + CoreAllocatorAdapter(const CoreAllocatorAdapter& x, const char* pName); + + CoreAllocatorAdapter& operator=(const CoreAllocatorAdapter& x); + + void* allocate(size_t n, int flags = 0); + void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0); + void deallocate(void* p, size_t n); + + AllocatorType* get_allocator() const; + void set_allocator(AllocatorType* pAllocator); + + int get_flags() const; + void set_flags(int flags); + + const char* get_name() const; + void set_name(const char* pName); + + public: // Public because otherwise VC++ generates (possibly invalid) warnings about inline friend template specializations. + AllocatorType* mpCoreAllocator; + int mnFlags; // Allocation flags. See ICoreAllocator/AllocFlags. + + #if EASTL_NAME_ENABLED + const char* mpName; // Debug name, used to track memory. + #endif + }; + + template + bool operator==(const CoreAllocatorAdapter& a, const CoreAllocatorAdapter& b); + + template + bool operator!=(const CoreAllocatorAdapter& a, const CoreAllocatorAdapter& b); + + + + /// EASTLICoreAllocator + /// + /// Provides a standardized typedef for ICoreAllocator; + /// + /// Example usage: + /// eastl::list widgetList("UI/WidgetList", pSomeCoreAllocator); + /// widgetList.push_back(Widget()); + /// + class ICoreAllocator; + class EASTLCoreAllocatorImpl; + + typedef CoreAllocatorAdapter EASTLICoreAllocatorAdapter; + typedef CoreAllocatorAdapter EASTLCoreAllocatorAdapter; + typedef EASTLICoreAllocatorAdapter EASTLICoreAllocator; // for backwards compatibility + + + + /// EASTLICoreDeleter + /// + /// Implements a functor which can free memory from the specified + /// ICoreAllocator interface. This is a convenience object provided for + /// users who wish to have EASTL containers deallocate memory obtained from + /// ICoreAllocator interfaces. + /// + template + class CoreDeleterAdapter + { + public: + typedef CoreDeleterAdapter this_type; + AllocatorType* mpCoreAllocator; + + public: + CoreDeleterAdapter(AllocatorType* pAllocator = EASTL_CORE_ALLOCATOR_ADAPTER_GET_DEFAULT_CORE_ALLOCATOR()) EA_NOEXCEPT + : mpCoreAllocator(pAllocator) {} + + ~CoreDeleterAdapter() EA_NOEXCEPT {} + + template + void operator()(T* p) + { + p->~T(); + mpCoreAllocator->Free(p); + } + + CoreDeleterAdapter(const CoreDeleterAdapter& in) { mpCoreAllocator = in.mpCoreAllocator; } + + CoreDeleterAdapter(CoreDeleterAdapter&& in) + { + mpCoreAllocator = in.mpCoreAllocator; + in.mpCoreAllocator = nullptr; + } + + CoreDeleterAdapter& operator=(const CoreDeleterAdapter& in) + { + mpCoreAllocator = in.mpCoreAllocator; + return *this; + } + + CoreDeleterAdapter& operator=(CoreDeleterAdapter&& in) + { + mpCoreAllocator = in.mpCoreAllocator; + in.mpCoreAllocator = nullptr; + return *this; + } + }; + + + + /// EASTLICoreDeleter + /// + /// Provides a standardized typedef for ICoreAllocator implementations. + /// + /// Example usage: + /// eastl::shared_ptr foo(pA, EASTLCoreDeleter()); + /// + typedef CoreDeleterAdapter EASTLICoreDeleterAdapter; + typedef CoreDeleterAdapter EASTLCoreDeleterAdapter; + + } // namespace Allocator + +} // namespace EA + + + + + +/////////////////////////////////////////////////////////////////////////////// +// Inlines +/////////////////////////////////////////////////////////////////////////////// + +namespace EA +{ + namespace Allocator + { + template + inline CoreAllocatorAdapter::CoreAllocatorAdapter(const char* EASTL_NAME(pName), AllocatorType* pCoreAllocator) + : mpCoreAllocator(pCoreAllocator), mnFlags(0) + { + #if EASTL_NAME_ENABLED + mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME; + #endif + } + + template + inline CoreAllocatorAdapter::CoreAllocatorAdapter(const char* EASTL_NAME(pName), AllocatorType* pCoreAllocator, int flags) + : mpCoreAllocator(pCoreAllocator), mnFlags(flags) + { + #if EASTL_NAME_ENABLED + mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME; + #endif + } + + template + inline CoreAllocatorAdapter::CoreAllocatorAdapter(const CoreAllocatorAdapter& x) + : mpCoreAllocator(x.mpCoreAllocator), mnFlags(x.mnFlags) + { + #if EASTL_NAME_ENABLED + mpName = x.mpName; + #endif + } + + template + inline CoreAllocatorAdapter::CoreAllocatorAdapter(const CoreAllocatorAdapter& x, const char* EASTL_NAME(pName)) + : mpCoreAllocator(x.mpCoreAllocator), mnFlags(x.mnFlags) + { + #if EASTL_NAME_ENABLED + mpName = pName ? pName : EASTL_ALLOCATOR_DEFAULT_NAME; + #endif + } + + template + inline CoreAllocatorAdapter& CoreAllocatorAdapter::operator=(const CoreAllocatorAdapter& x) + { + mpCoreAllocator = x.mpCoreAllocator; + mnFlags = x.mnFlags; + + #if EASTL_NAME_ENABLED + mpName = x.mpName; + #endif + + return *this; + } + + template + inline void* CoreAllocatorAdapter::allocate(size_t n, int /*flags*/) + { + // It turns out that EASTL itself doesn't use the flags parameter, + // whereas the user here might well want to specify a flags + // parameter. So we use ours instead of the one passed in. + return mpCoreAllocator->Alloc(n, EASTL_NAME_VAL(mpName), (unsigned)mnFlags); + } + + template + inline void* CoreAllocatorAdapter::allocate(size_t n, size_t alignment, size_t offset, int /*flags*/) + { + // It turns out that EASTL itself doesn't use the flags parameter, + // whereas the user here might well want to specify a flags + // parameter. So we use ours instead of the one passed in. + return mpCoreAllocator->Alloc(n, EASTL_NAME_VAL(mpName), (unsigned)mnFlags, (unsigned)alignment, (unsigned)offset); + } + + template + inline void CoreAllocatorAdapter::deallocate(void* p, size_t n) + { + return mpCoreAllocator->Free(p, n); + } + + template + inline AllocatorType* CoreAllocatorAdapter::get_allocator() const + { + return mpCoreAllocator; + } + + template + inline void CoreAllocatorAdapter::set_allocator(AllocatorType* pAllocator) + { + mpCoreAllocator = pAllocator; + } + + template + inline int CoreAllocatorAdapter::get_flags() const + { + return mnFlags; + } + + template + inline void CoreAllocatorAdapter::set_flags(int flags) + { + mnFlags = flags; + } + + template + inline const char* CoreAllocatorAdapter::get_name() const + { + #if EASTL_NAME_ENABLED + return mpName; + #else + return EASTL_ALLOCATOR_DEFAULT_NAME; + #endif + } + + template + inline void CoreAllocatorAdapter::set_name(const char* pName) + { + #if EASTL_NAME_ENABLED + mpName = pName; + #else + (void)pName; + #endif + } + + + + template + inline bool operator==(const CoreAllocatorAdapter& a, const CoreAllocatorAdapter& b) + { + return (a.mpCoreAllocator == b.mpCoreAllocator) && + (a.mnFlags == b.mnFlags); + } + + template + inline bool operator!=(const CoreAllocatorAdapter& a, const CoreAllocatorAdapter& b) + { + return (a.mpCoreAllocator != b.mpCoreAllocator) || + (a.mnFlags != b.mnFlags); + } + + + } // namespace Allocator + +} // namespace EA + + +#endif // EASTL_CORE_ALLOCATOR_ENABLED +#endif // Header include guard + + + + + + + + diff --git a/libkram/eastl/include/EASTL/deque.h b/libkram/eastl/include/EASTL/deque.h new file mode 100644 index 00000000..c2d55b1c --- /dev/null +++ b/libkram/eastl/include/EASTL/deque.h @@ -0,0 +1,2687 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +////////////////////////////////////////////////////////////////////////////// +// deque design +// +// A deque (pronounced "deck") is a double-ended queue, though this is partially +// of a misnomer. A deque does indeed let you add and remove values from both ends +// of the container, but it's not usually used for such a thing and instead is used +// as a more flexible version of a vector. It provides operator[] (random access) +// and can insert items anywhere and not just at the front and back. +// +// While you can implement a double-ended queue via a doubly-linked list, deque is +// instead implemented as a list of arrays. The benefit of this is that memory usage +// is lower and that random access can be had with decent efficiency. +// +// Our implementation of deque is just like every other implementation of deque, +// as the C++ standard all but dictates that you make it work this way. Below +// we have a depiction of an array (or vector) of 48 items, with each node being +// a '+' character and extra capacity being a '-' character. What we have is one +// contiguous block of memory: +// +// ++++++++++++++++++++++++++++++++++++++++++++++++----------------- +// 0 47 +// +// With a deque, the same array of 48 items would be implemented as multiple smaller +// arrays of contiguous memory, each of fixed size. We will call these "sub-arrays." +// In the case here, we have six arrays of 8 nodes: +// +// ++++++++ ++++++++ ++++++++ ++++++++ ++++++++ ++++++++ +// +// With an vector, item [0] is the first item and item [47] is the last item. With a +// deque, item [0] is usually not the first item and neither is item [47]. There is +// extra capacity on both the front side and the back side of the deque. So a deque +// (of 24 items) actually looks like this: +// +// -------- -----+++ ++++++++ ++++++++ +++++--- -------- +// 0 23 +// +// To insert items at the front, you move into the capacity on the left, and to insert +// items at the back, you append items on the right. As you can see, inserting an item +// at the front doesn't require allocating new memory nor does it require moving any +// items in the container. It merely involves moving the pointer to the [0] item to +// the left by one node. +// +// We keep track of these sub-arrays by having an array of pointers, with each array +// entry pointing to each of the sub-arrays. We could alternatively use a linked +// list of pointers, but it turns out we can implement our deque::operator[] more +// efficiently if we use an array of pointers instead of a list of pointers. +// +// To implement deque::iterator, we could keep a struct which is essentially this: +// struct iterator { +// int subArrayIndex; +// int subArrayOffset; +// } +// +// In practice, we implement iterators a little differently, but in reality our +// implementation isn't much different from the above. It turns out that it's most +// simple if we also manage the location of item [0] and item [end] by using these +// same iterators. +// +// To consider: Implement the deque as a circular deque instead of a linear one. +// This would use a similar subarray layout but iterators would +// wrap around when they reached the end of the subarray pointer list. +// +////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_DEQUE_H +#define EASTL_DEQUE_H + + +#include +#include +#include +#include +#include +#include +#include + +EA_DISABLE_ALL_VC_WARNINGS() +#include +#include +EA_RESTORE_ALL_VC_WARNINGS() + +#if EASTL_EXCEPTIONS_ENABLED + EA_DISABLE_ALL_VC_WARNINGS() + #include // std::out_of_range, std::length_error. + EA_RESTORE_ALL_VC_WARNINGS() +#endif + + +// 4267 - 'argument' : conversion from 'size_t' to 'const uint32_t', possible loss of data. This is a bogus warning resulting from a bug in VC++. +// 4345 - Behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized +// 4480 - nonstandard extension used: specifying underlying type for enum +// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc +// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught. +EA_DISABLE_VC_WARNING(4267 4345 4480 4530 4571); + +#if EASTL_EXCEPTIONS_ENABLED + // 4703 - potentially uninitialized local pointer variable used. VC++ is mistakenly analyzing the possibility of uninitialized variables, though it's not easy for it to do so. + // 4701 - potentially uninitialized local variable used. + EA_DISABLE_VC_WARNING(4703 4701) +#endif + + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + +namespace eastl +{ + + /// EASTL_DEQUE_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_DEQUE_DEFAULT_NAME + #define EASTL_DEQUE_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " deque" // Unless the user overrides something, this is "EASTL deque". + #endif + + + /// EASTL_DEQUE_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_DEQUE_DEFAULT_ALLOCATOR + #define EASTL_DEQUE_DEFAULT_ALLOCATOR allocator_type(EASTL_DEQUE_DEFAULT_NAME) + #endif + + + /// DEQUE_DEFAULT_SUBARRAY_SIZE + /// + /// Defines the default number of items in a subarray. + /// Note that the user has the option of specifying the subarray size + /// in the deque template declaration. + /// + #if !defined(__GNUC__) || (__GNUC__ >= 3) // GCC 2.x can't handle the declaration below. + #define DEQUE_DEFAULT_SUBARRAY_SIZE(T) ((sizeof(T) <= 4) ? 64 : ((sizeof(T) <= 8) ? 32 : ((sizeof(T) <= 16) ? 16 : ((sizeof(T) <= 32) ? 8 : 4)))) + #else + #define DEQUE_DEFAULT_SUBARRAY_SIZE(T) 16 + #endif + + + + /// DequeIterator + /// + /// The DequeIterator provides both const and non-const iterators for deque. + /// It also is used for the tracking of the begin and end for the deque. + /// + template + struct DequeIterator + { + typedef DequeIterator this_type; + typedef DequeIterator iterator; + typedef DequeIterator const_iterator; + typedef ptrdiff_t difference_type; + typedef EASTL_ITC_NS::random_access_iterator_tag iterator_category; + typedef T value_type; + typedef T* pointer; + typedef T& reference; + + public: + DequeIterator(); + DequeIterator(const iterator& x); + + pointer operator->() const; + reference operator*() const; + + this_type& operator++(); + this_type operator++(int); + + this_type& operator--(); + this_type operator--(int); + + this_type& operator+=(difference_type n); + this_type& operator-=(difference_type n); + + this_type operator+(difference_type n) const; + this_type operator-(difference_type n) const; + + protected: + template + friend struct DequeIterator; + + template + friend struct DequeBase; + + template + friend class deque; + + template + friend bool operator==(const DequeIterator&, + const DequeIterator&); + + template + friend bool operator!=(const DequeIterator&, + const DequeIterator&); + + template + friend bool operator!=(const DequeIterator& a, + const DequeIterator& b); + + template + friend bool operator< (const DequeIterator&, + const DequeIterator&); + + template + friend bool operator> (const DequeIterator&, + const DequeIterator&); + + template + friend bool operator<=(const DequeIterator&, + const DequeIterator&); + + template + friend bool operator>=(const DequeIterator&, + const DequeIterator&); + + template + friend typename DequeIterator::difference_type + operator-(const DequeIterator& a, + const DequeIterator& b); + + protected: + T* mpCurrent; // Where we currently point. Declared first because it's used most often. + T* mpBegin; // The beginning of the current subarray. + T* mpEnd; // The end of the current subarray. To consider: remove this member, as it is always equal to 'mpBegin + kDequeSubarraySize'. Given that deque subarrays usually consist of hundreds of bytes, this isn't a massive win. Also, now that we are implementing a zero-allocation new deque policy, mpEnd may in fact not be equal to 'mpBegin + kDequeSubarraySize'. + T** mpCurrentArrayPtr; // Pointer to current subarray. We could alternatively implement this as a list node iterator if the deque used a linked list. + + struct Increment {}; + struct Decrement {}; + struct FromConst {}; + + DequeIterator(T** pCurrentArrayPtr, T* pCurrent); + DequeIterator(const const_iterator& x, FromConst) : mpCurrent(x.mpCurrent), mpBegin(x.mpBegin), mpEnd(x.mpEnd), mpCurrentArrayPtr(x.mpCurrentArrayPtr){} + DequeIterator(const iterator& x, Increment); + DequeIterator(const iterator& x, Decrement); + + this_type copy(const iterator& first, const iterator& last, true_type); // true means that value_type has the type_trait has_trivial_relocate, + this_type copy(const iterator& first, const iterator& last, false_type); // false means it does not. + + void copy_backward(const iterator& first, const iterator& last, true_type); // true means that value_type has the type_trait has_trivial_relocate, + void copy_backward(const iterator& first, const iterator& last, false_type); // false means it does not. + + void SetSubarray(T** pCurrentArrayPtr); + }; + + + + + /// DequeBase + /// + /// The DequeBase implements memory allocation for deque. + /// See VectorBase (class vector) for an explanation of why we + /// create this separate base class. + /// + template + struct DequeBase + { + typedef T value_type; + typedef Allocator allocator_type; + typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. + typedef ptrdiff_t difference_type; + typedef DequeIterator iterator; + typedef DequeIterator const_iterator; + + static const size_type npos = (size_type)-1; /// 'npos' means non-valid position or simply non-position. + static const size_type kMaxSize = (size_type)-2; /// -1 is reserved for 'npos'. It also happens to be slightly beneficial that kMaxSize is a value less than -1, as it helps us deal with potential integer wraparound issues. + + enum + { + kMinPtrArraySize = 8, /// A new empty deque has a ptrArraySize of 0, but any allocated ptrArrays use this min size. + kSubarraySize = kDequeSubarraySize /// + //kNodeSize = kDequeSubarraySize * sizeof(T) /// Disabled because it prevents the ability to do this: struct X{ eastl::deque mDequeOfSelf; }; + }; + + enum Side /// Defines the side of the deque: front or back. + { + kSideFront, /// Identifies the front side of the deque. + kSideBack /// Identifies the back side of the deque. + }; + + protected: + T** mpPtrArray; // Array of pointers to subarrays. + size_type mnPtrArraySize; // Possibly we should store this as T** mpArrayEnd. + iterator mItBegin; // Where within the subarrays is our beginning. + iterator mItEnd; // Where within the subarrays is our end. + allocator_type mAllocator; // To do: Use base class optimization to make this go away. + + public: + DequeBase(const allocator_type& allocator); + DequeBase(size_type n); + DequeBase(size_type n, const allocator_type& allocator); + ~DequeBase(); + + const allocator_type& get_allocator() const EA_NOEXCEPT; + allocator_type& get_allocator() EA_NOEXCEPT; + void set_allocator(const allocator_type& allocator); + + protected: + T* DoAllocateSubarray(); + void DoFreeSubarray(T* p); + void DoFreeSubarrays(T** pBegin, T** pEnd); + + T** DoAllocatePtrArray(size_type n); + void DoFreePtrArray(T** p, size_t n); + + iterator DoReallocSubarray(size_type nAdditionalCapacity, Side allocationSide); + void DoReallocPtrArray(size_type nAdditionalCapacity, Side allocationSide); + + void DoInit(size_type n); + + }; // DequeBase + + + + + /// deque + /// + /// Implements a conventional C++ double-ended queue. The implementation used here + /// is very much like any other deque implementations you may have seen, as it + /// follows the standard algorithm for deque design. + /// + /// Note: + /// As of this writing, deque does not support zero-allocation initial emptiness. + /// A newly created deque with zero elements will still allocate a subarray + /// pointer set. We are looking for efficient and clean ways to get around this, + /// but current efforts have resulted in less efficient and more fragile code. + /// The logic of this class doesn't lend itself to a clean implementation. + /// It turns out that deques are one of the least likely classes you'd want this + /// behaviour in, so until this functionality becomes very important to somebody, + /// we will leave it as-is. It can probably be solved by adding some extra code to + /// the Do* functions and adding good comments explaining the situation. + /// + template + class deque : public DequeBase + { + public: + typedef DequeBase base_type; + typedef deque this_type; + typedef T value_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef DequeIterator iterator; + typedef DequeIterator const_iterator; + typedef eastl::reverse_iterator reverse_iterator; + typedef eastl::reverse_iterator const_reverse_iterator; + typedef typename base_type::size_type size_type; + typedef typename base_type::difference_type difference_type; + typedef typename base_type::allocator_type allocator_type; + + using base_type::kSideFront; + using base_type::kSideBack; + using base_type::mpPtrArray; + using base_type::mnPtrArraySize; + using base_type::mItBegin; + using base_type::mItEnd; + using base_type::mAllocator; + using base_type::npos; + using base_type::DoAllocateSubarray; + using base_type::DoFreeSubarray; + using base_type::DoFreeSubarrays; + using base_type::DoAllocatePtrArray; + using base_type::DoFreePtrArray; + using base_type::DoReallocSubarray; + using base_type::DoReallocPtrArray; + + public: + deque(); + explicit deque(const allocator_type& allocator); + explicit deque(size_type n, const allocator_type& allocator = EASTL_DEQUE_DEFAULT_ALLOCATOR); + deque(size_type n, const value_type& value, const allocator_type& allocator = EASTL_DEQUE_DEFAULT_ALLOCATOR); + deque(const this_type& x); + deque(this_type&& x); + deque(this_type&& x, const allocator_type& allocator); + deque(std::initializer_list ilist, const allocator_type& allocator = EASTL_DEQUE_DEFAULT_ALLOCATOR); + + template + deque(InputIterator first, InputIterator last); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg. + + ~deque(); + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + + void assign(size_type n, const value_type& value); + void assign(std::initializer_list ilist); + + template // It turns out that the C++ std::deque specifies a two argument + void assign(InputIterator first, InputIterator last); // version of assign that takes (int size, int value). These are not + // iterators, so we need to do a template compiler trick to do the right thing. + + iterator begin() EA_NOEXCEPT; + const_iterator begin() const EA_NOEXCEPT; + const_iterator cbegin() const EA_NOEXCEPT; + + iterator end() EA_NOEXCEPT; + const_iterator end() const EA_NOEXCEPT; + const_iterator cend() const EA_NOEXCEPT; + + reverse_iterator rbegin() EA_NOEXCEPT; + const_reverse_iterator rbegin() const EA_NOEXCEPT; + const_reverse_iterator crbegin() const EA_NOEXCEPT; + + reverse_iterator rend() EA_NOEXCEPT; + const_reverse_iterator rend() const EA_NOEXCEPT; + const_reverse_iterator crend() const EA_NOEXCEPT; + + bool empty() const EA_NOEXCEPT; + size_type size() const EA_NOEXCEPT; + + void resize(size_type n, const value_type& value); + void resize(size_type n); + + void shrink_to_fit(); + void set_capacity(size_type n = base_type::npos); + + reference operator[](size_type n); + const_reference operator[](size_type n) const; + + reference at(size_type n); + const_reference at(size_type n) const; + + reference front(); + const_reference front() const; + + reference back(); + const_reference back() const; + + void push_front(const value_type& value); + reference push_front(); + void push_front(value_type&& value); + + void push_back(const value_type& value); + reference push_back(); + void push_back(value_type&& value); + + void pop_front(); + void pop_back(); + + template + iterator emplace(const_iterator position, Args&&... args); + + template + void emplace_front(Args&&... args); + + template + void emplace_back(Args&&... args); + + iterator insert(const_iterator position, const value_type& value); + iterator insert(const_iterator position, value_type&& value); + void insert(const_iterator position, size_type n, const value_type& value); + iterator insert(const_iterator position, std::initializer_list ilist); + + template + void insert(const_iterator position, InputIterator first, InputIterator last); + + iterator erase(const_iterator position); + iterator erase(const_iterator first, const_iterator last); + reverse_iterator erase(reverse_iterator position); + reverse_iterator erase(reverse_iterator first, reverse_iterator last); + + void clear(); + //void reset_lose_memory(); // Disabled until it can be implemented efficiently and cleanly. // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + + bool validate() const; + int validate_iterator(const_iterator i) const; + + protected: + template + void DoInit(Integer n, Integer value, true_type); + + template + void DoInit(InputIterator first, InputIterator last, false_type); + + template + void DoInitFromIterator(InputIterator first, InputIterator last, EASTL_ITC_NS::input_iterator_tag); + + template + void DoInitFromIterator(ForwardIterator first, ForwardIterator last, EASTL_ITC_NS::forward_iterator_tag); + + void DoFillInit(const value_type& value); + + template + void DoAssign(Integer n, Integer value, true_type); + + template + void DoAssign(InputIterator first, InputIterator last, false_type); + + void DoAssignValues(size_type n, const value_type& value); + + template + void DoInsert(const const_iterator& position, Integer n, Integer value, true_type); + + template + void DoInsert(const const_iterator& position, const InputIterator& first, const InputIterator& last, false_type); + + template + void DoInsertFromIterator(const_iterator position, const InputIterator& first, const InputIterator& last, EASTL_ITC_NS::forward_iterator_tag); + + void DoInsertValues(const_iterator position, size_type n, const value_type& value); + + void DoSwap(this_type& x); + }; // class deque + + + + + /////////////////////////////////////////////////////////////////////// + // DequeBase + /////////////////////////////////////////////////////////////////////// + + template + DequeBase::DequeBase(const allocator_type& allocator) + : mpPtrArray(NULL), + mnPtrArraySize(0), + mItBegin(), + mItEnd(), + mAllocator(allocator) + { + // It is assumed here that the deque subclass will init us when/as needed. + } + + + template + DequeBase::DequeBase(size_type n) + : mpPtrArray(NULL), + mnPtrArraySize(0), + mItBegin(), + mItEnd(), + mAllocator(EASTL_DEQUE_DEFAULT_NAME) + { + // It's important to note that DoInit creates space for elements and assigns + // mItBegin/mItEnd to point to them, but these elements are not constructed. + // You need to immediately follow this constructor with code that constructs the values. + DoInit(n); + } + + + template + DequeBase::DequeBase(size_type n, const allocator_type& allocator) + : mpPtrArray(NULL), + mnPtrArraySize(0), + mItBegin(), + mItEnd(), + mAllocator(allocator) + { + // It's important to note that DoInit creates space for elements and assigns + // mItBegin/mItEnd to point to them, but these elements are not constructed. + // You need to immediately follow this constructor with code that constructs the values. + DoInit(n); + } + + + template + DequeBase::~DequeBase() + { + if(mpPtrArray) + { + DoFreeSubarrays(mItBegin.mpCurrentArrayPtr, mItEnd.mpCurrentArrayPtr + 1); + DoFreePtrArray(mpPtrArray, mnPtrArraySize); + mpPtrArray = nullptr; + } + } + + + template + const typename DequeBase::allocator_type& + DequeBase::get_allocator() const EA_NOEXCEPT + { + return mAllocator; + } + + + template + typename DequeBase::allocator_type& + DequeBase::get_allocator() EA_NOEXCEPT + { + return mAllocator; + } + + + template + void DequeBase::set_allocator(const allocator_type& allocator) + { + // The only time you can set an allocator is with an empty unused container, such as right after construction. + if(EASTL_LIKELY(mAllocator != allocator)) + { + if(EASTL_LIKELY(mpPtrArray && (mItBegin.mpCurrentArrayPtr == mItEnd.mpCurrentArrayPtr))) // If we are empty and so can safely deallocate the existing memory... We could also test for empty(), but that's a more expensive calculation and more involved clearing, though it would be more flexible. + { + DoFreeSubarrays(mItBegin.mpCurrentArrayPtr, mItEnd.mpCurrentArrayPtr + 1); + DoFreePtrArray(mpPtrArray, mnPtrArraySize); + + mAllocator = allocator; + DoInit(0); + } + else + { + EASTL_FAIL_MSG("DequeBase::set_allocator -- atempt to change allocator after allocating elements."); + } + } + } + + + template + T* DequeBase::DoAllocateSubarray() + { + T* p = (T*)allocate_memory(mAllocator, kDequeSubarraySize * sizeof(T), EASTL_ALIGN_OF(T), 0); + EASTL_ASSERT_MSG(p != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined."); + + #if EASTL_DEBUG + memset((void*)p, 0, kDequeSubarraySize * sizeof(T)); + #endif + + return (T*)p; + } + + + template + void DequeBase::DoFreeSubarray(T* p) + { + if(p) + EASTLFree(mAllocator, p, kDequeSubarraySize * sizeof(T)); + } + + template + void DequeBase::DoFreeSubarrays(T** pBegin, T** pEnd) + { + while(pBegin < pEnd) + DoFreeSubarray(*pBegin++); + } + + template + T** DequeBase::DoAllocatePtrArray(size_type n) + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(n >= 0x80000000)) + EASTL_FAIL_MSG("deque::DoAllocatePtrArray -- improbably large request."); + #endif + + T** pp = (T**)allocate_memory(mAllocator, n * sizeof(T*), EASTL_ALIGN_OF(T), 0); + EASTL_ASSERT_MSG(pp != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined."); + + #if EASTL_DEBUG + memset((void*)pp, 0, n * sizeof(T*)); + #endif + + return pp; + } + + + template + void DequeBase::DoFreePtrArray(T** pp, size_t n) + { + if(pp) + EASTLFree(mAllocator, pp, n * sizeof(T*)); + } + + + template + typename DequeBase::iterator + DequeBase::DoReallocSubarray(size_type nAdditionalCapacity, Side allocationSide) + { + // nAdditionalCapacity refers to the amount of additional space we need to be + // able to store in this deque. Typically this function is called as part of + // an insert or append operation. This is the function that makes sure there + // is enough capacity for the new elements to be copied into the deque. + // The new capacity here is always at the front or back of the deque. + // This function returns an iterator to that points to the new begin or + // the new end of the deque space, depending on allocationSide. + + if(allocationSide == kSideFront) + { + // There might be some free space (nCurrentAdditionalCapacity) at the front of the existing subarray. + const size_type nCurrentAdditionalCapacity = (size_type)(mItBegin.mpCurrent - mItBegin.mpBegin); + + if(EASTL_UNLIKELY(nCurrentAdditionalCapacity < nAdditionalCapacity)) // If we need to grow downward into a new subarray... + { + const difference_type nSubarrayIncrease = (difference_type)(((nAdditionalCapacity - nCurrentAdditionalCapacity) + kDequeSubarraySize - 1) / kDequeSubarraySize); + difference_type i; + + if(nSubarrayIncrease > (mItBegin.mpCurrentArrayPtr - mpPtrArray)) // If there are not enough pointers in front of the current (first) one... + DoReallocPtrArray((size_type)(nSubarrayIncrease - (mItBegin.mpCurrentArrayPtr - mpPtrArray)), kSideFront); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for(i = 1; i <= nSubarrayIncrease; ++i) + mItBegin.mpCurrentArrayPtr[-i] = DoAllocateSubarray(); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(difference_type j = 1; j < i; ++j) + DoFreeSubarray(mItBegin.mpCurrentArrayPtr[-j]); + throw; + } + #endif + } + + return mItBegin - (difference_type)nAdditionalCapacity; + } + else // else kSideBack + { + const size_type nCurrentAdditionalCapacity = (size_type)((mItEnd.mpEnd - 1) - mItEnd.mpCurrent); + + if(EASTL_UNLIKELY(nCurrentAdditionalCapacity < nAdditionalCapacity)) // If we need to grow forward into a new subarray... + { + const difference_type nSubarrayIncrease = (difference_type)(((nAdditionalCapacity - nCurrentAdditionalCapacity) + kDequeSubarraySize - 1) / kDequeSubarraySize); + difference_type i; + + if(nSubarrayIncrease > ((mpPtrArray + mnPtrArraySize) - mItEnd.mpCurrentArrayPtr) - 1) // If there are not enough pointers after the current (last) one... + DoReallocPtrArray((size_type)(nSubarrayIncrease - (((mpPtrArray + mnPtrArraySize) - mItEnd.mpCurrentArrayPtr) - 1)), kSideBack); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for(i = 1; i <= nSubarrayIncrease; ++i) + mItEnd.mpCurrentArrayPtr[i] = DoAllocateSubarray(); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(difference_type j = 1; j < i; ++j) + DoFreeSubarray(mItEnd.mpCurrentArrayPtr[j]); + throw; + } + #endif + } + + return mItEnd + (difference_type)nAdditionalCapacity; + } + } + + + template + void DequeBase::DoReallocPtrArray(size_type nAdditionalCapacity, Side allocationSide) + { + // This function is not called unless the capacity is known to require a resize. + // + // We have an array of pointers (mpPtrArray), of which a segment of them are in use and + // at either end of the array are zero or more unused pointers. This function is being + // called because we need to extend the capacity on either side of this array by + // nAdditionalCapacity pointers. However, it's possible that if the user is continually + // using push_back and pop_front then the pointer array will continue to be extended + // on the back side and unused on the front side. So while we are doing this resizing + // here we also take the opportunity to recenter the pointers and thus be balanced. + // It man turn out that we don't even need to reallocate the pointer array in order + // to increase capacity on one side, as simply moving the pointers to the center may + // be enough to open up the requires space. + // + // Balanced pointer array Unbalanced pointer array (unused space at front, no free space at back) + // ----++++++++++++---- ---------+++++++++++ + + const size_type nUnusedPtrCountAtFront = (size_type)(mItBegin.mpCurrentArrayPtr - mpPtrArray); + const size_type nUsedPtrCount = (size_type)(mItEnd.mpCurrentArrayPtr - mItBegin.mpCurrentArrayPtr) + 1; + const size_type nUsedPtrSpace = nUsedPtrCount * sizeof(void*); + const size_type nUnusedPtrCountAtBack = (mnPtrArraySize - nUnusedPtrCountAtFront) - nUsedPtrCount; + value_type** pPtrArrayBegin; + + if((allocationSide == kSideBack) && (nAdditionalCapacity <= nUnusedPtrCountAtFront)) // If we can take advantage of unused pointers at the front without doing any reallocation... + { + if(nAdditionalCapacity < (nUnusedPtrCountAtFront / 2)) // Possibly use more space than required, if there's a lot of extra space. + nAdditionalCapacity = (nUnusedPtrCountAtFront / 2); + + pPtrArrayBegin = mpPtrArray + (nUnusedPtrCountAtFront - nAdditionalCapacity); + memmove(pPtrArrayBegin, mItBegin.mpCurrentArrayPtr, nUsedPtrSpace); + + #if EASTL_DEBUG + memset(pPtrArrayBegin + nUsedPtrCount, 0, (size_t)(mpPtrArray + mnPtrArraySize) - (size_t)(pPtrArrayBegin + nUsedPtrCount)); + #endif + } + else if((allocationSide == kSideFront) && (nAdditionalCapacity <= nUnusedPtrCountAtBack)) // If we can take advantage of unused pointers at the back without doing any reallocation... + { + if(nAdditionalCapacity < (nUnusedPtrCountAtBack / 2)) // Possibly use more space than required, if there's a lot of extra space. + nAdditionalCapacity = (nUnusedPtrCountAtBack / 2); + + pPtrArrayBegin = mItBegin.mpCurrentArrayPtr + nAdditionalCapacity; + memmove(pPtrArrayBegin, mItBegin.mpCurrentArrayPtr, nUsedPtrSpace); + + #if EASTL_DEBUG + memset(mpPtrArray, 0, (size_t)((uintptr_t)pPtrArrayBegin - (uintptr_t)mpPtrArray)); + #endif + } + else + { + // In this case we will have to do a reallocation. + const size_type nNewPtrArraySize = mnPtrArraySize + eastl::max_alt(mnPtrArraySize, nAdditionalCapacity) + 2; // Allocate extra capacity. + value_type** const pNewPtrArray = DoAllocatePtrArray(nNewPtrArraySize); + + pPtrArrayBegin = pNewPtrArray + (mItBegin.mpCurrentArrayPtr - mpPtrArray) + ((allocationSide == kSideFront) ? nAdditionalCapacity : 0); + + // The following is equivalent to: eastl::copy(mItBegin.mpCurrentArrayPtr, mItEnd.mpCurrentArrayPtr + 1, pPtrArrayBegin); + // It's OK to use memcpy instead of memmove because the destination is guaranteed to non-overlap the source. + if(mpPtrArray) // Could also say: 'if(mItBegin.mpCurrentArrayPtr)' + memcpy(pPtrArrayBegin, mItBegin.mpCurrentArrayPtr, nUsedPtrSpace); + + DoFreePtrArray(mpPtrArray, mnPtrArraySize); + + mpPtrArray = pNewPtrArray; + mnPtrArraySize = nNewPtrArraySize; + } + + // We need to reset the begin and end iterators, as code that calls this expects them to *not* be invalidated. + mItBegin.SetSubarray(pPtrArrayBegin); + mItEnd.SetSubarray((pPtrArrayBegin + nUsedPtrCount) - 1); + } + + + template + void DequeBase::DoInit(size_type n) + { + // This code is disabled because it doesn't currently work properly. + // We are trying to make it so that a deque can have a zero allocation + // initial empty state, but we (OK, I) am having a hard time making + // this elegant and efficient. + //if(n) + //{ + const size_type nNewPtrArraySize = (size_type)((n / kDequeSubarraySize) + 1); // Always have at least one, even if n is zero. + const size_type kMinPtrArraySize_ = kMinPtrArraySize; + + mnPtrArraySize = eastl::max_alt(kMinPtrArraySize_, (nNewPtrArraySize + 2)); + mpPtrArray = DoAllocatePtrArray(mnPtrArraySize); + + value_type** const pPtrArrayBegin = (mpPtrArray + ((mnPtrArraySize - nNewPtrArraySize) / 2)); // Try to place it in the middle. + value_type** const pPtrArrayEnd = pPtrArrayBegin + nNewPtrArraySize; + value_type** pPtrArrayCurrent = pPtrArrayBegin; + + #if EASTL_EXCEPTIONS_ENABLED + try + { + try + { + #endif + while(pPtrArrayCurrent < pPtrArrayEnd) + *pPtrArrayCurrent++ = DoAllocateSubarray(); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeSubarrays(pPtrArrayBegin, pPtrArrayCurrent); + throw; + } + } + catch(...) + { + DoFreePtrArray(mpPtrArray, mnPtrArraySize); + mpPtrArray = NULL; + mnPtrArraySize = 0; + throw; + } + #endif + + mItBegin.SetSubarray(pPtrArrayBegin); + mItBegin.mpCurrent = mItBegin.mpBegin; + + mItEnd.SetSubarray(pPtrArrayEnd - 1); + mItEnd.mpCurrent = mItEnd.mpBegin + (difference_type)(n % kDequeSubarraySize); + //} + //else // Else we do a zero-allocation initialization. + //{ + // mpPtrArray = NULL; + // mnPtrArraySize = 0; + // + // mItBegin.mpCurrentArrayPtr = NULL; + // mItBegin.mpBegin = NULL; + // mItBegin.mpEnd = NULL; // We intentionally create a situation whereby the subarray that has no capacity. + // mItBegin.mpCurrent = NULL; + // + // mItEnd = mItBegin; + //} + } + + + + /////////////////////////////////////////////////////////////////////// + // DequeIterator + /////////////////////////////////////////////////////////////////////// + + template + DequeIterator::DequeIterator() + : mpCurrent(NULL), mpBegin(NULL), mpEnd(NULL), mpCurrentArrayPtr(NULL) + { + // Empty + } + + + template + DequeIterator::DequeIterator(T** pCurrentArrayPtr, T* pCurrent) + : mpCurrent(pCurrent), mpBegin(*pCurrentArrayPtr), mpEnd(pCurrent + kDequeSubarraySize), mpCurrentArrayPtr(pCurrentArrayPtr) + { + // Empty + } + + + template + DequeIterator::DequeIterator(const iterator& x) + : mpCurrent(x.mpCurrent), mpBegin(x.mpBegin), mpEnd(x.mpEnd), mpCurrentArrayPtr(x.mpCurrentArrayPtr) + { + // Empty + } + + + template + DequeIterator::DequeIterator(const iterator& x, Increment) + : mpCurrent(x.mpCurrent), mpBegin(x.mpBegin), mpEnd(x.mpEnd), mpCurrentArrayPtr(x.mpCurrentArrayPtr) + { + operator++(); + } + + + template + DequeIterator::DequeIterator(const iterator& x, Decrement) + : mpCurrent(x.mpCurrent), mpBegin(x.mpBegin), mpEnd(x.mpEnd), mpCurrentArrayPtr(x.mpCurrentArrayPtr) + { + operator--(); + } + + + template + typename DequeIterator::pointer + DequeIterator::operator->() const + { + return mpCurrent; + } + + + template + typename DequeIterator::reference + DequeIterator::operator*() const + { + return *mpCurrent; + } + + + template + typename DequeIterator::this_type& + DequeIterator::operator++() + { + if(EASTL_UNLIKELY(++mpCurrent == mpEnd)) + { + mpBegin = *++mpCurrentArrayPtr; + mpEnd = mpBegin + kDequeSubarraySize; + mpCurrent = mpBegin; + } + return *this; + } + + + template + typename DequeIterator::this_type + DequeIterator::operator++(int) + { + const this_type temp(*this); + operator++(); + return temp; + } + + + template + typename DequeIterator::this_type& + DequeIterator::operator--() + { + if(EASTL_UNLIKELY(mpCurrent == mpBegin)) + { + mpBegin = *--mpCurrentArrayPtr; + mpEnd = mpBegin + kDequeSubarraySize; + mpCurrent = mpEnd; // fall through... + } + --mpCurrent; + return *this; + } + + + template + typename DequeIterator::this_type + DequeIterator::operator--(int) + { + const this_type temp(*this); + operator--(); + return temp; + } + + + template + typename DequeIterator::this_type& + DequeIterator::operator+=(difference_type n) + { + const difference_type subarrayPosition = (mpCurrent - mpBegin) + n; + + // Cast from signed to unsigned (size_t) in order to obviate the need to compare to < 0. + if((size_t)subarrayPosition < (size_t)kDequeSubarraySize) // If the new position is within the current subarray (i.e. >= 0 && < kSubArraySize)... + mpCurrent += n; + else + { + // This implementation is a branchless version which works by offsetting + // the math to always be in the positive range. Much of the values here + // reduce to constants and both the multiplication and division are of + // power of two sizes and so this calculation ends up compiling down to + // just one addition, one shift and one subtraction. This algorithm has + // a theoretical weakness in that on 32 bit systems it will fail if the + // value of n is >= (2^32 - 2^24) or 4,278,190,080 of if kDequeSubarraySize + // is >= 2^24 or 16,777,216. + EASTL_CT_ASSERT((kDequeSubarraySize & (kDequeSubarraySize - 1)) == 0); // Verify that it is a power of 2. + const difference_type subarrayIndex = (((16777216 + subarrayPosition) / (difference_type)kDequeSubarraySize)) - (16777216 / (difference_type)kDequeSubarraySize); + + SetSubarray(mpCurrentArrayPtr + subarrayIndex); + mpCurrent = mpBegin + (subarrayPosition - (subarrayIndex * (difference_type)kDequeSubarraySize)); + } + return *this; + } + + + template + typename DequeIterator::this_type& + DequeIterator::operator-=(difference_type n) + { + return (*this).operator+=(-n); + } + + + template + typename DequeIterator::this_type + DequeIterator::operator+(difference_type n) const + { + return this_type(*this).operator+=(n); + } + + + template + typename DequeIterator::this_type + DequeIterator::operator-(difference_type n) const + { + return this_type(*this).operator+=(-n); + } + + + template + typename DequeIterator::this_type + DequeIterator::copy(const iterator& first, const iterator& last, true_type) + { + // To do: Implement this as a loop which does memcpys between subarrays appropriately. + // Currently we only do memcpy if the entire operation occurs within a single subarray. + if((first.mpBegin == last.mpBegin) && (first.mpBegin == mpBegin)) // If all operations are within the same subarray, implement the operation as a memmove. + { + memmove(mpCurrent, first.mpCurrent, (size_t)((uintptr_t)last.mpCurrent - (uintptr_t)first.mpCurrent)); + return *this + (last.mpCurrent - first.mpCurrent); + } + return eastl::copy(eastl::make_move_iterator(first), eastl::make_move_iterator(last), eastl::make_move_iterator(*this)).base(); + } + + + template + typename DequeIterator::this_type + DequeIterator::copy(const iterator& first, const iterator& last, false_type) + { + return eastl::copy(eastl::make_move_iterator(first), eastl::make_move_iterator(last), eastl::make_move_iterator(*this)).base(); + } + + + template + void DequeIterator::copy_backward(const iterator& first, const iterator& last, true_type) + { + // To do: Implement this as a loop which does memmoves between subarrays appropriately. + // Currently we only do memcpy if the entire operation occurs within a single subarray. + if((first.mpBegin == last.mpBegin) && (first.mpBegin == mpBegin)) // If all operations are within the same subarray, implement the operation as a memcpy. + memmove(mpCurrent - (last.mpCurrent - first.mpCurrent), first.mpCurrent, (size_t)((uintptr_t)last.mpCurrent - (uintptr_t)first.mpCurrent)); + else + eastl::copy_backward(eastl::make_move_iterator(first), eastl::make_move_iterator(last), eastl::make_move_iterator(*this)); + } + + + template + void DequeIterator::copy_backward(const iterator& first, const iterator& last, false_type) + { + eastl::copy_backward(eastl::make_move_iterator(first), eastl::make_move_iterator(last), eastl::make_move_iterator(*this)).base(); + } + + + template + void DequeIterator::SetSubarray(T** pCurrentArrayPtr) + { + mpCurrentArrayPtr = pCurrentArrayPtr; + mpBegin = *pCurrentArrayPtr; + mpEnd = mpBegin + kDequeSubarraySize; + } + + + // The C++ defect report #179 requires that we support comparisons between const and non-const iterators. + // Thus we provide additional template paremeters here to support this. The defect report does not + // require us to support comparisons between reverse_iterators and const_reverse_iterators. + template + inline bool operator==(const DequeIterator& a, + const DequeIterator& b) + { + return a.mpCurrent == b.mpCurrent; + } + + + template + inline bool operator!=(const DequeIterator& a, + const DequeIterator& b) + { + return a.mpCurrent != b.mpCurrent; + } + + + // We provide a version of operator!= for the case where the iterators are of the + // same type. This helps prevent ambiguity errors in the presence of rel_ops. + template + inline bool operator!=(const DequeIterator& a, + const DequeIterator& b) + { + return a.mpCurrent != b.mpCurrent; + } + + + template + inline bool operator<(const DequeIterator& a, + const DequeIterator& b) + { + return (a.mpCurrentArrayPtr == b.mpCurrentArrayPtr) ? (a.mpCurrent < b.mpCurrent) : (a.mpCurrentArrayPtr < b.mpCurrentArrayPtr); + } + + + template + inline bool operator>(const DequeIterator& a, + const DequeIterator& b) + { + return (a.mpCurrentArrayPtr == b.mpCurrentArrayPtr) ? (a.mpCurrent > b.mpCurrent) : (a.mpCurrentArrayPtr > b.mpCurrentArrayPtr); + } + + + template + inline bool operator<=(const DequeIterator& a, + const DequeIterator& b) + { + return (a.mpCurrentArrayPtr == b.mpCurrentArrayPtr) ? (a.mpCurrent <= b.mpCurrent) : (a.mpCurrentArrayPtr <= b.mpCurrentArrayPtr); + } + + + template + inline bool operator>=(const DequeIterator& a, + const DequeIterator& b) + { + return (a.mpCurrentArrayPtr == b.mpCurrentArrayPtr) ? (a.mpCurrent >= b.mpCurrent) : (a.mpCurrentArrayPtr >= b.mpCurrentArrayPtr); + } + + + // Random access iterators must support operator + and operator -. + // You can only add an integer to an iterator, and you cannot add two iterators. + template + inline DequeIterator + operator+(ptrdiff_t n, const DequeIterator& x) + { + return x + n; // Implement (n + x) in terms of (x + n). + } + + + // You can only add an integer to an iterator, but you can subtract two iterators. + // The C++ defect report #179 mentioned above specifically refers to + // operator - and states that we support the subtraction of const and non-const iterators. + template + inline typename DequeIterator::difference_type + operator-(const DequeIterator& a, + const DequeIterator& b) + { + // This is a fairly clever algorithm that has been used in STL deque implementations since the original HP STL: + typedef typename DequeIterator::difference_type difference_type; + + return ((difference_type)kDequeSubarraySize * ((a.mpCurrentArrayPtr - b.mpCurrentArrayPtr) - 1)) + (a.mpCurrent - a.mpBegin) + (b.mpEnd - b.mpCurrent); + } + + + + + /////////////////////////////////////////////////////////////////////// + // deque + /////////////////////////////////////////////////////////////////////// + + template + inline deque::deque() + : base_type((size_type)0) + { + // Empty + } + + + template + inline deque::deque(const allocator_type& allocator) + : base_type((size_type)0, allocator) + { + // Empty + } + + + template + inline deque::deque(size_type n, const allocator_type& allocator) + : base_type(n, allocator) + { + DoFillInit(value_type()); + } + + + template + inline deque::deque(size_type n, const value_type& value, const allocator_type& allocator) + : base_type(n, allocator) + { + DoFillInit(value); + } + + + template + inline deque::deque(const this_type& x) + : base_type(x.size(), x.mAllocator) + { + eastl::uninitialized_copy(x.mItBegin, x.mItEnd, mItBegin); + } + + + template + inline deque::deque(this_type&& x) + : base_type((size_type)0, x.mAllocator) + { + swap(x); + } + + + template + inline deque::deque(this_type&& x, const allocator_type& allocator) + : base_type((size_type)0, allocator) + { + swap(x); // member swap handles the case that x has a different allocator than our allocator by doing a copy. + } + + + template + inline deque::deque(std::initializer_list ilist, const allocator_type& allocator) + : base_type(allocator) + { + DoInit(ilist.begin(), ilist.end(), false_type()); + } + + + template + template + inline deque::deque(InputIterator first, InputIterator last) + : base_type(EASTL_DEQUE_DEFAULT_ALLOCATOR) // Call the empty base constructor, which does nothing. We need to do all the work in our own DoInit. + { + DoInit(first, last, is_integral()); + } + + + template + inline deque::~deque() + { + // Call destructors. Parent class will free the memory. + for(iterator itCurrent(mItBegin); itCurrent != mItEnd; ++itCurrent) + itCurrent.mpCurrent->~value_type(); + } + + + template + typename deque::this_type& + deque::operator=(const this_type& x) + { + if(&x != this) // If not assigning to ourselves... + { + // If (EASTL_ALLOCATOR_COPY_ENABLED == 1) and the current contents are allocated by an + // allocator that's unequal to x's allocator, we need to reallocate our elements with + // our current allocator and reallocate it with x's allocator. If the allocators are + // equal then we can use a more optimal algorithm that doesn't reallocate our elements + // but instead can copy them in place. + + #if EASTL_ALLOCATOR_COPY_ENABLED + bool bSlowerPathwayRequired = (mAllocator != x.mAllocator); + #else + bool bSlowerPathwayRequired = false; + #endif + + if(bSlowerPathwayRequired) + { + // We can't currently use set_capacity(0) or shrink_to_fit, because they + // leave a remaining allocation with our old allocator. So we do a similar + // thing but set our allocator to x.mAllocator while doing so. + this_type temp(x.mAllocator); + DoSwap(temp); + // Now we have an empty container with an allocator equal to x.mAllocator, ready to assign from x. + } + + DoAssign(x.begin(), x.end(), eastl::false_type()); + } + + return *this; + } + + + template + inline typename deque::this_type& + deque::operator=(this_type&& x) + { + if(this != &x) + { + set_capacity(0); // To consider: Are we really required to clear here? x is going away soon and will clear itself in its dtor. + swap(x); // member swap handles the case that x has a different allocator than our allocator by doing a copy. + } + return *this; + } + + + template + inline typename deque::this_type& + deque::operator=(std::initializer_list ilist) + { + DoAssign(ilist.begin(), ilist.end(), false_type()); + return *this; + } + + + template + inline void deque::assign(size_type n, const value_type& value) + { + DoAssignValues(n, value); + } + + + template + inline void deque::assign(std::initializer_list ilist) + { + DoAssign(ilist.begin(), ilist.end(), false_type()); + } + + + // It turns out that the C++ std::deque specifies a two argument + // version of assign that takes (int size, int value). These are not + // iterators, so we need to do a template compiler trick to do the right thing. + template + template + inline void deque::assign(InputIterator first, InputIterator last) + { + DoAssign(first, last, is_integral()); + } + + + template + inline typename deque::iterator + deque::begin() EA_NOEXCEPT + { + return mItBegin; + } + + + template + inline typename deque::const_iterator + deque::begin() const EA_NOEXCEPT + { + return mItBegin; + } + + + template + inline typename deque::const_iterator + deque::cbegin() const EA_NOEXCEPT + { + return mItBegin; + } + + + template + inline typename deque::iterator + deque::end() EA_NOEXCEPT + { + return mItEnd; + } + + + template + typename deque::const_iterator + deque::end() const EA_NOEXCEPT + { + return mItEnd; + } + + + template + inline typename deque::const_iterator + deque::cend() const EA_NOEXCEPT + { + return mItEnd; + } + + + template + inline typename deque::reverse_iterator + deque::rbegin() EA_NOEXCEPT + { + return reverse_iterator(mItEnd); + } + + + template + inline typename deque::const_reverse_iterator + deque::rbegin() const EA_NOEXCEPT + { + return const_reverse_iterator(mItEnd); + } + + + template + inline typename deque::const_reverse_iterator + deque::crbegin() const EA_NOEXCEPT + { + return const_reverse_iterator(mItEnd); + } + + + template + inline typename deque::reverse_iterator + deque::rend() EA_NOEXCEPT + { + return reverse_iterator(mItBegin); + } + + + template + inline typename deque::const_reverse_iterator + deque::rend() const EA_NOEXCEPT + { + return const_reverse_iterator(mItBegin); + } + + + template + inline typename deque::const_reverse_iterator + deque::crend() const EA_NOEXCEPT + { + return const_reverse_iterator(mItBegin); + } + + + template + inline bool deque::empty() const EA_NOEXCEPT + { + return mItBegin.mpCurrent == mItEnd.mpCurrent; + } + + + template + typename deque::size_type + inline deque::size() const EA_NOEXCEPT + { + return (size_type)(mItEnd - mItBegin); + } + + + template + inline void deque::resize(size_type n, const value_type& value) + { + const size_type nSizeCurrent = size(); + + if(n > nSizeCurrent) // We expect that more often than not, resizes will be upsizes. + insert(mItEnd, n - nSizeCurrent, value); + else + erase(mItBegin + (difference_type)n, mItEnd); + } + + + template + inline void deque::resize(size_type n) + { + resize(n, value_type()); + } + + + template + inline void deque::shrink_to_fit() + { + this_type x(eastl::make_move_iterator(begin()), eastl::make_move_iterator(end())); + swap(x); + } + + + template + inline void deque::set_capacity(size_type n) + { + // Currently there isn't a way to remove all allocations from a deque, as it + // requires a single starting allocation for the subarrays. So we can't just + // free all memory without leaving it in a bad state. So the best means of + // implementing set_capacity() is to do what we do below. + + if(n == 0) + { + this_type temp(mAllocator); + DoSwap(temp); + } + else if(n < size()) + { + // We currently ignore the request to reduce capacity. To do: Implement this + // and do it in a way that doesn't result in temporarily ~doubling our memory usage. + // That might involve trimming unused subarrays from the front or back of + // the container. + resize(n); + } + } + + + template + typename deque::reference + deque::operator[](size_type n) + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(n >= (size_type)(mItEnd - mItBegin))) + EASTL_FAIL_MSG("deque::operator[] -- out of range"); + #elif EASTL_ASSERT_ENABLED + // We allow taking a reference to deque[0] + if (EASTL_UNLIKELY((n != 0) && n >= (size_type)(mItEnd - mItBegin))) + EASTL_FAIL_MSG("deque::operator[] -- out of range"); + #endif + + // See DequeIterator::operator+=() for an explanation of the code below. + iterator it(mItBegin); + + const difference_type subarrayPosition = (difference_type)((it.mpCurrent - it.mpBegin) + (difference_type)n); + const difference_type subarrayIndex = (((16777216 + subarrayPosition) / (difference_type)kDequeSubarraySize)) - (16777216 / (difference_type)kDequeSubarraySize); + + return *(*(it.mpCurrentArrayPtr + subarrayIndex) + (subarrayPosition - (subarrayIndex * (difference_type)kDequeSubarraySize))); + } + + + template + typename deque::const_reference + deque::operator[](size_type n) const + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(n >= (size_type)(mItEnd - mItBegin))) + EASTL_FAIL_MSG("deque::operator[] -- out of range"); + #elif EASTL_ASSERT_ENABLED + // We allow the user to use a reference to deque[0] of an empty container. + if (EASTL_UNLIKELY((n != 0) && n >= (size_type)(mItEnd - mItBegin))) + EASTL_FAIL_MSG("deque::operator[] -- out of range"); + #endif + + // See DequeIterator::operator+=() for an explanation of the code below. + iterator it(mItBegin); + + const difference_type subarrayPosition = (it.mpCurrent - it.mpBegin) + (difference_type)n; + const difference_type subarrayIndex = (((16777216 + subarrayPosition) / (difference_type)kDequeSubarraySize)) - (16777216 / (difference_type)kDequeSubarraySize); + + return *(*(it.mpCurrentArrayPtr + subarrayIndex) + (subarrayPosition - (subarrayIndex * (difference_type)kDequeSubarraySize))); + } + + + template + typename deque::reference + deque::at(size_type n) + { + #if EASTL_EXCEPTIONS_ENABLED + if(n >= (size_type)(mItEnd - mItBegin)) + throw std::out_of_range("deque::at -- out of range"); + #elif EASTL_ASSERT_ENABLED + if(n >= (size_type)(mItEnd - mItBegin)) + EASTL_FAIL_MSG("deque::at -- out of range"); + #endif + return *(mItBegin.operator+((difference_type)n)); + } + + + template + typename deque::const_reference + deque::at(size_type n) const + { + #if EASTL_EXCEPTIONS_ENABLED + if(n >= (size_type)(mItEnd - mItBegin)) + throw std::out_of_range("deque::at -- out of range"); + #elif EASTL_ASSERT_ENABLED + if(n >= (size_type)(mItEnd - mItBegin)) + EASTL_FAIL_MSG("deque::at -- out of range"); + #endif + return *(mItBegin.operator+((difference_type)n)); + } + + + template + typename deque::reference + deque::front() + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY((size_type)(mItEnd == mItBegin))) + EASTL_FAIL_MSG("deque::front -- empty deque"); + #else + // We allow the user to reference an empty container. + #endif + + return *mItBegin; + } + + + template + typename deque::const_reference + deque::front() const + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY((size_type)(mItEnd == mItBegin))) + EASTL_FAIL_MSG("deque::front -- empty deque"); + #else + // We allow the user to reference an empty container. + #endif + + return *mItBegin; + } + + + template + typename deque::reference + deque::back() + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY((size_type)(mItEnd == mItBegin))) + EASTL_FAIL_MSG("deque::back -- empty deque"); + #else + // We allow the user to reference an empty container. + #endif + + return *iterator(mItEnd, typename iterator::Decrement()); + } + + + template + typename deque::const_reference + deque::back() const + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY((size_type)(mItEnd == mItBegin))) + EASTL_FAIL_MSG("deque::back -- empty deque"); + #else + // We allow the user to reference an empty container. + #endif + + return *iterator(mItEnd, typename iterator::Decrement()); + } + + + template + void deque::push_front(const value_type& value) + { + emplace_front(value); + } + + + template + void deque::push_front(value_type&& value) + { + emplace_front(eastl::move(value)); + } + + + template + typename deque::reference + deque::push_front() + { + emplace_front(value_type()); + return *mItBegin; // Same as return front(); + } + + + template + void deque::push_back(const value_type& value) + { + emplace_back(value); + } + + + template + void deque::push_back(value_type&& value) + { + emplace_back(eastl::move(value)); + } + + + template + typename deque::reference + deque::push_back() + { + emplace_back(value_type()); + return *iterator(mItEnd, typename iterator::Decrement()); // Same thing as return back(); + } + + + template + void deque::pop_front() + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY((size_type)(mItEnd == mItBegin))) + EASTL_FAIL_MSG("deque::pop_front -- empty deque"); + #endif + + if((mItBegin.mpCurrent + 1) != mItBegin.mpEnd) // If the operation is very simple... + (mItBegin.mpCurrent++)->~value_type(); + else + { + // This is executed only when we are popping the end (last) item off the front-most subarray. + // In this case we need to free the subarray and point mItBegin to the next subarray. + #ifdef EA_DEBUG + value_type** pp = mItBegin.mpCurrentArrayPtr; + #endif + + mItBegin.mpCurrent->~value_type(); // mpCurrent == mpEnd - 1 + DoFreeSubarray(mItBegin.mpBegin); + mItBegin.SetSubarray(mItBegin.mpCurrentArrayPtr + 1); + mItBegin.mpCurrent = mItBegin.mpBegin; + + #ifdef EA_DEBUG + *pp = NULL; + #endif + } + } + + + template + void deque::pop_back() + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY((size_type)(mItEnd == mItBegin))) + EASTL_FAIL_MSG("deque::pop_back -- empty deque"); + #endif + + if(mItEnd.mpCurrent != mItEnd.mpBegin) // If the operation is very simple... + (--mItEnd.mpCurrent)->~value_type(); + else + { + // This is executed only when we are popping the first item off the last subarray. + // In this case we need to free the subarray and point mItEnd to the previous subarray. + #ifdef EA_DEBUG + value_type** pp = mItEnd.mpCurrentArrayPtr; + #endif + + DoFreeSubarray(mItEnd.mpBegin); + mItEnd.SetSubarray(mItEnd.mpCurrentArrayPtr - 1); + mItEnd.mpCurrent = mItEnd.mpEnd - 1; // Recall that mItEnd points to one-past the last item in the container. + mItEnd.mpCurrent->~value_type(); // Thus we need to call the destructor on the item *before* that last item. + + #ifdef EA_DEBUG + *pp = NULL; + #endif + } + } + + + template + template + typename deque::iterator + deque::emplace(const_iterator position, Args&&... args) + { + if(EASTL_UNLIKELY(position.mpCurrent == mItEnd.mpCurrent)) // If we are doing the same thing as push_back... + { + emplace_back(eastl::forward(args)...); + return iterator(mItEnd, typename iterator::Decrement()); // Unfortunately, we need to make an iterator here, as the above push_back is an operation that can invalidate existing iterators. + } + else if(EASTL_UNLIKELY(position.mpCurrent == mItBegin.mpCurrent)) // If we are doing the same thing as push_front... + { + emplace_front(eastl::forward(args)...); + return mItBegin; + } + + iterator itPosition(position, typename iterator::FromConst()); + value_type valueSaved(eastl::forward(args)...); // We need to save this because value may come from within our container. It would be somewhat tedious to make a workaround that could avoid this. + const difference_type i(itPosition - mItBegin); + + #if EASTL_ASSERT_ENABLED + EASTL_ASSERT(!empty()); // The push_front and push_back calls below assume that we are non-empty. It turns out this is never called unless so. + + if(EASTL_UNLIKELY(!(validate_iterator(itPosition) & isf_valid))) + EASTL_FAIL_MSG("deque::emplace -- invalid iterator"); + #endif + + if(i < (difference_type)(size() / 2)) // Should we insert at the front or at the back? We divide the range in half. + { + emplace_front(eastl::move(*mItBegin)); // This operation potentially invalidates all existing iterators and so we need to assign them anew relative to mItBegin below. + + itPosition = mItBegin + i; + + const iterator newPosition (itPosition, typename iterator::Increment()); + iterator oldBegin (mItBegin, typename iterator::Increment()); + const iterator oldBeginPlus1(oldBegin, typename iterator::Increment()); + + oldBegin.copy(oldBeginPlus1, newPosition, eastl::has_trivial_relocate()); + } + else + { + emplace_back(eastl::move(*iterator(mItEnd, typename iterator::Decrement()))); + + itPosition = mItBegin + i; + + iterator oldBack (mItEnd, typename iterator::Decrement()); + const iterator oldBackMinus1(oldBack, typename iterator::Decrement()); + + oldBack.copy_backward(itPosition, oldBackMinus1, eastl::has_trivial_relocate()); + } + + *itPosition = eastl::move(valueSaved); + + return itPosition; + } + + template + template + void deque::emplace_front(Args&&... args) + { + if(mItBegin.mpCurrent != mItBegin.mpBegin) // If we have room in the first subarray... we hope that usually this 'new' pathway gets executed, as it is slightly faster. + ::new((void*)--mItBegin.mpCurrent) value_type(eastl::forward(args)...); // Construct in place. If args is a single arg of type value_type&& then it this will be a move construction. + else + { + // To consider: Detect if value isn't coming from within this container and handle that efficiently. + value_type valueSaved(eastl::forward(args)...); // We need to make a temporary, because args may be a value_type that comes from within our container and the operations below may change the container. But we can use move instead of copy. + + if(mItBegin.mpCurrentArrayPtr == mpPtrArray) // If there are no more pointers in front of the current (first) one... + DoReallocPtrArray(1, kSideFront); + + mItBegin.mpCurrentArrayPtr[-1] = DoAllocateSubarray(); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + mItBegin.SetSubarray(mItBegin.mpCurrentArrayPtr - 1); + mItBegin.mpCurrent = mItBegin.mpEnd - 1; + ::new((void*)mItBegin.mpCurrent) value_type(eastl::move(valueSaved)); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + ++mItBegin; // The exception could only occur in the new operation above, after we have incremented mItBegin. So we need to undo it. + DoFreeSubarray(mItBegin.mpCurrentArrayPtr[-1]); + throw; + } + #endif + } + } + + template + template + void deque::emplace_back(Args&&... args) + { + if((mItEnd.mpCurrent + 1) != mItEnd.mpEnd) // If we have room in the last subarray... we hope that usually this 'new' pathway gets executed, as it is slightly faster. + ::new((void*)mItEnd.mpCurrent++) value_type(eastl::forward(args)...); // Construct in place. If args is a single arg of type value_type&& then it this will be a move construction. + else + { + // To consider: Detect if value isn't coming from within this container and handle that efficiently. + value_type valueSaved(eastl::forward(args)...); // We need to make a temporary, because args may be a value_type that comes from within our container and the operations below may change the container. But we can use move instead of copy. + if(((mItEnd.mpCurrentArrayPtr - mpPtrArray) + 1) >= (difference_type)mnPtrArraySize) // If there are no more pointers after the current (last) one. + DoReallocPtrArray(1, kSideBack); + + mItEnd.mpCurrentArrayPtr[1] = DoAllocateSubarray(); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + ::new((void*)mItEnd.mpCurrent) value_type(eastl::move(valueSaved)); // We can move valueSaved into position. + mItEnd.SetSubarray(mItEnd.mpCurrentArrayPtr + 1); + mItEnd.mpCurrent = mItEnd.mpBegin; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + // No need to execute '--mItEnd', as the exception could only occur in the new operation above before we set mItEnd. + DoFreeSubarray(mItEnd.mpCurrentArrayPtr[1]); + throw; + } + #endif + } + } + + + template + typename deque::iterator + deque::insert(const_iterator position, const value_type& value) + { + return emplace(position, value); + } + + + template + typename deque::iterator + deque::insert(const_iterator position, value_type&& value) + { + return emplace(position, eastl::move(value)); + } + + + template + void deque::insert(const_iterator position, size_type n, const value_type& value) + { + DoInsertValues(position, n, value); + } + + + template + template + void deque::insert(const_iterator position, InputIterator first, InputIterator last) + { + DoInsert(position, first, last, is_integral()); // The C++ standard requires this sort of behaviour, as InputIterator might actually be Integer and 'first' is really 'count' and 'last' is really 'value'. + } + + + template + typename deque::iterator + deque::insert(const_iterator position, std::initializer_list ilist) + { + const difference_type i(position - mItBegin); + DoInsert(position, ilist.begin(), ilist.end(), false_type()); + return (mItBegin + i); + } + + + template + typename deque::iterator + deque::erase(const_iterator position) + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(!(validate_iterator(position) & isf_valid))) + EASTL_FAIL_MSG("deque::erase -- invalid iterator"); + + if(EASTL_UNLIKELY(position == end())) + EASTL_FAIL_MSG("deque::erase -- end() iterator is an invalid iterator for erase"); + #endif + + iterator itPosition(position, typename iterator::FromConst()); + iterator itNext(itPosition, typename iterator::Increment()); + const difference_type i(itPosition - mItBegin); + + if(i < (difference_type)(size() / 2)) // Should we move the front entries forward or the back entries backward? We divide the range in half. + { + itNext.copy_backward(mItBegin, itPosition, eastl::has_trivial_relocate()); + pop_front(); + } + else + { + itPosition.copy(itNext, mItEnd, eastl::has_trivial_relocate()); + pop_back(); + } + + return mItBegin + i; + } + + + template + typename deque::iterator + deque::erase(const_iterator first, const_iterator last) + { + iterator itFirst(first, typename iterator::FromConst()); + iterator itLast(last, typename iterator::FromConst()); + + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(!(validate_iterator(itFirst) & isf_valid))) + EASTL_FAIL_MSG("deque::erase -- invalid iterator"); + if(EASTL_UNLIKELY(!(validate_iterator(itLast) & isf_valid))) + EASTL_FAIL_MSG("deque::erase -- invalid iterator"); + #endif + + if((itFirst != mItBegin) || (itLast != mItEnd)) // If not erasing everything... (We expect that the user won't call erase(begin, end) because instead the user would just call clear.) + { + const difference_type n(itLast - itFirst); + const difference_type i(itFirst - mItBegin); + + if(i < (difference_type)((size() - n) / 2)) // Should we move the front entries forward or the back entries backward? We divide the range in half. + { + const iterator itNewBegin(mItBegin + n); + value_type** const pPtrArrayBegin = mItBegin.mpCurrentArrayPtr; + + itLast.copy_backward(mItBegin, itFirst, eastl::has_trivial_relocate()); + + for(; mItBegin != itNewBegin; ++mItBegin) // Question: If value_type is a POD type, will the compiler generate this loop at all? + mItBegin.mpCurrent->~value_type(); // If so, then we need to make a specialization for destructing PODs. + + DoFreeSubarrays(pPtrArrayBegin, itNewBegin.mpCurrentArrayPtr); + + // mItBegin = itNewBegin; <-- Not necessary, as the above loop makes it so already. + } + else // Else we will be moving back entries backward. + { + iterator itNewEnd(mItEnd - n); + value_type** const pPtrArrayEnd = itNewEnd.mpCurrentArrayPtr + 1; + + itFirst.copy(itLast, mItEnd, eastl::has_trivial_relocate()); + + for(iterator itTemp(itNewEnd); itTemp != mItEnd; ++itTemp) + itTemp.mpCurrent->~value_type(); + + DoFreeSubarrays(pPtrArrayEnd, mItEnd.mpCurrentArrayPtr + 1); + + mItEnd = itNewEnd; + } + + return mItBegin + i; + } + + clear(); + return mItEnd; + } + + + template + typename deque::reverse_iterator + deque::erase(reverse_iterator position) + { + return reverse_iterator(erase((++position).base())); + } + + + template + typename deque::reverse_iterator + deque::erase(reverse_iterator first, reverse_iterator last) + { + // Version which erases in order from first to last. + // difference_type i(first.base() - last.base()); + // while(i--) + // first = erase(first); + // return first; + + // Version which erases in order from last to first, but is slightly more efficient: + return reverse_iterator(erase(last.base(), first.base())); + } + + + template + void deque::clear() + { + // Destroy all values and all subarrays they belong to, except for the first one, + // as we need to reserve some space for a valid mItBegin/mItEnd. + if(mItBegin.mpCurrentArrayPtr != mItEnd.mpCurrentArrayPtr) // If there are multiple subarrays (more often than not, this will be so)... + { + for(value_type* p1 = mItBegin.mpCurrent; p1 < mItBegin.mpEnd; ++p1) + p1->~value_type(); + for(value_type* p2 = mItEnd.mpBegin; p2 < mItEnd.mpCurrent; ++p2) + p2->~value_type(); + DoFreeSubarray(mItEnd.mpBegin); // Leave mItBegin with a valid subarray. + } + else + { + for(value_type* p = mItBegin.mpCurrent; p < mItEnd.mpCurrent; ++p) + p->~value_type(); + // Don't free the one existing subarray, as we need it for mItBegin/mItEnd. + } + + for(value_type** pPtrArray = mItBegin.mpCurrentArrayPtr + 1; pPtrArray < mItEnd.mpCurrentArrayPtr; ++pPtrArray) + { + for(value_type* p = *pPtrArray, *pEnd = *pPtrArray + kDequeSubarraySize; p < pEnd; ++p) + p->~value_type(); + DoFreeSubarray(*pPtrArray); + } + + mItEnd = mItBegin; // mItBegin/mItEnd will not be dereferencable. + } + + + //template + //void deque::reset_lose_memory() + //{ + // // The reset_lose_memory function is a special extension function which unilaterally + // // resets the container to an empty state without freeing the memory of + // // the contained objects. This is useful for very quickly tearing down a + // // container built into scratch memory. + // + // // Currently we are unable to get this reset_lose_memory operation to work correctly + // // as we haven't been able to find a good way to have a deque initialize + // // without allocating memory. We can lose the old memory, but DoInit + // // would necessarily do a ptrArray allocation. And this is not within + // // our definition of how reset_lose_memory works. + // base_type::DoInit(0); + // + //} + + + template + void deque::swap(deque& x) + { + #if defined(EASTL_DEQUE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR) && EASTL_DEQUE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR + if(mAllocator == x.mAllocator) // If allocators are equivalent... + DoSwap(x); + else // else swap the contents. + { + const this_type temp(*this); // Can't call eastl::swap because that would + *this = x; // itself call this member swap function. + x = temp; + } + #else + // NOTE(rparolin): The previous implementation required T to be copy-constructible in the fall-back case where + // allocators with unique instances copied elements. This was an unnecessary restriction and prevented the common + // usage of deque with non-copyable types (eg. eastl::deque or eastl::deque). + // + // The previous implementation violated the following requirements of deque::swap so the fall-back code has + // been removed. EASTL implicitly defines 'propagate_on_container_swap = false' therefore the fall-back case is + // undefined behaviour. We simply swap the contents and the allocator as that is the common expectation of + // users and does not put the container into an invalid state since it can not free its memory via its current + // allocator instance. + // + DoSwap(x); + #endif + } + + + template + template + void deque::DoInit(Integer n, Integer value, true_type) + { + base_type::DoInit(n); // Call the base uninitialized init function. + DoFillInit(value); + } + + + template + template + void deque::DoInit(InputIterator first, InputIterator last, false_type) + { + typedef typename eastl::iterator_traits::iterator_category IC; + DoInitFromIterator(first, last, IC()); + } + + + template + template + void deque::DoInitFromIterator(InputIterator first, InputIterator last, EASTL_ITC_NS::input_iterator_tag) + { + base_type::DoInit(0); // Call the base uninitialized init function, but don't actually allocate any values. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + // We have little choice but to turn through the source iterator and call + // push_back for each item. It can be slow because it will keep reallocating the + // container memory as we go. We are not allowed to use distance() on an InputIterator. + for(; first != last; ++first) // InputIterators by definition actually only allow you to iterate through them once. + { // Thus the standard *requires* that we do this (inefficient) implementation. + push_back(*first); // Luckily, InputIterators are in practice almost never used, so this code will likely never get executed. + } + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + clear(); + throw; + } + #endif + } + + + template + template + void deque::DoInitFromIterator(ForwardIterator first, ForwardIterator last, EASTL_ITC_NS::forward_iterator_tag) + { + typedef typename eastl::remove_const::type non_const_iterator_type; // If T is a const type (e.g. const int) then we need to initialize it as if it were non-const. + typedef typename eastl::remove_const::type non_const_value_type; + + const size_type n = (size_type)eastl::distance(first, last); + value_type** pPtrArrayCurrent; + + base_type::DoInit(n); // Call the base uninitialized init function. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for(pPtrArrayCurrent = mItBegin.mpCurrentArrayPtr; pPtrArrayCurrent < mItEnd.mpCurrentArrayPtr; ++pPtrArrayCurrent) // Copy to the known-to-be-completely-used subarrays. + { + // We implment an algorithm here whereby we use uninitialized_copy() and advance() instead of just iterating from first to last and constructing as we go. The reason for this is that we can take advantage of POD data types and implement construction as memcpy operations. + ForwardIterator current(first); // To do: Implement a specialization of this algorithm for non-PODs which eliminates the need for 'current'. + + eastl::advance(current, kDequeSubarraySize); + eastl::uninitialized_copy((non_const_iterator_type)first, (non_const_iterator_type)current, (non_const_value_type*)*pPtrArrayCurrent); + first = current; + } + + eastl::uninitialized_copy((non_const_iterator_type)first, (non_const_iterator_type)last, (non_const_value_type*)mItEnd.mpBegin); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(iterator itCurrent(mItBegin), itEnd(pPtrArrayCurrent, *pPtrArrayCurrent); itCurrent != itEnd; ++itCurrent) + itCurrent.mpCurrent->~value_type(); + throw; + } + #endif + } + + + template + void deque::DoFillInit(const value_type& value) + { + value_type** pPtrArrayCurrent = mItBegin.mpCurrentArrayPtr; + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + while(pPtrArrayCurrent < mItEnd.mpCurrentArrayPtr) + { + eastl::uninitialized_fill(*pPtrArrayCurrent, *pPtrArrayCurrent + kDequeSubarraySize, value); + ++pPtrArrayCurrent; + } + eastl::uninitialized_fill(mItEnd.mpBegin, mItEnd.mpCurrent, value); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(iterator itCurrent(mItBegin), itEnd(pPtrArrayCurrent, *pPtrArrayCurrent); itCurrent != itEnd; ++itCurrent) + itCurrent.mpCurrent->~value_type(); + throw; + } + #endif + } + + + template + template + void deque::DoAssign(Integer n, Integer value, true_type) // false_type means this is the integer version instead of iterator version. + { + DoAssignValues(static_cast(n), static_cast(value)); + } + + + template + template + void deque::DoAssign(InputIterator first, InputIterator last, false_type) // false_type means this is the iterator version instead of integer version. + { + // Actually, the implementation below requires first/last to be a ForwardIterator and not just an InputIterator. + // But Paul Pedriana if you somehow need to work with an InputIterator and we can deal with it. + const size_type n = (size_type)eastl::distance(first, last); + const size_type nSize = size(); + + if(n > nSize) // If we are increasing the size... + { + InputIterator atEnd(first); + + eastl::advance(atEnd, (difference_type)nSize); + eastl::copy(first, atEnd, mItBegin); + insert(mItEnd, atEnd, last); + } + else // n is <= size. + { + iterator itEnd(eastl::copy(first, last, mItBegin)); + + if(n < nSize) // If we need to erase any trailing elements... + erase(itEnd, mItEnd); + } + } + + + template + void deque::DoAssignValues(size_type n, const value_type& value) + { + const size_type nSize = size(); + + if(n > nSize) // If we are increasing the size... + { + eastl::fill(mItBegin, mItEnd, value); + insert(mItEnd, n - nSize, value); + } + else + { + erase(mItBegin + (difference_type)n, mItEnd); + eastl::fill(mItBegin, mItEnd, value); + } + } + + + template + template + void deque::DoInsert(const const_iterator& position, Integer n, Integer value, true_type) + { + DoInsertValues(position, (size_type)n, (value_type)value); + } + + + template + template + void deque::DoInsert(const const_iterator& position, const InputIterator& first, const InputIterator& last, false_type) + { + typedef typename eastl::iterator_traits::iterator_category IC; + DoInsertFromIterator(position, first, last, IC()); + } + + + template + template + void deque::DoInsertFromIterator(const_iterator position, const InputIterator& first, const InputIterator& last, EASTL_ITC_NS::forward_iterator_tag) + { + const size_type n = (size_type)eastl::distance(first, last); + + // This implementation is nearly identical to DoInsertValues below. + // If you make a bug fix to one, you will likely want to fix the other. + if(position.mpCurrent == mItBegin.mpCurrent) // If inserting at the beginning or into an empty container... + { + iterator itNewBegin(DoReallocSubarray(n, kSideFront)); // itNewBegin to mItBegin refers to memory that isn't initialized yet; so it's not truly a valid iterator. Or at least not a dereferencable one. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + // We would like to use move here instead of copy when possible, which would be useful for + // when inserting from a std::initializer_list, for example. + // To do: solve this by having a template or runtime parameter which specifies move vs copy. + eastl::uninitialized_copy(first, last, itNewBegin); + mItBegin = itNewBegin; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeSubarrays(itNewBegin.mpCurrentArrayPtr, mItBegin.mpCurrentArrayPtr); + throw; + } + #endif + } + else if(EASTL_UNLIKELY(position.mpCurrent == mItEnd.mpCurrent)) // If inserting at the end (i.e. appending)... + { + const iterator itNewEnd(DoReallocSubarray(n, kSideBack)); // mItEnd to itNewEnd refers to memory that isn't initialized yet; so it's not truly a valid iterator. Or at least not a dereferencable one. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + // We would like to use move here instead of copy when possible, which would be useful for + // when inserting from a std::initializer_list, for example. + // To do: solve this by having a template or runtime parameter which specifies move vs copy. + eastl::uninitialized_copy(first, last, mItEnd); + mItEnd = itNewEnd; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeSubarrays(mItEnd.mpCurrentArrayPtr + 1, itNewEnd.mpCurrentArrayPtr + 1); + throw; + } + #endif + } + else + { + const difference_type nInsertionIndex = position - mItBegin; + const size_type nSize = size(); + + if(nInsertionIndex < (difference_type)(nSize / 2)) // If the insertion index is in the front half of the deque... grow the deque at the front. + { + const iterator itNewBegin(DoReallocSubarray(n, kSideFront)); // itNewBegin to mItBegin refers to memory that isn't initialized yet; so it's not truly a valid iterator. Or at least not a dereferencable one. + const iterator itOldBegin(mItBegin); + const iterator itPosition(mItBegin + nInsertionIndex); // We need to reset this value because the reallocation above can invalidate iterators. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + // We have a problem here: we would like to use move instead of copy, but it may be that the range to be inserted comes from + // this container and comes from the segment we need to move. So we can't use move operations unless we are careful to handle + // that situation. The newly inserted contents must be contents that were moved to and not moved from. To do: solve this. + if(nInsertionIndex >= (difference_type)n) // If the newly inserted items will be entirely within the old area... + { + iterator itUCopyEnd(mItBegin + (difference_type)n); + + eastl::uninitialized_copy(mItBegin, itUCopyEnd, itNewBegin); // This can throw. + itUCopyEnd = eastl::copy(itUCopyEnd, itPosition, itOldBegin); // Recycle 'itUCopyEnd' to mean something else. + eastl::copy(first, last, itUCopyEnd); + } + else // Else the newly inserted items are going within the newly allocated area at the front. + { + InputIterator mid(first); + + eastl::advance(mid, (difference_type)n - nInsertionIndex); + eastl::uninitialized_copy_copy(mItBegin, itPosition, first, mid, itNewBegin); // This can throw. + eastl::copy(mid, last, itOldBegin); + } + mItBegin = itNewBegin; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeSubarrays(itNewBegin.mpCurrentArrayPtr, mItBegin.mpCurrentArrayPtr); + throw; + } + #endif + } + else + { + const iterator itNewEnd(DoReallocSubarray(n, kSideBack)); + const iterator itOldEnd(mItEnd); + const difference_type nPushedCount = (difference_type)nSize - nInsertionIndex; + const iterator itPosition(mItEnd - nPushedCount); // We need to reset this value because the reallocation above can invalidate iterators. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + // We have a problem here: we would like to use move instead of copy, but it may be that the range to be inserted comes from + // this container and comes from the segment we need to move. So we can't use move operations unless we are careful to handle + // that situation. The newly inserted contents must be contents that were moved to and not moved from. To do: solve this. + if(nPushedCount > (difference_type)n) + { + const iterator itUCopyEnd(mItEnd - (difference_type)n); + + eastl::uninitialized_copy(itUCopyEnd, mItEnd, mItEnd); + eastl::copy_backward(itPosition, itUCopyEnd, itOldEnd); + eastl::copy(first, last, itPosition); + } + else + { + InputIterator mid(first); + + eastl::advance(mid, nPushedCount); + eastl::uninitialized_copy_copy(mid, last, itPosition, mItEnd, mItEnd); + eastl::copy(first, mid, itPosition); + } + mItEnd = itNewEnd; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeSubarrays(mItEnd.mpCurrentArrayPtr + 1, itNewEnd.mpCurrentArrayPtr + 1); + throw; + } + #endif + } + } + } + + + template + void deque::DoInsertValues(const_iterator position, size_type n, const value_type& value) + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(!(validate_iterator(position) & isf_valid))) + EASTL_FAIL_MSG("deque::insert -- invalid iterator"); + #endif + + // This implementation is nearly identical to DoInsertFromIterator above. + // If you make a bug fix to one, you will likely want to fix the other. + if(position.mpCurrent == mItBegin.mpCurrent) // If inserting at the beginning... + { + const iterator itNewBegin(DoReallocSubarray(n, kSideFront)); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + // Note that we don't make a temp copy of 'value' here. This is because in a + // deque, insertion at either the front or back doesn't cause a reallocation + // or move of data in the middle. That's a key feature of deques, in fact. + eastl::uninitialized_fill(itNewBegin, mItBegin, value); + mItBegin = itNewBegin; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeSubarrays(itNewBegin.mpCurrentArrayPtr, mItBegin.mpCurrentArrayPtr); + throw; + } + #endif + } + else if(EASTL_UNLIKELY(position.mpCurrent == mItEnd.mpCurrent)) // If inserting at the end (i.e. appending)... + { + const iterator itNewEnd(DoReallocSubarray(n, kSideBack)); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + // Note that we don't make a temp copy of 'value' here. This is because in a + // deque, insertion at either the front or back doesn't cause a reallocation + // or move of data in the middle. That's a key feature of deques, in fact. + eastl::uninitialized_fill(mItEnd, itNewEnd, value); + mItEnd = itNewEnd; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeSubarrays(mItEnd.mpCurrentArrayPtr + 1, itNewEnd.mpCurrentArrayPtr + 1); + throw; + } + #endif + } + else + { + // A key purpose of a deque is to implement insertions and removals more efficiently + // than with a vector. We are inserting into the middle of the deque here. A quick and + // dirty implementation of this would be to reallocate the subarrays and simply push + // all values in the middle upward like you would do with a vector. Instead we implement + // the minimum amount of reallocations needed but may need to do some value moving, + // as the subarray sizes need to remain constant and can have no holes in them. + const difference_type nInsertionIndex = position - mItBegin; + const size_type nSize = size(); + const value_type valueSaved(value); + + if(nInsertionIndex < (difference_type)(nSize / 2)) // If the insertion index is in the front half of the deque... grow the deque at the front. + { + const iterator itNewBegin(DoReallocSubarray(n, kSideFront)); + const iterator itOldBegin(mItBegin); + const iterator itPosition(mItBegin + nInsertionIndex); // We need to reset this value because the reallocation above can invalidate iterators. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + if(nInsertionIndex >= (difference_type)n) // If the newly inserted items will be entirely within the old area... + { + iterator itUCopyEnd(mItBegin + (difference_type)n); + + eastl::uninitialized_move_if_noexcept(mItBegin, itUCopyEnd, itNewBegin); // This can throw. + itUCopyEnd = eastl::move(itUCopyEnd, itPosition, itOldBegin); // Recycle 'itUCopyEnd' to mean something else. + eastl::fill(itUCopyEnd, itPosition, valueSaved); + } + else // Else the newly inserted items are going within the newly allocated area at the front. + { + eastl::uninitialized_move_fill(mItBegin, itPosition, itNewBegin, mItBegin, valueSaved); // This can throw. + eastl::fill(itOldBegin, itPosition, valueSaved); + } + mItBegin = itNewBegin; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeSubarrays(itNewBegin.mpCurrentArrayPtr, mItBegin.mpCurrentArrayPtr); + throw; + } + #endif + } + else // Else the insertion index is in the back half of the deque, so grow the deque at the back. + { + const iterator itNewEnd(DoReallocSubarray(n, kSideBack)); + const iterator itOldEnd(mItEnd); + const difference_type nPushedCount = (difference_type)nSize - nInsertionIndex; + const iterator itPosition(mItEnd - nPushedCount); // We need to reset this value because the reallocation above can invalidate iterators. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + if(nPushedCount > (difference_type)n) // If the newly inserted items will be entirely within the old area... + { + iterator itUCopyEnd(mItEnd - (difference_type)n); + + eastl::uninitialized_move_if_noexcept(itUCopyEnd, mItEnd, mItEnd); // This can throw. + itUCopyEnd = eastl::move_backward(itPosition, itUCopyEnd, itOldEnd); // Recycle 'itUCopyEnd' to mean something else. + eastl::fill(itPosition, itUCopyEnd, valueSaved); + } + else // Else the newly inserted items are going within the newly allocated area at the back. + { + eastl::uninitialized_fill_move(mItEnd, itPosition + (difference_type)n, valueSaved, itPosition, mItEnd); // This can throw. + eastl::fill(itPosition, itOldEnd, valueSaved); + } + mItEnd = itNewEnd; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeSubarrays(mItEnd.mpCurrentArrayPtr + 1, itNewEnd.mpCurrentArrayPtr + 1); + throw; + } + #endif + } + } + } + + + template + inline void deque::DoSwap(this_type& x) + { + eastl::swap(mpPtrArray, x.mpPtrArray); + eastl::swap(mnPtrArraySize, x.mnPtrArraySize); + eastl::swap(mItBegin, x.mItBegin); + eastl::swap(mItEnd, x.mItEnd); + eastl::swap(mAllocator, x.mAllocator); // We do this even if EASTL_ALLOCATOR_COPY_ENABLED is 0. + + } + + + template + inline bool deque::validate() const + { + // To do: More detailed validation. + // To do: Try to make the validation resistant to crashes if the data is invalid. + if((end() - begin()) < 0) + return false; + return true; + } + + + template + inline int deque::validate_iterator(const_iterator i) const + { + // To do: We don't currently track isf_current, will need to make it do so. + // To do: Fix the validation below, as it will not catch all invalid iterators. + if((i - begin()) < 0) + return isf_none; + + if((end() - i) < 0) + return isf_none; + + if(i == end()) + return (isf_valid | isf_current); + + return (isf_valid | isf_current | isf_can_dereference); + } + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const deque& a, const deque& b) + { + return ((a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin())); + } + + template + inline bool operator!=(const deque& a, const deque& b) + { + return ((a.size() != b.size()) || !eastl::equal(a.begin(), a.end(), b.begin())); + } + + template + inline bool operator<(const deque& a, const deque& b) + { + return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); + } + + template + inline bool operator>(const deque& a, const deque& b) + { + return b < a; + } + + template + inline bool operator<=(const deque& a, const deque& b) + { + return !(b < a); + } + + template + inline bool operator>=(const deque& a, const deque& b) + { + return !(a < b); + } + + template + inline void swap(deque& a, deque& b) + { + a.swap(b); + } + + /////////////////////////////////////////////////////////////////////// + // erase / erase_if + // + // https://en.cppreference.com/w/cpp/container/deque/erase2 + /////////////////////////////////////////////////////////////////////// + template + void erase(deque& c, const U& value) + { + // Erases all elements that compare equal to value from the container. + c.erase(eastl::remove(c.begin(), c.end(), value), c.end()); + } + + template + void erase_if(deque& c, Predicate predicate) + { + // Erases all elements that satisfy the predicate pred from the container. + c.erase(eastl::remove_if(c.begin(), c.end(), predicate), c.end()); + } + + +} // namespace eastl + + +EA_RESTORE_VC_WARNING(); +#if EASTL_EXCEPTIONS_ENABLED + EA_RESTORE_VC_WARNING(); +#endif + + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/finally.h b/libkram/eastl/include/EASTL/finally.h new file mode 100644 index 00000000..b4ed5803 --- /dev/null +++ b/libkram/eastl/include/EASTL/finally.h @@ -0,0 +1,93 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// eastl::finally is an implementation of the popular cpp idiom RAII - Resource +// Acquisition Is Initialization. eastl::finally guarantees that the user +// provided callable will be executed upon whatever mechanism is used to leave +// the current scope. This can guard against user errors but this is a popular +// technique to write robust code in execution environments that have exceptions +// enabled. +// +// Example: +// void foo() +// { +// void* p = malloc(128); +// auto _ = eastl::make_finally([&] { free(p); }); +// +// // Code that may throw an exception... +// +// } // eastl::finally guaranteed to call 'free' at scope exit. +// +// References: +// * https://www.bfilipek.com/2017/04/finalact.html +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_FINALLY_H +#define EASTL_FINALLY_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include +#include + +namespace eastl +{ + /////////////////////////////////////////////////////////////////////////// + // finally + // + // finally is the type that calls the users callback on scope exit. + // + template + class finally + { + static_assert(!eastl::is_lvalue_reference_v, "eastl::finally requires the callable is passed as an rvalue reference."); + + Functor m_functor; + bool m_engaged = false; + + public: + finally(Functor f) : m_functor(eastl::move(f)), m_engaged(true) {} + + finally(finally&& other) : m_functor(eastl::move(other.m_functor)), m_engaged(other.m_engaged) + { + other.dismiss(); + } + + ~finally() { execute(); } + + finally(const finally&) = delete; + finally& operator=(const finally&) = delete; + finally& operator=(finally&&) = delete; + + inline void dismiss() { m_engaged = false; } + + inline void execute() + { + if (m_engaged) + m_functor(); + + dismiss(); + } + }; + + + /////////////////////////////////////////////////////////////////////////// + // make_finally + // + // this utility function is the standard mechansim to perform the required + // type deduction on the users provided callback inorder to create a + // 'finally' object. + // + template + auto make_finally(F&& f) + { + return finally(eastl::forward(f)); + } +} + +#endif // EASTL_FINALLY_H diff --git a/libkram/eastl/include/EASTL/fixed_allocator.h b/libkram/eastl/include/EASTL/fixed_allocator.h new file mode 100644 index 00000000..488eae4a --- /dev/null +++ b/libkram/eastl/include/EASTL/fixed_allocator.h @@ -0,0 +1,455 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements the following +// fixed_allocator +// fixed_allocator_with_overflow +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_FIXED_ALLOCATOR_H +#define EASTL_FIXED_ALLOCATOR_H + + +#include +#include +#include +#include +#include +#include + +EA_DISABLE_ALL_VC_WARNINGS(); + +#include + +EA_RESTORE_ALL_VC_WARNINGS(); + +EA_DISABLE_VC_WARNING(4275); // non dll-interface class used as base for DLL-interface classkey 'identifier' + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /////////////////////////////////////////////////////////////////////////// + // fixed_allocator + /////////////////////////////////////////////////////////////////////////// + + /// fixed_allocator + /// + /// Implements an allocator which allocates a single fixed size where + /// the size, alignment, and memory used for the pool is defined at + /// runtime by the user. This is different from fixed containers + /// such as fixed_list whereby the size and alignment are determined + /// at compile time and the memory is directly built into the container's + /// member data. + /// + /// If the pool's memory is exhausted or was never initialized, the + /// allocate function returns NULL. Consider the fixed_allocator_with_overflow + /// class as an alternative in order to deal with this situation. + /// + /// This class requires the user to call container.get_allocator().init() + /// after constructing the container. There currently isn't a way to + /// construct the container with the initialization parameters, though + /// with some effort such a thing could probably be made possible. + /// It's not as simple as it might first seem, due to the non-copyable + /// nature of fixed allocators. A side effect of this limitation is that + /// you cannot copy-construct a container using fixed_allocators. + /// + /// Another side-effect is that you cannot swap two containers using + /// a fixed_allocator, as a swap requires temporary memory allocated by + /// an equivalent allocator, and such a thing cannot be done implicitly. + /// A workaround for the swap limitation is that you can implement your + /// own swap whereby you provide an explicitly created temporary object. + /// + /// Note: Be careful to set the allocator's node size to the size of the + /// container node and not the size of the contained object. Note that the + /// example code below uses IntListNode. + /// + /// Example usage: + /// typedef eastl::list IntList; + /// typedef IntList::node_type IntListNode; + /// + /// IntListNode buffer[200]; + /// IntList intList; + /// intList.get_allocator().init(buffer, sizeof(buffer), sizeof(IntListNode), __alignof(IntListNode)); + /// + class EASTL_API fixed_allocator : public fixed_pool_base + { + public: + /// fixed_allocator + /// + /// Default constructor. The user usually will need to call init() after + /// constructing via this constructor. + /// + fixed_allocator(const char* /*pName*/ = EASTL_FIXED_POOL_DEFAULT_NAME) + : fixed_pool_base(NULL) + { + } + + + /// fixed_allocator + /// + /// Copy constructor. The user usually will need to call init() after + /// constructing via this constructor. By their nature, fixed-allocators + /// cannot be copied in any useful way, as by their nature the user + /// must manually initialize them. + /// + fixed_allocator(const fixed_allocator&) + : fixed_pool_base(NULL) + { + } + + + /// operator= + /// + /// By their nature, fixed-allocators cannot be copied in any + /// useful way, as by their nature the user must manually + /// initialize them. + /// + fixed_allocator& operator=(const fixed_allocator&) + { + return *this; + } + + + // init + // + // No init here, as the base class version is sufficient. + // + //void init(void* pMemory, size_t memorySize, size_t nodeSize, + // size_t alignment, size_t alignmentOffset = 0); + + + /// allocate + /// + /// Allocates a new object of the size specified upon class initialization. + /// Returns NULL if there is no more memory. + /// + void* allocate(size_t n, int /*flags*/ = 0) + { + // To consider: Verify that 'n' is what the user initialized us with. + + Link* pLink = mpHead; + + if(pLink) // If we have space... + { + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + if(++mnCurrentSize > mnPeakSize) + mnPeakSize = mnCurrentSize; + #endif + + mpHead = pLink->mpNext; + return pLink; + } + else + { + // If there's no free node in the free list, just + // allocate another from the reserved memory area + + if(mpNext != mpCapacity) + { + pLink = mpNext; + + mpNext = reinterpret_cast(reinterpret_cast(mpNext) + n); + + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + if(++mnCurrentSize > mnPeakSize) + mnPeakSize = mnCurrentSize; + #endif + + return pLink; + } + + // EASTL_ASSERT(false); To consider: enable this assert. However, we intentionally disable it because this isn't necessarily an assertable error. + return NULL; + } + } + + + /// allocate + /// + void* allocate(size_t n, size_t /*alignment*/, size_t /*offset*/, int flags = 0) + { + return allocate(n, flags); + } + + + /// deallocate + /// + /// Frees the given object which was allocated by allocate(). + /// If the given node was not allocated by allocate() then the behaviour + /// is undefined. + /// + void deallocate(void* p, size_t) + { + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + --mnCurrentSize; + #endif + + ((Link*)p)->mpNext = mpHead; + mpHead = ((Link*)p); + } + + + using fixed_pool_base::can_allocate; + + + const char* get_name() const + { + return EASTL_FIXED_POOL_DEFAULT_NAME; + } + + + void set_name(const char*) + { + // Nothing to do. We don't allocate memory. + } + + }; // fixed_allocator + + bool operator==(const fixed_allocator& a, const fixed_allocator& b); + bool operator!=(const fixed_allocator& a, const fixed_allocator& b); + + + + /////////////////////////////////////////////////////////////////////////// + // fixed_allocator_with_overflow + /////////////////////////////////////////////////////////////////////////// + + /// fixed_allocator_with_overflow + /// + /// Implements an allocator which allocates a single fixed size where + /// the size, alignment, and memory used for the pool is defined at + /// runtime by the user. This is different from fixed containers + /// such as fixed_list whereby the size and alignment are determined + /// at compile time and the memory is directly built into the container's + /// member data. + /// + /// Note: Be careful to set the allocator's node size to the size of the + /// container node and not the size of the contained object. Note that the + /// example code below uses IntListNode. + /// + /// This class requires the user to call container.get_allocator().init() + /// after constructing the container. There currently isn't a way to + /// construct the container with the initialization parameters, though + /// with some effort such a thing could probably be made possible. + /// It's not as simple as it might first seem, due to the non-copyable + /// nature of fixed allocators. A side effect of this limitation is that + /// you cannot copy-construct a container using fixed_allocators. + /// + /// Another side-effect is that you cannot swap two containers using + /// a fixed_allocator, as a swap requires temporary memory allocated by + /// an equivalent allocator, and such a thing cannot be done implicitly. + /// A workaround for the swap limitation is that you can implement your + /// own swap whereby you provide an explicitly created temporary object. + /// + /// Example usage: + /// typedef eastl::list IntList; + /// typedef IntList::node_type IntListNode; + /// + /// IntListNode buffer[200]; + /// IntList intList; + /// intList.get_allocator().init(buffer, sizeof(buffer), sizeof(IntListNode), __alignof(IntListNode)); + /// + class EASTL_API fixed_allocator_with_overflow : public fixed_pool_base + { + public: + /// fixed_allocator_with_overflow + /// + /// Default constructor. The user usually will need to call init() after + /// constructing via this constructor. + /// + fixed_allocator_with_overflow(const char* pName = EASTL_FIXED_POOL_DEFAULT_NAME) + : fixed_pool_base(NULL) + , mOverflowAllocator(pName) + , mpPoolBegin(nullptr) + , mpPoolEnd(nullptr) + , mnNodeSize(0) + { + } + + + /// fixed_allocator_with_overflow + /// + /// Copy constructor. The user usually will need to call init() after + /// constructing via this constructor. By their nature, fixed-allocators + /// cannot be copied in any useful way, as by their nature the user + /// must manually initialize them. + /// + fixed_allocator_with_overflow(const fixed_allocator_with_overflow&) + : fixed_pool_base(NULL) + , mpPoolBegin(nullptr) + , mpPoolEnd(nullptr) + , mnNodeSize(0) + { + } + + + /// operator= + /// + /// By their nature, fixed-allocators cannot be copied in any + /// useful way, as by their nature the user must manually + /// initialize them. + /// + fixed_allocator_with_overflow& operator=(const fixed_allocator_with_overflow& x) + { + #if EASTL_ALLOCATOR_COPY_ENABLED + mOverflowAllocator = x.mOverflowAllocator; + #else + (void)x; + #endif + + return *this; + } + + + /// init + /// + void init(void* pMemory, size_t memorySize, size_t nodeSize, + size_t alignment, size_t alignmentOffset = 0) + { + fixed_pool_base::init(pMemory, memorySize, nodeSize, alignment, alignmentOffset); + + mpPoolBegin = pMemory; + mpPoolEnd = (void*)((uintptr_t)pMemory + memorySize); + mnNodeSize = (eastl_size_t)nodeSize; + } + + + /// allocate + /// + /// Allocates a new object of the size specified upon class initialization. + /// Returns NULL if there is no more memory. + /// + void* allocate(size_t /*n*/, int /*flags*/ = 0) + { + // To consider: Verify that 'n' is what the user initialized us with. + + void* p; + + if(mpHead) // If we have space... + { + p = mpHead; + mpHead = mpHead->mpNext; + } + else + { + // If there's no free node in the free list, just + // allocate another from the reserved memory area + + if (mpNext != mpCapacity) + { + p = mpNext; + mpNext = reinterpret_cast(reinterpret_cast(mpNext) + mnNodeSize); + } + else + p = mOverflowAllocator.allocate(mnNodeSize); + } + + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + if(p && (++mnCurrentSize > mnPeakSize)) + mnPeakSize = mnCurrentSize; + #endif + + return p; + } + + + /// allocate + /// + void* allocate(size_t n, size_t /*alignment*/, size_t /*offset*/, int flags = 0) + { + return allocate(n, flags); + } + + + /// deallocate + /// + /// Frees the given object which was allocated by allocate(). + /// If the given node was not allocated by allocate() then the behaviour + /// is undefined. + /// + void deallocate(void* p, size_t) + { + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + --mnCurrentSize; + #endif + + if((p >= mpPoolBegin) && (p < mpPoolEnd)) + { + ((Link*)p)->mpNext = mpHead; + mpHead = ((Link*)p); + } + else + mOverflowAllocator.deallocate(p, (size_t)mnNodeSize); + } + + + using fixed_pool_base::can_allocate; + + + const char* get_name() const + { + return mOverflowAllocator.get_name(); + } + + + void set_name(const char* pName) + { + mOverflowAllocator.set_name(pName); + } + + protected: + EASTLAllocatorType mOverflowAllocator; // To consider: Allow the user to define the type of this, presumably via a template parameter. + void* mpPoolBegin; // To consider: We have these member variables and ideally we shouldn't need them. The problem is that + void* mpPoolEnd; // the information about the pool buffer and object size is stored in the owning container + eastl_size_t mnNodeSize; // and we can't have access to it without increasing the amount of code we need and by templating + // more code. It may turn out that simply storing data here is smaller in the end. + }; // fixed_allocator_with_overflow // Granted, this class is usually used for debugging purposes, but perhaps there is an elegant solution. + + bool operator==(const fixed_allocator_with_overflow& a, const fixed_allocator_with_overflow& b); + bool operator!=(const fixed_allocator_with_overflow& a, const fixed_allocator_with_overflow& b); + + + + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + inline bool operator==(const fixed_allocator&, const fixed_allocator&) + { + return false; + } + + inline bool operator!=(const fixed_allocator&, const fixed_allocator&) + { + return false; + } + + inline bool operator==(const fixed_allocator_with_overflow&, const fixed_allocator_with_overflow&) + { + return false; + } + + inline bool operator!=(const fixed_allocator_with_overflow&, const fixed_allocator_with_overflow&) + { + return false; + } + + +} // namespace eastl + + +EA_RESTORE_VC_WARNING(); + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/fixed_function.h b/libkram/eastl/include/EASTL/fixed_function.h new file mode 100644 index 00000000..6aed768a --- /dev/null +++ b/libkram/eastl/include/EASTL/fixed_function.h @@ -0,0 +1,218 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_FIXED_FUNCTION_H +#define EASTL_FIXED_FUNCTION_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include + +namespace eastl +{ + template + class fixed_function; + + namespace internal + { + template + struct is_fixed_function + : public eastl::false_type {}; + + template + struct is_fixed_function> + : public eastl::true_type {}; + + template + EA_CONSTEXPR bool is_fixed_function_v = is_fixed_function::value; + } + + #define EASTL_INTERNAL_FIXED_FUNCTION_STATIC_ASSERT(TYPE) \ + static_assert(sizeof(TYPE) <= sizeof(typename Base::FunctorStorageType), \ + "fixed_function local buffer is not large enough to hold the callable object.") + + #define EASTL_INTERNAL_FIXED_FUNCTION_NEW_SIZE_STATIC_ASSERT(NEW_SIZE_IN_BYTES) \ + static_assert(SIZE_IN_BYTES >= NEW_SIZE_IN_BYTES, \ + "fixed_function local buffer is not large enough to hold the new fixed_function type.") + + template + using EASTL_DISABLE_OVERLOAD_IF_FIXED_FUNCTION = + eastl::disable_if_t>>; + + + // fixed_function + // + template + class fixed_function : public internal::function_detail + { + using Base = internal::function_detail; + + public: + using typename Base::result_type; + + fixed_function() EA_NOEXCEPT = default; + fixed_function(std::nullptr_t p) EA_NOEXCEPT + : Base(p) + { + } + + fixed_function(const fixed_function& other) + : Base(other) + { + } + + fixed_function(fixed_function&& other) + : Base(eastl::move(other)) + { + } + + template > + fixed_function(Functor functor) + : Base(eastl::move(functor)) + { + EASTL_INTERNAL_FIXED_FUNCTION_STATIC_ASSERT(Functor); + } + + template + fixed_function(const fixed_function& other) + : Base(other) + { + EASTL_INTERNAL_FIXED_FUNCTION_NEW_SIZE_STATIC_ASSERT(NEW_SIZE_IN_BYTES); + } + + template + fixed_function(fixed_function&& other) + : Base(eastl::move(other)) + { + EASTL_INTERNAL_FIXED_FUNCTION_NEW_SIZE_STATIC_ASSERT(NEW_SIZE_IN_BYTES); + } + + ~fixed_function() EA_NOEXCEPT = default; + + fixed_function& operator=(const fixed_function& other) + { + Base::operator=(other); + return *this; + } + + fixed_function& operator=(fixed_function&& other) + { + Base::operator=(eastl::move(other)); + return *this; + } + + fixed_function& operator=(std::nullptr_t p) EA_NOEXCEPT + { + Base::operator=(p); + return *this; + } + + template + fixed_function& operator=(const fixed_function& other) + { + EASTL_INTERNAL_FIXED_FUNCTION_NEW_SIZE_STATIC_ASSERT(NEW_SIZE_IN_BYTES); + + Base::operator=(other); + return *this; + } + + template + fixed_function& operator=(fixed_function&& other) + { + EASTL_INTERNAL_FIXED_FUNCTION_NEW_SIZE_STATIC_ASSERT(NEW_SIZE_IN_BYTES); + + Base::operator=(eastl::move(other)); + return *this; + } + + template > + fixed_function& operator=(Functor&& functor) + { + EASTL_INTERNAL_FIXED_FUNCTION_STATIC_ASSERT(eastl::decay_t); + Base::operator=(eastl::forward(functor)); + return *this; + } + + template + fixed_function& operator=(eastl::reference_wrapper f) EA_NOEXCEPT + { + EASTL_INTERNAL_FIXED_FUNCTION_STATIC_ASSERT(eastl::reference_wrapper); + Base::operator=(f); + return *this; + } + + void swap(fixed_function& other) EA_NOEXCEPT + { + Base::swap(other); + } + + explicit operator bool() const EA_NOEXCEPT + { + return Base::operator bool(); + } + + R operator ()(Args... args) const + { + return Base::operator ()(eastl::forward(args)...); + } + + #if EASTL_RTTI_ENABLED + const std::type_info& target_type() const EA_NOEXCEPT + { + return Base::target_type(); + } + + template + Functor* target() EA_NOEXCEPT + { + return Base::target(); + } + + template + const Functor* target() const EA_NOEXCEPT + { + return Base::target(); + } + #endif + }; + + template + bool operator==(const fixed_function& f, std::nullptr_t) EA_NOEXCEPT + { + return !f; + } + + template + bool operator==(std::nullptr_t, const fixed_function& f) EA_NOEXCEPT + { + return !f; + } + + template + bool operator!=(const fixed_function& f, std::nullptr_t) EA_NOEXCEPT + { + return !!f; + } + + template + bool operator!=(std::nullptr_t, const fixed_function& f) EA_NOEXCEPT + { + return !!f; + } + + template + void swap(fixed_function& lhs, fixed_function& rhs) + { + lhs.swap(rhs); + } + +} // namespace eastl + +#endif // EASTL_FIXED_FUNCTION_H diff --git a/libkram/eastl/include/EASTL/fixed_hash_map.h b/libkram/eastl/include/EASTL/fixed_hash_map.h new file mode 100644 index 00000000..af6663dd --- /dev/null +++ b/libkram/eastl/include/EASTL/fixed_hash_map.h @@ -0,0 +1,822 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements a hash_map and hash_multimap which use a fixed size +// memory pool for its buckets and nodes. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_FIXED_HASH_MAP_H +#define EASTL_FIXED_HASH_MAP_H + + +#include +#include + +EA_DISABLE_VC_WARNING(4127) // Conditional expression is constant + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + +namespace eastl +{ + /// EASTL_FIXED_HASH_MAP_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// In the case of fixed-size containers, the allocator name always refers + /// to overflow allocations. + /// + #ifndef EASTL_FIXED_HASH_MAP_DEFAULT_NAME + #define EASTL_FIXED_HASH_MAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_hash_map" // Unless the user overrides something, this is "EASTL fixed_hash_map". + #endif + + #ifndef EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME + #define EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_hash_multimap" // Unless the user overrides something, this is "EASTL fixed_hash_multimap". + #endif + + + /// EASTL_FIXED_HASH_MAP_DEFAULT_ALLOCATOR + /// EASTL_FIXED_HASH_MULTIMAP_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_FIXED_HASH_MAP_DEFAULT_ALLOCATOR + #define EASTL_FIXED_HASH_MAP_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_HASH_MAP_DEFAULT_NAME) + #endif + + #ifndef EASTL_FIXED_HASH_MULTIMAP_DEFAULT_ALLOCATOR + #define EASTL_FIXED_HASH_MULTIMAP_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME) + #endif + + + + /// fixed_hash_map + /// + /// Implements a hash_map with a fixed block of memory identified by the nodeCount and bucketCount + /// template parameters. + /// + /// Template parameters: + /// Key The key type for the map. This is a map of Key to T (value). + /// T The value type for the map. + /// nodeCount The max number of objects to contain. This value must be >= 1. + /// bucketCount The number of buckets to use. This value must be >= 2. + /// bEnableOverflow Whether or not we should use the global heap if our object pool is exhausted. + /// Hash hash_set hash function. See hash_set. + /// Predicate hash_set equality testing function. See hash_set. + /// + template , typename Predicate = eastl::equal_to, bool bCacheHashCode = false, typename OverflowAllocator = EASTLAllocatorType> + class fixed_hash_map : public hash_map::node_type), + nodeCount, + EASTL_ALIGN_OF(eastl::pair), + 0, + bEnableOverflow, + OverflowAllocator>, + bCacheHashCode> + { + public: + typedef fixed_hashtable_allocator::node_type), nodeCount, EASTL_ALIGN_OF(eastl::pair), 0, + bEnableOverflow, OverflowAllocator> fixed_allocator_type; + typedef typename fixed_allocator_type::overflow_allocator_type overflow_allocator_type; + typedef hash_map base_type; + typedef fixed_hash_map this_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::size_type size_type; + + enum { kMaxSize = nodeCount }; + + using base_type::mAllocator; + using base_type::clear; + + protected: + node_type** mBucketBuffer[bucketCount + 1]; // '+1' because the hash table needs a null terminating bucket. + char mNodeBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements. + + public: + explicit fixed_hash_map(const overflow_allocator_type& overflowAllocator); + + explicit fixed_hash_map(const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate()); + + fixed_hash_map(const Hash& hashFunction, + const Predicate& predicate, + const overflow_allocator_type& overflowAllocator); + + template + fixed_hash_map(InputIterator first, InputIterator last, + const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate()); + + fixed_hash_map(const this_type& x); + fixed_hash_map(this_type&& x); + fixed_hash_map(this_type&& x, const overflow_allocator_type& overflowAllocator); + fixed_hash_map(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_HASH_MAP_DEFAULT_ALLOCATOR); + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + + size_type max_size() const; + + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT; + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT; + void set_overflow_allocator(const overflow_allocator_type& allocator); + + void clear(bool clearBuckets); + }; // fixed_hash_map + + + + + + /// fixed_hash_multimap + /// + /// Implements a hash_multimap with a fixed block of memory identified by the nodeCount and bucketCount + /// template parameters. + /// + /// Template parameters: + /// Key The key type for the map. This is a map of Key to T (value). + /// T The value type for the map. + /// nodeCount The max number of objects to contain. This value must be >= 1. + /// bucketCount The number of buckets to use. This value must be >= 2. + /// bEnableOverflow Whether or not we should use the global heap if our object pool is exhausted. + /// Hash hash_set hash function. See hash_set. + /// Predicate hash_set equality testing function. See hash_set. + /// + template , typename Predicate = eastl::equal_to, bool bCacheHashCode = false, typename OverflowAllocator = EASTLAllocatorType> + class fixed_hash_multimap : public hash_multimap::node_type), + nodeCount, + EASTL_ALIGN_OF(eastl::pair), + 0, + bEnableOverflow, + OverflowAllocator>, + bCacheHashCode> + { + public: + typedef fixed_hashtable_allocator::node_type), nodeCount, EASTL_ALIGN_OF(eastl::pair), 0, + bEnableOverflow, OverflowAllocator> fixed_allocator_type; + typedef typename fixed_allocator_type::overflow_allocator_type overflow_allocator_type; + typedef hash_multimap base_type; + typedef fixed_hash_multimap this_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::size_type size_type; + + enum { kMaxSize = nodeCount }; + + using base_type::mAllocator; + using base_type::clear; + + protected: + node_type** mBucketBuffer[bucketCount + 1]; // '+1' because the hash table needs a null terminating bucket. + char mNodeBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements. + + public: + explicit fixed_hash_multimap(const overflow_allocator_type& overflowAllocator); + + explicit fixed_hash_multimap(const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate()); + + fixed_hash_multimap(const Hash& hashFunction, + const Predicate& predicate, + const overflow_allocator_type& overflowAllocator); + + template + fixed_hash_multimap(InputIterator first, InputIterator last, + const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate()); + + fixed_hash_multimap(const this_type& x); + fixed_hash_multimap(this_type&& x); + fixed_hash_multimap(this_type&& x, const overflow_allocator_type& overflowAllocator); + fixed_hash_multimap(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_HASH_MULTIMAP_DEFAULT_ALLOCATOR); + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + + size_type max_size() const; + + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT; + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT; + void set_overflow_allocator(const overflow_allocator_type& allocator); + + void clear(bool clearBuckets); + }; // fixed_hash_multimap + + + + + + + /////////////////////////////////////////////////////////////////////// + // fixed_hash_map + /////////////////////////////////////////////////////////////////////// + + template + inline fixed_hash_map:: + fixed_hash_map(const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), + Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MAP_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + } + + + template + inline fixed_hash_map:: + fixed_hash_map(const Hash& hashFunction, + const Predicate& predicate) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, + predicate, fixed_allocator_type(NULL, mBucketBuffer)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MAP_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + } + + + template + inline fixed_hash_map:: + fixed_hash_map(const Hash& hashFunction, + const Predicate& predicate, + const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, + predicate, fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MAP_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + } + + + template + template + fixed_hash_map:: + fixed_hash_map(InputIterator first, InputIterator last, + const Hash& hashFunction, + const Predicate& predicate) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, + predicate, fixed_allocator_type(NULL, mBucketBuffer)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MAP_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + base_type::insert(first, last); + } + + + template + inline fixed_hash_map:: + fixed_hash_map(const this_type& x) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), + x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + mAllocator.reset(mNodeBuffer); + base_type::insert(x.begin(), x.end()); + } + + + template + inline fixed_hash_map:: + fixed_hash_map(this_type&& x) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), + x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer)) + { + // This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here. + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + mAllocator.reset(mNodeBuffer); + base_type::insert(x.begin(), x.end()); + } + + + template + inline fixed_hash_map:: + fixed_hash_map(this_type&& x, const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), + x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + // This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here. + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + mAllocator.reset(mNodeBuffer); + base_type::insert(x.begin(), x.end()); + } + + + template + inline fixed_hash_map:: + fixed_hash_map(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), + Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MAP_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + base_type::insert(ilist.begin(), ilist.end()); + } + + + template + inline typename fixed_hash_map::this_type& + fixed_hash_map::operator=(const this_type& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline typename fixed_hash_map::this_type& + fixed_hash_map::operator=(this_type&& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline typename fixed_hash_map::this_type& + fixed_hash_map::operator=(std::initializer_list ilist) + { + base_type::clear(); + base_type::insert(ilist.begin(), ilist.end()); + return *this; + } + + + template + inline void fixed_hash_map:: + swap(this_type& x) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(*this, x); + } + + + template + inline void fixed_hash_map:: + reset_lose_memory() + { + base_type::mnBucketCount = (size_type)base_type::mRehashPolicy.GetPrevBucketCount((uint32_t)bucketCount); + base_type::mnElementCount = 0; + base_type::mRehashPolicy.mnNextResize = 0; + base_type::get_allocator().reset(mNodeBuffer); + } + + + template + inline typename fixed_hash_map::size_type + fixed_hash_map::max_size() const + { + return kMaxSize; + } + + + template + inline const typename fixed_hash_map::overflow_allocator_type& + fixed_hash_map::get_overflow_allocator() const EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline typename fixed_hash_map::overflow_allocator_type& + fixed_hash_map::get_overflow_allocator() EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline void fixed_hash_map:: + set_overflow_allocator(const overflow_allocator_type& allocator) + { + mAllocator.set_overflow_allocator(allocator); + } + + + template + inline void fixed_hash_map:: + clear(bool clearBuckets) + { + base_type::DoFreeNodes(base_type::mpBucketArray, base_type::mnBucketCount); + if(clearBuckets) + { + base_type::DoFreeBuckets(base_type::mpBucketArray, base_type::mnBucketCount); + reset_lose_memory(); + } + base_type::mpBucketArray = (node_type**)mBucketBuffer; + base_type::mnElementCount = 0; + } + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline void swap(fixed_hash_map& a, + fixed_hash_map& b) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(a, b); + } + + + + + /////////////////////////////////////////////////////////////////////// + // fixed_hash_multimap + /////////////////////////////////////////////////////////////////////// + + template + inline fixed_hash_multimap:: + fixed_hash_multimap(const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), + Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + } + + + template + inline fixed_hash_multimap:: + fixed_hash_multimap(const Hash& hashFunction, + const Predicate& predicate) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, + predicate, fixed_allocator_type(NULL, mBucketBuffer)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + } + + + template + inline fixed_hash_multimap:: + fixed_hash_multimap(const Hash& hashFunction, + const Predicate& predicate, + const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, + predicate, fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + } + + + template + template + fixed_hash_multimap:: + fixed_hash_multimap(InputIterator first, InputIterator last, + const Hash& hashFunction, + const Predicate& predicate) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, + predicate, fixed_allocator_type(NULL, mBucketBuffer)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + base_type::insert(first, last); + } + + + template + inline fixed_hash_multimap:: + fixed_hash_multimap(const this_type& x) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), + x.equal_function(),fixed_allocator_type(NULL, mBucketBuffer)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + mAllocator.reset(mNodeBuffer); + base_type::insert(x.begin(), x.end()); + } + + + template + inline fixed_hash_multimap:: + fixed_hash_multimap(this_type&& x) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), + x.equal_function(),fixed_allocator_type(NULL, mBucketBuffer)) + { + // This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here. + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + mAllocator.reset(mNodeBuffer); + base_type::insert(x.begin(), x.end()); + } + + + template + inline fixed_hash_multimap:: + fixed_hash_multimap(this_type&& x, const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), + x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + // This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here. + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + mAllocator.reset(mNodeBuffer); + base_type::insert(x.begin(), x.end()); + } + + + template + inline fixed_hash_multimap:: + fixed_hash_multimap(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), + Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MULTIMAP_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + base_type::insert(ilist.begin(), ilist.end()); + } + + + template + inline typename fixed_hash_multimap::this_type& + fixed_hash_multimap::operator=(const this_type& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline typename fixed_hash_multimap::this_type& + fixed_hash_multimap::operator=(this_type&& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline typename fixed_hash_multimap::this_type& + fixed_hash_multimap::operator=(std::initializer_list ilist) + { + base_type::clear(); + base_type::insert(ilist.begin(), ilist.end()); + return *this; + } + + + template + inline void fixed_hash_multimap:: + swap(this_type& x) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(*this, x); + } + + + template + inline void fixed_hash_multimap:: + reset_lose_memory() + { + base_type::mnBucketCount = (size_type)base_type::mRehashPolicy.GetPrevBucketCount((uint32_t)bucketCount); + base_type::mnElementCount = 0; + base_type::mRehashPolicy.mnNextResize = 0; + base_type::get_allocator().reset(mNodeBuffer); + } + + + template + inline typename fixed_hash_multimap::size_type + fixed_hash_multimap::max_size() const + { + return kMaxSize; + } + + + template + inline const typename fixed_hash_multimap::overflow_allocator_type& + fixed_hash_multimap::get_overflow_allocator() const EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline typename fixed_hash_multimap::overflow_allocator_type& + fixed_hash_multimap::get_overflow_allocator() EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline void fixed_hash_multimap::set_overflow_allocator(const overflow_allocator_type& allocator) + { + mAllocator.set_overflow_allocator(allocator); + } + + + template + inline void fixed_hash_multimap:: + clear(bool clearBuckets) + { + base_type::DoFreeNodes(base_type::mpBucketArray, base_type::mnBucketCount); + if(clearBuckets) + { + base_type::DoFreeBuckets(base_type::mpBucketArray, base_type::mnBucketCount); + reset_lose_memory(); + } + base_type::mpBucketArray = (node_type**)mBucketBuffer; + base_type::mnElementCount = 0; + } + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline void swap(fixed_hash_multimap& a, + fixed_hash_multimap& b) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(a, b); + } + + + +} // namespace eastl + +EA_RESTORE_VC_WARNING() + +#endif // Header include guard + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/fixed_hash_set.h b/libkram/eastl/include/EASTL/fixed_hash_set.h new file mode 100644 index 00000000..0db9f49f --- /dev/null +++ b/libkram/eastl/include/EASTL/fixed_hash_set.h @@ -0,0 +1,782 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements a hash_set which uses a fixed size memory pool for +// its buckets and nodes. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_FIXED_HASH_SET_H +#define EASTL_FIXED_HASH_SET_H + + +#include +#include + +EA_DISABLE_VC_WARNING(4127) // Conditional expression is constant + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + /// EASTL_FIXED_HASH_SET_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// In the case of fixed-size containers, the allocator name always refers + /// to overflow allocations. + /// + #ifndef EASTL_FIXED_HASH_SET_DEFAULT_NAME + #define EASTL_FIXED_HASH_SET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_hash_set" // Unless the user overrides something, this is "EASTL fixed_hash_set". + #endif + + #ifndef EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME + #define EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_hash_multiset" // Unless the user overrides something, this is "EASTL fixed_hash_multiset". + #endif + + + /// EASTL_FIXED_HASH_SET_DEFAULT_ALLOCATOR + /// EASTL_FIXED_HASH_MULTISET_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_FIXED_HASH_SET_DEFAULT_ALLOCATOR + #define EASTL_FIXED_HASH_SET_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_HASH_SET_DEFAULT_NAME) + #endif + + #ifndef EASTL_FIXED_HASH_MULTISET_DEFAULT_ALLOCATOR + #define EASTL_FIXED_HASH_MULTISET_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME) + #endif + + + + /// fixed_hash_set + /// + /// Implements a hash_set with a fixed block of memory identified by the nodeCount and bucketCount + /// template parameters. + /// + /// Template parameters: + /// Value The type of object the hash_set holds. + /// nodeCount The max number of objects to contain. This value must be >= 1. + /// bucketCount The number of buckets to use. This value must be >= 2. + /// bEnableOverflow Whether or not we should use the global heap if our object pool is exhausted. + /// Hash hash_set hash function. See hash_set. + /// Predicate hash_set equality testing function. See hash_set. + /// + template , typename Predicate = eastl::equal_to, bool bCacheHashCode = false, typename OverflowAllocator = EASTLAllocatorType> + class fixed_hash_set : public hash_set::node_type), + nodeCount, + EASTL_ALIGN_OF(Value), + 0, + bEnableOverflow, + OverflowAllocator>, + bCacheHashCode> + { + public: + typedef fixed_hashtable_allocator::node_type), nodeCount, EASTL_ALIGN_OF(Value), 0, + bEnableOverflow, OverflowAllocator> fixed_allocator_type; + typedef typename fixed_allocator_type::overflow_allocator_type overflow_allocator_type; + typedef fixed_hash_set this_type; + typedef hash_set base_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::size_type size_type; + + enum { kMaxSize = nodeCount }; + + using base_type::mAllocator; + + protected: + node_type** mBucketBuffer[bucketCount + 1]; // '+1' because the hash table needs a null terminating bucket. + char mNodeBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements. + + public: + explicit fixed_hash_set(const overflow_allocator_type& overflowAllocator); + + explicit fixed_hash_set(const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate()); + + fixed_hash_set(const Hash& hashFunction, + const Predicate& predicate, + const overflow_allocator_type& overflowAllocator); + + template + fixed_hash_set(InputIterator first, InputIterator last, + const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate()); + + fixed_hash_set(const this_type& x); + fixed_hash_set(this_type&& x); + fixed_hash_set(this_type&& x, const overflow_allocator_type& overflowAllocator); + + fixed_hash_set(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_HASH_SET_DEFAULT_ALLOCATOR); + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + + size_type max_size() const; + + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT; + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT; + void set_overflow_allocator(const overflow_allocator_type& allocator); + }; // fixed_hash_set + + + + + + + /// fixed_hash_multiset + /// + /// Implements a hash_multiset with a fixed block of memory identified by the nodeCount and bucketCount + /// template parameters. + /// + /// Value The type of object the hash_set holds. + /// nodeCount The max number of objects to contain. This value must be >= 1. + /// bucketCount The number of buckets to use. This value must be >= 2. + /// bEnableOverflow Whether or not we should use the global heap if our object pool is exhausted. + /// Hash hash_set hash function. See hash_set. + /// Predicate hash_set equality testing function. See hash_set. + /// + template , typename Predicate = eastl::equal_to, bool bCacheHashCode = false, typename OverflowAllocator = EASTLAllocatorType> + class fixed_hash_multiset : public hash_multiset::node_type), + nodeCount, + EASTL_ALIGN_OF(Value), + 0, + bEnableOverflow, + OverflowAllocator>, + bCacheHashCode> + { + public: + typedef fixed_hashtable_allocator::node_type), nodeCount, EASTL_ALIGN_OF(Value), 0, + bEnableOverflow, OverflowAllocator> fixed_allocator_type; + typedef typename fixed_allocator_type::overflow_allocator_type overflow_allocator_type; + typedef hash_multiset base_type; + typedef fixed_hash_multiset this_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::size_type size_type; + + enum { kMaxSize = nodeCount }; + + using base_type::mAllocator; + + protected: + node_type** mBucketBuffer[bucketCount + 1]; // '+1' because the hash table needs a null terminating bucket. + char mNodeBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements. + + public: + explicit fixed_hash_multiset(const overflow_allocator_type& overflowAllocator); + + explicit fixed_hash_multiset(const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate()); + + fixed_hash_multiset(const Hash& hashFunction, + const Predicate& predicate, + const overflow_allocator_type& overflowAllocator); + + template + fixed_hash_multiset(InputIterator first, InputIterator last, + const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate()); + + fixed_hash_multiset(const this_type& x); + fixed_hash_multiset(this_type&& x); + fixed_hash_multiset(this_type&& x, const overflow_allocator_type& overflowAllocator); + fixed_hash_multiset(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_HASH_MULTISET_DEFAULT_ALLOCATOR); + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + + size_type max_size() const; + + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT; + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT; + void set_overflow_allocator(const overflow_allocator_type& allocator); + }; // fixed_hash_multiset + + + + + + /////////////////////////////////////////////////////////////////////// + // fixed_hash_set + /////////////////////////////////////////////////////////////////////// + + template + inline fixed_hash_set:: + fixed_hash_set(const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), + Hash(), Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_SET_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + } + + + template + inline fixed_hash_set:: + fixed_hash_set(const Hash& hashFunction, + const Predicate& predicate) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), + hashFunction, predicate, fixed_allocator_type(NULL, mBucketBuffer)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_SET_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + } + + + template + inline fixed_hash_set:: + fixed_hash_set(const Hash& hashFunction, + const Predicate& predicate, + const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), + hashFunction, predicate, fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_SET_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + } + + + template + template + fixed_hash_set:: + fixed_hash_set(InputIterator first, InputIterator last, + const Hash& hashFunction, + const Predicate& predicate) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, + predicate, fixed_allocator_type(NULL, mBucketBuffer)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_SET_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + base_type::insert(first, last); + } + + + template + inline fixed_hash_set:: + fixed_hash_set(const this_type& x) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), + x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + mAllocator.reset(mNodeBuffer); + base_type::insert(x.begin(), x.end()); + } + + + template + inline fixed_hash_set::fixed_hash_set(this_type&& x) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), + x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer)) + { + // This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here. + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + mAllocator.reset(mNodeBuffer); + base_type::insert(x.begin(), x.end()); + } + + + template + inline fixed_hash_set::fixed_hash_set(this_type&& x, const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), + x.hash_function(), x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + // This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here. + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + mAllocator.reset(mNodeBuffer); + base_type::insert(x.begin(), x.end()); + } + + + template + inline fixed_hash_set:: + fixed_hash_set(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), + Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_SET_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + base_type::insert(ilist.begin(), ilist.end()); + } + + + template + typename fixed_hash_set::this_type& + fixed_hash_set::operator=(const this_type& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline typename fixed_hash_set::this_type& + fixed_hash_set::operator=(this_type&& x) + { + operator=(x); + return *this; + } + + + template + inline typename fixed_hash_set::this_type& + fixed_hash_set::operator=(std::initializer_list ilist) + { + base_type::clear(); + base_type::insert(ilist.begin(), ilist.end()); + return *this; + } + + + template + inline void fixed_hash_set:: + swap(this_type& x) + { + // We must do a brute-force swap, because fixed containers cannot share memory allocations. + // Note that we create a temp value on the stack. This approach may fail if the size of the + // container is too large. We have a rule against allocating memory from the heap, and so + // if the user wants to swap two large objects of this class, the user will currently need + // to implement it manually. To consider: add code to allocate a temporary buffer if the + // size of the container is too large for the stack. + EASTL_ASSERT(sizeof(x) < EASTL_MAX_STACK_USAGE); // It is dangerous to try to create objects that are too big for the stack. + + const this_type temp(*this); // Can't call eastl::swap because that would + *this = x; // itself call this member swap function. + x = temp; + } + + + template + void fixed_hash_set:: + reset_lose_memory() + { + base_type::reset_lose_memory(); + base_type::get_allocator().reset(mNodeBuffer); + } + + + template + inline typename fixed_hash_set::size_type + fixed_hash_set::max_size() const + { + return kMaxSize; + } + + + template + inline const typename fixed_hash_set::overflow_allocator_type& + fixed_hash_set::get_overflow_allocator() const EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline typename fixed_hash_set::overflow_allocator_type& + fixed_hash_set::get_overflow_allocator() EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline void fixed_hash_set:: + set_overflow_allocator(const overflow_allocator_type& allocator) + { + mAllocator.set_overflow_allocator(allocator); + } + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline void swap(fixed_hash_set& a, + fixed_hash_set& b) + { + a.swap(b); + } + + + + + /////////////////////////////////////////////////////////////////////// + // fixed_hash_multiset + /////////////////////////////////////////////////////////////////////// + + template + inline fixed_hash_multiset:: + fixed_hash_multiset(const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), + Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + } + + + template + inline fixed_hash_multiset:: + fixed_hash_multiset(const Hash& hashFunction, + const Predicate& predicate) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, + predicate, fixed_allocator_type(NULL, mBucketBuffer)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + } + + + template + inline fixed_hash_multiset:: + fixed_hash_multiset(const Hash& hashFunction, + const Predicate& predicate, + const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, + predicate, fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + } + + + template + template + inline fixed_hash_multiset:: + fixed_hash_multiset(InputIterator first, InputIterator last, + const Hash& hashFunction, + const Predicate& predicate) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), hashFunction, + predicate, fixed_allocator_type(NULL, mBucketBuffer)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + base_type::insert(first, last); + } + + + template + inline fixed_hash_multiset:: + fixed_hash_multiset(const this_type& x) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), + x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + mAllocator.reset(mNodeBuffer); + base_type::insert(x.begin(), x.end()); + } + + + template + inline fixed_hash_multiset::fixed_hash_multiset(this_type&& x) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), x.hash_function(), + x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer)) + { + // This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here. + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + mAllocator.reset(mNodeBuffer); + base_type::insert(x.begin(), x.end()); + } + + + template + inline fixed_hash_multiset::fixed_hash_multiset(this_type&& x, const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), + x.hash_function(), x.equal_function(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + // This implementation is the same as above. If we could rely on using C++11 delegating constructor support then we could just call that here. + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + mAllocator.reset(mNodeBuffer); + base_type::insert(x.begin(), x.end()); + } + + + template + inline fixed_hash_multiset:: + fixed_hash_multiset(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator) + : base_type(prime_rehash_policy::GetPrevBucketCountOnly(bucketCount), Hash(), + Predicate(), fixed_allocator_type(NULL, mBucketBuffer, overflowAllocator)) + { + EASTL_CT_ASSERT((nodeCount >= 1) && (bucketCount >= 2)); + + if(!bEnableOverflow) + base_type::set_max_load_factor(10000.f); // Set it so that we will never resize. + + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_HASH_MULTISET_DEFAULT_NAME); + #endif + + mAllocator.reset(mNodeBuffer); + base_type::insert(ilist.begin(), ilist.end()); + } + + + template + inline typename fixed_hash_multiset::this_type& + fixed_hash_multiset::operator=(const this_type& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline typename fixed_hash_multiset::this_type& + fixed_hash_multiset::operator=(this_type&& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline typename fixed_hash_multiset::this_type& + fixed_hash_multiset::operator=(std::initializer_list ilist) + { + base_type::clear(); + base_type::insert(ilist.begin(), ilist.end()); + return *this; + } + + + template + inline void fixed_hash_multiset:: + swap(this_type& x) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(*this, x); + } + + + template + inline void fixed_hash_multiset:: + reset_lose_memory() + { + base_type::reset_lose_memory(); + base_type::get_allocator().reset(mNodeBuffer); + } + + + template + inline typename fixed_hash_multiset::size_type + fixed_hash_multiset::max_size() const + { + return kMaxSize; + } + + + template + inline const typename fixed_hash_multiset::overflow_allocator_type& + fixed_hash_multiset::get_overflow_allocator() const EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline typename fixed_hash_multiset::overflow_allocator_type& + fixed_hash_multiset::get_overflow_allocator() EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline void fixed_hash_multiset:: + set_overflow_allocator(const overflow_allocator_type& allocator) + { + mAllocator.set_overflow_allocator(allocator); + } + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline void swap(fixed_hash_multiset& a, + fixed_hash_multiset& b) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(a, b); + } + + +} // namespace eastl + +EA_RESTORE_VC_WARNING() + +#endif // Header include guard + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/fixed_list.h b/libkram/eastl/include/EASTL/fixed_list.h new file mode 100644 index 00000000..9e48089c --- /dev/null +++ b/libkram/eastl/include/EASTL/fixed_list.h @@ -0,0 +1,388 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements a list which uses a fixed size memory pool for its nodes. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_FIXED_LIST_H +#define EASTL_FIXED_LIST_H + + +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + /// EASTL_FIXED_LIST_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// In the case of fixed-size containers, the allocator name always refers + /// to overflow allocations. + /// + #ifndef EASTL_FIXED_LIST_DEFAULT_NAME + #define EASTL_FIXED_LIST_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_list" // Unless the user overrides something, this is "EASTL fixed_list". + #endif + + + /// EASTL_FIXED_LIST_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_FIXED_LIST_DEFAULT_ALLOCATOR + #define EASTL_FIXED_LIST_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_LIST_DEFAULT_NAME) + #endif + + + + /// fixed_list + /// + /// fixed_list is a list which uses a single block of contiguous memory + /// for its nodes. The purpose of this is to reduce memory usage relative + /// to a conventional memory allocation system (with block headers), to + /// increase allocation speed (often due to avoidance of mutex locks), + /// to increase performance (due to better memory locality), and to decrease + /// memory fragmentation due to the way that fixed block allocators work. + /// + /// The primary downside to a fixed_list is that the number of nodes it + /// can contain is fixed upon its declaration. If you want a fixed_list + /// that doesn't have this limitation, then you probably don't want a + /// fixed_list. You can always create your own memory allocator that works + /// the way you want. + /// + /// Template parameters: + /// T The type of object the list holds. + /// nodeCount The max number of objects to contain. + /// bEnableOverflow Whether or not we should use the overflow heap if our object pool is exhausted. + /// OverflowAllocator Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap. + /// + template + class fixed_list : public list::node_type), + nodeCount, EASTL_ALIGN_OF(T), 0, bEnableOverflow, OverflowAllocator> > + { + public: + typedef fixed_node_allocator::node_type), nodeCount, + EASTL_ALIGN_OF(T), 0, bEnableOverflow, OverflowAllocator> fixed_allocator_type; + typedef OverflowAllocator overflow_allocator_type; + typedef list base_type; + typedef fixed_list this_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::iterator iterator; + + enum { kMaxSize = nodeCount }; + + using base_type::assign; + using base_type::resize; + using base_type::insert; + using base_type::size; + using base_type::get_allocator; + + protected: + char mBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements. + + using base_type::internalAllocator; + + public: + fixed_list(); + explicit fixed_list(const overflow_allocator_type& overflowAllocator); // Only applicable if bEnableOverflow is true. + explicit fixed_list(size_type n); // Currently we don't support overflowAllocator specification for other constructors, for simplicity. + fixed_list(size_type n, const value_type& value); + fixed_list(const this_type& x); + fixed_list(this_type&& x); + fixed_list(this_type&&, const overflow_allocator_type& overflowAllocator); + fixed_list(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_LIST_DEFAULT_ALLOCATOR); + + template + fixed_list(InputIterator first, InputIterator last); + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + size_type max_size() const; // Returns the max fixed size, which is the user-supplied nodeCount parameter. + bool full() const; // Returns true if the fixed space has been fully allocated. Note that if overflow is enabled, the container size can be greater than nodeCount but full() could return true because the fixed space may have a recently freed slot. + bool has_overflowed() const; // Returns true if the allocations spilled over into the overflow allocator. Meaningful only if overflow is enabled. + bool can_overflow() const; // Returns the value of the bEnableOverflow template parameter. + + // OverflowAllocator + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT; + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT; + void set_overflow_allocator(const overflow_allocator_type& allocator); + }; // fixed_list + + + + /////////////////////////////////////////////////////////////////////// + // fixed_list + /////////////////////////////////////////////////////////////////////// + + template + inline fixed_list::fixed_list() + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + internalAllocator().set_name(EASTL_FIXED_LIST_DEFAULT_NAME); + #endif + } + + + template + inline fixed_list::fixed_list(const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + internalAllocator().set_name(EASTL_FIXED_LIST_DEFAULT_NAME); + #endif + } + + + template + inline fixed_list::fixed_list(size_type n) + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + internalAllocator().set_name(EASTL_FIXED_LIST_DEFAULT_NAME); + #endif + + resize(n); + } + + + template + inline fixed_list::fixed_list(size_type n, const value_type& value) + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + internalAllocator().set_name(EASTL_FIXED_LIST_DEFAULT_NAME); + #endif + + resize(n, value); + } + + + template + inline fixed_list::fixed_list(const this_type& x) + : base_type(fixed_allocator_type(mBuffer)) + { + internalAllocator().copy_overflow_allocator(x.internalAllocator()); + + #if EASTL_NAME_ENABLED + internalAllocator().set_name(x.internalAllocator().get_name()); + #endif + + assign(x.begin(), x.end()); + } + + + template + inline fixed_list::fixed_list(this_type&& x) + : base_type(fixed_allocator_type(mBuffer)) + { + // Since we are a fixed_list, we can't normally swap pointers unless both this and + // x are using using overflow and the overflow allocators are equal. To do: + //if(has_overflowed() && x.has_overflowed() && (get_overflow_allocator() == x.get_overflow_allocator())) + //{ + // We can swap contents and may need to swap the allocators as well. + //} + + // The following is currently identical to the fixed_vector(const this_type& x) code above. If it stays that + // way then we may want to make a shared implementation. + internalAllocator().copy_overflow_allocator(x.internalAllocator()); + + #if EASTL_NAME_ENABLED + internalAllocator().set_name(x.internalAllocator().get_name()); + #endif + + assign(x.begin(), x.end()); + } + + + template + inline fixed_list::fixed_list(this_type&& x, const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + // See comments above. + internalAllocator().copy_overflow_allocator(x.internalAllocator()); + + #if EASTL_NAME_ENABLED + internalAllocator().set_name(x.internalAllocator().get_name()); + #endif + + assign(x.begin(), x.end()); + } + + + template + inline fixed_list::fixed_list(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + assign(ilist.begin(), ilist.end()); + } + + + template + template + fixed_list::fixed_list(InputIterator first, InputIterator last) + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + internalAllocator().set_name(EASTL_FIXED_LIST_DEFAULT_NAME); + #endif + + assign(first, last); + } + + + template + inline typename fixed_list::this_type& + fixed_list::operator=(const this_type& x) + { + if(this != &x) + { + base_type::clear(); + + #if EASTL_ALLOCATOR_COPY_ENABLED + internalAllocator() = x.internalAllocator(); // The primary effect of this is to copy the overflow allocator. + #endif + + base_type::assign(x.begin(), x.end()); // It would probably be better to implement this like list::operator=. + } + return *this; + } + + + template + inline typename fixed_list::this_type& + fixed_list::operator=(this_type&& x) + { + return operator=(x); + } + + + template + inline typename fixed_list::this_type& + fixed_list::operator=(std::initializer_list ilist) + { + base_type::clear(); + base_type::assign(ilist.begin(), ilist.end()); + return *this; + } + + + template + inline void fixed_list::swap(this_type& x) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(*this, x); + } + + + template + inline void fixed_list::reset_lose_memory() + { + base_type::reset_lose_memory(); + get_allocator().reset(mBuffer); + } + + + template + inline typename fixed_list::size_type + fixed_list::max_size() const + { + return kMaxSize; + } + + + template + inline bool fixed_list::full() const + { + // Note: This implementation isn't right in the case of bEnableOverflow = true because it will return + // false for the case that there are free nodes from the buffer but also nodes from the dynamic heap. + // This can happen if the container exceeds the fixed size and then frees some of the nodes from the fixed buffer. + // The only simple fix for this is to take on another member variable which tracks whether this overflow + // has occurred at some point in the past. + return !internalAllocator().can_allocate(); // This is the quickest way of detecting this. has_overflowed uses a different method because it can't use this quick method. + } + + + template + inline bool fixed_list::has_overflowed() const + { + #if EASTL_FIXED_SIZE_TRACKING_ENABLED // If we can use this faster pathway (as size() may be slow)... + return (internalAllocator().mPool.mnPeakSize > kMaxSize); + #else + return (size() > kMaxSize); + #endif + } + + + template + inline bool fixed_list::can_overflow() const + { + return bEnableOverflow; + } + + + template + inline const typename fixed_list::overflow_allocator_type& + fixed_list::get_overflow_allocator() const EA_NOEXCEPT + { + return internalAllocator().get_overflow_allocator(); + } + + + template + inline typename fixed_list::overflow_allocator_type& + fixed_list::get_overflow_allocator() EA_NOEXCEPT + { + return internalAllocator().get_overflow_allocator(); + } + + + template + inline void + fixed_list::set_overflow_allocator(const overflow_allocator_type& allocator) + { + internalAllocator().set_overflow_allocator(allocator); + } + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline void swap(fixed_list& a, + fixed_list& b) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(a, b); + } + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/fixed_map.h b/libkram/eastl/include/EASTL/fixed_map.h new file mode 100644 index 00000000..c01db08f --- /dev/null +++ b/libkram/eastl/include/EASTL/fixed_map.h @@ -0,0 +1,580 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements a map and multimap which use a fixed size memory +// pool for their nodes. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_FIXED_MAP_H +#define EASTL_FIXED_MAP_H + + +#include +#include // Included because fixed_rbtree_base resides here. + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + /// EASTL_FIXED_MAP_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// In the case of fixed-size containers, the allocator name always refers + /// to overflow allocations. + /// + #ifndef EASTL_FIXED_MAP_DEFAULT_NAME + #define EASTL_FIXED_MAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_map" // Unless the user overrides something, this is "EASTL fixed_map". + #endif + + #ifndef EASTL_FIXED_MULTIMAP_DEFAULT_NAME + #define EASTL_FIXED_MULTIMAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_multimap" // Unless the user overrides something, this is "EASTL fixed_multimap". + #endif + + + /// EASTL_FIXED_MAP_DEFAULT_ALLOCATOR + /// EASTL_FIXED_MULTIMAP_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_FIXED_MAP_DEFAULT_ALLOCATOR + #define EASTL_FIXED_MAP_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_MAP_DEFAULT_NAME) + #endif + + #ifndef EASTL_FIXED_MULTIMAP_DEFAULT_ALLOCATOR + #define EASTL_FIXED_MULTIMAP_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_MULTIMAP_DEFAULT_NAME) + #endif + + + + /// fixed_map + /// + /// Implements a map with a fixed block of memory identified by the + /// nodeCount template parameter. + /// + /// Key The key object (key in the key/value pair). + /// T The mapped object (value in the key/value pair). + /// nodeCount The max number of objects to contain. + /// bEnableOverflow Whether or not we should use the global heap if our object pool is exhausted. + /// Compare Compare function/object for set ordering. + /// OverflowAllocator Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap. + /// + template , typename OverflowAllocator = EASTLAllocatorType> + class fixed_map : public map::node_type), + nodeCount, EASTL_ALIGN_OF(eastl::pair), 0, bEnableOverflow, OverflowAllocator> > + { + public: + typedef fixed_node_allocator::node_type), nodeCount, + EASTL_ALIGN_OF(eastl::pair), 0, bEnableOverflow, OverflowAllocator> fixed_allocator_type; + typedef typename fixed_allocator_type::overflow_allocator_type overflow_allocator_type; + typedef fixed_map this_type; + typedef map base_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::size_type size_type; + + enum { kMaxSize = nodeCount }; + + using base_type::insert; + + protected: + char mBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements. + + using base_type::mAllocator; + + public: + fixed_map(); + explicit fixed_map(const overflow_allocator_type& overflowAllocator); + explicit fixed_map(const Compare& compare); + fixed_map(const this_type& x); + fixed_map(this_type&& x); + fixed_map(this_type&& x, const overflow_allocator_type& overflowAllocator); + fixed_map(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_MAP_DEFAULT_ALLOCATOR); + + template + fixed_map(InputIterator first, InputIterator last); + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + + size_type max_size() const; + + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT; + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT; + void set_overflow_allocator(const overflow_allocator_type& allocator); + }; // fixed_map + + + + + /// fixed_multimap + /// + /// Implements a multimap with a fixed block of memory identified by the + /// nodeCount template parameter. + /// + /// Key The key object (key in the key/value pair). + /// T The mapped object (value in the key/value pair). + /// nodeCount The max number of objects to contain. + /// bEnableOverflow Whether or not we should use the global heap if our object pool is exhausted. + /// Compare Compare function/object for set ordering. + /// OverflowAllocator Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap. + /// + template , typename OverflowAllocator = EASTLAllocatorType> + class fixed_multimap : public multimap::node_type), + nodeCount, EASTL_ALIGN_OF(eastl::pair), 0, bEnableOverflow, OverflowAllocator> > + { + public: + typedef fixed_node_allocator::node_type), nodeCount, + EASTL_ALIGN_OF(eastl::pair), 0, bEnableOverflow, OverflowAllocator> fixed_allocator_type; + typedef typename fixed_allocator_type::overflow_allocator_type overflow_allocator_type; + typedef multimap base_type; + typedef fixed_multimap this_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::size_type size_type; + + enum { kMaxSize = nodeCount }; + + using base_type::insert; + + protected: + char mBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements. + + using base_type::mAllocator; + using base_type::get_compare; + + public: + fixed_multimap(); + fixed_multimap(const overflow_allocator_type& overflowAllocator); + explicit fixed_multimap(const Compare& compare); + fixed_multimap(const this_type& x); + fixed_multimap(this_type&& x); + fixed_multimap(this_type&& x, const overflow_allocator_type& overflowAllocator); + fixed_multimap(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_MULTIMAP_DEFAULT_ALLOCATOR); + + template + fixed_multimap(InputIterator first, InputIterator last); + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + + size_type max_size() const; + + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT; + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT; + void set_overflow_allocator(const overflow_allocator_type& allocator); + }; // fixed_multimap + + + + + + /////////////////////////////////////////////////////////////////////// + // fixed_map + /////////////////////////////////////////////////////////////////////// + + template + inline fixed_map::fixed_map() + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MAP_DEFAULT_NAME); + #endif + } + + + template + inline fixed_map::fixed_map(const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MAP_DEFAULT_NAME); + #endif + } + + + template + inline fixed_map::fixed_map(const Compare& compare) + : base_type(compare, fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MAP_DEFAULT_NAME); + #endif + } + + + template + inline fixed_map::fixed_map(const this_type& x) + : base_type(x.get_compare(), fixed_allocator_type(mBuffer)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + base_type::operator=(x); + } + + + template + inline fixed_map::fixed_map(this_type&& x) + : base_type(x.get_compare(), fixed_allocator_type(mBuffer)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + base_type::operator=(x); + } + + + template + inline fixed_map::fixed_map(this_type&& x, const overflow_allocator_type& overflowAllocator) + : base_type(x.get_compare(), fixed_allocator_type(mBuffer, overflowAllocator)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + base_type::operator=(x); + } + + + template + fixed_map::fixed_map(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MAP_DEFAULT_NAME); + #endif + + insert(ilist.begin(), ilist.end()); + } + + + template + template + fixed_map::fixed_map(InputIterator first, InputIterator last) + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MAP_DEFAULT_NAME); + #endif + + insert(first, last); + } + + + template + inline typename fixed_map::this_type& + fixed_map::operator=(const this_type& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline typename fixed_map::this_type& + fixed_map::operator=(std::initializer_list ilist) + { + base_type::clear(); + insert(ilist.begin(), ilist.end()); + return *this; + } + + + template + inline typename fixed_map::this_type& + fixed_map::operator=(this_type&& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline void fixed_map::swap(this_type& x) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(*this, x); + } + + + template + inline void fixed_map::reset_lose_memory() + { + base_type::reset_lose_memory(); + base_type::get_allocator().reset(mBuffer); + } + + + template + inline typename fixed_map::size_type + fixed_map::max_size() const + { + return kMaxSize; + } + + + template + inline const typename fixed_map::overflow_allocator_type& + fixed_map::get_overflow_allocator() const EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline typename fixed_map::overflow_allocator_type& + fixed_map::get_overflow_allocator() EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline void + fixed_map::set_overflow_allocator(const overflow_allocator_type& allocator) + { + mAllocator.set_overflow_allocator(allocator); + } + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline void swap(fixed_map& a, + fixed_map& b) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(a, b); + } + + + + + /////////////////////////////////////////////////////////////////////// + // fixed_multimap + /////////////////////////////////////////////////////////////////////// + + template + inline fixed_multimap::fixed_multimap() + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MULTIMAP_DEFAULT_NAME); + #endif + } + + + template + inline fixed_multimap::fixed_multimap(const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MULTIMAP_DEFAULT_NAME); + #endif + } + + + template + inline fixed_multimap::fixed_multimap(const Compare& compare) + : base_type(compare, fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MULTIMAP_DEFAULT_NAME); + #endif + } + + + template + inline fixed_multimap::fixed_multimap(const this_type& x) + : base_type(x.get_compare(), fixed_allocator_type(mBuffer)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + base_type::operator=(x); + } + + + template + inline fixed_multimap::fixed_multimap(this_type&& x) + : base_type(x.get_compare(), fixed_allocator_type(mBuffer)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + base_type::operator=(x); + } + + + template + inline fixed_multimap::fixed_multimap(this_type&& x, const overflow_allocator_type& overflowAllocator) + : base_type(x.get_compare(), fixed_allocator_type(mBuffer, overflowAllocator)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + base_type::operator=(x); + } + + + template + fixed_multimap::fixed_multimap(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MULTIMAP_DEFAULT_NAME); + #endif + + insert(ilist.begin(), ilist.end()); + } + + + template + template + fixed_multimap:: + fixed_multimap(InputIterator first, InputIterator last) + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MULTIMAP_DEFAULT_NAME); + #endif + + insert(first, last); + } + + + template + inline typename fixed_multimap::this_type& + fixed_multimap::operator=(const this_type& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline typename fixed_multimap::this_type& + fixed_multimap::operator=(std::initializer_list ilist) + { + base_type::clear(); + insert(ilist.begin(), ilist.end()); + return *this; + } + + + template + inline typename fixed_multimap::this_type& + fixed_multimap::operator=(this_type&& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline void fixed_multimap::swap(this_type& x) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(*this, x); + } + + + template + inline void fixed_multimap::reset_lose_memory() + { + base_type::reset_lose_memory(); + base_type::get_allocator().reset(mBuffer); + } + + + template + inline typename fixed_multimap::size_type + fixed_multimap::max_size() const + { + return kMaxSize; + } + + + template + inline const typename fixed_multimap::overflow_allocator_type& + fixed_multimap::get_overflow_allocator() const EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline typename fixed_multimap::overflow_allocator_type& + fixed_multimap::get_overflow_allocator() EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline void + fixed_multimap::set_overflow_allocator(const overflow_allocator_type& allocator) + { + mAllocator.set_overflow_allocator(allocator); + } + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline void swap(fixed_multimap& a, + fixed_multimap& b) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(a, b); + } + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/fixed_set.h b/libkram/eastl/include/EASTL/fixed_set.h new file mode 100644 index 00000000..e5f00236 --- /dev/null +++ b/libkram/eastl/include/EASTL/fixed_set.h @@ -0,0 +1,578 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements a set and multiset which use a fixed size memory +// pool for their nodes. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_FIXED_SET_H +#define EASTL_FIXED_SET_H + + +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + /// EASTL_FIXED_SET_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// In the case of fixed-size containers, the allocator name always refers + /// to overflow allocations. + /// + #ifndef EASTL_FIXED_SET_DEFAULT_NAME + #define EASTL_FIXED_SET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_set" // Unless the user overrides something, this is "EASTL fixed_set". + #endif + + #ifndef EASTL_FIXED_MULTISET_DEFAULT_NAME + #define EASTL_FIXED_MULTISET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_multiset" // Unless the user overrides something, this is "EASTL fixed_multiset". + #endif + + + /// EASTL_FIXED_SET_DEFAULT_ALLOCATOR + /// EASTL_FIXED_MULTISET_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_FIXED_SET_DEFAULT_ALLOCATOR + #define EASTL_FIXED_SET_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_SET_DEFAULT_NAME) + #endif + + #ifndef EASTL_FIXED_MULTISET_DEFAULT_ALLOCATOR + #define EASTL_FIXED_MULTISET_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_MULTISET_DEFAULT_NAME) + #endif + + + + /// fixed_set + /// + /// Implements a set with a fixed block of memory identified by the + /// nodeCount template parameter. + /// + /// Template parameters: + /// Key The type of object the set holds (a.k.a. value). + /// nodeCount The max number of objects to contain. + /// bEnableOverflow Whether or not we should use the global heap if our object pool is exhausted. + /// Compare Compare function/object for set ordering. + /// OverflowAllocator Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap. + /// + template , typename OverflowAllocator = EASTLAllocatorType> + class fixed_set : public set::node_type), + nodeCount, EASTL_ALIGN_OF(Key), 0, bEnableOverflow, OverflowAllocator> > + { + public: + typedef fixed_node_allocator::node_type), nodeCount, + EASTL_ALIGN_OF(Key), 0, bEnableOverflow, OverflowAllocator> fixed_allocator_type; + typedef typename fixed_allocator_type::overflow_allocator_type overflow_allocator_type; + typedef set base_type; + typedef fixed_set this_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::size_type size_type; + + enum { kMaxSize = nodeCount }; + + using base_type::insert; + + protected: + char mBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements. + + using base_type::mAllocator; + using base_type::get_compare; + + public: + fixed_set(); + fixed_set(const overflow_allocator_type& overflowAllocator); + explicit fixed_set(const Compare& compare); + fixed_set(const this_type& x); + fixed_set(this_type&& x); + fixed_set(this_type&& x, const overflow_allocator_type& overflowAllocator); + fixed_set(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_SET_DEFAULT_ALLOCATOR); + + template + fixed_set(InputIterator first, InputIterator last); + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + + size_type max_size() const; + + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT; + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT; + void set_overflow_allocator(const overflow_allocator_type& allocator); + }; // fixed_set + + + + + + + /// fixed_multiset + /// + /// Implements a multiset with a fixed block of memory identified by the + /// nodeCount template parameter. + /// + /// Key The type of object the set holds (a.k.a. value). + /// nodeCount The max number of objects to contain. + /// bEnableOverflow Whether or not we should use the global heap if our object pool is exhausted. + /// Compare Compare function/object for set ordering. + /// OverflowAllocator Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap. + /// + template , typename OverflowAllocator = EASTLAllocatorType> + class fixed_multiset : public multiset::node_type), + nodeCount, EASTL_ALIGN_OF(Key), 0, bEnableOverflow, OverflowAllocator> > + { + public: + typedef fixed_node_allocator::node_type), nodeCount, + EASTL_ALIGN_OF(Key), 0, bEnableOverflow, OverflowAllocator> fixed_allocator_type; + typedef typename fixed_allocator_type::overflow_allocator_type overflow_allocator_type; + typedef multiset base_type; + typedef fixed_multiset this_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::size_type size_type; + + enum { kMaxSize = nodeCount }; + + using base_type::insert; + + protected: + char mBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements. + + using base_type::mAllocator; + + public: + fixed_multiset(); + fixed_multiset(const overflow_allocator_type& overflowAllocator); + explicit fixed_multiset(const Compare& compare); + fixed_multiset(const this_type& x); + fixed_multiset(this_type&& x); + fixed_multiset(this_type&& x, const overflow_allocator_type& overflowAllocator); + fixed_multiset(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_MULTISET_DEFAULT_ALLOCATOR); + + template + fixed_multiset(InputIterator first, InputIterator last); + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + + size_type max_size() const; + + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT; + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT; + void set_overflow_allocator(const overflow_allocator_type& allocator); + }; // fixed_multiset + + + + + /////////////////////////////////////////////////////////////////////// + // fixed_set + /////////////////////////////////////////////////////////////////////// + + template + inline fixed_set::fixed_set() + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_SET_DEFAULT_NAME); + #endif + } + + + template + inline fixed_set::fixed_set(const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_SET_DEFAULT_NAME); + #endif + } + + + template + inline fixed_set::fixed_set(const Compare& compare) + : base_type(compare, fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_SET_DEFAULT_NAME); + #endif + } + + + template + inline fixed_set::fixed_set(const this_type& x) + : base_type(x.get_compare(), fixed_allocator_type(mBuffer)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + base_type::operator=(x); + } + + + template + inline fixed_set::fixed_set(this_type&& x) + : base_type(x.get_compare(), fixed_allocator_type(mBuffer)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + base_type::operator=(x); + } + + + template + inline fixed_set::fixed_set(this_type&& x, const overflow_allocator_type& overflowAllocator) + : base_type(x.get_compare(), fixed_allocator_type(mBuffer, overflowAllocator)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + base_type::operator=(x); + } + + + template + fixed_set::fixed_set(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_SET_DEFAULT_NAME); + #endif + + insert(ilist.begin(), ilist.end()); + } + + + template + template + fixed_set::fixed_set(InputIterator first, InputIterator last) + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_SET_DEFAULT_NAME); + #endif + + insert(first, last); + } + + + template + inline typename fixed_set::this_type& + fixed_set::operator=(const this_type& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline typename fixed_set::this_type& + fixed_set::operator=(std::initializer_list ilist) + { + base_type::clear(); + insert(ilist.begin(), ilist.end()); + return *this; + } + + + template + inline typename fixed_set::this_type& + fixed_set::operator=(this_type&& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline void fixed_set::swap(this_type& x) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(*this, x); + } + + + template + inline void fixed_set::reset_lose_memory() + { + base_type::reset_lose_memory(); + base_type::get_allocator().reset(mBuffer); + } + + + template + inline typename fixed_set::size_type + fixed_set::max_size() const + { + return kMaxSize; + } + + + template + inline const typename fixed_set::overflow_allocator_type& + fixed_set::get_overflow_allocator() const EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline typename fixed_set::overflow_allocator_type& + fixed_set::get_overflow_allocator() EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline void fixed_set::set_overflow_allocator(const overflow_allocator_type& allocator) + { + mAllocator.set_overflow_allocator(allocator); + } + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline void swap(fixed_set& a, + fixed_set& b) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(a, b); + } + + + + /////////////////////////////////////////////////////////////////////// + // fixed_multiset + /////////////////////////////////////////////////////////////////////// + + template + inline fixed_multiset::fixed_multiset() + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MULTISET_DEFAULT_NAME); + #endif + } + + + template + inline fixed_multiset::fixed_multiset(const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MULTISET_DEFAULT_NAME); + #endif + } + + + template + inline fixed_multiset::fixed_multiset(const Compare& compare) + : base_type(compare, fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MULTISET_DEFAULT_NAME); + #endif + } + + + template + inline fixed_multiset::fixed_multiset(const this_type& x) + : base_type(x.get_compare(), fixed_allocator_type(mBuffer)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + base_type::operator=(x); + } + + + template + inline fixed_multiset::fixed_multiset(this_type&& x) + : base_type(x.get_compare(), fixed_allocator_type(mBuffer)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + base_type::operator=(x); + } + + + template + inline fixed_multiset::fixed_multiset(this_type&& x, const overflow_allocator_type& overflowAllocator) + : base_type(x.get_compare(), fixed_allocator_type(mBuffer, overflowAllocator)) + { + mAllocator.copy_overflow_allocator(x.mAllocator); + + #if EASTL_NAME_ENABLED + mAllocator.set_name(x.mAllocator.get_name()); + #endif + + base_type::operator=(x); + } + + + template + fixed_multiset::fixed_multiset(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MULTISET_DEFAULT_NAME); + #endif + + insert(ilist.begin(), ilist.end()); + } + + + template + template + fixed_multiset::fixed_multiset(InputIterator first, InputIterator last) + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + mAllocator.set_name(EASTL_FIXED_MULTISET_DEFAULT_NAME); + #endif + + insert(first, last); + } + + + template + inline typename fixed_multiset::this_type& + fixed_multiset::operator=(const this_type& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline typename fixed_multiset::this_type& + fixed_multiset::operator=(std::initializer_list ilist) + { + base_type::clear(); + insert(ilist.begin(), ilist.end()); + return *this; + } + + + template + inline typename fixed_multiset::this_type& + fixed_multiset::operator=(this_type&& x) + { + base_type::operator=(x); + return *this; + } + + + template + inline void fixed_multiset::swap(this_type& x) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(*this, x); + } + + + template + inline void fixed_multiset::reset_lose_memory() + { + base_type::reset_lose_memory(); + base_type::get_allocator().reset(mBuffer); + } + + + template + inline typename fixed_multiset::size_type + fixed_multiset::max_size() const + { + return kMaxSize; + } + + + template + inline const typename fixed_multiset::overflow_allocator_type& + fixed_multiset::get_overflow_allocator() const EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline typename fixed_multiset::overflow_allocator_type& + fixed_multiset::get_overflow_allocator() EA_NOEXCEPT + { + return mAllocator.get_overflow_allocator(); + } + + + template + inline void fixed_multiset::set_overflow_allocator(const overflow_allocator_type& allocator) + { + mAllocator.set_overflow_allocator(allocator); + } + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline void swap(fixed_multiset& a, + fixed_multiset& b) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(a, b); + } + + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/fixed_slist.h b/libkram/eastl/include/EASTL/fixed_slist.h new file mode 100644 index 00000000..85a7a7b3 --- /dev/null +++ b/libkram/eastl/include/EASTL/fixed_slist.h @@ -0,0 +1,389 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements an slist which uses a fixed size memory pool for its nodes. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_FIXED_SLIST_H +#define EASTL_FIXED_SLIST_H + + +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + /// EASTL_FIXED_SLIST_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// In the case of fixed-size containers, the allocator name always refers + /// to overflow allocations. + /// + #ifndef EASTL_FIXED_SLIST_DEFAULT_NAME + #define EASTL_FIXED_SLIST_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_slist" // Unless the user overrides something, this is "EASTL fixed_slist". + #endif + + + /// EASTL_FIXED_SLIST_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_FIXED_SLIST_DEFAULT_ALLOCATOR + #define EASTL_FIXED_SLIST_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_SLIST_DEFAULT_NAME) + #endif + + + + /// fixed_slist + /// + /// fixed_slist is an slist which uses a single block of contiguous memory + /// for its nodes. The purpose of this is to reduce memory usage relative + /// to a conventional memory allocation system (with block headers), to + /// increase allocation speed (often due to avoidance of mutex locks), + /// to increase performance (due to better memory locality), and to decrease + /// memory fragmentation due to the way that fixed block allocators work. + /// + /// The primary downside to a fixed_slist is that the number of nodes it + /// can contain is fixed upon its declaration. If you want a fixed_slist + /// that doesn't have this limitation, then you probably don't want a + /// fixed_slist. You can always create your own memory allocator that works + /// the way you want. + /// + /// Template parameters: + /// T The type of object the slist holds. + /// nodeCount The max number of objects to contain. + /// bEnableOverflow Whether or not we should use the overflow heap if our object pool is exhausted. + /// OverflowAllocator Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap. + /// + template + class fixed_slist : public slist::node_type), + nodeCount, EASTL_ALIGN_OF(T), 0, bEnableOverflow, OverflowAllocator> > + { + public: + typedef fixed_node_allocator::node_type), nodeCount, + EASTL_ALIGN_OF(T), 0, bEnableOverflow, OverflowAllocator> fixed_allocator_type; + typedef OverflowAllocator overflow_allocator_type; + typedef slist base_type; + typedef fixed_slist this_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::node_type node_type; + + enum { kMaxSize = nodeCount }; + + using base_type::assign; + using base_type::resize; + using base_type::size; + + protected: + char mBuffer[fixed_allocator_type::kBufferSize]; // kBufferSize will take into account alignment requirements. + + using base_type::internalAllocator; + + public: + fixed_slist(); + explicit fixed_slist(const overflow_allocator_type& overflowAllocator); // Only applicable if bEnableOverflow is true. + explicit fixed_slist(size_type n); // Currently we don't support overflowAllocator specification for other constructors, for simplicity. + fixed_slist(size_type n, const value_type& value); + fixed_slist(const this_type& x); + fixed_slist(this_type&& x); + fixed_slist(this_type&&, const overflow_allocator_type&); + fixed_slist(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_SLIST_DEFAULT_ALLOCATOR); + + template + fixed_slist(InputIterator first, InputIterator last); + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + size_type max_size() const; // Returns the max fixed size, which is the user-supplied nodeCount parameter. + bool full() const; // Returns true if the fixed space has been fully allocated. Note that if overflow is enabled, the container size can be greater than nodeCount but full() could return true because the fixed space may have a recently freed slot. + bool has_overflowed() const; // Returns true if the allocations spilled over into the overflow allocator. Meaningful only if overflow is enabled. + bool can_overflow() const; // Returns the value of the bEnableOverflow template parameter. + + // OverflowAllocator + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT; + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT; + void set_overflow_allocator(const overflow_allocator_type& allocator); + }; // fixed_slist + + + + + /////////////////////////////////////////////////////////////////////// + // slist + /////////////////////////////////////////////////////////////////////// + + template + inline fixed_slist::fixed_slist() + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + internalAllocator().set_name(EASTL_FIXED_SLIST_DEFAULT_NAME); + #endif + } + + + template + inline fixed_slist::fixed_slist(const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + internalAllocator().set_name(EASTL_FIXED_SLIST_DEFAULT_NAME); + #endif + } + + + template + inline fixed_slist::fixed_slist(size_type n) + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + internalAllocator().set_name(EASTL_FIXED_SLIST_DEFAULT_NAME); + #endif + + resize(n); + } + + + template + inline fixed_slist::fixed_slist(size_type n, const value_type& value) + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + internalAllocator().set_name(EASTL_FIXED_SLIST_DEFAULT_NAME); + #endif + + resize(n, value); + } + + + template + inline fixed_slist::fixed_slist(const this_type& x) + : base_type(fixed_allocator_type(mBuffer)) + { + internalAllocator().copy_overflow_allocator(x.internalAllocator()); + + #if EASTL_NAME_ENABLED + internalAllocator().set_name(x.internalAllocator().get_name()); + #endif + + assign(x.begin(), x.end()); + } + + + template + inline fixed_slist::fixed_slist(this_type&& x) + : base_type(fixed_allocator_type(mBuffer)) + { + // Since we are a fixed_list, we can't normally swap pointers unless both this and + // x are using using overflow and the overflow allocators are equal. To do: + //if(has_overflowed() && x.has_overflowed() && (get_overflow_allocator() == x.get_overflow_allocator())) + //{ + // We can swap contents and may need to swap the allocators as well. + //} + + // The following is currently identical to the fixed_vector(const this_type& x) code above. If it stays that + // way then we may want to make a shared implementation. + internalAllocator().copy_overflow_allocator(x.internalAllocator()); + + #if EASTL_NAME_ENABLED + internalAllocator().set_name(x.internalAllocator().get_name()); + #endif + + assign(x.begin(), x.end()); + } + + template + inline fixed_slist::fixed_slist(this_type&& x, const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + // See comments above. + internalAllocator().copy_overflow_allocator(x.internalAllocator()); + + #if EASTL_NAME_ENABLED + internalAllocator().set_name(x.internalAllocator().get_name()); + #endif + + assign(x.begin(), x.end()); + } + + + template + inline fixed_slist::fixed_slist(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + internalAllocator().set_name(EASTL_FIXED_SLIST_DEFAULT_NAME); + #endif + + assign(ilist.begin(), ilist.end()); + } + + + template + template + fixed_slist::fixed_slist(InputIterator first, InputIterator last) + : base_type(fixed_allocator_type(mBuffer)) + { + #if EASTL_NAME_ENABLED + internalAllocator().set_name(EASTL_FIXED_SLIST_DEFAULT_NAME); + #endif + + assign(first, last); + } + + + template + inline typename fixed_slist::this_type& + fixed_slist::operator=(const this_type& x) + { + if(this != &x) + { + base_type::clear(); + + #if EASTL_ALLOCATOR_COPY_ENABLED + internalAllocator() = x.internalAllocator(); // The primary effect of this is to copy the overflow allocator. + #endif + + base_type::assign(x.begin(), x.end()); // It would probably be better to implement this like slist::operator=. + } + return *this; + } + + + template + inline typename fixed_slist::this_type& + fixed_slist::operator=(this_type&& x) + { + return operator=(x); + } + + + template + inline typename fixed_slist::this_type& + fixed_slist::operator=(std::initializer_list ilist) + { + base_type::clear(); + base_type::assign(ilist.begin(), ilist.end()); + return *this; + } + + + template + inline void fixed_slist::swap(this_type& x) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(*this, x); + } + + + template + inline void fixed_slist::reset_lose_memory() + { + base_type::reset_lose_memory(); + base_type::get_allocator().reset(mBuffer); + } + + + template + inline typename fixed_slist::size_type + fixed_slist::max_size() const + { + return kMaxSize; + } + + + template + inline bool fixed_slist::full() const + { + // Note: This implementation isn't right in the case of bEnableOverflow = true because it will return + // false for the case that there are free nodes from the buffer but also nodes from the dynamic heap. + // This can happen if the container exceeds the fixed size and then frees some of the nodes from the fixed buffer. + return !internalAllocator().can_allocate(); // This is the quickest way of detecting this. has_overflowed uses a different method because it can't use this quick method. + } + + + template + inline bool fixed_slist::has_overflowed() const + { + #if EASTL_FIXED_SIZE_TRACKING_ENABLED // If we can use this faster pathway (as size() may be slow)... + return (internalAllocator().mPool.mnPeakSize > kMaxSize); + #else + return (size() > kMaxSize); + #endif + } + + + template + inline bool fixed_slist::can_overflow() const + { + return bEnableOverflow; + } + + + template + inline const typename fixed_slist::overflow_allocator_type& + fixed_slist::get_overflow_allocator() const EA_NOEXCEPT + { + return internalAllocator().get_overflow_allocator(); + } + + + template + inline typename fixed_slist::overflow_allocator_type& + fixed_slist::get_overflow_allocator() EA_NOEXCEPT + { + return internalAllocator().get_overflow_allocator(); + } + + + template + inline void + fixed_slist::set_overflow_allocator(const overflow_allocator_type& allocator) + { + internalAllocator().set_overflow_allocator(allocator); + } + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline void swap(fixed_slist& a, + fixed_slist& b) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(a, b); + } + + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/fixed_string.h b/libkram/eastl/include/EASTL/fixed_string.h new file mode 100644 index 00000000..f646302b --- /dev/null +++ b/libkram/eastl/include/EASTL/fixed_string.h @@ -0,0 +1,805 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements a string which uses a fixed size memory pool. +// The bEnableOverflow template parameter allows the container to resort to +// heap allocations if the memory pool is exhausted. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_FIXED_STRING_H +#define EASTL_FIXED_STRING_H + +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + +namespace eastl +{ + /// EASTL_FIXED_STRING_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// In the case of fixed-size containers, the allocator name always refers + /// to overflow allocations. + /// + #ifndef EASTL_FIXED_STRING_DEFAULT_NAME + #define EASTL_FIXED_STRING_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_string" // Unless the user overrides something, this is "EASTL fixed_string". + #endif + + + + /// fixed_string + /// + /// A fixed_string with bEnableOverflow == true is identical to a regular + /// string in terms of its behavior. All the expectations of regular string + /// apply to it and no additional expectations come from it. When bEnableOverflow + /// is false, fixed_string behaves like regular string with the exception that + /// its capacity can never increase. All operations you do on such a fixed_string + /// which require a capacity increase will result in undefined behavior or an + /// C++ allocation exception, depending on the configuration of EASTL. + /// + /// Note: The nodeCount value is the amount of characters to allocate, which needs to + /// take into account a terminating zero. Thus if you want to store strings with a strlen + /// of 30, the nodeCount value must be at least 31. + /// + /// Template parameters: + /// T The type of object the string holds (char, wchar_t, char8_t, char16_t, char32_t). + /// nodeCount The max number of objects to contain. + /// bEnableOverflow Whether or not we should use the overflow heap if our object pool is exhausted. + /// OverflowAllocator Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap. + /// + /// Notes: + /// The nodeCount value must be at least 2, one for a character and one for a terminating 0. + /// + /// As of this writing, the string class necessarily reallocates when an insert of + /// self is done into self. As a result, the fixed_string class doesn't support + /// inserting self into self unless the bEnableOverflow template parameter is true. + /// + /// Example usage: + /// fixed_string fixedString("hello world"); // Can hold up to a strlen of 128. + /// + /// fixedString = "hola mundo"; + /// fixedString.clear(); + /// fixedString.resize(200); + /// fixedString.sprintf("%f", 1.5f); + /// + template + class fixed_string : public basic_string > + { + public: + typedef fixed_vector_allocator fixed_allocator_type; + typedef typename fixed_allocator_type::overflow_allocator_type overflow_allocator_type; + typedef basic_string base_type; + typedef fixed_string this_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::CtorDoNotInitialize CtorDoNotInitialize; + typedef typename base_type::CtorSprintf CtorSprintf; + typedef aligned_buffer aligned_buffer_type; + + enum { kMaxSize = nodeCount - 1 }; // -1 because we need to save one element for the silent terminating null. + + using base_type::npos; + using base_type::mPair; + using base_type::append; + using base_type::resize; + using base_type::clear; + using base_type::capacity; + using base_type::size; + using base_type::sprintf_va_list; + using base_type::DoAllocate; + using base_type::DoFree; + using base_type::internalLayout; + using base_type::get_allocator; + + protected: + union // We define a union in order to avoid strict pointer aliasing issues with compilers like GCC. + { + value_type mArray[1]; + aligned_buffer_type mBuffer; // Question: Why are we doing this aligned_buffer thing? Why not just do an array of value_type, given that we are using just strings of char types. + }; + + public: + fixed_string(); + explicit fixed_string(const overflow_allocator_type& overflowAllocator); // Only applicable if bEnableOverflow is true. + fixed_string(const base_type& x, size_type position, size_type n = base_type::npos); // Currently we don't support overflowAllocator specification for other constructors, for simplicity. + fixed_string(const value_type* p, size_type n); + fixed_string(const value_type* p); + fixed_string(size_type n, const value_type& value); + fixed_string(const this_type& x); + fixed_string(const this_type& x, const overflow_allocator_type& overflowAllocator); + fixed_string(const base_type& x); + fixed_string(const value_type* pBegin, const value_type* pEnd); + fixed_string(CtorDoNotInitialize, size_type n); + fixed_string(CtorSprintf, const value_type* pFormat, ...); + fixed_string(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator); + fixed_string(this_type&& x); + fixed_string(this_type&& x, const overflow_allocator_type& overflowAllocator); + + this_type& operator=(const this_type& x); + this_type& operator=(const base_type& x); + this_type& operator=(const value_type* p); + this_type& operator=(const value_type c); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + + void set_capacity(size_type n); + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + size_type max_size() const; + bool full() const; // Returns true if the fixed space has been fully allocated. Note that if overflow is enabled, the container size can be greater than nodeCount but full() could return true because the fixed space may have a recently freed slot. + bool has_overflowed() const; // Returns true if the allocations spilled over into the overflow allocator. Meaningful only if overflow is enabled. + bool can_overflow() const; // Returns the value of the bEnableOverflow template parameter. + + // The inherited versions of substr/left/right call the basic_string constructor, + // which will call the overflow allocator and fail if bEnableOverflow == false + this_type substr(size_type position, size_type n) const; + this_type left(size_type n) const; + this_type right(size_type n) const; + + // OverflowAllocator + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT; + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT; + void set_overflow_allocator(const overflow_allocator_type& allocator); + }; // fixed_string + + + + + + /////////////////////////////////////////////////////////////////////// + // fixed_string + /////////////////////////////////////////////////////////////////////// + + template + inline fixed_string::fixed_string() + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + + *internalLayout().HeapBeginPtr() = 0; + } + + + template + inline fixed_string::fixed_string(const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + + *internalLayout().HeapBeginPtr() = 0; + } + + + template + inline fixed_string::fixed_string(const this_type& x) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + get_allocator().copy_overflow_allocator(x.get_allocator()); + + #if EASTL_NAME_ENABLED + get_allocator().set_name(x.get_allocator().get_name()); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + + *internalLayout().HeapBeginPtr() = 0; + + append(x); + } + + + template + inline fixed_string::fixed_string(const this_type& x, const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator)) + { + get_allocator().copy_overflow_allocator(x.get_allocator()); + + #if EASTL_NAME_ENABLED + get_allocator().set_name(x.get_allocator().get_name()); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + + *internalLayout().HeapBeginPtr() = 0; + + append(x); + } + + + template + inline fixed_string::fixed_string(const base_type& x) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(x.get_allocator().get_name()); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + + *internalLayout().HeapBeginPtr() = 0; + + append(x); + } + + + template + inline fixed_string::fixed_string(const base_type& x, size_type position, size_type n) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(x.get_allocator().get_name()); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + + *internalLayout().HeapBeginPtr() = 0; + + append(x, position, n); + } + + + template + inline fixed_string::fixed_string(const value_type* p, size_type n) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + + *internalLayout().HeapBeginPtr() = 0; + + append(p, n); + } + + + template + inline fixed_string::fixed_string(const value_type* p) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + + *internalLayout().HeapBeginPtr() = 0; + + append(p); // There better be enough space to hold the assigned string. + } + + + template + inline fixed_string::fixed_string(size_type n, const value_type& value) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + + *internalLayout().HeapBeginPtr() = 0; + + append(n, value); // There better be enough space to hold the assigned string. + } + + + template + inline fixed_string::fixed_string(const value_type* pBegin, const value_type* pEnd) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + + *internalLayout().HeapBeginPtr() = 0; + + append(pBegin, pEnd); + } + + + template + inline fixed_string::fixed_string(CtorDoNotInitialize, size_type n) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + + if(n < nodeCount) + { + internalLayout().SetHeapSize(n); + *internalLayout().HeapEndPtr() = 0; + } + else + { + internalLayout().SetHeapSize(0); + *internalLayout().HeapEndPtr() = 0; + + resize(n); + } + } + + + template + inline fixed_string::fixed_string(CtorSprintf, const value_type* pFormat, ...) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + *internalLayout().HeapBeginPtr() = 0; + + va_list arguments; + va_start(arguments, pFormat); + sprintf_va_list(pFormat, arguments); + va_end(arguments); + } + + + template + inline fixed_string::fixed_string(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_STRING_DEFAULT_NAME); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + + *internalLayout().HeapBeginPtr() = 0; + + append(ilist.begin(), ilist.end()); + } + + + template + inline fixed_string::fixed_string(this_type&& x) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + // We copy from x instead of trade with it. We need to do so because fixed_ containers use local memory buffers. + #if EASTL_NAME_ENABLED + get_allocator().set_name(x.get_allocator().get_name()); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + + *internalLayout().HeapBeginPtr() = 0; + + append(x); // Let x destruct its own items. + } + + template + inline fixed_string::fixed_string(this_type&& x, const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator)) + { + // We copy from x instead of trade with it. We need to do so because fixed_ containers use local memory buffers. + #if EASTL_NAME_ENABLED + get_allocator().set_name(x.get_allocator().get_name()); + #endif + + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapCapacity(nodeCount - 1); + internalLayout().SetHeapSize(0); + + *internalLayout().HeapBeginPtr() = 0; + + append(x); // Let x destruct its own items. + } + + + template + inline typename fixed_string::this_type& + fixed_string::operator=(const this_type& x) + { + if(this != &x) + { + clear(); + + #if EASTL_ALLOCATOR_COPY_ENABLED + get_allocator() = x.get_allocator(); + #endif + + append(x); + } + return *this; + } + + + template + inline typename fixed_string:: + this_type& fixed_string::operator=(const base_type& x) + { + if(static_cast(this) != &x) + { + clear(); + + #if EASTL_ALLOCATOR_COPY_ENABLED + get_allocator() = x.get_allocator(); + #endif + + append(x); + } + return *this; + } + + + template + inline typename fixed_string:: + this_type& fixed_string::operator=(const value_type* p) + { + if(internalLayout().HeapBeginPtr() != p) + { + clear(); + append(p); + } + return *this; + } + + + template + inline typename fixed_string:: + this_type& fixed_string::operator=(const value_type c) + { + clear(); + append((size_type)1, c); + return *this; + } + + + template + inline typename fixed_string:: + this_type& fixed_string::operator=(std::initializer_list ilist) + { + clear(); + append(ilist.begin(), ilist.end()); + return *this; + } + + + template + inline typename fixed_string:: + this_type& fixed_string::operator=(this_type&& x) + { + // We copy from x instead of trade with it. We need to do so because fixed_ containers use local memory buffers. + + // if(static_cast(this) != &x) This should be impossible, so we disable it until proven otherwise. + { + clear(); + + #if EASTL_ALLOCATOR_COPY_ENABLED + get_allocator() = x.get_allocator(); + #endif + + append(x); // Let x destruct its own items. + } + return *this; + } + + + template + inline void fixed_string::swap(this_type& x) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(*this, x); + } + + + template + inline void fixed_string::set_capacity(size_type n) + { + const size_type nPrevSize = internalLayout().GetSize(); + const size_type nPrevCapacity = capacity(); + + if(n == npos) // If the user means to set the capacity so that it equals the size (i.e. free excess capacity)... + n = nPrevSize; + + if(n != nPrevCapacity) // If the request results in a capacity change... + { + const size_type allocSize = (n + 1); // +1 because the terminating 0 isn't included in the supplied capacity value. So now n refers the amount of memory we need. + + if(can_overflow() && (((uintptr_t)internalLayout().HeapBeginPtr() != (uintptr_t)mBuffer.buffer) || (allocSize > kMaxSize))) // If we are or would be using dynamically allocated memory instead of our fixed-size member buffer... + { + T* const pNewData = (allocSize <= kMaxSize) ? (T*)&mBuffer.buffer[0] : DoAllocate(allocSize); + T* const pCopyEnd = (n < nPrevSize) ? (internalLayout().HeapBeginPtr() + n) : internalLayout().HeapEndPtr(); + CharStringUninitializedCopy(internalLayout().HeapBeginPtr(), pCopyEnd, pNewData); // Copy [internalLayout().heap.mpBegin, pCopyEnd) to pNewData. + if((uintptr_t)internalLayout().HeapBeginPtr() != (uintptr_t)mBuffer.buffer) + DoFree(internalLayout().HeapBeginPtr(), internalLayout().GetHeapCapacity() + 1); + + internalLayout().SetHeapSize((size_type)(pCopyEnd - internalLayout().HeapBeginPtr())); + internalLayout().SetHeapBeginPtr(pNewData); + internalLayout().SetHeapCapacity(allocSize - 1); + } // Else the new capacity would be within our fixed buffer. + else if(n < nPrevSize) // If the newly requested capacity is less than our size, we do what vector::set_capacity does and resize, even though we actually aren't reducing the capacity. + resize(n); + } + } + + + template + inline void fixed_string::reset_lose_memory() + { + internalLayout().SetHeapBeginPtr(mArray); + internalLayout().SetHeapSize(0); + internalLayout().SetHeapCapacity(nodeCount - 1); + } + + + template + inline typename fixed_string:: + size_type fixed_string::max_size() const + { + return kMaxSize; + } + + + template + inline bool fixed_string::full() const + { + // If size >= capacity, then we are definitely full. + // Also, if our size is smaller but we've switched away from mBuffer due to a previous overflow, then we are considered full. + return ((size_t)(internalLayout().HeapEndPtr() - internalLayout().HeapBeginPtr()) >= kMaxSize) || ((void*)internalLayout().HeapBeginPtr() != (void*)mBuffer.buffer); + } + + + template + inline bool fixed_string::has_overflowed() const + { + // This will be incorrect for the case that bOverflowEnabled is true and the container was resized + // down to a small size where the fixed buffer could take over ownership of the data again. + // The only simple fix for this is to take on another member variable which tracks whether this overflow + // has occurred at some point in the past. + return ((void*)internalLayout().HeapBeginPtr() != (void*)mBuffer.buffer); + } + + + template + inline bool fixed_string::can_overflow() const + { + return bEnableOverflow; + } + + + template + inline typename fixed_string:: + this_type fixed_string::substr(size_type position, size_type n) const + { + #if EASTL_STRING_OPT_RANGE_ERRORS + if(position > internalLayout().GetSize()) + base_type::ThrowRangeException(); + #endif + + return fixed_string(internalLayout().HeapBeginPtr() + position, + internalLayout().HeapBeginPtr() + position + eastl::min_alt(n, internalLayout().GetSize() - position)); + } + + + template + inline typename fixed_string:: + this_type fixed_string::left(size_type n) const + { + const size_type nLength = size(); + if(n < nLength) + return fixed_string(internalLayout().HeapBeginPtr(), internalLayout().HeapBeginPtr() + n); + return *this; + } + + + template + inline typename fixed_string:: + this_type fixed_string::right(size_type n) const + { + const size_type nLength = size(); + if(n < nLength) + return fixed_string(internalLayout().HeapEndPtr() - n, internalLayout().HeapEndPtr()); + return *this; + } + + + template + inline const typename fixed_string:: + overflow_allocator_type& fixed_string::get_overflow_allocator() const EA_NOEXCEPT + { + return get_allocator().get_overflow_allocator(); + } + + + template + inline typename fixed_string:: + overflow_allocator_type& fixed_string::get_overflow_allocator() EA_NOEXCEPT + { + return get_allocator().get_overflow_allocator(); + } + + + template + inline void + fixed_string::set_overflow_allocator(const overflow_allocator_type& allocator) + { + get_allocator().set_overflow_allocator(allocator); + } + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + + // Operator + + template + fixed_string operator+(const fixed_string& a, + const fixed_string& b) + { + // We have a problem here because need to return an fixed_string by value. This will typically result in it + // using stack space equal to its size. That size may be too large to be workable. + typedef fixed_string this_type; + + this_type result(const_cast(a).get_overflow_allocator()); + result.append(a); + result.append(b); + return result; + } + + + template + fixed_string operator+(const typename fixed_string::value_type* p, + const fixed_string& b) + { + typedef fixed_string this_type; + + const typename this_type::size_type n = (typename this_type::size_type)CharStrlen(p); + this_type result(const_cast(b).get_overflow_allocator()); + result.append(p, p + n); + result.append(b); + return result; + } + + + template + fixed_string operator+(typename fixed_string::value_type c, + const fixed_string& b) + { + typedef fixed_string this_type; + + this_type result(const_cast(b).get_overflow_allocator()); + result.push_back(c); + result.append(b); + return result; + } + + + template + fixed_string operator+(const fixed_string& a, + const typename fixed_string::value_type* p) + { + typedef fixed_string this_type; + + const typename this_type::size_type n = (typename this_type::size_type)CharStrlen(p); + this_type result(const_cast(a).get_overflow_allocator()); + result.append(a); + result.append(p, p + n); + return result; + } + + + template + fixed_string operator+(const fixed_string& a, + typename fixed_string::value_type c) + { + typedef fixed_string this_type; + + this_type result(const_cast(a).get_overflow_allocator()); + result.append(a); + result.push_back(c); + return result; + } + + + template + fixed_string operator+(fixed_string&& a, + fixed_string&& b) + { + a.append(b); // Using an rvalue by name results in it becoming an lvalue. + return eastl::move(a); + } + + template + fixed_string operator+(fixed_string&& a, + const fixed_string& b) + { + a.append(b); + return eastl::move(a); + } + + template + fixed_string operator+(const typename fixed_string::value_type* p, + fixed_string&& b) + { + b.insert(0, p); + return eastl::move(b); + } + + template + fixed_string operator+(fixed_string&& a, + const typename fixed_string::value_type* p) + { + a.append(p); + return eastl::move(a); + } + + template + fixed_string operator+(fixed_string&& a, + typename fixed_string::value_type c) + { + a.push_back(c); + return eastl::move(a); + } + + + // operator ==, !=, <, >, <=, >= come from the string implementations. + + template + inline void swap(fixed_string& a, + fixed_string& b) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(a, b); + } + + +} // namespace eastl + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/fixed_substring.h b/libkram/eastl/include/EASTL/fixed_substring.h new file mode 100644 index 00000000..033052f4 --- /dev/null +++ b/libkram/eastl/include/EASTL/fixed_substring.h @@ -0,0 +1,265 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_FIXED_SUBSTRING_H +#define EASTL_FIXED_SUBSTRING_H + + +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /// fixed_substring + /// + /// Implements a string which is a reference to a segment of characters. + /// This class is efficient because it allocates no memory and copies no + /// memory during construction and assignment, but rather refers directly + /// to the segment of chracters. A common use of this is to have a + /// fixed_substring efficiently refer to a substring within another string. + /// + /// You cannot directly resize a fixed_substring (e.g. via resize, insert, + /// append, erase), but you can assign a different substring to it. + /// You can modify the characters within a substring in place. + /// As of this writing, in the name of being lean and simple it is the + /// user's responsibility to not call unsupported resizing functions + /// such as those listed above. A detailed listing of the functions which + /// are not supported is given below in the class declaration. + /// + /// The c_str function doesn't act as one might hope, as it simply + /// returns the pointer to the beginning of the string segment and the + /// 0-terminator may be beyond the end of the segment. If you want to + /// always be able to use c_str as expected, use the fixed string solution + /// we describe below. + /// + /// Another use of fixed_substring is to provide C++ string-like functionality + /// with a C character array. This allows you to work on a C character array + /// as if it were a C++ string as opposed using the C string API. Thus you + /// can do this: + /// + /// void DoSomethingForUser(char* timeStr, size_t timeStrCapacity) + /// { + /// fixed_substring tmp(timeStr, timeStrCapacity); + /// tmp = "hello "; + /// tmp += "world"; + /// } + /// + /// Note that this class constructs and assigns from const string pointers + /// and const string objects, yet this class does not declare its member + /// data as const. This is a concession in order to allow this implementation + /// to be simple and lean. It is the user's responsibility to make sure + /// that strings that should not or can not be modified are either not + /// used by fixed_substring or are not modified by fixed_substring. + /// + /// A more flexible alternative to fixed_substring is fixed_string. + /// fixed_string has none of the functional limitations that fixed_substring + /// has and like fixed_substring it doesn't allocate memory. However, + /// fixed_string makes a *copy* of the source string and uses local + /// memory to store that copy. Also, fixed_string objects on the stack + /// are going to have a limit as to their maximum size. + /// + /// Notes: + /// As of this writing, the string class necessarily reallocates when + /// an insert of self is done into self. As a result, the fixed_substring + /// class doesn't support inserting self into self. + /// + /// Example usage: + /// basic_string str("hello world"); + /// fixed_substring sub(str, 2, 5); // sub == "llo w" + /// + template + class fixed_substring : public basic_string + { + public: + typedef basic_string base_type; + typedef fixed_substring this_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::iterator iterator; + typedef typename base_type::const_iterator const_iterator; + + using base_type::npos; + using base_type::mPair; + using base_type::AllocateSelf; + using base_type::internalLayout; + using base_type::get_allocator; + + private: + + void SetInternalHeapLayout(value_type* pBeginPtr, size_type nSize, size_type nCap) + { + internalLayout().SetHeapBeginPtr(pBeginPtr); + internalLayout().SetHeapSize(nSize); + internalLayout().SetHeapCapacity(nCap); + } + + + public: + fixed_substring() + : base_type() + { + } + + fixed_substring(const base_type& x) + : base_type() + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(x.get_allocator().get_name()); + #endif + + assign(x); + } + + // We gain no benefit from having an rvalue move constructor or assignment operator, + // as this class is a const class. + + fixed_substring(const base_type& x, size_type position, size_type n = base_type::npos) + : base_type() + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(x.get_allocator().get_name()); + #endif + + assign(x, position, n); + } + + fixed_substring(const value_type* p, size_type n) + : base_type() + { + assign(p, n); + } + + fixed_substring(const value_type* p) + : base_type() + { + assign(p); + } + + fixed_substring(const value_type* pBegin, const value_type* pEnd) + : base_type() + { + assign(pBegin, pEnd); + } + + ~fixed_substring() + { + // We need to reset, as otherwise the parent destructor will + // attempt to free our memory. + AllocateSelf(); + } + + this_type& operator=(const base_type& x) + { + assign(x); + return *this; + } + + this_type& operator=(const value_type* p) + { + assign(p); + return *this; + } + + this_type& assign(const base_type& x) + { + // By design, we need to cast away const-ness here. + SetInternalHeapLayout(const_cast(x.data()), x.size(), x.size()); + return *this; + } + + this_type& assign(const base_type& x, size_type position, size_type n) + { + // By design, we need to cast away const-ness here. + SetInternalHeapLayout(const_cast(x.data()) + position, n, n); + return *this; + } + + this_type& assign(const value_type* p, size_type n) + { + // By design, we need to cast away const-ness here. + SetInternalHeapLayout(const_cast(p), n, n); + return *this; + } + + this_type& assign(const value_type* p) + { + // By design, we need to cast away const-ness here. + SetInternalHeapLayout(const_cast(p), (size_type)CharStrlen(p), (size_type)CharStrlen(p)); + return *this; + } + + this_type& assign(const value_type* pBegin, const value_type* pEnd) + { + // By design, we need to cast away const-ness here. + SetInternalHeapLayout(const_cast(pBegin), (size_type)(pEnd - pBegin), (size_type)(pEnd - pBegin)); + return *this; + } + + + // Partially supported functionality + // + // When using fixed_substring on a character sequence that is within another + // string, the following functions may do one of two things: + // 1 Attempt to reallocate + // 2 Write a 0 char at the end of the fixed_substring + // + // Item #1 will result in a crash, due to the attempt by the underlying + // string class to free the substring memory. Item #2 will result in a 0 + // char being written to the character array. Item #2 may or may not be + // a problem, depending on how you use fixed_substring. Thus the following + // functions cannot be used safely. + + #if 0 // !defined(EA_COMPILER_NO_DELETED_FUNCTIONS) We may want to enable these deletions after some investigation of possible user impact. + this_type& operator=(value_type c) = delete; + void resize(size_type n, value_type c) = delete; + void resize(size_type n) = delete; + void reserve(size_type = 0) = delete; + void set_capacity(size_type n) = delete; + void clear() = delete; + this_type& operator+=(const base_type& x) = delete; + this_type& operator+=(const value_type* p) = delete; + this_type& operator+=(value_type c) = delete; + this_type& append(const base_type& x) = delete; + this_type& append(const base_type& x, size_type position, size_type n) = delete; + this_type& append(const value_type* p, size_type n) = delete; + this_type& append(const value_type* p) = delete; + this_type& append(size_type n) = delete; + this_type& append(size_type n, value_type c) = delete; + this_type& append(const value_type* pBegin, const value_type* pEnd) = delete; + this_type& append_sprintf_va_list(const value_type* pFormat, va_list arguments) = delete; + this_type& append_sprintf(const value_type* pFormat, ...) = delete; + void push_back(value_type c) = delete; + void pop_back() = delete; + this_type& assign(size_type n, value_type c) = delete; + this_type& insert(size_type position, const base_type& x) = delete; + this_type& insert(size_type position, const base_type& x, size_type beg, size_type n) = delete; + this_type& insert(size_type position, const value_type* p, size_type n) = delete; + this_type& insert(size_type position, const value_type* p) = delete; + this_type& insert(size_type position, size_type n, value_type c) = delete; + iterator insert(const_iterator p, value_type c) = delete; + void insert(const_iterator p, size_type n, value_type c) = delete; + void insert(const_iterator p, const value_type* pBegin, const value_type* pEnd) = delete; + this_type& erase(size_type position = 0, size_type n = npos) = delete; + iterator erase(const_iterator p) = delete; + iterator erase(const_iterator pBegin, const_iterator pEnd) = delete; + void swap(base_type& x) = delete; + this_type& sprintf_va_list(const value_type* pFormat, va_list arguments) = delete; + this_type& sprintf(const value_type* pFormat, ...) = delete; + #endif + + }; // fixed_substring + + +} // namespace eastl + + + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/fixed_vector.h b/libkram/eastl/include/EASTL/fixed_vector.h new file mode 100644 index 00000000..1dc482bd --- /dev/null +++ b/libkram/eastl/include/EASTL/fixed_vector.h @@ -0,0 +1,625 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements a vector which uses a fixed size memory pool. +// The bEnableOverflow template parameter allows the container to resort to +// heap allocations if the memory pool is exhausted. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_FIXED_VECTOR_H +#define EASTL_FIXED_VECTOR_H + + +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + /// EASTL_FIXED_VECTOR_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// In the case of fixed-size containers, the allocator name always refers + /// to overflow allocations. + /// + #ifndef EASTL_FIXED_VECTOR_DEFAULT_NAME + #define EASTL_FIXED_VECTOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_vector" // Unless the user overrides something, this is "EASTL fixed_vector". + #endif + + + /// EASTL_FIXED_VECTOR_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_FIXED_VECTOR_DEFAULT_ALLOCATOR + #define EASTL_FIXED_VECTOR_DEFAULT_ALLOCATOR overflow_allocator_type(EASTL_FIXED_VECTOR_DEFAULT_NAME) + #endif + + + /// fixed_vector + /// + /// A fixed_vector with bEnableOverflow == true is identical to a regular + /// vector in terms of its behavior. All the expectations of regular vector + /// apply to it and no additional expectations come from it. When bEnableOverflow + /// is false, fixed_vector behaves like regular vector with the exception that + /// its capacity can never increase. All operations you do on such a fixed_vector + /// which require a capacity increase will result in undefined behavior or an + /// C++ allocation exception, depending on the configuration of EASTL. + /// + /// Template parameters: + /// T The type of object the vector holds. + /// nodeCount The max number of objects to contain. + /// bEnableOverflow Whether or not we should use the overflow heap if our object pool is exhausted. + /// OverflowAllocator Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap. + /// + /// Note: The nodeCount value must be at least 1. + /// + /// Example usage: + /// fixed_vector fixedVector); + /// + /// fixedVector.push_back(Widget()); + /// fixedVector.resize(200); + /// fixedVector.clear(); + /// + template ::type> + class fixed_vector : public vector > + { + public: + typedef fixed_vector_allocator fixed_allocator_type; + typedef OverflowAllocator overflow_allocator_type; + typedef vector base_type; + typedef fixed_vector this_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::reference reference; + typedef typename base_type::iterator iterator; + typedef typename base_type::const_iterator const_iterator; + typedef aligned_buffer aligned_buffer_type; + + enum { kMaxSize = nodeCount }; + + using base_type::get_allocator; + using base_type::mpBegin; + using base_type::mpEnd; + using base_type::internalCapacityPtr; + using base_type::resize; + using base_type::clear; + using base_type::size; + using base_type::assign; + using base_type::npos; + using base_type::DoAllocate; + using base_type::DoFree; + using base_type::DoAssign; + using base_type::DoAssignFromIterator; + + protected: + aligned_buffer_type mBuffer; + + public: + fixed_vector(); + explicit fixed_vector(const overflow_allocator_type& overflowAllocator); // Only applicable if bEnableOverflow is true. + explicit fixed_vector(size_type n); // Currently we don't support overflowAllocator specification for other constructors, for simplicity. + fixed_vector(size_type n, const value_type& value); + fixed_vector(const this_type& x); + fixed_vector(this_type&& x); + fixed_vector(this_type&& x, const overflow_allocator_type& overflowAllocator); + fixed_vector(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator = EASTL_FIXED_VECTOR_DEFAULT_ALLOCATOR); + + template + fixed_vector(InputIterator first, InputIterator last); + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + + void set_capacity(size_type n); + void clear(bool freeOverflow); + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + size_type max_size() const; // Returns the max fixed size, which is the user-supplied nodeCount parameter. + bool full() const; // Returns true if the fixed space has been fully allocated. Note that if overflow is enabled, the container size can be greater than nodeCount but full() could return true because the fixed space may have a recently freed slot. + bool has_overflowed() const; // Returns true if the allocations spilled over into the overflow allocator. Meaningful only if overflow is enabled. + bool can_overflow() const; // Returns the value of the bEnableOverflow template parameter. + + void* push_back_uninitialized(); + void push_back(const value_type& value); // We implement push_back here because we have a specialization that's + reference push_back(); // smaller for the case of overflow being disabled. + void push_back(value_type&& value); + + // OverflowAllocator + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT; + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT; + void set_overflow_allocator(const overflow_allocator_type& allocator); + + protected: + void* DoPushBackUninitialized(true_type); + void* DoPushBackUninitialized(false_type); + + void DoPushBack(true_type, const value_type& value); + void DoPushBack(false_type, const value_type& value); + + void DoPushBackMove(true_type, value_type&& value); + void DoPushBackMove(false_type, value_type&& value); + + reference DoPushBack(false_type); + reference DoPushBack(true_type); + + }; // fixed_vector + + + + + /////////////////////////////////////////////////////////////////////// + // fixed_vector + /////////////////////////////////////////////////////////////////////// + + template + inline fixed_vector::fixed_vector() + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_VECTOR_DEFAULT_NAME); + #endif + + mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0]; + internalCapacityPtr() = mpBegin + nodeCount; + } + + template + inline fixed_vector::fixed_vector(const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_VECTOR_DEFAULT_NAME); + #endif + + mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0]; + internalCapacityPtr() = mpBegin + nodeCount; + } + + template + inline fixed_vector::fixed_vector(size_type n) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_VECTOR_DEFAULT_NAME); + #endif + + mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0]; + internalCapacityPtr() = mpBegin + nodeCount; + resize(n); + } + + + template + inline fixed_vector::fixed_vector(size_type n, const value_type& value) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_VECTOR_DEFAULT_NAME); + #endif + + mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0]; + internalCapacityPtr() = mpBegin + nodeCount; + resize(n, value); + } + + + template + inline fixed_vector::fixed_vector(const this_type& x) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + get_allocator().copy_overflow_allocator(x.get_allocator()); + + #if EASTL_NAME_ENABLED + get_allocator().set_name(x.get_allocator().get_name()); + #endif + + mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0]; + internalCapacityPtr() = mpBegin + nodeCount; + base_type::template DoAssign(x.begin(), x.end(), false_type()); + } + + + template + inline fixed_vector::fixed_vector(this_type&& x) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + // Since we are a fixed_vector, we can't swap pointers. We can possibly so something like fixed_swap or + // we can just do an assignment from x. If we want to do the former then we need to have some complicated + // code to deal with overflow or no overflow, and whether the memory is in the fixed-size buffer or in + // the overflow allocator. 90% of the time the memory should be in the fixed buffer, in which case + // a simple assignment is no worse than the fancy pathway. + + // Since we are a fixed_list, we can't normally swap pointers unless both this and + // x are using using overflow and the overflow allocators are equal. To do: + //if(has_overflowed() && x.has_overflowed() && (get_overflow_allocator() == x.get_overflow_allocator())) + //{ + // We can swap contents and may need to swap the allocators as well. + //} + + // The following is currently identical to the fixed_vector(const this_type& x) code above. If it stays that + // way then we may want to make a shared implementation. + get_allocator().copy_overflow_allocator(x.get_allocator()); + + #if EASTL_NAME_ENABLED + get_allocator().set_name(x.get_allocator().get_name()); + #endif + + mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0]; + internalCapacityPtr() = mpBegin + nodeCount; + base_type::template DoAssign, true>(eastl::make_move_iterator(x.begin()), eastl::make_move_iterator(x.end()), false_type()); + } + + + template + inline fixed_vector::fixed_vector(this_type&& x, const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator)) + { + // See the discussion above. + + // The following is currently identical to the fixed_vector(const this_type& x) code above. If it stays that + // way then we may want to make a shared implementation. + get_allocator().copy_overflow_allocator(x.get_allocator()); + + #if EASTL_NAME_ENABLED + get_allocator().set_name(x.get_allocator().get_name()); + #endif + + mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0]; + internalCapacityPtr() = mpBegin + nodeCount; + base_type::template DoAssign(x.begin(), x.end(), false_type()); + } + + + template + inline fixed_vector::fixed_vector(std::initializer_list ilist, const overflow_allocator_type& overflowAllocator) + : base_type(fixed_allocator_type(mBuffer.buffer, overflowAllocator)) + { + typedef typename std::initializer_list::iterator InputIterator; + typedef typename eastl::iterator_traits::iterator_category IC; + + mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0]; + internalCapacityPtr() = mpBegin + nodeCount; + base_type::template DoAssignFromIterator(ilist.begin(), ilist.end(), IC()); + } + + + template + template + fixed_vector::fixed_vector(InputIterator first, InputIterator last) + : base_type(fixed_allocator_type(mBuffer.buffer)) + { + #if EASTL_NAME_ENABLED + get_allocator().set_name(EASTL_FIXED_VECTOR_DEFAULT_NAME); + #endif + + mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0]; + internalCapacityPtr() = mpBegin + nodeCount; + base_type::template DoAssign(first, last, is_integral()); + } + + + template + inline typename fixed_vector::this_type& + fixed_vector::operator=(const this_type& x) + { + if(this != &x) + { + clear(); + + #if EASTL_ALLOCATOR_COPY_ENABLED + get_allocator() = x.get_allocator(); // The primary effect of this is to copy the overflow allocator. + #endif + + base_type::template DoAssign(x.begin(), x.end(), false_type()); // Shorter route. + } + return *this; + } + + + template + inline typename fixed_vector::this_type& + fixed_vector::operator=(std::initializer_list ilist) + { + typedef typename std::initializer_list::iterator InputIterator; + typedef typename eastl::iterator_traits::iterator_category IC; + + clear(); + base_type::template DoAssignFromIterator(ilist.begin(), ilist.end(), IC()); + return *this; + } + + + template + inline typename fixed_vector::this_type& + fixed_vector::operator=(this_type&& x) + { + // Since we are a fixed_vector, we can't swap pointers. We can possibly do something like fixed_swap or + // we can just do an assignment from x. If we want to do the former then we need to have some complicated + // code to deal with overflow or no overflow, and whether the memory is in the fixed-size buffer or in + // the overflow allocator. 90% of the time the memory should be in the fixed buffer, in which case + // a simple assignment is no worse than the fancy pathway. + if (this != &x) + { + clear(); + + #if EASTL_ALLOCATOR_COPY_ENABLED + get_allocator() = x.get_allocator(); // The primary effect of this is to copy the overflow allocator. + #endif + + base_type::template DoAssign, true>(eastl::make_move_iterator(x.begin()), eastl::make_move_iterator(x.end()), false_type()); // Shorter route. + } + return *this; + } + + + template + inline void fixed_vector::swap(this_type& x) + { + if((has_overflowed() && x.has_overflowed()) && (get_overflow_allocator() == x.get_overflow_allocator())) // If both containers are using the heap instead of local memory + { // then we can do a fast pointer swap instead of content swap. + eastl::swap(mpBegin, x.mpBegin); + eastl::swap(mpEnd, x.mpEnd); + eastl::swap(internalCapacityPtr(), x.internalCapacityPtr()); + } + else + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(*this, x); + } + } + + + template + inline void fixed_vector::set_capacity(size_type n) + { + const size_type nPrevSize = (size_type)(mpEnd - mpBegin); + const size_type nPrevCapacity = (size_type)(internalCapacityPtr() - mpBegin); + + if(n == npos) // If the user means to set the capacity so that it equals the size (i.e. free excess capacity)... + n = nPrevSize; + + if(n != nPrevCapacity) // If the request results in a capacity change... + { + if(can_overflow() && (((uintptr_t)mpBegin != (uintptr_t)mBuffer.buffer) || (n > kMaxSize))) // If we are or would be using dynamically allocated memory instead of our fixed-size member buffer... + { + T* const pNewData = (n <= kMaxSize) ? (T*)&mBuffer.buffer[0] : DoAllocate(n); + T* const pCopyEnd = (n < nPrevSize) ? (mpBegin + n) : mpEnd; + eastl::uninitialized_move_ptr(mpBegin, pCopyEnd, pNewData); // Move [mpBegin, pCopyEnd) to p. + eastl::destruct(mpBegin, mpEnd); + if((uintptr_t)mpBegin != (uintptr_t)mBuffer.buffer) + DoFree(mpBegin, (size_type)(internalCapacityPtr() - mpBegin)); + + mpEnd = pNewData + (pCopyEnd - mpBegin); + mpBegin = pNewData; + internalCapacityPtr() = mpBegin + n; + } // Else the new capacity would be within our fixed buffer. + else if(n < nPrevSize) // If the newly requested capacity is less than our size, we do what vector::set_capacity does and resize, even though we actually aren't reducing the capacity. + resize(n); + } + } + + + template + inline void fixed_vector::clear(bool freeOverflow) + { + base_type::clear(); + if (freeOverflow && mpBegin != (value_type*)&mBuffer.buffer[0]) + { + EASTLFree(get_allocator(), mpBegin, (internalCapacityPtr() - mpBegin) * sizeof(T)); + mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0]; + internalCapacityPtr() = mpBegin + nodeCount; + } + } + + + template + inline void fixed_vector::reset_lose_memory() + { + mpBegin = mpEnd = (value_type*)&mBuffer.buffer[0]; + internalCapacityPtr() = mpBegin + nodeCount; + } + + + template + inline typename fixed_vector::size_type + fixed_vector::max_size() const + { + return kMaxSize; + } + + + template + inline bool fixed_vector::full() const + { + // If size >= capacity, then we are definitely full. + // Also, if our size is smaller but we've switched away from mBuffer due to a previous overflow, then we are considered full. + return ((size_t)(mpEnd - mpBegin) >= kMaxSize) || ((void*)mpBegin != (void*)mBuffer.buffer); + } + + + template + inline bool fixed_vector::has_overflowed() const + { + // This will be incorrect for the case that bOverflowEnabled is true and the container was resized + // down to a small size where the fixed buffer could take over ownership of the data again. + // The only simple fix for this is to take on another member variable which tracks whether this overflow + // has occurred at some point in the past. + return ((void*)mpBegin != (void*)mBuffer.buffer); + } + + + template + inline bool fixed_vector::can_overflow() const + { + return bEnableOverflow; + } + + + template + inline void* fixed_vector::push_back_uninitialized() + { + return DoPushBackUninitialized(typename type_select::type()); + } + + + template + inline void* fixed_vector::DoPushBackUninitialized(true_type) + { + return base_type::push_back_uninitialized(); + } + + + template + inline void* fixed_vector::DoPushBackUninitialized(false_type) + { + EASTL_ASSERT(mpEnd < internalCapacityPtr()); + + return mpEnd++; + } + + + template + inline void fixed_vector::push_back(const value_type& value) + { + DoPushBack(typename type_select::type(), value); + } + + + template + inline void fixed_vector::DoPushBack(true_type, const value_type& value) + { + base_type::push_back(value); + } + + + // This template specializes for overflow NOT enabled. + // In this configuration, there is no need for the heavy weight push_back() which tests to see if the container should grow (it never will) + template + inline void fixed_vector::DoPushBack(false_type, const value_type& value) + { + EASTL_ASSERT(mpEnd < internalCapacityPtr()); + + ::new((void*)mpEnd++) value_type(value); + } + + + template + inline typename fixed_vector::reference fixed_vector::push_back() + { + return DoPushBack(typename type_select::type()); + } + + + template + inline typename fixed_vector::reference fixed_vector::DoPushBack(true_type) + { + return base_type::push_back(); + } + + + // This template specializes for overflow NOT enabled. + // In this configuration, there is no need for the heavy weight push_back() which tests to see if the container should grow (it never will) + template + inline typename fixed_vector::reference fixed_vector::DoPushBack(false_type) + { + EASTL_ASSERT(mpEnd < internalCapacityPtr()); + + ::new((void*)mpEnd++) value_type; // Note that this isn't value_type() as that syntax doesn't work on all compilers for POD types. + + return *(mpEnd - 1); // Same as return back(); + } + + + template + inline void fixed_vector::push_back(value_type&& value) + { + DoPushBackMove(typename type_select::type(), eastl::move(value)); + } + + + template + inline void fixed_vector::DoPushBackMove(true_type, value_type&& value) + { + base_type::push_back(eastl::move(value)); // This will call vector::push_back(value_type &&), and possibly swap value with *mpEnd. + } + + + // This template specializes for overflow NOT enabled. + // In this configuration, there is no need for the heavy weight push_back() which tests to see if the container should grow (it never will) + template + inline void fixed_vector::DoPushBackMove(false_type, value_type&& value) + { + EASTL_ASSERT(mpEnd < internalCapacityPtr()); + + ::new((void*)mpEnd++) value_type(eastl::move(value)); // This will call the value_type(value_type&&) constructor, and possibly swap value with *mpEnd. + } + + + template + inline const typename fixed_vector::overflow_allocator_type& + fixed_vector::get_overflow_allocator() const EA_NOEXCEPT + { + return get_allocator().get_overflow_allocator(); + } + + + template + inline typename fixed_vector::overflow_allocator_type& + fixed_vector::get_overflow_allocator() EA_NOEXCEPT + { + return get_allocator().get_overflow_allocator(); + } + + + template + inline void + fixed_vector::set_overflow_allocator(const overflow_allocator_type& allocator) + { + get_allocator().set_overflow_allocator(allocator); + } + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + // operator ==, !=, <, >, <=, >= come from the vector implementations. + + template + inline void swap(fixed_vector& a, + fixed_vector& b) + { + // Fixed containers use a special swap that can deal with excessively large buffers. + eastl::fixed_swap(a, b); + } + + + +} // namespace eastl + + + +#endif // Header include guard + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/functional.h b/libkram/eastl/include/EASTL/functional.h new file mode 100644 index 00000000..556bf020 --- /dev/null +++ b/libkram/eastl/include/EASTL/functional.h @@ -0,0 +1,1266 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_FUNCTIONAL_H +#define EASTL_FUNCTIONAL_H + + +#include +#include +#include +#include +#include +#include + + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + /////////////////////////////////////////////////////////////////////// + // Primary C++ functions + /////////////////////////////////////////////////////////////////////// + + template + struct plus : public binary_function + { + EA_CPP14_CONSTEXPR T operator()(const T& a, const T& b) const + { return a + b; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/plus_void + template <> + struct plus + { + typedef int is_transparent; + template + EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const + -> decltype(eastl::forward(a) + eastl::forward(b)) + { return eastl::forward(a) + eastl::forward(b); } + }; + + template + struct minus : public binary_function + { + EA_CPP14_CONSTEXPR T operator()(const T& a, const T& b) const + { return a - b; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/minus_void + template <> + struct minus + { + typedef int is_transparent; + template + EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const + -> decltype(eastl::forward(a) - eastl::forward(b)) + { return eastl::forward(a) - eastl::forward(b); } + }; + + template + struct multiplies : public binary_function + { + EA_CPP14_CONSTEXPR T operator()(const T& a, const T& b) const + { return a * b; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/multiplies_void + template <> + struct multiplies + { + typedef int is_transparent; + template + EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const + -> decltype(eastl::forward(a) * eastl::forward(b)) + { return eastl::forward(a) * eastl::forward(b); } + }; + + template + struct divides : public binary_function + { + EA_CPP14_CONSTEXPR T operator()(const T& a, const T& b) const + { return a / b; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/divides_void + template <> + struct divides + { + typedef int is_transparent; + template + EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const + -> decltype(eastl::forward(a) / eastl::forward(b)) + { return eastl::forward(a) / eastl::forward(b); } + }; + + template + struct modulus : public binary_function + { + EA_CPP14_CONSTEXPR T operator()(const T& a, const T& b) const + { return a % b; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/modulus_void + template <> + struct modulus + { + typedef int is_transparent; + template + EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const + -> decltype(eastl::forward(a) % eastl::forward(b)) + { return eastl::forward(a) % eastl::forward(b); } + }; + + template + struct negate : public unary_function + { + EA_CPP14_CONSTEXPR T operator()(const T& a) const + { return -a; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/negate_void + template <> + struct negate + { + typedef int is_transparent; + template + EA_CPP14_CONSTEXPR auto operator()(T&& t) const + -> decltype(-eastl::forward(t)) + { return -eastl::forward(t); } + }; + + template + struct equal_to : public binary_function + { + EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const + { return a == b; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/equal_to_void + template <> + struct equal_to + { + typedef int is_transparent; + template + EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const + -> decltype(eastl::forward(a) == eastl::forward(b)) + { return eastl::forward(a) == eastl::forward(b); } + }; + + template + bool validate_equal_to(const T& a, const T& b, Compare compare) + { + return compare(a, b) == compare(b, a); + } + + template + struct not_equal_to : public binary_function + { + EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const + { return a != b; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/not_equal_to_void + template <> + struct not_equal_to + { + typedef int is_transparent; + template + EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const + -> decltype(eastl::forward(a) != eastl::forward(b)) + { return eastl::forward(a) != eastl::forward(b); } + }; + + template + bool validate_not_equal_to(const T& a, const T& b, Compare compare) + { + return compare(a, b) == compare(b, a); // We want the not equal comparison results to be equal. + } + + /// str_equal_to + /// + /// Compares two 0-terminated string types. + /// The T types are expected to be iterators or act like iterators. + /// The expected behavior of str_less is the same as (strcmp(p1, p2) == 0). + /// + /// Example usage: + /// hash_set, str_equal_to > stringHashSet; + /// + /// Note: + /// You couldn't use str_equal_to like this: + /// bool result = equal("hi", "hi" + 2, "ho", str_equal_to()); + /// This is because equal tests an array of something, with each element by + /// the comparison function. But str_equal_to tests an array of something itself. + /// + /// To consider: Update this code to use existing word-based comparison optimizations, + /// such as that used in the EAStdC Strcmp function. + /// + template + struct str_equal_to : public binary_function + { + EA_CPP14_CONSTEXPR bool operator()(T a, T b) const + { + while(*a && (*a == *b)) + { + ++a; + ++b; + } + return (*a == *b); + } + }; + + template + struct greater : public binary_function + { + EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const + { return a > b; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/greater_void + template <> + struct greater + { + template + EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const + -> decltype(eastl::forward(a) > eastl::forward(b)) + { return eastl::forward(a) > eastl::forward(b); } + }; + + template + bool validate_greater(const T& a, const T& b, Compare compare) + { + return !compare(a, b) || !compare(b, a); // If (a > b), then !(b > a) + } + + + template + bool validate_less(const T& a, const T& b, Compare compare) + { + return !compare(a, b) || !compare(b, a); // If (a < b), then !(b < a) + } + + /// str_less + /// + /// Compares two 0-terminated string types. + /// The T types are expected to be iterators or act like iterators, + /// and that includes being a pointer to a C character array. + /// The expected behavior of str_less is the same as (strcmp(p1, p2) < 0). + /// This function is not Unicode-correct and it's not guaranteed to work + /// with all Unicode strings. + /// + /// Example usage: + /// set > stringSet; + /// + /// To consider: Update this code to use existing word-based comparison optimizations, + /// such as that used in the EAStdC Strcmp function. + /// + template + struct str_less : public binary_function + { + bool operator()(T a, T b) const + { + while(static_cast::type>::type>(*a) == + static_cast::type>::type>(*b)) + { + if(*a == 0) + return (*b != 0); + ++a; + ++b; + } + + char aValue = static_cast::type>(*a); + char bValue = static_cast::type>(*b); + + typename make_unsigned::type aValueU = static_cast::type>(aValue); + typename make_unsigned::type bValueU = static_cast::type>(bValue); + + return aValueU < bValueU; + + //return (static_cast::type>::type>(*a) < + // static_cast::type>::type>(*b)); + } + }; + + template + struct greater_equal : public binary_function + { + EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const + { return a >= b; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/greater_equal_void + template <> + struct greater_equal + { + template + EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const + -> decltype(eastl::forward(a) >= eastl::forward(b)) + { return eastl::forward(a) >= eastl::forward(b); } + }; + + template + bool validate_greater_equal(const T& a, const T& b, Compare compare) + { + return !compare(a, b) || !compare(b, a); // If (a >= b), then !(b >= a) + } + + template + struct less_equal : public binary_function + { + EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const + { return a <= b; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/less_equal_void + template <> + struct less_equal + { + template + EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const + -> decltype(eastl::forward(a) <= eastl::forward(b)) + { return eastl::forward(a) <= eastl::forward(b); } + }; + + template + bool validate_less_equal(const T& a, const T& b, Compare compare) + { + return !compare(a, b) || !compare(b, a); // If (a <= b), then !(b <= a) + } + + template + struct logical_and : public binary_function + { + EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const + { return a && b; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/logical_and_void + template <> + struct logical_and + { + template + EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const + -> decltype(eastl::forward(a) && eastl::forward(b)) + { return eastl::forward(a) && eastl::forward(b); } + }; + + template + struct logical_or : public binary_function + { + EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const + { return a || b; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/logical_or_void + template <> + struct logical_or + { + template + EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const + -> decltype(eastl::forward(a) || eastl::forward(b)) + { return eastl::forward(a) || eastl::forward(b); } + }; + + template + struct logical_not : public unary_function + { + EA_CPP14_CONSTEXPR bool operator()(const T& a) const + { return !a; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/logical_not_void + template <> + struct logical_not + { + template + EA_CPP14_CONSTEXPR auto operator()(T&& t) const + -> decltype(!eastl::forward(t)) + { return !eastl::forward(t); } + }; + + + + /////////////////////////////////////////////////////////////////////// + // Dual type functions + /////////////////////////////////////////////////////////////////////// + + template + struct equal_to_2 : public binary_function + { + EA_CPP14_CONSTEXPR bool operator()(const T& a, const U& b) const + { return a == b; } + EA_CPP14_CONSTEXPR bool operator()(const U& b, const T& a) const // If you are getting a 'operator() already defined' error related to on this line while compiling a + { return b == a; } // hashtable class (e.g. hash_map), it's likely that you are using hashtable::find_as when you should + }; // be using hashtable::find instead. The problem is that (const T, U) collide. To do: make this work. + + template + struct equal_to_2 : public equal_to + { + }; + + + template + struct not_equal_to_2 : public binary_function + { + EA_CPP14_CONSTEXPR bool operator()(const T& a, const U& b) const + { return a != b; } + EA_CPP14_CONSTEXPR bool operator()(const U& b, const T& a) const + { return b != a; } + }; + + template + struct not_equal_to_2 : public not_equal_to + { + }; + + + template + struct less_2 : public binary_function + { + EA_CPP14_CONSTEXPR bool operator()(const T& a, const U& b) const + { return a < b; } + EA_CPP14_CONSTEXPR bool operator()(const U& b, const T& a) const + { return b < a; } + }; + + template + struct less_2 : public less + { + }; + + + + + /// unary_negate + /// + template + class unary_negate : public unary_function + { + protected: + Predicate mPredicate; + public: + explicit unary_negate(const Predicate& a) + : mPredicate(a) {} + EA_CPP14_CONSTEXPR bool operator()(const typename Predicate::argument_type& a) const + { return !mPredicate(a); } + }; + + template + inline EA_CPP14_CONSTEXPR unary_negate not1(const Predicate& predicate) + { return unary_negate(predicate); } + + + + /// binary_negate + /// + template + class binary_negate : public binary_function + { + protected: + Predicate mPredicate; + public: + explicit binary_negate(const Predicate& a) + : mPredicate(a) { } + EA_CPP14_CONSTEXPR bool operator()(const typename Predicate::first_argument_type& a, const typename Predicate::second_argument_type& b) const + { return !mPredicate(a, b); } + }; + + template + inline EA_CPP14_CONSTEXPR binary_negate not2(const Predicate& predicate) + { return binary_negate(predicate); } + + + + /// unary_compose + /// + template + struct unary_compose : public unary_function + { + protected: + Operation1 op1; + Operation2 op2; + + public: + unary_compose(const Operation1& x, const Operation2& y) + : op1(x), op2(y) {} + + typename Operation1::result_type operator()(const typename Operation2::argument_type& x) const + { return op1(op2(x)); } + + typename Operation1::result_type operator()(typename Operation2::argument_type& x) const + { return op1(op2(x)); } + }; + + template + inline unary_compose + compose1(const Operation1& op1, const Operation2& op2) + { + return unary_compose(op1,op2); + } + + + /// binary_compose + /// + template + class binary_compose : public unary_function + { + protected: + Operation1 op1; + Operation2 op2; + Operation3 op3; + + public: + // Support binary functors too. + typedef typename Operation2::argument_type first_argument_type; + typedef typename Operation3::argument_type second_argument_type; + + binary_compose(const Operation1& x, const Operation2& y, const Operation3& z) + : op1(x), op2(y), op3(z) { } + + typename Operation1::result_type operator()(const typename Operation2::argument_type& x) const + { return op1(op2(x),op3(x)); } + + typename Operation1::result_type operator()(typename Operation2::argument_type& x) const + { return op1(op2(x),op3(x)); } + + typename Operation1::result_type operator()(const typename Operation2::argument_type& x,const typename Operation3::argument_type& y) const + { return op1(op2(x),op3(y)); } + + typename Operation1::result_type operator()(typename Operation2::argument_type& x, typename Operation3::argument_type& y) const + { return op1(op2(x),op3(y)); } + }; + + + template + inline binary_compose + compose2(const Operation1& op1, const Operation2& op2, const Operation3& op3) + { + return binary_compose(op1, op2, op3); + } + + + + /////////////////////////////////////////////////////////////////////// + // pointer_to_unary_function + /////////////////////////////////////////////////////////////////////// + + /// pointer_to_unary_function + /// + /// This is an adapter template which converts a pointer to a standalone + /// function to a function object. This allows standalone functions to + /// work in many cases where the system requires a function object. + /// + /// Example usage: + /// ptrdiff_t Rand(ptrdiff_t n) { return rand() % n; } // Note: The C rand function is poor and slow. + /// pointer_to_unary_function randInstance(Rand); + /// random_shuffle(pArrayBegin, pArrayEnd, randInstance); + /// + template + class pointer_to_unary_function : public unary_function + { + protected: + Result (*mpFunction)(Arg); + + public: + pointer_to_unary_function() + { } + + explicit pointer_to_unary_function(Result (*pFunction)(Arg)) + : mpFunction(pFunction) { } + + Result operator()(Arg x) const + { return mpFunction(x); } + }; + + + /// ptr_fun + /// + /// This ptr_fun is simply shorthand for usage of pointer_to_unary_function. + /// + /// Example usage (actually, you don't need to use ptr_fun here, but it works anyway): + /// int factorial(int x) { return (x > 1) ? (x * factorial(x - 1)) : x; } + /// transform(pIntArrayBegin, pIntArrayEnd, pIntArrayBegin, ptr_fun(factorial)); + /// + template + inline pointer_to_unary_function + ptr_fun(Result (*pFunction)(Arg)) + { return pointer_to_unary_function(pFunction); } + + + + + + /////////////////////////////////////////////////////////////////////// + // pointer_to_binary_function + /////////////////////////////////////////////////////////////////////// + + /// pointer_to_binary_function + /// + /// This is an adapter template which converts a pointer to a standalone + /// function to a function object. This allows standalone functions to + /// work in many cases where the system requires a function object. + /// + template + class pointer_to_binary_function : public binary_function + { + protected: + Result (*mpFunction)(Arg1, Arg2); + + public: + pointer_to_binary_function() + { } + + explicit pointer_to_binary_function(Result (*pFunction)(Arg1, Arg2)) + : mpFunction(pFunction) {} + + Result operator()(Arg1 x, Arg2 y) const + { return mpFunction(x, y); } + }; + + + /// This ptr_fun is simply shorthand for usage of pointer_to_binary_function. + /// + /// Example usage (actually, you don't need to use ptr_fun here, but it works anyway): + /// int multiply(int x, int y) { return x * y; } + /// transform(pIntArray1Begin, pIntArray1End, pIntArray2Begin, pIntArray1Begin, ptr_fun(multiply)); + /// + template + inline pointer_to_binary_function + ptr_fun(Result (*pFunction)(Arg1, Arg2)) + { return pointer_to_binary_function(pFunction); } + + + + + + + /////////////////////////////////////////////////////////////////////// + // mem_fun + // mem_fun1 + // + // Note that mem_fun calls member functions via *pointers* to classes + // and not instances of classes. mem_fun_ref is for calling functions + // via instances of classes or references to classes. + // + // NOTE: + // mem_fun was deprecated in C++11 and removed in C++17, in favor + // of the more general mem_fn and bind. + // + /////////////////////////////////////////////////////////////////////// + + /// mem_fun_t + /// + /// Member function with no arguments. + /// + template + class mem_fun_t : public unary_function + { + public: + typedef Result (T::*MemberFunction)(); + + inline explicit mem_fun_t(MemberFunction pMemberFunction) + : mpMemberFunction(pMemberFunction) + { + // Empty + } + + inline Result operator()(T* pT) const + { + return (pT->*mpMemberFunction)(); + } + + protected: + MemberFunction mpMemberFunction; + }; + + + /// mem_fun1_t + /// + /// Member function with one argument. + /// + template + class mem_fun1_t : public binary_function + { + public: + typedef Result (T::*MemberFunction)(Argument); + + inline explicit mem_fun1_t(MemberFunction pMemberFunction) + : mpMemberFunction(pMemberFunction) + { + // Empty + } + + inline Result operator()(T* pT, Argument arg) const + { + return (pT->*mpMemberFunction)(arg); + } + + protected: + MemberFunction mpMemberFunction; + }; + + + /// const_mem_fun_t + /// + /// Const member function with no arguments. + /// Note that we inherit from unary_function + /// instead of what the C++ standard specifies: unary_function. + /// The C++ standard is in error and this has been recognized by the defect group. + /// + template + class const_mem_fun_t : public unary_function + { + public: + typedef Result (T::*MemberFunction)() const; + + inline explicit const_mem_fun_t(MemberFunction pMemberFunction) + : mpMemberFunction(pMemberFunction) + { + // Empty + } + + inline Result operator()(const T* pT) const + { + return (pT->*mpMemberFunction)(); + } + + protected: + MemberFunction mpMemberFunction; + }; + + + /// const_mem_fun1_t + /// + /// Const member function with one argument. + /// Note that we inherit from unary_function + /// instead of what the C++ standard specifies: unary_function. + /// The C++ standard is in error and this has been recognized by the defect group. + /// + template + class const_mem_fun1_t : public binary_function + { + public: + typedef Result (T::*MemberFunction)(Argument) const; + + inline explicit const_mem_fun1_t(MemberFunction pMemberFunction) + : mpMemberFunction(pMemberFunction) + { + // Empty + } + + inline Result operator()(const T* pT, Argument arg) const + { + return (pT->*mpMemberFunction)(arg); + } + + protected: + MemberFunction mpMemberFunction; + }; + + + /// mem_fun + /// + /// This is the high level interface to the mem_fun_t family. + /// + /// Example usage: + /// struct TestClass { void print() { puts("hello"); } } + /// TestClass* pTestClassArray[3] = { ... }; + /// for_each(pTestClassArray, pTestClassArray + 3, &TestClass::print); + /// + /// Note: using conventional inlining here to avoid issues on GCC/Linux + /// + template + inline mem_fun_t + mem_fun(Result (T::*MemberFunction)()) + { + return eastl::mem_fun_t(MemberFunction); + } + + template + inline mem_fun1_t + mem_fun(Result (T::*MemberFunction)(Argument)) + { + return eastl::mem_fun1_t(MemberFunction); + } + + template + inline const_mem_fun_t + mem_fun(Result (T::*MemberFunction)() const) + { + return eastl::const_mem_fun_t(MemberFunction); + } + + template + inline const_mem_fun1_t + mem_fun(Result (T::*MemberFunction)(Argument) const) + { + return eastl::const_mem_fun1_t(MemberFunction); + } + + + + + + /////////////////////////////////////////////////////////////////////// + // mem_fun_ref + // mem_fun1_ref + // + /////////////////////////////////////////////////////////////////////// + + /// mem_fun_ref_t + /// + template + class mem_fun_ref_t : public unary_function + { + public: + typedef Result (T::*MemberFunction)(); + + inline explicit mem_fun_ref_t(MemberFunction pMemberFunction) + : mpMemberFunction(pMemberFunction) + { + // Empty + } + + inline Result operator()(T& t) const + { + return (t.*mpMemberFunction)(); + } + + protected: + MemberFunction mpMemberFunction; + }; + + + /// mem_fun1_ref_t + /// + template + class mem_fun1_ref_t : public binary_function + { + public: + typedef Result (T::*MemberFunction)(Argument); + + inline explicit mem_fun1_ref_t(MemberFunction pMemberFunction) + : mpMemberFunction(pMemberFunction) + { + // Empty + } + + inline Result operator()(T& t, Argument arg) const + { + return (t.*mpMemberFunction)(arg); + } + + protected: + MemberFunction mpMemberFunction; + }; + + + /// const_mem_fun_ref_t + /// + template + class const_mem_fun_ref_t : public unary_function + { + public: + typedef Result (T::*MemberFunction)() const; + + inline explicit const_mem_fun_ref_t(MemberFunction pMemberFunction) + : mpMemberFunction(pMemberFunction) + { + // Empty + } + + inline Result operator()(const T& t) const + { + return (t.*mpMemberFunction)(); + } + + protected: + MemberFunction mpMemberFunction; + }; + + + /// const_mem_fun1_ref_t + /// + template + class const_mem_fun1_ref_t : public binary_function + { + public: + typedef Result (T::*MemberFunction)(Argument) const; + + inline explicit const_mem_fun1_ref_t(MemberFunction pMemberFunction) + : mpMemberFunction(pMemberFunction) + { + // Empty + } + + inline Result operator()(const T& t, Argument arg) const + { + return (t.*mpMemberFunction)(arg); + } + + protected: + MemberFunction mpMemberFunction; + }; + + + /// mem_fun_ref + /// Example usage: + /// struct TestClass { void print() { puts("hello"); } } + /// TestClass testClassArray[3]; + /// for_each(testClassArray, testClassArray + 3, &TestClass::print); + /// + /// Note: using conventional inlining here to avoid issues on GCC/Linux + /// + template + inline mem_fun_ref_t + mem_fun_ref(Result (T::*MemberFunction)()) + { + return eastl::mem_fun_ref_t(MemberFunction); + } + + template + inline mem_fun1_ref_t + mem_fun_ref(Result (T::*MemberFunction)(Argument)) + { + return eastl::mem_fun1_ref_t(MemberFunction); + } + + template + inline const_mem_fun_ref_t + mem_fun_ref(Result (T::*MemberFunction)() const) + { + return eastl::const_mem_fun_ref_t(MemberFunction); + } + + template + inline const_mem_fun1_ref_t + mem_fun_ref(Result (T::*MemberFunction)(Argument) const) + { + return eastl::const_mem_fun1_ref_t(MemberFunction); + } + + + // not_fn_ret + // not_fn_ret is a implementation specified return type of eastl::not_fn. + // The type name is not specified but it does have mandated functions that conforming implementations must support. + // + // http://en.cppreference.com/w/cpp/utility/functional/not_fn + // + template + struct not_fn_ret + { + explicit not_fn_ret(F&& f) : mDecayF(eastl::forward(f)) {} + not_fn_ret(not_fn_ret&& f) = default; + not_fn_ret(const not_fn_ret& f) = default; + + // overloads for lvalues + template + auto operator()(Args&&... args) & + -> decltype(!eastl::declval&, Args...>>()) + { return !eastl::invoke(mDecayF, eastl::forward(args)...); } + + template + auto operator()(Args&&... args) const & + -> decltype(!eastl::declval const&, Args...>>()) + { return !eastl::invoke(mDecayF, eastl::forward(args)...); } + + // overloads for rvalues + template + auto operator()(Args&&... args) && + -> decltype(!eastl::declval, Args...>>()) + { return !eastl::invoke(eastl::move(mDecayF), eastl::forward(args)...); } + + template + auto operator()(Args&&... args) const && + -> decltype(!eastl::declval const, Args...>>()) + { return !eastl::invoke(eastl::move(mDecayF), eastl::forward(args)...); } + + eastl::decay_t mDecayF; + }; + + /// not_fn + /// + /// Creates an implementation specified functor that returns the complement of the callable object it was passed. + /// not_fn is intended to replace the C++03-era negators eastl::not1 and eastl::not2. + /// + /// http://en.cppreference.com/w/cpp/utility/functional/not_fn + /// + /// Example usage: + /// + /// auto nf = eastl::not_fn([]{ return false; }); + /// assert(nf()); // return true + /// + template + inline not_fn_ret not_fn(F&& f) + { + return not_fn_ret(eastl::forward(f)); + } + + + /////////////////////////////////////////////////////////////////////// + // hash + /////////////////////////////////////////////////////////////////////// + namespace Internal + { + // utility to disable the generic template specialization that is + // used for enum types only. + template + struct EnableHashIf {}; + + template + struct EnableHashIf + { + size_t operator()(T p) const { return size_t(p); } + }; + } // namespace Internal + + + template struct hash; + + template + struct hash : Internal::EnableHashIf> {}; + + template struct hash // Note that we use the pointer as-is and don't divide by sizeof(T*). This is because the table is of a prime size and this division doesn't benefit distribution. + { size_t operator()(T* p) const { return size_t(uintptr_t(p)); } }; + + template <> struct hash + { size_t operator()(bool val) const { return static_cast(val); } }; + + template <> struct hash + { size_t operator()(char val) const { return static_cast(val); } }; + + template <> struct hash + { size_t operator()(signed char val) const { return static_cast(val); } }; + + template <> struct hash + { size_t operator()(unsigned char val) const { return static_cast(val); } }; + + #if defined(EA_CHAR8_UNIQUE) && EA_CHAR8_UNIQUE + template <> struct hash + { size_t operator()(char8_t val) const { return static_cast(val); } }; + #endif + + #if defined(EA_CHAR16_NATIVE) && EA_CHAR16_NATIVE + template <> struct hash + { size_t operator()(char16_t val) const { return static_cast(val); } }; + #endif + + #if defined(EA_CHAR32_NATIVE) && EA_CHAR32_NATIVE + template <> struct hash + { size_t operator()(char32_t val) const { return static_cast(val); } }; + #endif + + // If wchar_t is a native type instead of simply a define to an existing type... + #if !defined(EA_WCHAR_T_NON_NATIVE) + template <> struct hash + { size_t operator()(wchar_t val) const { return static_cast(val); } }; + #endif + + template <> struct hash + { size_t operator()(signed short val) const { return static_cast(val); } }; + + template <> struct hash + { size_t operator()(unsigned short val) const { return static_cast(val); } }; + + template <> struct hash + { size_t operator()(signed int val) const { return static_cast(val); } }; + + template <> struct hash + { size_t operator()(unsigned int val) const { return static_cast(val); } }; + + template <> struct hash + { size_t operator()(signed long val) const { return static_cast(val); } }; + + template <> struct hash + { size_t operator()(unsigned long val) const { return static_cast(val); } }; + + template <> struct hash + { size_t operator()(signed long long val) const { return static_cast(val); } }; + + template <> struct hash + { size_t operator()(unsigned long long val) const { return static_cast(val); } }; + + template <> struct hash + { size_t operator()(float val) const { return static_cast(val); } }; + + template <> struct hash + { size_t operator()(double val) const { return static_cast(val); } }; + + template <> struct hash + { size_t operator()(long double val) const { return static_cast(val); } }; + + #if defined(EA_HAVE_INT128) && EA_HAVE_INT128 + template <> struct hash + { size_t operator()(uint128_t val) const { return static_cast(val); } }; + #endif + + + /////////////////////////////////////////////////////////////////////////// + // string hashes + // + // Note that our string hashes here intentionally are slow for long strings. + // The reasoning for this is so: + // - The large majority of hashed strings are only a few bytes long. + // - The hash function is significantly more efficient if it can make this assumption. + // - The user is welcome to make a custom hash for those uncommon cases where + // long strings need to be hashed. Indeed, the user can probably make a + // special hash customized for such strings that's better than what we provide. + /////////////////////////////////////////////////////////////////////////// + + template <> struct hash + { + size_t operator()(const char* p) const + { + uint32_t c, result = 2166136261U; // FNV1 hash. Perhaps the best string hash. Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size. + while((c = (uint8_t)*p++) != 0) // Using '!=' disables compiler warnings. + result = (result * 16777619) ^ c; + return (size_t)result; + } + }; + + template <> struct hash + { + size_t operator()(const char* p) const + { + uint32_t c, result = 2166136261U; // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size. + while((c = (uint8_t)*p++) != 0) // cast to unsigned 8 bit. + result = (result * 16777619) ^ c; + return (size_t)result; + } + }; + +#if EA_CHAR8_UNIQUE + template <> struct hash + { + size_t operator()(const char8_t* p) const + { + uint32_t c, result = 2166136261U; // FNV1 hash. Perhaps the best string hash. Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size. + while((c = (uint8_t)*p++) != 0) // Using '!=' disables compiler warnings. + result = (result * 16777619) ^ c; + return (size_t)result; + } + }; + + template <> struct hash + { + size_t operator()(const char8_t* p) const + { + uint32_t c, result = 2166136261U; // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size. + while((c = (uint8_t)*p++) != 0) // cast to unsigned 8 bit. + result = (result * 16777619) ^ c; + return (size_t)result; + } + }; +#endif + + + template <> struct hash + { + size_t operator()(const char16_t* p) const + { + uint32_t c, result = 2166136261U; // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size. + while((c = (uint16_t)*p++) != 0) // cast to unsigned 16 bit. + result = (result * 16777619) ^ c; + return (size_t)result; + } + }; + + template <> struct hash + { + size_t operator()(const char16_t* p) const + { + uint32_t c, result = 2166136261U; // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size. + while((c = (uint16_t)*p++) != 0) // cast to unsigned 16 bit. + result = (result * 16777619) ^ c; + return (size_t)result; + } + }; + + template <> struct hash + { + size_t operator()(const char32_t* p) const + { + uint32_t c, result = 2166136261U; // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size. + while((c = (uint32_t)*p++) != 0) // cast to unsigned 32 bit. + result = (result * 16777619) ^ c; + return (size_t)result; + } + }; + + template <> struct hash + { + size_t operator()(const char32_t* p) const + { + uint32_t c, result = 2166136261U; // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size. + while((c = (uint32_t)*p++) != 0) // cast to unsigned 32 bit. + result = (result * 16777619) ^ c; + return (size_t)result; + } + }; + +#if defined(EA_WCHAR_UNIQUE) && EA_WCHAR_UNIQUE + template<> struct hash + { + size_t operator()(const wchar_t* p) const + { + uint32_t c, result = 2166136261U; // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size. + while ((c = (uint32_t)*p++) != 0) // cast to unsigned 32 bit. + result = (result * 16777619) ^ c; + return (size_t)result; + } + }; + + template<> struct hash + { + size_t operator()(const wchar_t* p) const + { + uint32_t c, result = 2166136261U; // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size. + while ((c = (uint32_t)*p++) != 0) // cast to unsigned 32 bit. + result = (result * 16777619) ^ c; + return (size_t)result; + } + }; +#endif + + /// string_hash + /// + /// Defines a generic string hash for an arbitrary EASTL basic_string container. + /// + /// Example usage: + /// eastl::hash_set > hashSet; + /// + template + struct string_hash + { + typedef String string_type; + typedef typename String::value_type value_type; + typedef typename eastl::add_unsigned::type unsigned_value_type; + + size_t operator()(const string_type& s) const + { + const unsigned_value_type* p = (const unsigned_value_type*)s.c_str(); + uint32_t c, result = 2166136261U; // Intentionally uint32_t instead of size_t, so the behavior is the same regardless of size. + while((c = *p++) != 0) + result = (result * 16777619) ^ c; + return (size_t)result; + } + }; + + +} // namespace eastl + +#include + +#endif // Header include guard + + + + + + + diff --git a/libkram/eastl/include/EASTL/hash_map.h b/libkram/eastl/include/EASTL/hash_map.h new file mode 100644 index 00000000..c363597f --- /dev/null +++ b/libkram/eastl/include/EASTL/hash_map.h @@ -0,0 +1,580 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file is based on the TR1 (technical report 1) reference implementation +// of the unordered_set/unordered_map C++ classes as of about 4/2005. Most likely +// many or all C++ library vendors' implementations of this classes will be +// based off of the reference version and so will look pretty similar to this +// file as well as other vendors' versions. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_HASH_MAP_H +#define EASTL_HASH_MAP_H + + +#include +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /// EASTL_HASH_MAP_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_HASH_MAP_DEFAULT_NAME + #define EASTL_HASH_MAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " hash_map" // Unless the user overrides something, this is "EASTL hash_map". + #endif + + + /// EASTL_HASH_MULTIMAP_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_HASH_MULTIMAP_DEFAULT_NAME + #define EASTL_HASH_MULTIMAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " hash_multimap" // Unless the user overrides something, this is "EASTL hash_multimap". + #endif + + + /// EASTL_HASH_MAP_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_HASH_MAP_DEFAULT_ALLOCATOR + #define EASTL_HASH_MAP_DEFAULT_ALLOCATOR allocator_type(EASTL_HASH_MAP_DEFAULT_NAME) + #endif + + /// EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR + #define EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR allocator_type(EASTL_HASH_MULTIMAP_DEFAULT_NAME) + #endif + + + + /// hash_map + /// + /// Implements a hash_map, which is a hashed associative container. + /// Lookups are O(1) (that is, they are fast) but the container is + /// not sorted. Note that lookups are only O(1) if the hash table + /// is well-distributed (non-colliding). The lookup approaches + /// O(n) behavior as the table becomes increasingly poorly distributed. + /// + /// set_max_load_factor + /// If you want to make a hashtable never increase its bucket usage, + /// call set_max_load_factor with a very high value such as 100000.f. + /// + /// bCacheHashCode + /// We provide the boolean bCacheHashCode template parameter in order + /// to allow the storing of the hash code of the key within the map. + /// When this option is disabled, the rehashing of the table will + /// call the hash function on the key. Setting bCacheHashCode to true + /// is useful for cases whereby the calculation of the hash value for + /// a contained object is very expensive. + /// + /// find_as + /// In order to support the ability to have a hashtable of strings but + /// be able to do efficiently lookups via char pointers (i.e. so they + /// aren't converted to string objects), we provide the find_as + /// function. This function allows you to do a find with a key of a + /// type other than the hashtable key type. + /// + /// Example find_as usage: + /// hash_map hashMap; + /// i = hashMap.find_as("hello"); // Use default hash and compare. + /// + /// Example find_as usage (namespaces omitted for brevity): + /// hash_map hashMap; + /// i = hashMap.find_as("hello", hash(), equal_to_2()); + /// + template , typename Predicate = eastl::equal_to, + typename Allocator = EASTLAllocatorType, bool bCacheHashCode = false> + class hash_map + : public hashtable, Allocator, eastl::use_first >, Predicate, + Hash, mod_range_hashing, default_ranged_hash, prime_rehash_policy, bCacheHashCode, true, true> + { + public: + typedef hashtable, Allocator, + eastl::use_first >, + Predicate, Hash, mod_range_hashing, default_ranged_hash, + prime_rehash_policy, bCacheHashCode, true, true> base_type; + typedef hash_map this_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::key_type key_type; + typedef T mapped_type; + typedef typename base_type::value_type value_type; // NOTE: 'value_type = pair'. + typedef typename base_type::allocator_type allocator_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::insert_return_type insert_return_type; + typedef typename base_type::iterator iterator; + typedef typename base_type::const_iterator const_iterator; + + using base_type::insert; + + public: + /// hash_map + /// + /// Default constructor. + /// + explicit hash_map(const allocator_type& allocator = EASTL_HASH_MAP_DEFAULT_ALLOCATOR) + : base_type(0, Hash(), mod_range_hashing(), default_ranged_hash(), + Predicate(), eastl::use_first >(), allocator) + { + // Empty + } + + + /// hash_map + /// + /// Constructor which creates an empty container, but start with nBucketCount buckets. + /// We default to a small nBucketCount value, though the user really should manually + /// specify an appropriate value in order to prevent memory from being reallocated. + /// + explicit hash_map(size_type nBucketCount, const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MAP_DEFAULT_ALLOCATOR) + : base_type(nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), + predicate, eastl::use_first >(), allocator) + { + // Empty + } + + + hash_map(const this_type& x) + : base_type(x) + { + } + + + hash_map(this_type&& x) + : base_type(eastl::move(x)) + { + } + + + hash_map(this_type&& x, const allocator_type& allocator) + : base_type(eastl::move(x), allocator) + { + } + + + /// hash_map + /// + /// initializer_list-based constructor. + /// Allows for initializing with brace values (e.g. hash_map hm = { {3,"c"}, {4,"d"}, {5,"e"} }; ) + /// + hash_map(std::initializer_list ilist, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MAP_DEFAULT_ALLOCATOR) + : base_type(ilist.begin(), ilist.end(), nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), + predicate, eastl::use_first >(), allocator) + { + // Empty + } + + + /// hash_map + /// + /// An input bucket count of <= 1 causes the bucket count to be equal to the number of + /// elements in the input range. + /// + template + hash_map(ForwardIterator first, ForwardIterator last, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MAP_DEFAULT_ALLOCATOR) + : base_type(first, last, nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), + predicate, eastl::use_first >(), allocator) + { + // Empty + } + + + this_type& operator=(const this_type& x) + { + return static_cast(base_type::operator=(x)); + } + + + this_type& operator=(std::initializer_list ilist) + { + return static_cast(base_type::operator=(ilist)); + } + + + this_type& operator=(this_type&& x) + { + return static_cast(base_type::operator=(eastl::move(x))); + } + + + /// insert + /// + /// This is an extension to the C++ standard. We insert a default-constructed + /// element with the given key. The reason for this is that we can avoid the + /// potentially expensive operation of creating and/or copying a mapped_type + /// object on the stack. + insert_return_type insert(const key_type& key) + { + return base_type::DoInsertKey(true_type(), key); + } + + T& at(const key_type& k) + { + iterator it = base_type::find(k); + + if (it == base_type::end()) + { + #if EASTL_EXCEPTIONS_ENABLED + // throw exeption if exceptions enabled + throw std::out_of_range("invalid hash_map key"); + #else + // assert false if asserts enabled + EASTL_ASSERT_MSG(false, "invalid hash_map key"); + #endif + } + // undefined behaviour if exceptions and asserts are disabled and it == end() + return it->second; + } + + + const T& at(const key_type& k) const + { + const_iterator it = base_type::find(k); + + if (it == base_type::end()) + { + #if EASTL_EXCEPTIONS_ENABLED + // throw exeption if exceptions enabled + throw std::out_of_range("invalid hash_map key"); + #else + // assert false if asserts enabled + EASTL_ASSERT_MSG(false, "invalid hash_map key"); + #endif + } + // undefined behaviour if exceptions and asserts are disabled and it == end() + return it->second; + } + + + insert_return_type insert(key_type&& key) + { + return base_type::DoInsertKey(true_type(), eastl::move(key)); + } + + + mapped_type& operator[](const key_type& key) + { + return (*base_type::DoInsertKey(true_type(), key).first).second; + + // Slower reference version: + //const typename base_type::iterator it = base_type::find(key); + //if(it != base_type::end()) + // return (*it).second; + //return (*base_type::insert(value_type(key, mapped_type())).first).second; + } + + mapped_type& operator[](key_type&& key) + { + // The Standard states that this function "inserts the value value_type(std::move(key), mapped_type())" + return (*base_type::DoInsertKey(true_type(), eastl::move(key)).first).second; + } + + + }; // hash_map + + /// hash_map erase_if + /// + /// https://en.cppreference.com/w/cpp/container/unordered_map/erase_if + template + void erase_if(eastl::hash_map& c, UserPredicate predicate) + { + // Erases all elements that satisfy the predicate from the container. + for (auto i = c.begin(), last = c.end(); i != last;) + { + if (predicate(*i)) + { + i = c.erase(i); + } + else + { + ++i; + } + } + } + + + /// hash_multimap + /// + /// Implements a hash_multimap, which is the same thing as a hash_map + /// except that contained elements need not be unique. See the + /// documentation for hash_set for details. + /// + template , typename Predicate = eastl::equal_to, + typename Allocator = EASTLAllocatorType, bool bCacheHashCode = false> + class hash_multimap + : public hashtable, Allocator, eastl::use_first >, Predicate, + Hash, mod_range_hashing, default_ranged_hash, prime_rehash_policy, bCacheHashCode, true, false> + { + public: + typedef hashtable, Allocator, + eastl::use_first >, + Predicate, Hash, mod_range_hashing, default_ranged_hash, + prime_rehash_policy, bCacheHashCode, true, false> base_type; + typedef hash_multimap this_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::key_type key_type; + typedef T mapped_type; + typedef typename base_type::value_type value_type; // Note that this is pair. + typedef typename base_type::allocator_type allocator_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::insert_return_type insert_return_type; + typedef typename base_type::iterator iterator; + + using base_type::insert; + + private: + using base_type::try_emplace; + using base_type::insert_or_assign; + + public: + /// hash_multimap + /// + /// Default constructor. + /// + explicit hash_multimap(const allocator_type& allocator = EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR) + : base_type(0, Hash(), mod_range_hashing(), default_ranged_hash(), + Predicate(), eastl::use_first >(), allocator) + { + // Empty + } + + + /// hash_multimap + /// + /// Constructor which creates an empty container, but start with nBucketCount buckets. + /// We default to a small nBucketCount value, though the user really should manually + /// specify an appropriate value in order to prevent memory from being reallocated. + /// + explicit hash_multimap(size_type nBucketCount, const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR) + : base_type(nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), + predicate, eastl::use_first >(), allocator) + { + // Empty + } + + + hash_multimap(const this_type& x) + : base_type(x) + { + } + + + hash_multimap(this_type&& x) + : base_type(eastl::move(x)) + { + } + + + hash_multimap(this_type&& x, const allocator_type& allocator) + : base_type(eastl::move(x), allocator) + { + } + + + /// hash_multimap + /// + /// initializer_list-based constructor. + /// Allows for initializing with brace values (e.g. hash_multimap hm = { {3,"c"}, {3,"C"}, {4,"d"} }; ) + /// + hash_multimap(std::initializer_list ilist, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR) + : base_type(ilist.begin(), ilist.end(), nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), + predicate, eastl::use_first >(), allocator) + { + // Empty + } + + + /// hash_multimap + /// + /// An input bucket count of <= 1 causes the bucket count to be equal to the number of + /// elements in the input range. + /// + template + hash_multimap(ForwardIterator first, ForwardIterator last, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MULTIMAP_DEFAULT_ALLOCATOR) + : base_type(first, last, nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), + predicate, eastl::use_first >(), allocator) + { + // Empty + } + + + this_type& operator=(const this_type& x) + { + return static_cast(base_type::operator=(x)); + } + + + this_type& operator=(std::initializer_list ilist) + { + return static_cast(base_type::operator=(ilist)); + } + + + this_type& operator=(this_type&& x) + { + return static_cast(base_type::operator=(eastl::move(x))); + } + + + /// insert + /// + /// This is an extension to the C++ standard. We insert a default-constructed + /// element with the given key. The reason for this is that we can avoid the + /// potentially expensive operation of creating and/or copying a mapped_type + /// object on the stack. + insert_return_type insert(const key_type& key) + { + return base_type::DoInsertKey(false_type(), key); + } + + + insert_return_type insert(key_type&& key) + { + return base_type::DoInsertKey(false_type(), eastl::move(key)); + } + + }; // hash_multimap + + /// hash_multimap erase_if + /// + /// https://en.cppreference.com/w/cpp/container/unordered_multimap/erase_if + template + void erase_if(eastl::hash_multimap& c, UserPredicate predicate) + { + // Erases all elements that satisfy the predicate from the container. + for (auto i = c.begin(), last = c.end(); i != last;) + { + if (predicate(*i)) + { + i = c.erase(i); + } + else + { + ++i; + } + } + } + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const hash_map& a, + const hash_map& b) + { + typedef typename hash_map::const_iterator const_iterator; + + // We implement branching with the assumption that the return value is usually false. + if(a.size() != b.size()) + return false; + + // For map (with its unique keys), we need only test that each element in a can be found in b, + // as there can be only one such pairing per element. multimap needs to do a something more elaborate. + for(const_iterator ai = a.begin(), aiEnd = a.end(), biEnd = b.end(); ai != aiEnd; ++ai) + { + const_iterator bi = b.find(ai->first); + + if((bi == biEnd) || !(*ai == *bi)) // We have to compare the values, because lookups are done by keys alone but the full value_type of a map is a key/value pair. + return false; // It's possible that two elements in the two containers have identical keys but different values. + } + + return true; + } + + template + inline bool operator!=(const hash_map& a, + const hash_map& b) + { + return !(a == b); + } + + + template + inline bool operator==(const hash_multimap& a, + const hash_multimap& b) + { + typedef typename hash_multimap::const_iterator const_iterator; + typedef typename eastl::iterator_traits::difference_type difference_type; + + // We implement branching with the assumption that the return value is usually false. + if(a.size() != b.size()) + return false; + + // We can't simply search for each element of a in b, as it may be that the bucket for + // two elements in a has those same two elements in b but in different order (which should + // still result in equality). Also it's possible that one bucket in a has two elements which + // both match a solitary element in the equivalent bucket in b (which shouldn't result in equality). + eastl::pair aRange; + eastl::pair bRange; + + for(const_iterator ai = a.begin(), aiEnd = a.end(); ai != aiEnd; ai = aRange.second) // For each element in a... + { + aRange = a.equal_range(ai->first); // Get the range of elements in a that are equal to ai. + bRange = b.equal_range(ai->first); // Get the range of elements in b that are equal to ai. + + // We need to verify that aRange == bRange. First make sure the range sizes are equivalent... + const difference_type aDistance = eastl::distance(aRange.first, aRange.second); + const difference_type bDistance = eastl::distance(bRange.first, bRange.second); + + if(aDistance != bDistance) + return false; + + // At this point, aDistance > 0 and aDistance == bDistance. + // Implement a fast pathway for the case that there's just a single element. + if(aDistance == 1) + { + if(!(*aRange.first == *bRange.first)) // We have to compare the values, because lookups are done by keys alone but the full value_type of a map is a key/value pair. + return false; // It's possible that two elements in the two containers have identical keys but different values. Ditto for the permutation case below. + } + else + { + // Check to see if these aRange and bRange are any permutation of each other. + // This check gets slower as there are more elements in the range. + if(!eastl::is_permutation(aRange.first, aRange.second, bRange.first)) + return false; + } + } + + return true; + } + + template + inline bool operator!=(const hash_multimap& a, + const hash_multimap& b) + { + return !(a == b); + } + + +} // namespace eastl + + +#endif // Header include guard + + + + + + diff --git a/libkram/eastl/include/EASTL/hash_set.h b/libkram/eastl/include/EASTL/hash_set.h new file mode 100644 index 00000000..c075975d --- /dev/null +++ b/libkram/eastl/include/EASTL/hash_set.h @@ -0,0 +1,468 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file is based on the TR1 (technical report 1) reference implementation +// of the unordered_set/unordered_map C++ classes as of about 4/2005. Most likely +// many or all C++ library vendors' implementations of this classes will be +// based off of the reference version and so will look pretty similar to this +// file as well as other vendors' versions. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_HASH_SET_H +#define EASTL_HASH_SET_H + + +#include +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /// EASTL_HASH_SET_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_HASH_SET_DEFAULT_NAME + #define EASTL_HASH_SET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " hash_set" // Unless the user overrides something, this is "EASTL hash_set". + #endif + + + /// EASTL_HASH_MULTISET_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_HASH_MULTISET_DEFAULT_NAME + #define EASTL_HASH_MULTISET_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " hash_multiset" // Unless the user overrides something, this is "EASTL hash_multiset". + #endif + + + /// EASTL_HASH_SET_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_HASH_SET_DEFAULT_ALLOCATOR + #define EASTL_HASH_SET_DEFAULT_ALLOCATOR allocator_type(EASTL_HASH_SET_DEFAULT_NAME) + #endif + + /// EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR + #define EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR allocator_type(EASTL_HASH_MULTISET_DEFAULT_NAME) + #endif + + + + /// hash_set + /// + /// Implements a hash_set, which is a hashed unique-item container. + /// Lookups are O(1) (that is, they are fast) but the container is + /// not sorted. Note that lookups are only O(1) if the hash table + /// is well-distributed (non-colliding). The lookup approaches + /// O(n) behavior as the table becomes increasingly poorly distributed. + /// + /// set_max_load_factor + /// If you want to make a hashtable never increase its bucket usage, + /// call set_max_load_factor with a very high value such as 100000.f. + /// + /// bCacheHashCode + /// We provide the boolean bCacheHashCode template parameter in order + /// to allow the storing of the hash code of the key within the map. + /// When this option is disabled, the rehashing of the table will + /// call the hash function on the key. Setting bCacheHashCode to true + /// is useful for cases whereby the calculation of the hash value for + /// a contained object is very expensive. + /// + /// find_as + /// In order to support the ability to have a hashtable of strings but + /// be able to do efficiently lookups via char pointers (i.e. so they + /// aren't converted to string objects), we provide the find_as + /// function. This function allows you to do a find with a key of a + /// type other than the hashtable key type. + /// + /// Example find_as usage: + /// hash_set hashSet; + /// i = hashSet.find_as("hello"); // Use default hash and compare. + /// + /// Example find_as usage (namespaces omitted for brevity): + /// hash_set hashSet; + /// i = hashSet.find_as("hello", hash(), equal_to_2()); + /// + template , typename Predicate = eastl::equal_to, + typename Allocator = EASTLAllocatorType, bool bCacheHashCode = false> + class hash_set + : public hashtable, Predicate, + Hash, mod_range_hashing, default_ranged_hash, + prime_rehash_policy, bCacheHashCode, false, true> + { + public: + typedef hashtable, Predicate, + Hash, mod_range_hashing, default_ranged_hash, + prime_rehash_policy, bCacheHashCode, false, true> base_type; + typedef hash_set this_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::allocator_type allocator_type; + typedef typename base_type::node_type node_type; + + public: + /// hash_set + /// + /// Default constructor. + /// + explicit hash_set(const allocator_type& allocator = EASTL_HASH_SET_DEFAULT_ALLOCATOR) + : base_type(0, Hash(), mod_range_hashing(), default_ranged_hash(), Predicate(), eastl::use_self(), allocator) + { + // Empty + } + + + /// hash_set + /// + /// Constructor which creates an empty container, but start with nBucketCount buckets. + /// We default to a small nBucketCount value, though the user really should manually + /// specify an appropriate value in order to prevent memory from being reallocated. + /// + explicit hash_set(size_type nBucketCount, const Hash& hashFunction = Hash(), const Predicate& predicate = Predicate(), + const allocator_type& allocator = EASTL_HASH_SET_DEFAULT_ALLOCATOR) + : base_type(nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), predicate, eastl::use_self(), allocator) + { + // Empty + } + + + hash_set(const this_type& x) + : base_type(x) + { + } + + + hash_set(this_type&& x) + : base_type(eastl::move(x)) + { + } + + + hash_set(this_type&& x, const allocator_type& allocator) + : base_type(eastl::move(x), allocator) + { + } + + + /// hash_set + /// + /// initializer_list-based constructor. + /// Allows for initializing with brace values (e.g. hash_set hs = { 3, 4, 5, }; ) + /// + hash_set(std::initializer_list ilist, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_SET_DEFAULT_ALLOCATOR) + : base_type(ilist.begin(), ilist.end(), nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), predicate, eastl::use_self(), allocator) + { + // Empty + } + + + /// hash_set + /// + /// An input bucket count of <= 1 causes the bucket count to be equal to the number of + /// elements in the input range. + /// + template + hash_set(FowardIterator first, FowardIterator last, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_SET_DEFAULT_ALLOCATOR) + : base_type(first, last, nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), predicate, eastl::use_self(), allocator) + { + // Empty + } + + + this_type& operator=(const this_type& x) + { + return static_cast(base_type::operator=(x)); + } + + + this_type& operator=(std::initializer_list ilist) + { + return static_cast(base_type::operator=(ilist)); + } + + + this_type& operator=(this_type&& x) + { + return static_cast(base_type::operator=(eastl::move(x))); + } + + }; // hash_set + + /// hash_set erase_if + /// + /// https://en.cppreference.com/w/cpp/container/unordered_set/erase_if + template + void erase_if(eastl::hash_set& c, UserPredicate predicate) + { + // Erases all elements that satisfy the predicate pred from the container. + for (auto i = c.begin(), last = c.end(); i != last;) + { + if (predicate(*i)) + { + i = c.erase(i); + } + else + { + ++i; + } + } + } + + + /// hash_multiset + /// + /// Implements a hash_multiset, which is the same thing as a hash_set + /// except that contained elements need not be unique. See the documentation + /// for hash_set for details. + /// + template , typename Predicate = eastl::equal_to, + typename Allocator = EASTLAllocatorType, bool bCacheHashCode = false> + class hash_multiset + : public hashtable, Predicate, + Hash, mod_range_hashing, default_ranged_hash, + prime_rehash_policy, bCacheHashCode, false, false> + { + public: + typedef hashtable, Predicate, + Hash, mod_range_hashing, default_ranged_hash, + prime_rehash_policy, bCacheHashCode, false, false> base_type; + typedef hash_multiset this_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::allocator_type allocator_type; + typedef typename base_type::node_type node_type; + + public: + /// hash_multiset + /// + /// Default constructor. + /// + explicit hash_multiset(const allocator_type& allocator = EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR) + : base_type(0, Hash(), mod_range_hashing(), default_ranged_hash(), Predicate(), eastl::use_self(), allocator) + { + // Empty + } + + + /// hash_multiset + /// + /// Constructor which creates an empty container, but start with nBucketCount buckets. + /// We default to a small nBucketCount value, though the user really should manually + /// specify an appropriate value in order to prevent memory from being reallocated. + /// + explicit hash_multiset(size_type nBucketCount, const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR) + : base_type(nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), predicate, eastl::use_self(), allocator) + { + // Empty + } + + + hash_multiset(const this_type& x) + : base_type(x) + { + } + + + hash_multiset(this_type&& x) + : base_type(eastl::move(x)) + { + } + + + hash_multiset(this_type&& x, const allocator_type& allocator) + : base_type(eastl::move(x), allocator) + { + } + + + /// hash_multiset + /// + /// initializer_list-based constructor. + /// Allows for initializing with brace values (e.g. hash_set hs = { 3, 3, 4, }; ) + /// + hash_multiset(std::initializer_list ilist, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR) + : base_type(ilist.begin(), ilist.end(), nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), predicate, eastl::use_self(), allocator) + { + // Empty + } + + + /// hash_multiset + /// + /// An input bucket count of <= 1 causes the bucket count to be equal to the number of + /// elements in the input range. + /// + template + hash_multiset(FowardIterator first, FowardIterator last, size_type nBucketCount = 0, const Hash& hashFunction = Hash(), + const Predicate& predicate = Predicate(), const allocator_type& allocator = EASTL_HASH_MULTISET_DEFAULT_ALLOCATOR) + : base_type(first, last, nBucketCount, hashFunction, mod_range_hashing(), default_ranged_hash(), predicate, eastl::use_self(), allocator) + { + // Empty + } + + + this_type& operator=(const this_type& x) + { + return static_cast(base_type::operator=(x)); + } + + + this_type& operator=(std::initializer_list ilist) + { + return static_cast(base_type::operator=(ilist)); + } + + + this_type& operator=(this_type&& x) + { + return static_cast(base_type::operator=(eastl::move(x))); + } + + }; // hash_multiset + + /// hash_multiset erase_if + /// + /// https://en.cppreference.com/w/cpp/container/unordered_multiset/erase_if + template + void erase_if(eastl::hash_multiset& c, UserPredicate predicate) + { + // Erases all elements that satisfy the predicate pred from the container. + for (auto i = c.begin(), last = c.end(); i != last;) + { + if (predicate(*i)) + { + i = c.erase(i); + } + else + { + ++i; + } + } + } + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const hash_set& a, + const hash_set& b) + { + typedef typename hash_set::const_iterator const_iterator; + + // We implement branching with the assumption that the return value is usually false. + if(a.size() != b.size()) + return false; + + // For set (with its unique keys), we need only test that each element in a can be found in b, + // as there can be only one such pairing per element. multiset needs to do a something more elaborate. + for(const_iterator ai = a.begin(), aiEnd = a.end(), biEnd = b.end(); ai != aiEnd; ++ai) + { + const_iterator bi = b.find(*ai); + + if((bi == biEnd) || !(*ai == *bi)) // We have to compare values in addition to making sure the lookups succeeded. This is because the lookup is done via the user-supplised Predicate + return false; // which isn't strictly required to be identical to the Value operator==, though 99% of the time it will be so. + } + + return true; + } + + template + inline bool operator!=(const hash_set& a, + const hash_set& b) + { + return !(a == b); + } + + + template + inline bool operator==(const hash_multiset& a, + const hash_multiset& b) + { + typedef typename hash_multiset::const_iterator const_iterator; + typedef typename eastl::iterator_traits::difference_type difference_type; + + // We implement branching with the assumption that the return value is usually false. + if(a.size() != b.size()) + return false; + + // We can't simply search for each element of a in b, as it may be that the bucket for + // two elements in a has those same two elements in b but in different order (which should + // still result in equality). Also it's possible that one bucket in a has two elements which + // both match a solitary element in the equivalent bucket in b (which shouldn't result in equality). + eastl::pair aRange; + eastl::pair bRange; + + for(const_iterator ai = a.begin(), aiEnd = a.end(); ai != aiEnd; ai = aRange.second) // For each element in a... + { + aRange = a.equal_range(*ai); // Get the range of elements in a that are equal to ai. + bRange = b.equal_range(*ai); // Get the range of elements in b that are equal to ai. + + // We need to verify that aRange == bRange. First make sure the range sizes are equivalent... + const difference_type aDistance = eastl::distance(aRange.first, aRange.second); + const difference_type bDistance = eastl::distance(bRange.first, bRange.second); + + if(aDistance != bDistance) + return false; + + // At this point, aDistance > 0 and aDistance == bDistance. + // Implement a fast pathway for the case that there's just a single element. + if(aDistance == 1) + { + if(!(*aRange.first == *bRange.first)) // We have to compare values in addition to making sure the distance (element count) was equal. This is because the lookup is done via the user-supplised Predicate + return false; // which isn't strictly required to be identical to the Value operator==, though 99% of the time it will be so. Ditto for the is_permutation usage below. + } + else + { + // Check to see if these aRange and bRange are any permutation of each other. + // This check gets slower as there are more elements in the range. + if(!eastl::is_permutation(aRange.first, aRange.second, bRange.first)) + return false; + } + } + + return true; + } + + template + inline bool operator!=(const hash_multiset& a, + const hash_multiset& b) + { + return !(a == b); + } + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/heap.h b/libkram/eastl/include/EASTL/heap.h new file mode 100644 index 00000000..f0e770b9 --- /dev/null +++ b/libkram/eastl/include/EASTL/heap.h @@ -0,0 +1,685 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements heap functionality much like the std C++ heap algorithms. +// Such heaps are not the same thing as memory heaps or pools, but rather are +// semi-sorted random access containers which have the primary purpose of +// supporting the implementation of priority_queue and similar data structures. +// +// The primary distinctions between this heap functionality and std::heap are: +// - This heap exposes some extra functionality such as is_heap and change_heap. +// - This heap is more efficient than versions found in typical STL +// implementations such as STLPort, Microsoft, and Metrowerks. This comes +// about due to better use of array dereferencing and branch prediction. +// You should expect of 5-30%, depending on the usage and platform. +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// The publicly usable functions we define are: +// push_heap -- Adds an entry to a heap. Same as C++ std::push_heap. +// pop_heap -- Removes the top entry from a heap. Same as C++ std::pop_heap. +// make_heap -- Converts an array to a heap. Same as C++ std::make_heap. +// sort_heap -- Sorts a heap in place. Same as C++ std::sort_heap. +// remove_heap -- Removes an arbitrary entry from a heap. +// change_heap -- Changes the priority of an entry in the heap. +// is_heap -- Returns true if an array appears is in heap format. Same as C++11 std::is_heap. +// is_heap_until -- Returns largest part of the range which is a heap. Same as C++11 std::is_heap_until. +/////////////////////////////////////////////////////////////////////////////// + + + +#ifndef EASTL_HEAP_H +#define EASTL_HEAP_H + + +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /////////////////////////////////////////////////////////////////////// + // promote_heap (internal function) + /////////////////////////////////////////////////////////////////////// + + template + inline void promote_heap_impl(RandomAccessIterator first, Distance topPosition, Distance position, T value) + { + for(Distance parentPosition = (position - 1) >> 1; // This formula assumes that (position > 0). // We use '>> 1' instead of '/ 2' because we have seen VC++ generate better code with >>. + (position > topPosition) && (*(first + parentPosition) < value); + parentPosition = (position - 1) >> 1) + { + *(first + position) = eastl::forward(*(first + parentPosition)); // Swap the node with its parent. + position = parentPosition; + } + + *(first + position) = eastl::forward(value); + } + + /// promote_heap + /// + /// Moves a value in the heap from a given position upward until + /// it is sorted correctly. It's kind of like bubble-sort, except that + /// instead of moving linearly from the back of a list to the front, + /// it moves from the bottom of the tree up the branches towards the + /// top. But otherwise is just like bubble-sort. + /// + /// This function requires that the value argument refer to a value + /// that is currently not within the heap. + /// + template + inline void promote_heap(RandomAccessIterator first, Distance topPosition, Distance position, const T& value) + { + typedef typename iterator_traits::value_type value_type; + promote_heap_impl(first, topPosition, position, value); + } + + + /// promote_heap + /// + /// Moves a value in the heap from a given position upward until + /// it is sorted correctly. It's kind of like bubble-sort, except that + /// instead of moving linearly from the back of a list to the front, + /// it moves from the bottom of the tree up the branches towards the + /// top. But otherwise is just like bubble-sort. + /// + /// This function requires that the value argument refer to a value + /// that is currently not within the heap. + /// + template + inline void promote_heap(RandomAccessIterator first, Distance topPosition, Distance position, T&& value) + { + typedef typename iterator_traits::value_type value_type; + promote_heap_impl(first, topPosition, position, eastl::forward(value)); + } + + + template + inline void promote_heap_impl(RandomAccessIterator first, Distance topPosition, Distance position, T value, Compare compare) + { + for(Distance parentPosition = (position - 1) >> 1; // This formula assumes that (position > 0). // We use '>> 1' instead of '/ 2' because we have seen VC++ generate better code with >>. + (position > topPosition) && compare(*(first + parentPosition), value); + parentPosition = (position - 1) >> 1) + { + *(first + position) = eastl::forward(*(first + parentPosition)); // Swap the node with its parent. + position = parentPosition; + } + + *(first + position) = eastl::forward(value); + } + + + /// promote_heap + /// + /// Takes a Compare(a, b) function (or function object) which returns true if a < b. + /// For example, you could use the standard 'less' comparison object. + /// + /// The Compare function must work equivalently to the compare function used + /// to make and maintain the heap. + /// + /// This function requires that the value argument refer to a value + /// that is currently not within the heap. + /// + template + inline void promote_heap(RandomAccessIterator first, Distance topPosition, Distance position, const T& value, Compare compare) + { + typedef typename iterator_traits::value_type value_type; + promote_heap_impl(first, topPosition, position, value, compare); + } + + + /// promote_heap + /// + /// Takes a Compare(a, b) function (or function object) which returns true if a < b. + /// For example, you could use the standard 'less' comparison object. + /// + /// The Compare function must work equivalently to the compare function used + /// to make and maintain the heap. + /// + /// This function requires that the value argument refer to a value + /// that is currently not within the heap. + /// + template + inline void promote_heap(RandomAccessIterator first, Distance topPosition, Distance position, T&& value, Compare compare) + { + typedef typename iterator_traits::value_type value_type; + promote_heap_impl(first, topPosition, position, eastl::forward(value), compare); + } + + + + /////////////////////////////////////////////////////////////////////// + // adjust_heap (internal function) + /////////////////////////////////////////////////////////////////////// + + template + void adjust_heap_impl(RandomAccessIterator first, Distance topPosition, Distance heapSize, Distance position, T value) + { + // We do the conventional approach of moving the position down to the + // bottom then inserting the value at the back and moving it up. + Distance childPosition = (2 * position) + 2; + + for(; childPosition < heapSize; childPosition = (2 * childPosition) + 2) + { + if(*(first + childPosition) < *(first + (childPosition - 1))) // Choose the larger of the two children. + --childPosition; + *(first + position) = eastl::forward(*(first + childPosition)); // Swap positions with this child. + position = childPosition; + } + + if(childPosition == heapSize) // If we are at the very last index of the bottom... + { + *(first + position) = eastl::forward(*(first + (childPosition - 1))); + position = childPosition - 1; + } + + eastl::promote_heap(first, topPosition, position, eastl::forward(value)); + } + + /// adjust_heap + /// + /// Given a position that has just been vacated, this function moves + /// new values into that vacated position appropriately. The value + /// argument is an entry which will be inserted into the heap after + /// we move nodes into the positions that were vacated. + /// + /// This function requires that the value argument refer to a value + /// that is currently not within the heap. + /// + template + void adjust_heap(RandomAccessIterator first, Distance topPosition, Distance heapSize, Distance position, const T& value) + { + typedef typename iterator_traits::value_type value_type; + adjust_heap_impl(first, topPosition, heapSize, position, eastl::forward(value)); + } + + + /// adjust_heap + /// + /// Given a position that has just been vacated, this function moves + /// new values into that vacated position appropriately. The value + /// argument is an entry which will be inserted into the heap after + /// we move nodes into the positions that were vacated. + /// + /// This function requires that the value argument refer to a value + /// that is currently not within the heap. + /// + template + void adjust_heap(RandomAccessIterator first, Distance topPosition, Distance heapSize, Distance position, T&& value) + { + typedef typename iterator_traits::value_type value_type; + adjust_heap_impl(first, topPosition, heapSize, position, eastl::forward(value)); + } + + + template + void adjust_heap_impl(RandomAccessIterator first, Distance topPosition, Distance heapSize, Distance position, T value, Compare compare) + { + // We do the conventional approach of moving the position down to the + // bottom then inserting the value at the back and moving it up. + Distance childPosition = (2 * position) + 2; + + for(; childPosition < heapSize; childPosition = (2 * childPosition) + 2) + { + if(compare(*(first + childPosition), *(first + (childPosition - 1)))) // Choose the larger of the two children. + --childPosition; + *(first + position) = eastl::forward(*(first + childPosition)); // Swap positions with this child. + position = childPosition; + } + + if(childPosition == heapSize) // If we are at the bottom... + { + *(first + position) = eastl::forward(*(first + (childPosition - 1))); + position = childPosition - 1; + } + + eastl::promote_heap(first, topPosition, position, eastl::forward(value), compare); + } + + /// adjust_heap + /// + /// The Compare function must work equivalently to the compare function used + /// to make and maintain the heap. + /// + /// This function requires that the value argument refer to a value + /// that is currently not within the heap. + /// + template + void adjust_heap(RandomAccessIterator first, Distance topPosition, Distance heapSize, Distance position, const T& value, Compare compare) + { + typedef typename iterator_traits::value_type value_type; + adjust_heap_impl(first, topPosition, heapSize, position, eastl::forward(value), compare); + } + + + /// adjust_heap + /// + /// The Compare function must work equivalently to the compare function used + /// to make and maintain the heap. + /// + /// This function requires that the value argument refer to a value + /// that is currently not within the heap. + /// + template + void adjust_heap(RandomAccessIterator first, Distance topPosition, Distance heapSize, Distance position, T&& value, Compare compare) + { + typedef typename iterator_traits::value_type value_type; + adjust_heap_impl(first, topPosition, heapSize, position, eastl::forward(value), compare); + } + + + /////////////////////////////////////////////////////////////////////// + // push_heap + /////////////////////////////////////////////////////////////////////// + + /// push_heap + /// + /// Adds an item to a heap (which is an array). The item necessarily + /// comes from the back of the heap (array). Thus, the insertion of a + /// new item in a heap is a two step process: push_back and push_heap. + /// + /// Example usage: + /// vector heap; + /// + /// heap.push_back(3); + /// push_heap(heap.begin(), heap.end()); // Places '3' appropriately. + /// + template + inline void push_heap(RandomAccessIterator first, RandomAccessIterator last) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + typedef typename eastl::iterator_traits::value_type value_type; + + const value_type tempBottom(eastl::forward(*(last - 1))); + + eastl::promote_heap + (first, (difference_type)0, (difference_type)(last - first - 1), eastl::forward(tempBottom)); + } + + + /// push_heap + /// + /// This version is useful for cases where your object comparison is unusual + /// or where you want to have the heap store pointers to objects instead of + /// storing the objects themselves (often in order to improve cache coherency + /// while doing sorting). + /// + /// The Compare function must work equivalently to the compare function used + /// to make and maintain the heap. + /// + template + inline void push_heap(RandomAccessIterator first, RandomAccessIterator last, Compare compare) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + typedef typename eastl::iterator_traits::value_type value_type; + + const value_type tempBottom(*(last - 1)); + + eastl::promote_heap + (first, (difference_type)0, (difference_type)(last - first - 1), tempBottom, compare); + } + + + + + /////////////////////////////////////////////////////////////////////// + // pop_heap + /////////////////////////////////////////////////////////////////////// + + /// pop_heap + /// + /// Removes the first item from the heap (which is an array), and adjusts + /// the heap so that the highest priority item becomes the new first item. + /// + /// Example usage: + /// vector heap; + /// + /// heap.push_back(2); + /// heap.push_back(3); + /// heap.push_back(1); + /// + /// pop_heap(heap.begin(), heap.end()); // Moves heap[0] to the back of the heap and adjusts the heap. + /// heap.pop_back(); // Remove value that was just at the top of the heap + /// + template + inline void pop_heap(RandomAccessIterator first, RandomAccessIterator last) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + typedef typename eastl::iterator_traits::value_type value_type; + + value_type tempBottom(eastl::forward(*(last - 1))); + *(last - 1) = eastl::forward(*first); + eastl::adjust_heap + (first, (difference_type)0, (difference_type)(last - first - 1), 0, eastl::forward(tempBottom)); + } + + + + /// pop_heap + /// + /// This version is useful for cases where your object comparison is unusual + /// or where you want to have the heap store pointers to objects instead of + /// storing the objects themselves (often in order to improve cache coherency + /// while doing sorting). + /// + /// The Compare function must work equivalently to the compare function used + /// to make and maintain the heap. + /// + template + inline void pop_heap(RandomAccessIterator first, RandomAccessIterator last, Compare compare) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + typedef typename eastl::iterator_traits::value_type value_type; + + value_type tempBottom(eastl::forward(*(last - 1))); + *(last - 1) = eastl::forward(*first); + eastl::adjust_heap + (first, (difference_type)0, (difference_type)(last - first - 1), 0, eastl::forward(tempBottom), compare); + } + + + /////////////////////////////////////////////////////////////////////// + // make_heap + /////////////////////////////////////////////////////////////////////// + + + /// make_heap + /// + /// Given an array, this function converts it into heap format. + /// The complexity is O(n), where n is count of the range. + /// The input range is not required to be in any order. + /// + template + void make_heap(RandomAccessIterator first, RandomAccessIterator last) + { + // We do bottom-up heap construction as per Sedgewick. Such construction is O(n). + typedef typename eastl::iterator_traits::difference_type difference_type; + typedef typename eastl::iterator_traits::value_type value_type; + + const difference_type heapSize = last - first; + + if(heapSize >= 2) // If there is anything to do... (we need this check because otherwise the math fails below). + { + difference_type parentPosition = ((heapSize - 2) >> 1) + 1; // We use '>> 1' instead of '/ 2' because we have seen VC++ generate better code with >>. + + do{ + --parentPosition; + value_type temp(eastl::forward(*(first + parentPosition))); + eastl::adjust_heap + (first, parentPosition, heapSize, parentPosition, eastl::forward(temp)); + } while(parentPosition != 0); + } + } + + + template + void make_heap(RandomAccessIterator first, RandomAccessIterator last, Compare compare) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + typedef typename eastl::iterator_traits::value_type value_type; + + const difference_type heapSize = last - first; + + if(heapSize >= 2) // If there is anything to do... (we need this check because otherwise the math fails below). + { + difference_type parentPosition = ((heapSize - 2) >> 1) + 1; // We use '>> 1' instead of '/ 2' because we have seen VC++ generate better code with >>. + + do{ + --parentPosition; + value_type temp(eastl::forward(*(first + parentPosition))); + eastl::adjust_heap + (first, parentPosition, heapSize, parentPosition, eastl::forward(temp), compare); + } while(parentPosition != 0); + } + } + + + /////////////////////////////////////////////////////////////////////// + // sort_heap + /////////////////////////////////////////////////////////////////////// + + /// sort_heap + /// + /// After the application if this algorithm, the range it was applied to + /// is no longer a heap, though it will be a reverse heap (smallest first). + /// The item with the lowest priority will be first, and the highest last. + /// This is not a stable sort because the relative order of equivalent + /// elements is not necessarily preserved. + /// The range referenced must be valid; all pointers must be dereferenceable + /// and within the sequence the last position is reachable from the first + /// by incrementation. + /// The complexity is at most O(n * log(n)), where n is count of the range. + /// + template + inline void sort_heap(RandomAccessIterator first, RandomAccessIterator last) + { + for(; (last - first) > 1; --last) // We simply use the heap to sort itself. + eastl::pop_heap(first, last); + } + + + /// sort_heap + /// + /// The Compare function must work equivalently to the compare function used + /// to make and maintain the heap. + /// + template + inline void sort_heap(RandomAccessIterator first, RandomAccessIterator last, Compare compare) + { + for(; (last - first) > 1; --last) // We simply use the heap to sort itself. + eastl::pop_heap(first, last, compare); + } + + + + /////////////////////////////////////////////////////////////////////// + // remove_heap + /////////////////////////////////////////////////////////////////////// + + /// remove_heap + /// + /// Removes an arbitrary entry from the heap and adjusts the heap appropriately. + /// This function is unlike pop_heap in that pop_heap moves the top item + /// to the back of the heap, whereas remove_heap moves an arbitrary item to + /// the back of the heap. + /// + /// Note: Since this function moves the element to the back of the heap and + /// doesn't actually remove it from the given container, the user must call + /// the container erase function if the user wants to erase the element + /// from the container. + /// + template + inline void remove_heap(RandomAccessIterator first, Distance heapSize, Distance position) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + typedef typename eastl::iterator_traits::value_type value_type; + + const value_type tempBottom(*(first + heapSize - 1)); + *(first + heapSize - 1) = *(first + position); + eastl::adjust_heap + (first, (difference_type)0, (difference_type)(heapSize - 1), (difference_type)position, tempBottom); + } + + + /// remove_heap + /// + /// The Compare function must work equivalently to the compare function used + /// to make and maintain the heap. + /// + /// Note: Since this function moves the element to the back of the heap and + /// doesn't actually remove it from the given container, the user must call + /// the container erase function if the user wants to erase the element + /// from the container. + /// + template + inline void remove_heap(RandomAccessIterator first, Distance heapSize, Distance position, Compare compare) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + typedef typename eastl::iterator_traits::value_type value_type; + + const value_type tempBottom(*(first + heapSize - 1)); + *(first + heapSize - 1) = *(first + position); + eastl::adjust_heap + (first, (difference_type)0, (difference_type)(heapSize - 1), (difference_type)position, tempBottom, compare); + } + + + + /////////////////////////////////////////////////////////////////////// + // change_heap + /////////////////////////////////////////////////////////////////////// + + /// change_heap + /// + /// Given a value in the heap that has changed in priority, this function + /// adjusts the heap appropriately. The heap size remains unchanged after + /// this operation. + /// + template + inline void change_heap(RandomAccessIterator first, Distance heapSize, Distance position) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + typedef typename eastl::iterator_traits::value_type value_type; + + eastl::remove_heap(first, heapSize, position); + + value_type tempBottom(*(first + heapSize - 1)); + + eastl::promote_heap + (first, (difference_type)0, (difference_type)(heapSize - 1), tempBottom); + } + + + /// change_heap + /// + /// The Compare function must work equivalently to the compare function used + /// to make and maintain the heap. + /// + template + inline void change_heap(RandomAccessIterator first, Distance heapSize, Distance position, Compare compare) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + typedef typename eastl::iterator_traits::value_type value_type; + + eastl::remove_heap(first, heapSize, position, compare); + + value_type tempBottom(*(first + heapSize - 1)); + + eastl::promote_heap + (first, (difference_type)0, (difference_type)(heapSize - 1), tempBottom, compare); + } + + + + /////////////////////////////////////////////////////////////////////// + // is_heap_until + /////////////////////////////////////////////////////////////////////// + + /// is_heap_until + /// + template + inline RandomAccessIterator is_heap_until(RandomAccessIterator first, RandomAccessIterator last) + { + int counter = 0; + + for(RandomAccessIterator child = first + 1; child < last; ++child, counter ^= 1) + { + if(*first < *child) // We must use operator <, and are not allowed to use > or >= here. + return child; + first += counter; // counter switches between 0 and 1 every time through. + } + + return last; + } + + + /// is_heap_until + /// + /// The Compare function must work equivalently to the compare function used + /// to make and maintain the heap. + /// + template + inline RandomAccessIterator is_heap_until(RandomAccessIterator first, RandomAccessIterator last, Compare compare) + { + int counter = 0; + + for(RandomAccessIterator child = first + 1; child < last; ++child, counter ^= 1) + { + if(compare(*first, *child)) + return child; + first += counter; // counter switches between 0 and 1 every time through. + } + + return last; + } + + + + /////////////////////////////////////////////////////////////////////// + // is_heap + /////////////////////////////////////////////////////////////////////// + + /// is_heap + /// + /// This is a useful debugging algorithm for verifying that a random + /// access container is in heap format. + /// + template + inline bool is_heap(RandomAccessIterator first, RandomAccessIterator last) + { + return (eastl::is_heap_until(first, last) == last); + } + + + /// is_heap + /// + /// The Compare function must work equivalently to the compare function used + /// to make and maintain the heap. + /// + template + inline bool is_heap(RandomAccessIterator first, RandomAccessIterator last, Compare compare) + { + return (eastl::is_heap_until(first, last, compare) == last); + } + + + // To consider: The following may be a faster implementation for most cases. + // + // template + // inline bool is_heap(RandomAccessIterator first, RandomAccessIterator last) + // { + // if(((uintptr_t)(last - first) & 1) == 0) // If the range has an even number of elements... + // --last; + // + // RandomAccessIterator parent = first, child = (first + 1); + // + // for(; child < last; child += 2, ++parent) + // { + // if((*parent < *child) || (*parent < *(child + 1))) + // return false; + // } + // + // if((((uintptr_t)(last - first) & 1) == 0) && (*parent < *child)) + // return false; + // + // return true; + // } + + +} // namespace eastl + + +#endif // Header include guard + + + + diff --git a/libkram/eastl/include/EASTL/initializer_list.h b/libkram/eastl/include/EASTL/initializer_list.h new file mode 100644 index 00000000..028fb4f8 --- /dev/null +++ b/libkram/eastl/include/EASTL/initializer_list.h @@ -0,0 +1,96 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +// +// This file #includes if it's available, else it defines +// its own version of std::initializer_list. It does not define eastl::initializer_list +// because that would not provide any use, due to how the C++11 Standard works. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INITIALIZER_LIST_H +#define EASTL_INITIALIZER_LIST_H + + +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + +#if defined(EA_HAVE_CPP11_INITIALIZER_LIST) // If the compiler can generate calls to std::initializer_list... + + // The initializer_list type must be declared in the std namespace, as that's the + // namespace the compiler uses when generating code to use it. + EA_DISABLE_ALL_VC_WARNINGS() + #include + EA_RESTORE_ALL_VC_WARNINGS() + +#else + + // If you get an error here about initializer_list being already defined, then the EA_HAVE_CPP11_INITIALIZER_LIST define from needs to be updated. + namespace std + { + // See the C++11 Standard, section 18.9. + template + class initializer_list + { + public: + typedef E value_type; + typedef const E& reference; + typedef const E& const_reference; + typedef size_t size_type; + typedef const E* iterator; // Must be const, as initializer_list (and its mpArray) is an immutable temp object. + typedef const E* const_iterator; + + private: + iterator mpArray; + size_type mArraySize; + + // This constructor is private, but the C++ compiler has the ability to call it, as per the C++11 Standard. + initializer_list(const_iterator pArray, size_type arraySize) + : mpArray(pArray), mArraySize(arraySize) { } + + public: + initializer_list() EA_NOEXCEPT // EA_NOEXCEPT requires a recent version of EABase. + : mpArray(NULL), mArraySize(0) { } + + size_type size() const EA_NOEXCEPT { return mArraySize; } + const_iterator begin() const EA_NOEXCEPT { return mpArray; } // Must be const_iterator, as initializer_list (and its mpArray) is an immutable temp object. + const_iterator end() const EA_NOEXCEPT { return mpArray + mArraySize; } + }; + + + template + const T* begin(std::initializer_list ilist) EA_NOEXCEPT + { + return ilist.begin(); + } + + template + const T* end(std::initializer_list ilist) EA_NOEXCEPT + { + return ilist.end(); + } + } + +#endif + + +#endif // Header include guard + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch.h new file mode 100644 index 00000000..4924a591 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch.h @@ -0,0 +1,65 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_H +#define EASTL_ATOMIC_INTERNAL_ARCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// Include the architecture specific implementations +// +#if defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64) + + #include "x86/arch_x86.h" + +#elif defined(EA_PROCESSOR_ARM32) || defined(EA_PROCESSOR_ARM64) + + #include "arm/arch_arm.h" + +#endif + + +///////////////////////////////////////////////////////////////////////////////// + + +#include "arch_fetch_add.h" +#include "arch_fetch_sub.h" + +#include "arch_fetch_and.h" +#include "arch_fetch_xor.h" +#include "arch_fetch_or.h" + +#include "arch_add_fetch.h" +#include "arch_sub_fetch.h" + +#include "arch_and_fetch.h" +#include "arch_xor_fetch.h" +#include "arch_or_fetch.h" + +#include "arch_exchange.h" + +#include "arch_cmpxchg_weak.h" +#include "arch_cmpxchg_strong.h" + +#include "arch_load.h" +#include "arch_store.h" + +#include "arch_compiler_barrier.h" + +#include "arch_cpu_pause.h" + +#include "arch_memory_barrier.h" + +#include "arch_signal_fence.h" + +#include "arch_thread_fence.h" + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_add_fetch.h new file mode 100644 index 00000000..65771f89 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_add_fetch.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_ADD_FETCH_H +#define EASTL_ATOMIC_INTERNAL_ARCH_ADD_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_ADD_FETCH_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_8) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_8) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_8) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_16) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_16) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_16) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_32) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_32) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_32) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_64) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_64) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_64) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_128) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_128) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_128) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_ADD_FETCH_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_ADD_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_and_fetch.h new file mode 100644 index 00000000..df7ba35d --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_and_fetch.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_AND_FETCH_H +#define EASTL_ATOMIC_INTERNAL_ARCH_AND_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_AND_FETCH_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_8) + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_8) + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_8) + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_16) + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_16) + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_16) + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_32) + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_32) + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_32) + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_64) + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_64) + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_64) + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_128) + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_128) + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_128) + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_AND_FETCH_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_AND_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_strong.h new file mode 100644 index 00000000..1005dc33 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_strong.h @@ -0,0 +1,430 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_STRONG_H +#define EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_STRONG_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_*_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128_AVAILABLE 0 +#endif + + +///////////////////////////////////////////////////////////////////////////////// + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_8_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_8_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_8_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_8(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_8_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_8(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_8_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_8(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) + + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_16_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_16_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_16_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_16(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_16_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_16(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_16_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_16(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) + + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_32_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_32_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_32_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_32(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_32_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_32(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_32_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_32(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) + + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_64_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_64_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_64_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_64(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_64_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_64(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_64_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_64(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) + + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_128_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_128_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_128_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_128(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_128_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_128(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_128_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_128(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_STRONG_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_weak.h new file mode 100644 index 00000000..5ce26386 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cmpxchg_weak.h @@ -0,0 +1,430 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_WEAK_H +#define EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_WEAK_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_*_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128_AVAILABLE 0 +#endif + + +///////////////////////////////////////////////////////////////////////////////// + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_8_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_8_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_8_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_8(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_8_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_8(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_8_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_8(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) + + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_16_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_16_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_16_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_16(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_16_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_16(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_16_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_16(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) + + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_32_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_32_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_32_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_32(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_32_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_32(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_32_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_32(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) + + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_64_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_64_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_64_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_64(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_64_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_64(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_64_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_64(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) + + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_128_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_128_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_128_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_128(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_128_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_128(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) + +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_128_AVAILABLE \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128_AVAILABLE +#define EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_128(type, ret, ptr, expected, desired) \ + EASTL_ARCH_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_CMPXCHG_WEAK_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_compiler_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_compiler_barrier.h new file mode 100644 index 00000000..0652469b --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_compiler_barrier.h @@ -0,0 +1,19 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_COMPILER_BARRIER_H +#define EASTL_ATOMIC_INTERNAL_ARCH_COMPILER_BARRIER_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_ARCH_ATOMIC_COMPILER_BARRIER_AVAILABLE 0 + +#define EASTL_ARCH_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY_AVAILABLE 0 + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_COMPILER_BARRIER_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cpu_pause.h new file mode 100644 index 00000000..e8c2d1d7 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_cpu_pause.h @@ -0,0 +1,25 @@ +///////////////////////////////////////////////////////////////////////////////// +// copyright (c) electronic arts inc. all rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_CPU_PAUSE_H +#define EASTL_ATOMIC_INTERNAL_ARCH_CPU_PAUSE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_CPU_PAUSE() +// +#if defined(EASTL_ARCH_ATOMIC_CPU_PAUSE) + #define EASTL_ARCH_ATOMIC_CPU_PAUSE_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CPU_PAUSE_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_CPU_PAUSE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_exchange.h new file mode 100644 index 00000000..76003188 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_exchange.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_EXCHANGE_H +#define EASTL_ATOMIC_INTERNAL_ARCH_EXCHANGE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_EXCHANGE_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_8) + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_8) + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_8) + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_16) + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_16) + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_16) + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_32) + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_32) + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_32) + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_64) + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_64) + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_64) + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_128) + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_128) + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_128) + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_EXCHANGE_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_EXCHANGE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_add.h new file mode 100644 index 00000000..71907f70 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_add.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_ADD_H +#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_ADD_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_FETCH_ADD_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_8) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_8) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_8) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_16) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_16) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_16) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_32) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_32) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_32) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_64) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_64) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_64) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_128) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_128) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_128) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_ADD_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_ADD_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_and.h new file mode 100644 index 00000000..f2b39a4c --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_and.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_AND_H +#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_AND_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_FETCH_AND_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_8) + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_8) + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_8) + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_16) + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_16) + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_16) + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_32) + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_32) + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_32) + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_64) + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_64) + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_64) + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_128) + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_128) + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_128) + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_AND_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_AND_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_or.h new file mode 100644 index 00000000..dd6dd0db --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_or.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_OR_H +#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_OR_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_FETCH_OR_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_8) + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_8) + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_8) + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_16) + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_16) + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_16) + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_32) + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_32) + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_32) + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_64) + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_64) + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_64) + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_128) + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_128) + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_128) + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_OR_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_OR_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_sub.h new file mode 100644 index 00000000..ea63db73 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_sub.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_SUB_H +#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_SUB_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_FETCH_SUB_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_8) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_8) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_8) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_16) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_16) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_16) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_32) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_32) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_32) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_64) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_64) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_64) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_128) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_128) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_128) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_SUB_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_SUB_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_xor.h new file mode 100644 index 00000000..b41ad2d4 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_fetch_xor.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_FETCH_XOR_H +#define EASTL_ATOMIC_INTERNAL_ARCH_FETCH_XOR_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_FETCH_XOR_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_8) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_8) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_8) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_16) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_16) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_16) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_32) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_32) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_32) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_64) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_64) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_64) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_128) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_128) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_128) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_FETCH_XOR_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_FETCH_XOR_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_load.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_load.h new file mode 100644 index 00000000..eea7cf49 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_load.h @@ -0,0 +1,125 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_LOAD_H +#define EASTL_ATOMIC_INTERNAL_ARCH_LOAD_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_LOAD_*_N(type, type ret, type * ptr) +// +#if defined(EASTL_ARCH_ATOMIC_LOAD_RELAXED_8) + #define EASTL_ARCH_ATOMIC_LOAD_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_LOAD_RELAXED_16) + #define EASTL_ARCH_ATOMIC_LOAD_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_LOAD_RELAXED_32) + #define EASTL_ARCH_ATOMIC_LOAD_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_LOAD_READ_DEPENDS_32) + #define EASTL_ARCH_ATOMIC_LOAD_READ_DEPENDS_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_READ_DEPENDS_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_LOAD_RELAXED_64) + #define EASTL_ARCH_ATOMIC_LOAD_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_LOAD_READ_DEPENDS_64) + #define EASTL_ARCH_ATOMIC_LOAD_READ_DEPENDS_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_READ_DEPENDS_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_LOAD_RELAXED_128) + #define EASTL_ARCH_ATOMIC_LOAD_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_LOAD_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_LOAD_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_memory_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_memory_barrier.h new file mode 100644 index 00000000..c6cc6bfc --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_memory_barrier.h @@ -0,0 +1,47 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_MEMORY_BARRIER_H +#define EASTL_ATOMIC_INTERNAL_ARCH_MEMORY_BARRIER_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_CPU_MB() +// +#if defined(EASTL_ARCH_ATOMIC_CPU_MB) + #define EASTL_ARCH_ATOMIC_CPU_MB_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CPU_MB_AVAILABLE 0 +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_CPU_WMB() +// +#if defined(EASTL_ARCH_ATOMIC_CPU_WMB) + #define EASTL_ARCH_ATOMIC_CPU_WMB_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CPU_WMB_AVAILABLE 0 +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_CPU_RMB() +// +#if defined(EASTL_ARCH_ATOMIC_CPU_RMB) + #define EASTL_ARCH_ATOMIC_CPU_RMB_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_CPU_RMB_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_MEMORY_BARRIER_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_or_fetch.h new file mode 100644 index 00000000..110326b4 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_or_fetch.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_OR_FETCH_H +#define EASTL_ATOMIC_INTERNAL_ARCH_OR_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_OR_FETCH_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_8) + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_8) + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_8) + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_16) + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_16) + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_16) + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_32) + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_32) + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_32) + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_64) + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_64) + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_64) + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_128) + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_128) + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_128) + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_OR_FETCH_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_OR_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_signal_fence.h new file mode 100644 index 00000000..65b64fc2 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_signal_fence.h @@ -0,0 +1,21 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_SIGNAL_FENCE_H +#define EASTL_ATOMIC_INTERNAL_ARCH_SIGNAL_FENCE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_ARCH_ATOMIC_SIGNAL_FENCE_RELAXED_AVAILABLE 0 +#define EASTL_ARCH_ATOMIC_SIGNAL_FENCE_ACQUIRE_AVAILABLE 0 +#define EASTL_ARCH_ATOMIC_SIGNAL_FENCE_RELEASE_AVAILABLE 0 +#define EASTL_ARCH_ATOMIC_SIGNAL_FENCE_ACQ_REL_AVAILABLE 0 +#define EASTL_ARCH_ATOMIC_SIGNAL_FENCE_SEQ_CST_AVAILABLE 0 + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_SIGNAL_FENCE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_store.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_store.h new file mode 100644 index 00000000..9a4112cb --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_store.h @@ -0,0 +1,113 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_STORE_H +#define EASTL_ATOMIC_INTERNAL_ARCH_STORE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_STORE_*_N(type, type * ptr, type val) +// +#if defined(EASTL_ARCH_ATOMIC_STORE_RELAXED_8) + #define EASTL_ARCH_ATOMIC_STORE_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_STORE_RELEASE_8) + #define EASTL_ARCH_ATOMIC_STORE_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_STORE_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_STORE_RELAXED_16) + #define EASTL_ARCH_ATOMIC_STORE_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_STORE_RELEASE_16) + #define EASTL_ARCH_ATOMIC_STORE_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_STORE_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_STORE_RELAXED_32) + #define EASTL_ARCH_ATOMIC_STORE_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_STORE_RELEASE_32) + #define EASTL_ARCH_ATOMIC_STORE_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_STORE_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_STORE_RELAXED_64) + #define EASTL_ARCH_ATOMIC_STORE_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_STORE_RELEASE_64) + #define EASTL_ARCH_ATOMIC_STORE_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_STORE_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_STORE_RELAXED_128) + #define EASTL_ARCH_ATOMIC_STORE_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_STORE_RELEASE_128) + #define EASTL_ARCH_ATOMIC_STORE_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_STORE_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_STORE_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_STORE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_sub_fetch.h new file mode 100644 index 00000000..20241b14 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_sub_fetch.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_SUB_FETCH_H +#define EASTL_ATOMIC_INTERNAL_ARCH_SUB_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_SUB_FETCH_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_8) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_8) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_8) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_16) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_16) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_16) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_32) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_32) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_32) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_64) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_64) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_64) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_128) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_128) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_128) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_SUB_FETCH_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_SUB_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_thread_fence.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_thread_fence.h new file mode 100644 index 00000000..676fbf19 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_thread_fence.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_THREAD_FENCE_H +#define EASTL_ATOMIC_INTERNAL_ARCH_THREAD_FENCE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_THREAD_FENCE_*() +// +#if defined(EASTL_ARCH_ATOMIC_THREAD_FENCE_RELAXED) + #define EASTL_ARCH_ATOMIC_THREAD_FENCE_RELAXED_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_THREAD_FENCE_RELAXED_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_THREAD_FENCE_ACQUIRE) + #define EASTL_ARCH_ATOMIC_THREAD_FENCE_ACQUIRE_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_THREAD_FENCE_ACQUIRE_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_THREAD_FENCE_RELEASE) + #define EASTL_ARCH_ATOMIC_THREAD_FENCE_RELEASE_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_THREAD_FENCE_RELEASE_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_THREAD_FENCE_ACQ_REL) + #define EASTL_ARCH_ATOMIC_THREAD_FENCE_ACQ_REL_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_THREAD_FENCE_ACQ_REL_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_THREAD_FENCE_SEQ_CST) + #define EASTL_ARCH_ATOMIC_THREAD_FENCE_SEQ_CST_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_THREAD_FENCE_SEQ_CST_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_THREAD_FENCE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/arch/arch_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_xor_fetch.h new file mode 100644 index 00000000..63548c22 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/arch/arch_xor_fetch.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ARCH_XOR_FETCH_H +#define EASTL_ATOMIC_INTERNAL_ARCH_XOR_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ARCH_ATOMIC_XOR_FETCH_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_8) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_8) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_8) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_8) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_8) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_16) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_16) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_16) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_16) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_16) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_32) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_32) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_32) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_32) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_32) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_64) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_64) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_64) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_64) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_64) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_128) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_128) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_128) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_128) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_128) + #define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_ARCH_ATOMIC_XOR_FETCH_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ARCH_XOR_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic.h b/libkram/eastl/include/EASTL/internal/atomic/atomic.h new file mode 100644 index 00000000..e1c5286e --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic.h @@ -0,0 +1,252 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_H +#define EASTL_ATOMIC_INTERNAL_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#include +#include +#include +#include + +#include "atomic_macros.h" +#include "atomic_casts.h" + +#include "atomic_memory_order.h" +#include "atomic_asserts.h" + +#include "atomic_size_aligned.h" +#include "atomic_base_width.h" + +#include "atomic_integral.h" + +#include "atomic_pointer.h" + + +///////////////////////////////////////////////////////////////////////////////// + + +/** + * NOTE: + * + * All of the actual implementation is done via the ATOMIC_MACROS in the compiler or arch sub folders. + * The C++ code is merely boilerplate around these macros that actually implement the atomic operations. + * The C++ boilerplate is also hidden behind macros. + * This may seem more complicated but this is all meant to reduce copy-pasting and to ensure all operations + * all end up going down to one macro that does the actual implementation. + * The reduced code duplication makes it easier to verify the implementation and reason about it. + * Ensures we do not have to re-implement the same code for compilers that do not support generic builtins such as MSVC. + * Ensures for compilers that have separate intrinsics for different widths, that C++ boilerplate isn't copy-pasted leading to programmer errors. + * Ensures if we ever have to implement a new platform, only the low-level leaf macros have to be implemented, everything else will be generated for you. + */ + + +#include "atomic_push_compiler_options.h" + + +namespace eastl +{ + + +namespace internal +{ + + + template + struct is_atomic_lockfree_size + { + static EASTL_CPP17_INLINE_VARIABLE constexpr bool value = false || + #if defined(EASTL_ATOMIC_HAS_8BIT) + sizeof(T) == 1 || + #endif + #if defined(EASTL_ATOMIC_HAS_16BIT) + sizeof(T) == 2 || + #endif + #if defined(EASTL_ATOMIC_HAS_32BIT) + sizeof(T) == 4 || + #endif + #if defined(EASTL_ATOMIC_HAS_64BIT) + sizeof(T) == 8 || + #endif + #if defined(EASTL_ATOMIC_HAS_128BIT) + sizeof(T) == 16 || + #endif + false; + }; + + + template + struct is_user_type_suitable_for_primary_template + { + static EASTL_CPP17_INLINE_VARIABLE constexpr bool value = eastl::internal::is_atomic_lockfree_size::value; + }; + + + template + using select_atomic_inherit_0 = typename eastl::conditional || eastl::internal::is_user_type_suitable_for_primary_template::value, + eastl::internal::atomic_base_width, /* True */ + eastl::internal::atomic_invalid_type /* False */ + >::type; + + template + using select_atomic_inherit = select_atomic_inherit_0; + + +} // namespace internal + + +#define EASTL_ATOMIC_CLASS_IMPL(type, base, valueType, differenceType) \ + private: \ + \ + EASTL_ATOMIC_STATIC_ASSERT_TYPE(type); \ + \ + using Base = base; \ + \ + public: \ + \ + typedef valueType value_type; \ + typedef differenceType difference_type; \ + \ + public: \ + \ + static EASTL_CPP17_INLINE_VARIABLE constexpr bool is_always_lock_free = eastl::internal::is_atomic_lockfree_size::value; \ + \ + public: /* deleted ctors && assignment operators */ \ + \ + atomic(const atomic&) EA_NOEXCEPT = delete; \ + \ + atomic& operator=(const atomic&) EA_NOEXCEPT = delete; \ + atomic& operator=(const atomic&) volatile EA_NOEXCEPT = delete; \ + \ + public: /* ctors */ \ + \ + EA_CONSTEXPR atomic(type desired) EA_NOEXCEPT \ + : Base{ desired } \ + { \ + } \ + \ + EA_CONSTEXPR atomic() EA_NOEXCEPT_IF(eastl::is_nothrow_default_constructible_v) = default; \ + \ + public: \ + \ + bool is_lock_free() const EA_NOEXCEPT \ + { \ + return eastl::internal::is_atomic_lockfree_size::value; \ + } \ + \ + bool is_lock_free() const volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(type); \ + return false; \ + } + + +#define EASTL_ATOMIC_USING_ATOMIC_BASE(type) \ + public: \ + \ + using Base::operator=; \ + using Base::store; \ + using Base::load; \ + using Base::exchange; \ + using Base::compare_exchange_weak; \ + using Base::compare_exchange_strong; \ + \ + public: \ + \ + operator type() const volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + } \ + \ + operator type() const EA_NOEXCEPT \ + { \ + return load(eastl::memory_order_seq_cst); \ + } + + +#define EASTL_ATOMIC_USING_ATOMIC_INTEGRAL() \ + public: \ + \ + using Base::fetch_add; \ + using Base::add_fetch; \ + \ + using Base::fetch_sub; \ + using Base::sub_fetch; \ + \ + using Base::fetch_and; \ + using Base::and_fetch; \ + \ + using Base::fetch_or; \ + using Base::or_fetch; \ + \ + using Base::fetch_xor; \ + using Base::xor_fetch; \ + \ + using Base::operator++; \ + using Base::operator--; \ + using Base::operator+=; \ + using Base::operator-=; \ + using Base::operator&=; \ + using Base::operator|=; \ + using Base::operator^=; + + +#define EASTL_ATOMIC_USING_ATOMIC_POINTER() \ + public: \ + \ + using Base::fetch_add; \ + using Base::add_fetch; \ + using Base::fetch_sub; \ + using Base::sub_fetch; \ + \ + using Base::operator++; \ + using Base::operator--; \ + using Base::operator+=; \ + using Base::operator-=; + + +template +struct atomic : protected eastl::internal::select_atomic_inherit +{ + EASTL_ATOMIC_CLASS_IMPL(T, eastl::internal::select_atomic_inherit, T, T) + + EASTL_ATOMIC_USING_ATOMIC_BASE(T) +}; + + +template +struct atomic && !eastl::is_same_v>> : protected eastl::internal::atomic_integral_width +{ + EASTL_ATOMIC_CLASS_IMPL(T, eastl::internal::atomic_integral_width, T, T) + + EASTL_ATOMIC_USING_ATOMIC_BASE(T) + + EASTL_ATOMIC_USING_ATOMIC_INTEGRAL() +}; + + +template +struct atomic : protected eastl::internal::atomic_pointer_width +{ + EASTL_ATOMIC_CLASS_IMPL(T*, eastl::internal::atomic_pointer_width, T*, ptrdiff_t) + + EASTL_ATOMIC_USING_ATOMIC_BASE(T*) + + EASTL_ATOMIC_USING_ATOMIC_POINTER() +}; + + +} // namespace eastl + + +#include "atomic_pop_compiler_options.h" + + +#endif /* EASTL_ATOMIC_INTERNAL_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_asserts.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_asserts.h new file mode 100644 index 00000000..9324a479 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_asserts.h @@ -0,0 +1,75 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_STATIC_ASSERTS_H +#define EASTL_ATOMIC_INTERNAL_STATIC_ASSERTS_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(type) \ + static_assert(!eastl::is_same::value, "eastl::atomic : volatile eastl::atomic is not what you expect! Read the docs in EASTL/atomic.h! Use the memory orders to access the atomic object!"); + +#define EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(type) \ + static_assert(!eastl::is_same::value, "eastl::atomic : invalid memory order for the given operation!"); + +#define EASTL_ATOMIC_STATIC_ASSERT_TYPE(type) \ + /* User Provided T must not be cv qualified */ \ + static_assert(!eastl::is_const::value, "eastl::atomic : Template Typename T cannot be const!"); \ + static_assert(!eastl::is_volatile::value, "eastl::atomic : Template Typename T cannot be volatile! Use the memory orders to access the underlying type for the guarantees you need."); \ + /* T must satisfy StandardLayoutType */ \ + static_assert(eastl::is_standard_layout::value, "eastl::atomic : Must have standard layout!"); \ + /* T must be TriviallyCopyable but it does not have to be TriviallyConstructible */ \ + static_assert(eastl::is_trivially_copyable::value, "eastl::atomci : Template Typename T must be trivially copyable!"); \ + static_assert(eastl::is_copy_constructible::value, "eastl::atomic : Template Typename T must be copy constructible!"); \ + static_assert(eastl::is_move_constructible::value, "eastl::atomic : Template Typename T must be move constructible!"); \ + static_assert(eastl::is_copy_assignable::value, "eastl::atomic : Template Typename T must be copy assignable!"); \ + static_assert(eastl::is_move_assignable::value, "eastl::atomic : Template Typename T must be move assignable!"); \ + static_assert(eastl::is_trivially_destructible::value, "eastl::atomic : Must be trivially destructible!"); \ + static_assert(eastl::internal::is_atomic_lockfree_size::value, "eastl::atomic : Template Typename T must be a lockfree size!"); + +#define EASTL_ATOMIC_STATIC_ASSERT_TYPE_IS_OBJECT(type) \ + static_assert(eastl::is_object::value, "eastl::atomic : Template Typename T must be an object type!"); + +#define EASTL_ATOMIC_ASSERT_ALIGNED(alignment) \ + EASTL_ASSERT((alignment & (alignment - 1)) == 0); \ + EASTL_ASSERT((reinterpret_cast(this) & (alignment - 1)) == 0) + + +namespace eastl +{ + + +namespace internal +{ + + + template + struct atomic_invalid_type + { + /** + * class Test { int i; int j; int k; }; sizeof(Test) == 96 bits + * + * std::atomic allows non-primitive types to be used for the template type. + * This causes the api to degrade to locking for types that cannot fit into the lockfree size + * of the target platform such as std::atomic leading to performance traps. + * + * If this static_assert() fired, this means your template type T is larger than any atomic instruction + * supported on the given platform. + */ + + static_assert(!eastl::is_same::value, "eastl::atomic : invalid template type T!"); + }; + + +} // namespace internal + + +} // namespace eastl + + +#endif /* EASTL_ATOMIC_INTERNAL_STATIC_ASSERTS_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_base_width.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_base_width.h new file mode 100644 index 00000000..ca476182 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_base_width.h @@ -0,0 +1,346 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_BASE_WIDTH_H +#define EASTL_ATOMIC_INTERNAL_BASE_WIDTH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#include "atomic_push_compiler_options.h" + + +namespace eastl +{ + + +namespace internal +{ + + + template + struct atomic_base_width; + + /** + * NOTE: + * + * T does not have to be trivially default constructible but it still + * has to be a trivially copyable type for the primary atomic template. + * Thus we must type pun into whatever storage type of the given fixed width + * the platform designates. This ensures T does not have to be trivially constructible. + */ + +#define EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits) \ + EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_FIXED_WIDTH_TYPE_, bits) + + +#define EASTL_ATOMIC_STORE_FUNC_IMPL(op, bits) \ + EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits) fixedWidthDesired = EASTL_ATOMIC_TYPE_PUN_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), desired); \ + EA_PREPROCESSOR_JOIN(op, bits)(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), \ + EASTL_ATOMIC_TYPE_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), this->GetAtomicAddress()), \ + fixedWidthDesired) + + +#define EASTL_ATOMIC_LOAD_FUNC_IMPL(op, bits) \ + EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits) retVal; \ + EA_PREPROCESSOR_JOIN(op, bits)(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), \ + retVal, \ + EASTL_ATOMIC_TYPE_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), this->GetAtomicAddress())); \ + return EASTL_ATOMIC_TYPE_PUN_CAST(T, retVal); + + +#define EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(op, bits) \ + EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits) retVal; \ + EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits) fixedWidthDesired = EASTL_ATOMIC_TYPE_PUN_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), desired); \ + EA_PREPROCESSOR_JOIN(op, bits)(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), \ + retVal, \ + EASTL_ATOMIC_TYPE_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), this->GetAtomicAddress()), \ + fixedWidthDesired); \ + return EASTL_ATOMIC_TYPE_PUN_CAST(T, retVal); + + +#define EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(op, bits) \ + bool retVal; \ + EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits) fixedWidthDesired = EASTL_ATOMIC_TYPE_PUN_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), desired); \ + EA_PREPROCESSOR_JOIN(op, bits)(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), \ + retVal, \ + EASTL_ATOMIC_TYPE_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), this->GetAtomicAddress()), \ + EASTL_ATOMIC_TYPE_CAST(EASTL_ATOMIC_BASE_FIXED_WIDTH_TYPE(bits), &expected), \ + fixedWidthDesired); \ + return retVal; + + +#define EASTL_ATOMIC_BASE_OP_JOIN(op, Order) \ + EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_, op), Order) + + +#define EASTL_ATOMIC_BASE_CMPXCHG_FUNCS_IMPL(funcName, cmpxchgOp, bits) \ + using Base::funcName; \ + \ + bool funcName(T& expected, T desired) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _SEQ_CST_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _RELAXED_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_acquire_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _ACQUIRE_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_release_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _RELEASE_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_acq_rel_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _ACQ_REL_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _SEQ_CST_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_relaxed_s, \ + eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _RELAXED_RELAXED_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_acquire_s, \ + eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _ACQUIRE_RELAXED_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_acquire_s, \ + eastl::internal::memory_order_acquire_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _ACQUIRE_ACQUIRE_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_release_s, \ + eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _RELEASE_RELAXED_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_acq_rel_s, \ + eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _ACQ_REL_RELAXED_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_acq_rel_s, \ + eastl::internal::memory_order_acquire_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _ACQ_REL_ACQUIRE_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_seq_cst_s, \ + eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _SEQ_CST_RELAXED_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_seq_cst_s, \ + eastl::internal::memory_order_acquire_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _SEQ_CST_ACQUIRE_), bits); \ + } \ + \ + bool funcName(T& expected, T desired, \ + eastl::internal::memory_order_seq_cst_s, \ + eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_CMPXCHG_FUNC_IMPL(EASTL_ATOMIC_BASE_OP_JOIN(cmpxchgOp, _SEQ_CST_SEQ_CST_), bits); \ + } + +#define EASTL_ATOMIC_BASE_CMPXCHG_WEAK_FUNCS_IMPL(bits) \ + EASTL_ATOMIC_BASE_CMPXCHG_FUNCS_IMPL(compare_exchange_weak, CMPXCHG_WEAK, bits) + +#define EASTL_ATOMIC_BASE_CMPXCHG_STRONG_FUNCS_IMPL(bits) \ + EASTL_ATOMIC_BASE_CMPXCHG_FUNCS_IMPL(compare_exchange_strong, CMPXCHG_STRONG, bits) + + +#define EASTL_ATOMIC_BASE_WIDTH_SPECIALIZE(bytes, bits) \ + template \ + struct atomic_base_width : public atomic_size_aligned \ + { \ + private: \ + \ + static_assert(EA_ALIGN_OF(atomic_size_aligned) == bytes, "eastl::atomic must be sizeof(T) aligned!"); \ + static_assert(EA_ALIGN_OF(atomic_size_aligned) == sizeof(T), "eastl::atomic must be sizeof(T) aligned!"); \ + using Base = atomic_size_aligned; \ + \ + public: /* ctors */ \ + \ + EA_CONSTEXPR atomic_base_width(T desired) EA_NOEXCEPT \ + : Base{ desired } \ + { \ + } \ + \ + EA_CONSTEXPR atomic_base_width() EA_NOEXCEPT_IF(eastl::is_nothrow_default_constructible_v) = default; \ + \ + atomic_base_width(const atomic_base_width&) EA_NOEXCEPT = delete; \ + \ + public: /* store */ \ + \ + using Base::store; \ + \ + void store(T desired) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STORE_FUNC_IMPL(EASTL_ATOMIC_STORE_SEQ_CST_, bits); \ + } \ + \ + void store(T desired, eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STORE_FUNC_IMPL(EASTL_ATOMIC_STORE_RELAXED_, bits); \ + } \ + \ + void store(T desired, eastl::internal::memory_order_release_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STORE_FUNC_IMPL(EASTL_ATOMIC_STORE_RELEASE_, bits); \ + } \ + \ + void store(T desired, eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STORE_FUNC_IMPL(EASTL_ATOMIC_STORE_SEQ_CST_, bits); \ + } \ + \ + public: /* load */ \ + \ + using Base::load; \ + \ + T load() const EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_LOAD_FUNC_IMPL(EASTL_ATOMIC_LOAD_SEQ_CST_, bits); \ + } \ + \ + T load(eastl::internal::memory_order_relaxed_s) const EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_LOAD_FUNC_IMPL(EASTL_ATOMIC_LOAD_RELAXED_, bits); \ + } \ + \ + T load(eastl::internal::memory_order_acquire_s) const EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_LOAD_FUNC_IMPL(EASTL_ATOMIC_LOAD_ACQUIRE_, bits); \ + } \ + \ + T load(eastl::internal::memory_order_seq_cst_s) const EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_LOAD_FUNC_IMPL(EASTL_ATOMIC_LOAD_SEQ_CST_, bits); \ + } \ + \ + public: /* exchange */ \ + \ + using Base::exchange; \ + \ + T exchange(T desired) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(EASTL_ATOMIC_EXCHANGE_SEQ_CST_, bits); \ + } \ + \ + T exchange(T desired, eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(EASTL_ATOMIC_EXCHANGE_RELAXED_, bits); \ + } \ + \ + T exchange(T desired, eastl::internal::memory_order_acquire_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(EASTL_ATOMIC_EXCHANGE_ACQUIRE_, bits); \ + } \ + \ + T exchange(T desired, eastl::internal::memory_order_release_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(EASTL_ATOMIC_EXCHANGE_RELEASE_, bits); \ + } \ + \ + T exchange(T desired, eastl::internal::memory_order_acq_rel_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(EASTL_ATOMIC_EXCHANGE_ACQ_REL_, bits); \ + } \ + \ + T exchange(T desired, eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_EXCHANGE_FUNC_IMPL(EASTL_ATOMIC_EXCHANGE_SEQ_CST_, bits); \ + } \ + \ + public: /* compare_exchange_weak */ \ + \ + EASTL_ATOMIC_BASE_CMPXCHG_WEAK_FUNCS_IMPL(bits) \ + \ + public: /* compare_exchange_strong */ \ + \ + EASTL_ATOMIC_BASE_CMPXCHG_STRONG_FUNCS_IMPL(bits) \ + \ + public: /* assignment operator */ \ + \ + using Base::operator=; \ + \ + T operator=(T desired) EA_NOEXCEPT \ + { \ + store(desired, eastl::memory_order_seq_cst); \ + return desired; \ + } \ + \ + atomic_base_width& operator=(const atomic_base_width&) EA_NOEXCEPT = delete; \ + atomic_base_width& operator=(const atomic_base_width&) volatile EA_NOEXCEPT = delete; \ + \ + }; + + +#if defined(EASTL_ATOMIC_HAS_8BIT) + EASTL_ATOMIC_BASE_WIDTH_SPECIALIZE(1, 8) +#endif + +#if defined(EASTL_ATOMIC_HAS_16BIT) + EASTL_ATOMIC_BASE_WIDTH_SPECIALIZE(2, 16) +#endif + +#if defined(EASTL_ATOMIC_HAS_32BIT) + EASTL_ATOMIC_BASE_WIDTH_SPECIALIZE(4, 32) +#endif + +#if defined(EASTL_ATOMIC_HAS_64BIT) + EASTL_ATOMIC_BASE_WIDTH_SPECIALIZE(8, 64) +#endif + +#if defined(EASTL_ATOMIC_HAS_128BIT) + EASTL_ATOMIC_BASE_WIDTH_SPECIALIZE(16, 128) +#endif + + +} // namespace internal + + +} // namespace eastl + + +#include "atomic_pop_compiler_options.h" + + +#endif /* EASTL_ATOMIC_INTERNAL_BASE_WIDTH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_casts.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_casts.h new file mode 100644 index 00000000..54b9ed27 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_casts.h @@ -0,0 +1,190 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_CASTS_H +#define EASTL_ATOMIC_INTERNAL_CASTS_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#include + + +#include + + +namespace eastl +{ + + +namespace internal +{ + + +template +EASTL_FORCE_INLINE volatile T* AtomicVolatileCast(T* ptr) EA_NOEXCEPT +{ + static_assert(!eastl::is_volatile::value, "eastl::atomic : pointer must not be volatile, the pointed to type must be volatile!"); + static_assert(eastl::is_volatile::value, "eastl::atomic : the pointed to type must be volatile!"); + + return reinterpret_cast(ptr); +} + + +/** + * NOTE: + * + * Some compiler intrinsics do not operate on pointer types thus + * doing atomic operations on pointers must be casted to the suitable + * sized unsigned integral type. + * + * Some compiler intrinsics aren't generics and thus structs must also + * be casted to the appropriate sized unsigned integral type. + * + * Atomic operations on an int* might have to be casted to a uint64_t on + * a platform with 8-byte pointers as an example. + * + * Also doing an atomic operation on a struct, we must ensure that we observe + * the whole struct as one atomic unit with no shearing between the members. + * A load of a struct with two uint32_t members must be one uint64_t load, + * not two separate uint32_t loads, thus casted to the suitable sized + * unsigned integral type. + */ +template +EASTL_FORCE_INLINE volatile Integral* AtomicVolatileIntegralCast(T* ptr) EA_NOEXCEPT +{ + static_assert(!eastl::is_volatile::value, "eastl::atomic : pointer must not be volatile, the pointed to type must be volatile!"); + static_assert(eastl::is_volatile::value, "eastl::atomic : the pointed to type must be volatile!"); + static_assert(eastl::is_integral::value, "eastl::atomic : Integral cast must cast to an Integral type!"); + static_assert(sizeof(Integral) == sizeof(T), "eastl::atomic : Integral and T must be same size for casting!"); + + return reinterpret_cast(ptr); +} + +template +EASTL_FORCE_INLINE Integral* AtomicIntegralCast(T* ptr) EA_NOEXCEPT +{ + static_assert(eastl::is_integral::value, "eastl::atomic : Integral cast must cast to an Integral type!"); + static_assert(sizeof(Integral) == sizeof(T), "eastl::atomic : Integral and T must be same size for casting!"); + + return reinterpret_cast(ptr); +} + + +/** + * NOTE: + * + * These casts are meant to be used with unions or structs of larger types that must be casted + * down to the smaller integral types. Like with 128-bit atomics and msvc intrinsics. + * + * struct Foo128 { __int64 array[2]; }; can be casted to a __int64* + * since a poiter to Foo128 is a pointer to the first member. + */ +template +EASTL_FORCE_INLINE volatile ToType* AtomicVolatileTypeCast(FromType* ptr) EA_NOEXCEPT +{ + static_assert(!eastl::is_volatile::value, "eastl::atomic : pointer must not be volatile, the pointed to type must be volatile!"); + static_assert(eastl::is_volatile::value, "eastl::atomic : the pointed to type must be volatile!"); + + return reinterpret_cast(ptr); +} + +template +EASTL_FORCE_INLINE ToType* AtomicTypeCast(FromType* ptr) EA_NOEXCEPT +{ + return reinterpret_cast(ptr); +} + + +/** + * NOTE: + * + * This is a compiler guaranteed safe type punning. + * This is useful when dealing with user defined structs. + * struct Test { uint32_t; unint32_t; }; + * + * Example: + * uint64_t atomicLoad = *((volatile uint64_t*)&Test); + * Test load = AtomicTypePunCast(atomicLoad); + * + * uint64_t comparand = AtomicTypePunCast(Test); + * cmpxchg(&Test, comparand, desired); + * + * This can be implemented in many different ways depending on the compiler such + * as thru a union, memcpy, reinterpret_cast(atomicLoad), etc. + */ +template , int> = 0> +EASTL_FORCE_INLINE Pun AtomicTypePunCast(const T& fromType) EA_NOEXCEPT +{ + static_assert(sizeof(Pun) == sizeof(T), "eastl::atomic : Pun and T must be the same size for type punning!"); + + /** + * aligned_storage ensures we can TypePun objects that aren't trivially default constructible + * but still trivially copyable. + */ + typename eastl::aligned_storage::type ret; + memcpy(eastl::addressof(ret), eastl::addressof(fromType), sizeof(Pun)); + return reinterpret_cast(ret); +} + +template , int> = 0> +EASTL_FORCE_INLINE Pun AtomicTypePunCast(const T& fromType) EA_NOEXCEPT +{ + return fromType; +} + + +template +EASTL_FORCE_INLINE T AtomicNegateOperand(T val) EA_NOEXCEPT +{ + static_assert(eastl::is_integral::value, "eastl::atomic : Integral Negation must be an Integral type!"); + static_assert(!eastl::is_volatile::value, "eastl::atomic : T must not be volatile!"); + + return static_cast(0U - static_cast>(val)); +} + +EASTL_FORCE_INLINE ptrdiff_t AtomicNegateOperand(ptrdiff_t val) EA_NOEXCEPT +{ + return -val; +} + + +} // namespace internal + + +} // namespace eastl + + +/** + * NOTE: + * + * These macros are meant to prevent inclusion hell. + * Also so that it fits with the style of the rest of the atomic macro implementation. + */ +#define EASTL_ATOMIC_VOLATILE_CAST(ptr) \ + eastl::internal::AtomicVolatileCast((ptr)) + +#define EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(IntegralType, ptr) \ + eastl::internal::AtomicVolatileIntegralCast((ptr)) + +#define EASTL_ATOMIC_INTEGRAL_CAST(IntegralType, ptr) \ + eastl::internal::AtomicIntegralCast((ptr)) + +#define EASTL_ATOMIC_VOLATILE_TYPE_CAST(ToType, ptr) \ + eastl::internal::AtomicVolatileTypeCast((ptr)) + +#define EASTL_ATOMIC_TYPE_CAST(ToType, ptr) \ + eastl::internal::AtomicTypeCast((ptr)) + +#define EASTL_ATOMIC_TYPE_PUN_CAST(PunType, fromType) \ + eastl::internal::AtomicTypePunCast((fromType)) + +#define EASTL_ATOMIC_NEGATE_OPERAND(val) \ + eastl::internal::AtomicNegateOperand((val)) + + +#endif /* EASTL_ATOMIC_INTERNAL_CASTS_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_flag.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_flag.h new file mode 100644 index 00000000..e135d612 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_flag.h @@ -0,0 +1,170 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNA_ATOMIC_FLAG_H +#define EASTL_ATOMIC_INTERNA_ATOMIC_FLAG_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#include "atomic_push_compiler_options.h" + + +namespace eastl +{ + + +class atomic_flag +{ +public: /* ctors */ + + EA_CONSTEXPR atomic_flag(bool desired) EA_NOEXCEPT + : mFlag{ desired } + { + } + + EA_CONSTEXPR atomic_flag() EA_NOEXCEPT + : mFlag{ false } + { + } + +public: /* deleted ctors && assignment operators */ + + atomic_flag(const atomic_flag&) EA_NOEXCEPT = delete; + + atomic_flag& operator=(const atomic_flag&) EA_NOEXCEPT = delete; + atomic_flag& operator=(const atomic_flag&) volatile EA_NOEXCEPT = delete; + +public: /* clear */ + + template + void clear(Order order) volatile EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(Order); + } + + template + void clear(Order order) EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(Order); + } + + void clear(eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT + { + mFlag.store(false, eastl::memory_order_relaxed); + } + + void clear(eastl::internal::memory_order_release_s) EA_NOEXCEPT + { + mFlag.store(false, eastl::memory_order_release); + } + + void clear(eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT + { + mFlag.store(false, eastl::memory_order_seq_cst); + } + + void clear() EA_NOEXCEPT + { + mFlag.store(false, eastl::memory_order_seq_cst); + } + +public: /* test_and_set */ + + template + bool test_and_set(Order order) volatile EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(Order); + return false; + } + + template + bool test_and_set(Order order) EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(Order); + return false; + } + + bool test_and_set(eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT + { + return mFlag.exchange(true, eastl::memory_order_relaxed); + } + + bool test_and_set(eastl::internal::memory_order_acquire_s) EA_NOEXCEPT + { + return mFlag.exchange(true, eastl::memory_order_acquire); + } + + bool test_and_set(eastl::internal::memory_order_release_s) EA_NOEXCEPT + { + return mFlag.exchange(true, eastl::memory_order_release); + } + + bool test_and_set(eastl::internal::memory_order_acq_rel_s) EA_NOEXCEPT + { + return mFlag.exchange(true, eastl::memory_order_acq_rel); + } + + bool test_and_set(eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT + { + return mFlag.exchange(true, eastl::memory_order_seq_cst); + } + + bool test_and_set() EA_NOEXCEPT + { + return mFlag.exchange(true, eastl::memory_order_seq_cst); + } + +public: /* test */ + + template + bool test(Order order) const volatile EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(Order); + return false; + } + + template + bool test(Order order) const EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(Order); + return false; + } + + bool test(eastl::internal::memory_order_relaxed_s) const EA_NOEXCEPT + { + return mFlag.load(eastl::memory_order_relaxed); + } + + bool test(eastl::internal::memory_order_acquire_s) const EA_NOEXCEPT + { + return mFlag.load(eastl::memory_order_acquire); + } + + bool test(eastl::internal::memory_order_seq_cst_s) const EA_NOEXCEPT + { + return mFlag.load(eastl::memory_order_seq_cst); + } + + bool test() const EA_NOEXCEPT + { + return mFlag.load(eastl::memory_order_seq_cst); + } + +private: + + eastl::atomic mFlag; +}; + + +} // namespace eastl + + +#include "atomic_pop_compiler_options.h" + + +#endif /* EASTL_ATOMIC_INTERNA_ATOMIC_FLAG_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_flag_standalone.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_flag_standalone.h new file mode 100644 index 00000000..b5284bed --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_flag_standalone.h @@ -0,0 +1,69 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_FLAG_STANDALONE_H +#define EASTL_ATOMIC_INTERNAL_FLAG_STANDALONE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +namespace eastl +{ + + +//////////////////////////////////////////////////////////////////////////////// +// +// bool atomic_flag_test_and_set(eastl::atomic*) +// +EASTL_FORCE_INLINE bool atomic_flag_test_and_set(eastl::atomic_flag* atomicObj) EA_NOEXCEPT +{ + return atomicObj->test_and_set(); +} + +template +EASTL_FORCE_INLINE bool atomic_flag_test_and_set_explicit(eastl::atomic_flag* atomicObj, Order order) +{ + return atomicObj->test_and_set(order); +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// bool atomic_flag_clear(eastl::atomic*) +// +EASTL_FORCE_INLINE void atomic_flag_clear(eastl::atomic_flag* atomicObj) +{ + atomicObj->clear(); +} + +template +EASTL_FORCE_INLINE void atomic_flag_clear_explicit(eastl::atomic_flag* atomicObj, Order order) +{ + atomicObj->clear(order); +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// bool atomic_flag_test(eastl::atomic*) +// +EASTL_FORCE_INLINE bool atomic_flag_test(eastl::atomic_flag* atomicObj) +{ + return atomicObj->test(); +} + +template +EASTL_FORCE_INLINE bool atomic_flag_test_explicit(eastl::atomic_flag* atomicObj, Order order) +{ + return atomicObj->test(order); +} + + +} // namespace eastl + + +#endif /* EASTL_ATOMIC_INTERNAL_FLAG_STANDALONE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_integral.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_integral.h new file mode 100644 index 00000000..7c94db32 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_integral.h @@ -0,0 +1,343 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_INTEGRAL_H +#define EASTL_ATOMIC_INTERNAL_INTEGRAL_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#include "atomic_push_compiler_options.h" + + +namespace eastl +{ + + +namespace internal +{ + + +#define EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(funcName) \ + template \ + T funcName(T arg, Order order) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T); \ + } \ + \ + template \ + T funcName(T arg, Order order) volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + } \ + \ + T funcName(T arg) volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + } + + +#define EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_INC_DEC_OPERATOR_IMPL(operatorOp) \ + T operator operatorOp() volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + } \ + \ + T operator operatorOp(int) volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + } + + +#define EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(operatorOp) \ + T operator operatorOp(T arg) volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + } + + + template + struct atomic_integral_base : public atomic_base_width + { + private: + + using Base = atomic_base_width; + + public: /* ctors */ + + EA_CONSTEXPR atomic_integral_base(T desired) EA_NOEXCEPT + : Base{ desired } + { + } + + EA_CONSTEXPR atomic_integral_base() EA_NOEXCEPT = default; + + atomic_integral_base(const atomic_integral_base&) EA_NOEXCEPT = delete; + + public: /* assignment operator */ + + using Base::operator=; + + atomic_integral_base& operator=(const atomic_integral_base&) EA_NOEXCEPT = delete; + atomic_integral_base& operator=(const atomic_integral_base&) volatile EA_NOEXCEPT = delete; + + public: /* fetch_add */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(fetch_add) + + public: /* add_fetch */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(add_fetch) + + public: /* fetch_sub */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(fetch_sub) + + public: /* sub_fetch */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(sub_fetch) + + public: /* fetch_and */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(fetch_and) + + public: /* and_fetch */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(and_fetch) + + public: /* fetch_or */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(fetch_or) + + public: /* or_fetch */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(or_fetch) + + public: /* fetch_xor */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(fetch_xor) + + public: /* xor_fetch */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_FUNCS_IMPL(xor_fetch) + + public: /* operator++ && operator-- */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_INC_DEC_OPERATOR_IMPL(++) + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_INC_DEC_OPERATOR_IMPL(--) + + public: /* operator+= && operator-= */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(+=) + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(-=) + + public: /* operator&= */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(&=) + + public: /* operator|= */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(|=) + + public: /* operator^= */ + + EASTL_ATOMIC_INTEGRAL_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(^=) + + }; + + + template + struct atomic_integral_width; + +#define EASTL_ATOMIC_INTEGRAL_FUNC_IMPL(op, bits) \ + T retVal; \ + EA_PREPROCESSOR_JOIN(op, bits)(T, retVal, this->GetAtomicAddress(), arg); \ + return retVal; + +#define EASTL_ATOMIC_INTEGRAL_FETCH_IMPL(funcName, op, bits) \ + T funcName(T arg) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_INTEGRAL_FUNC_IMPL(op, bits); \ + } + +#define EASTL_ATOMIC_INTEGRAL_FETCH_ORDER_IMPL(funcName, orderType, op, bits) \ + T funcName(T arg, orderType) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_INTEGRAL_FUNC_IMPL(op, bits); \ + } + +#define EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, Order) \ + EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_, fetchOp), Order) + +#define EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(funcName, fetchOp, bits) \ + using Base::funcName; \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_IMPL(funcName, EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, _SEQ_CST_), bits) \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_relaxed_s, \ + EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, _RELAXED_), bits) \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_acquire_s, \ + EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, _ACQUIRE_), bits) \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_release_s, \ + EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, _RELEASE_), bits) \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_acq_rel_s, \ + EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, _ACQ_REL_), bits) \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_seq_cst_s, \ + EASTL_ATOMIC_INTEGRAL_FETCH_OP_JOIN(fetchOp, _SEQ_CST_), bits) + +#define EASTL_ATOMIC_INTEGRAL_FETCH_INC_DEC_OPERATOR_IMPL(operatorOp, preFuncName, postFuncName) \ + using Base::operator operatorOp; \ + \ + T operator operatorOp() EA_NOEXCEPT \ + { \ + return preFuncName(1, eastl::memory_order_seq_cst); \ + } \ + \ + T operator operatorOp(int) EA_NOEXCEPT \ + { \ + return postFuncName(1, eastl::memory_order_seq_cst); \ + } + +#define EASTL_ATOMIC_INTEGRAL_FETCH_ASSIGNMENT_OPERATOR_IMPL(operatorOp, funcName) \ + using Base::operator operatorOp; \ + \ + T operator operatorOp(T arg) EA_NOEXCEPT \ + { \ + return funcName(arg, eastl::memory_order_seq_cst); \ + } + + +#define EASTL_ATOMIC_INTEGRAL_WIDTH_SPECIALIZE(bytes, bits) \ + template \ + struct atomic_integral_width : public atomic_integral_base \ + { \ + private: \ + \ + using Base = atomic_integral_base; \ + \ + public: /* ctors */ \ + \ + EA_CONSTEXPR atomic_integral_width(T desired) EA_NOEXCEPT \ + : Base{ desired } \ + { \ + } \ + \ + EA_CONSTEXPR atomic_integral_width() EA_NOEXCEPT = default; \ + \ + atomic_integral_width(const atomic_integral_width&) EA_NOEXCEPT = delete; \ + \ + public: /* assignment operator */ \ + \ + using Base::operator=; \ + \ + atomic_integral_width& operator=(const atomic_integral_width&) EA_NOEXCEPT = delete; \ + atomic_integral_width& operator=(const atomic_integral_width&) volatile EA_NOEXCEPT = delete; \ + \ + public: /* fetch_add */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(fetch_add, FETCH_ADD, bits) \ + \ + public: /* add_fetch */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(add_fetch, ADD_FETCH, bits) \ + \ + public: /* fetch_sub */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(fetch_sub, FETCH_SUB, bits) \ + \ + public: /* sub_fetch */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(sub_fetch, SUB_FETCH, bits) \ + \ + public: /* fetch_and */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(fetch_and, FETCH_AND, bits) \ + \ + public: /* and_fetch */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(and_fetch, AND_FETCH, bits) \ + \ + public: /* fetch_or */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(fetch_or, FETCH_OR, bits) \ + \ + public: /* or_fetch */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(or_fetch, OR_FETCH, bits) \ + \ + public: /* fetch_xor */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(fetch_xor, FETCH_XOR, bits) \ + \ + public: /* xor_fetch */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_FUNCS_IMPL(xor_fetch, XOR_FETCH, bits) \ + \ + public: /* operator++ && operator-- */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_INC_DEC_OPERATOR_IMPL(++, add_fetch, fetch_add) \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_INC_DEC_OPERATOR_IMPL(--, sub_fetch, fetch_sub) \ + \ + public: /* operator+= && operator-= */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_ASSIGNMENT_OPERATOR_IMPL(+=, add_fetch) \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_ASSIGNMENT_OPERATOR_IMPL(-=, sub_fetch) \ + \ + public: /* operator&= */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_ASSIGNMENT_OPERATOR_IMPL(&=, and_fetch) \ + \ + public: /* operator|= */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_ASSIGNMENT_OPERATOR_IMPL(|=, or_fetch) \ + \ + public: /* operator^= */ \ + \ + EASTL_ATOMIC_INTEGRAL_FETCH_ASSIGNMENT_OPERATOR_IMPL(^=, xor_fetch) \ + \ + }; + + +#if defined(EASTL_ATOMIC_HAS_8BIT) + EASTL_ATOMIC_INTEGRAL_WIDTH_SPECIALIZE(1, 8) +#endif + +#if defined(EASTL_ATOMIC_HAS_16BIT) + EASTL_ATOMIC_INTEGRAL_WIDTH_SPECIALIZE(2, 16) +#endif + +#if defined(EASTL_ATOMIC_HAS_32BIT) + EASTL_ATOMIC_INTEGRAL_WIDTH_SPECIALIZE(4, 32) +#endif + +#if defined(EASTL_ATOMIC_HAS_64BIT) + EASTL_ATOMIC_INTEGRAL_WIDTH_SPECIALIZE(8, 64) +#endif + +#if defined(EASTL_ATOMIC_HAS_128BIT) + EASTL_ATOMIC_INTEGRAL_WIDTH_SPECIALIZE(16, 128) +#endif + + +} // namespace internal + + +} // namespace eastl + + +#include "atomic_pop_compiler_options.h" + + +#endif /* EASTL_ATOMIC_INTERNAL_INTEGRAL_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h new file mode 100644 index 00000000..756a4b4d --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros.h @@ -0,0 +1,67 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_H +#define EASTL_ATOMIC_INTERNAL_MACROS_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// The reason for the implementation separating out into a compiler and architecture +// folder is as follows. +// +// The compiler directory is meant to implement atomics using the compiler provided +// intrinsics. This also implies that usually the same compiler instrinsic implementation +// can be used for any architecture the compiler supports. If a compiler provides intrinsics +// to support barriers or atomic operations, then that implementation should be in the +// compiler directory. +// +// The arch directory is meant to manually implement atomics for a specific architecture +// such as power or x86. There may be some compiler specific code in this directory because +// GCC inline assembly syntax may be different than another compiler as an example. +// +// The arch directory can also be used to implement some atomic operations ourselves +// if we deem the compiler provided implementation to be inefficient for the given +// architecture or we need to do some things manually for a given compiler. +// +// The atomic_macros directory implements the macros that the rest of the atomic +// library uses. These macros will expand to either the compiler or arch implemented +// macro. The arch implemented macro is given priority over the compiler implemented +// macro if both are implemented otherwise whichever is implemented is chosen or +// an error is emitted if none are implemented. +// +// The implementation being all macros has a couple nice side effects as well. +// +// 1. All the implementation ends up funneling into one low level macro implementation +// which makes it easy to verify correctness, reduce copy-paste errors and differences +// in various platform implementations. +// +// 2. Allows for the implementation to be implemented efficiently on compilers that do not +// directly implement the C++ memory model in their intrinsics such as msvc. +// +// 3. Allows for the implementation of atomics that may not be supported on the given platform, +// such as 128-bit atomics on 32-bit platforms since the macros will only ever be expanded +// on platforms that support said features. This makes implementing said features pretty easy +// since we do not have to worry about complicated feature detection in the low level implementations. +// +// The macro implementation may asume that all passed in types are trivially constructible thus it is +// free to create local variables of the passed in types as it may please. +// It may also assume that all passed in types are trivially copyable as well. +// It cannot assume any passed in type is any given type thus is a specific type if needed, it must do an +// EASTL_ATOMIC_TYPE_PUN_CAST() to the required type. +// + + +#include "compiler/compiler.h" +#include "arch/arch.h" + +#include "atomic_macros/atomic_macros.h" + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h new file mode 100644 index 00000000..941ac51c --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros.h @@ -0,0 +1,145 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_ATOMIC_MACROS_H +#define EASTL_ATOMIC_INTERNAL_ATOMIC_MACROS_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#include "atomic_macros_base.h" + +#include "atomic_macros_fetch_add.h" +#include "atomic_macros_fetch_sub.h" + +#include "atomic_macros_fetch_and.h" +#include "atomic_macros_fetch_xor.h" +#include "atomic_macros_fetch_or.h" + +#include "atomic_macros_add_fetch.h" +#include "atomic_macros_sub_fetch.h" + +#include "atomic_macros_and_fetch.h" +#include "atomic_macros_xor_fetch.h" +#include "atomic_macros_or_fetch.h" + +#include "atomic_macros_exchange.h" + +#include "atomic_macros_cmpxchg_weak.h" +#include "atomic_macros_cmpxchg_strong.h" + +#include "atomic_macros_load.h" +#include "atomic_macros_store.h" + +#include "atomic_macros_compiler_barrier.h" + +#include "atomic_macros_cpu_pause.h" + +#include "atomic_macros_memory_barrier.h" + +#include "atomic_macros_signal_fence.h" + +#include "atomic_macros_thread_fence.h" + + +///////////////////////////////////////////////////////////////////////////////// + + +#if defined(EASTL_COMPILER_ATOMIC_HAS_8BIT) || defined(EASTL_ARCH_ATOMIC_HAS_8BIT) + + #define EASTL_ATOMIC_HAS_8BIT + +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_HAS_16BIT) || defined(EASTL_ARCH_ATOMIC_HAS_16BIT) + + #define EASTL_ATOMIC_HAS_16BIT + +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_HAS_32BIT) || defined(EASTL_ARCH_ATOMIC_HAS_32BIT) + + #define EASTL_ATOMIC_HAS_32BIT + +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_HAS_64BIT) || defined(EASTL_ARCH_ATOMIC_HAS_64BIT) + + #define EASTL_ATOMIC_HAS_64BIT + +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_HAS_128BIT) || defined(EASTL_ARCH_ATOMIC_HAS_128BIT) + + #define EASTL_ATOMIC_HAS_128BIT + +#endif + + +///////////////////////////////////////////////////////////////////////////////// + + +#if defined(EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_8) + + #define EASTL_ATOMIC_FIXED_WIDTH_TYPE_8 EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_8 + +#elif defined(EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_8) + + #define EASTL_ATOMIC_FIXED_WIDTH_TYPE_8 EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_8 + +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_16) + + #define EASTL_ATOMIC_FIXED_WIDTH_TYPE_16 EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_16 + +#elif defined(EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_16) + + #define EASTL_ATOMIC_FIXED_WIDTH_TYPE_16 EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_16 + +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_32) + + #define EASTL_ATOMIC_FIXED_WIDTH_TYPE_32 EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_32 + +#elif defined(EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_32) + + #define EASTL_ATOMIC_FIXED_WIDTH_TYPE_32 EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_32 + +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_64) + + #define EASTL_ATOMIC_FIXED_WIDTH_TYPE_64 EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_64 + +#elif defined(EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_64) + + #define EASTL_ATOMIC_FIXED_WIDTH_TYPE_64 EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_64 + +#endif + + +#if defined(EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_128) + + #define EASTL_ATOMIC_FIXED_WIDTH_TYPE_128 EASTL_ARCH_ATOMIC_FIXED_WIDTH_TYPE_128 + +#elif defined(EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_128) + + #define EASTL_ATOMIC_FIXED_WIDTH_TYPE_128 EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_128 + +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_ATOMIC_MACROS_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_add_fetch.h new file mode 100644 index 00000000..f551a07c --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_add_fetch.h @@ -0,0 +1,98 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_ADD_FETCH_H +#define EASTL_ATOMIC_INTERNAL_MACROS_ADD_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_ADD_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_ATOMIC_ADD_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELAXED_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQUIRE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELEASE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQ_REL_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_SEQ_CST_8)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_ADD_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELAXED_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQUIRE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELEASE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQ_REL_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_SEQ_CST_16)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_ADD_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELAXED_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQUIRE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELEASE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQ_REL_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_SEQ_CST_32)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_ADD_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELAXED_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQUIRE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELEASE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQ_REL_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_SEQ_CST_64)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_ADD_FETCH_RELAXED_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELAXED_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQUIRE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_RELEASE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_RELEASE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_ACQ_REL_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_ADD_FETCH_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_ADD_FETCH_SEQ_CST_128)(type, ret, ptr, val) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_ADD_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_and_fetch.h new file mode 100644 index 00000000..69127223 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_and_fetch.h @@ -0,0 +1,98 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_AND_FETCH_H +#define EASTL_ATOMIC_INTERNAL_MACROS_AND_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_AND_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_ATOMIC_AND_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELAXED_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQUIRE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELEASE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQ_REL_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_SEQ_CST_8)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_AND_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELAXED_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQUIRE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELEASE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQ_REL_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_SEQ_CST_16)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_AND_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELAXED_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQUIRE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELEASE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQ_REL_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_SEQ_CST_32)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_AND_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELAXED_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQUIRE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELEASE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQ_REL_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_SEQ_CST_64)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_AND_FETCH_RELAXED_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELAXED_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQUIRE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_RELEASE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_RELEASE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_ACQ_REL_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_AND_FETCH_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_AND_FETCH_SEQ_CST_128)(type, ret, ptr, val) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_AND_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_base.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_base.h new file mode 100644 index 00000000..f03720d9 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_base.h @@ -0,0 +1,65 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_BASE_H +#define EASTL_ATOMIC_INTERNAL_MACROS_BASE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_ATOMIC_INTERNAL_COMPILER_AVAILABLE(op) \ + EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_COMPILER_, op), _AVAILABLE) + +#define EASTL_ATOMIC_INTERNAL_ARCH_AVAILABLE(op) \ + EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_ARCH_, op), _AVAILABLE) + +#define EASTL_ATOMIC_INTERNAL_NOT_IMPLEMENTED_ERROR(...) \ + static_assert(false, "eastl::atomic atomic macro not implemented!") + + +/* Compiler && Arch Not Implemented */ +#define EASTL_ATOMIC_INTERNAL_OP_PATTERN_00(op) \ + EASTL_ATOMIC_INTERNAL_NOT_IMPLEMENTED_ERROR + +/* Arch Implemented */ +#define EASTL_ATOMIC_INTERNAL_OP_PATTERN_01(op) \ + EA_PREPROCESSOR_JOIN(EASTL_ARCH_, op) + +/* Compiler Implmented */ +#define EASTL_ATOMIC_INTERNAL_OP_PATTERN_10(op) \ + EA_PREPROCESSOR_JOIN(EASTL_COMPILER_, op) + +/* Compiler && Arch Implemented */ +#define EASTL_ATOMIC_INTERNAL_OP_PATTERN_11(op) \ + EA_PREPROCESSOR_JOIN(EASTL_ARCH_, op) + + +/* This macro creates the pattern macros above for the 2x2 True-False truth table */ +#define EASTL_ATOMIC_INTERNAL_OP_HELPER1(compiler, arch, op) \ + EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_INTERNAL_OP_PATTERN_, EA_PREPROCESSOR_JOIN(compiler, arch))(op) + + +///////////////////////////////////////////////////////////////////////////////// +// +// EASTL_ATOMIC_CHOOSE_OP_IMPL +// +// This macro chooses between the compiler or architecture implementation for a +// given atomic operation. +// +// USAGE: +// +// EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELAXED_8)(ret, ptr, val) +// +#define EASTL_ATOMIC_CHOOSE_OP_IMPL(op) \ + EASTL_ATOMIC_INTERNAL_OP_HELPER1( \ + EASTL_ATOMIC_INTERNAL_COMPILER_AVAILABLE(op), \ + EASTL_ATOMIC_INTERNAL_ARCH_AVAILABLE(op), \ + op \ + ) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_BASE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_strong.h new file mode 100644 index 00000000..3cff4935 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_strong.h @@ -0,0 +1,245 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_STRONG_H +#define EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_STRONG_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_CMPXCHG_STRONG_*_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128)(type, ret, ptr, expected, desired) + + +///////////////////////////////////////////////////////////////////////////////// + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_CMPXCHG_STRONG_*(bool ret, type * ptr, type * expected, type desired) +// +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_8)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_16)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_32)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_64)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELAXED_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQUIRE_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_RELEASE_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_RELEASE_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_ACQ_REL_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_ACQ_REL_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_STRONG_SEQ_CST_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_STRONG_SEQ_CST_128)(type, ret, ptr, expected, desired) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_STRONG_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_weak.h new file mode 100644 index 00000000..60ea8b0b --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cmpxchg_weak.h @@ -0,0 +1,245 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_WEAK_H +#define EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_WEAK_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_CMPXCHG_WEAK_*_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128)(type, ret, ptr, expected, desired) + + +///////////////////////////////////////////////////////////////////////////////// + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_CMPXCHG_WEAK_*(bool ret, type * ptr, type * expected, type desired) +// +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_8)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_8(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_8)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_16)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_16(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_16)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_32)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_32(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_32)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_64)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_64(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_64)(type, ret, ptr, expected, desired) + + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELAXED_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQUIRE_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_RELEASE_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_RELEASE_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_ACQ_REL_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_ACQ_REL_128)(type, ret, ptr, expected, desired) + +#define EASTL_ATOMIC_CMPXCHG_WEAK_SEQ_CST_128(type, ret, ptr, expected, desired) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CMPXCHG_WEAK_SEQ_CST_128)(type, ret, ptr, expected, desired) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_CMPXCHG_WEAK_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_compiler_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_compiler_barrier.h new file mode 100644 index 00000000..96ea6d0b --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_compiler_barrier.h @@ -0,0 +1,30 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_COMPILER_BARRIER_H +#define EASTL_ATOMIC_INTERNAL_MACROS_COMPILER_BARRIER_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_COMPILER_BARRIER() +// +#define EASTL_ATOMIC_COMPILER_BARRIER() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_COMPILER_BARRIER)() + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(const T&, type) +// +#define EASTL_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(val, type) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY)(val, type) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_COMPILER_BARRIER_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cpu_pause.h new file mode 100644 index 00000000..e027b576 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_cpu_pause.h @@ -0,0 +1,22 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_CPU_PAUSE_H +#define EASTL_ATOMIC_INTERNAL_MACROS_CPU_PAUSE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_CPU_PAUSE() +// +#define EASTL_ATOMIC_CPU_PAUSE() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CPU_PAUSE)() + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_CPU_PAUSE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_exchange.h new file mode 100644 index 00000000..0681318f --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_exchange.h @@ -0,0 +1,98 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_EXCHANGE_H +#define EASTL_ATOMIC_INTERNAL_MACROS_EXCHANGE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_EXCHANGE_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_ATOMIC_EXCHANGE_RELAXED_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELAXED_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQUIRE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_RELEASE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELEASE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQ_REL_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_SEQ_CST_8)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_EXCHANGE_RELAXED_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELAXED_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQUIRE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_RELEASE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELEASE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQ_REL_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_SEQ_CST_16)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_EXCHANGE_RELAXED_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELAXED_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQUIRE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_RELEASE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELEASE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQ_REL_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_SEQ_CST_32)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_EXCHANGE_RELAXED_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELAXED_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQUIRE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_RELEASE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELEASE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQ_REL_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_SEQ_CST_64)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_EXCHANGE_RELAXED_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELAXED_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQUIRE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_RELEASE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_RELEASE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_ACQ_REL_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_EXCHANGE_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_EXCHANGE_SEQ_CST_128)(type, ret, ptr, val) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_EXCHANGE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_add.h new file mode 100644 index 00000000..701fdf37 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_add.h @@ -0,0 +1,98 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_ADD_H +#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_ADD_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_FETCH_ADD_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_ATOMIC_FETCH_ADD_RELAXED_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELAXED_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQUIRE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_RELEASE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELEASE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQ_REL_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_SEQ_CST_8)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_ADD_RELAXED_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELAXED_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQUIRE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_RELEASE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELEASE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQ_REL_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_SEQ_CST_16)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_ADD_RELAXED_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELAXED_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQUIRE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_RELEASE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELEASE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQ_REL_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_SEQ_CST_32)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_ADD_RELAXED_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELAXED_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQUIRE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_RELEASE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELEASE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQ_REL_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_SEQ_CST_64)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_ADD_RELAXED_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELAXED_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQUIRE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_RELEASE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_RELEASE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_ACQ_REL_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_ADD_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_ADD_SEQ_CST_128)(type, ret, ptr, val) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_ADD_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_and.h new file mode 100644 index 00000000..831f1bfe --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_and.h @@ -0,0 +1,98 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_AND_H +#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_AND_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_FETCH_AND_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_ATOMIC_FETCH_AND_RELAXED_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELAXED_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQUIRE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_RELEASE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELEASE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQ_REL_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_SEQ_CST_8)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_AND_RELAXED_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELAXED_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQUIRE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_RELEASE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELEASE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQ_REL_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_SEQ_CST_16)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_AND_RELAXED_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELAXED_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQUIRE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_RELEASE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELEASE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQ_REL_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_SEQ_CST_32)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_AND_RELAXED_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELAXED_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQUIRE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_RELEASE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELEASE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQ_REL_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_SEQ_CST_64)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_AND_RELAXED_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELAXED_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQUIRE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_RELEASE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_RELEASE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_ACQ_REL_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_AND_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_AND_SEQ_CST_128)(type, ret, ptr, val) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_AND_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_or.h new file mode 100644 index 00000000..b1322970 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_or.h @@ -0,0 +1,98 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_OR_H +#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_OR_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_FETCH_OR_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_ATOMIC_FETCH_OR_RELAXED_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELAXED_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQUIRE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_RELEASE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELEASE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQ_REL_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_SEQ_CST_8)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_OR_RELAXED_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELAXED_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQUIRE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_RELEASE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELEASE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQ_REL_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_SEQ_CST_16)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_OR_RELAXED_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELAXED_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQUIRE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_RELEASE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELEASE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQ_REL_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_SEQ_CST_32)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_OR_RELAXED_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELAXED_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQUIRE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_RELEASE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELEASE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQ_REL_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_SEQ_CST_64)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_OR_RELAXED_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELAXED_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQUIRE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_RELEASE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_RELEASE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_ACQ_REL_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_OR_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_OR_SEQ_CST_128)(type, ret, ptr, val) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_OR_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_sub.h new file mode 100644 index 00000000..00980643 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_sub.h @@ -0,0 +1,98 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_SUB_H +#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_SUB_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_FETCH_SUB_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_ATOMIC_FETCH_SUB_RELAXED_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELAXED_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQUIRE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_RELEASE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELEASE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQ_REL_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_SEQ_CST_8)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_SUB_RELAXED_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELAXED_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQUIRE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_RELEASE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELEASE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQ_REL_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_SEQ_CST_16)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_SUB_RELAXED_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELAXED_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQUIRE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_RELEASE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELEASE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQ_REL_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_SEQ_CST_32)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_SUB_RELAXED_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELAXED_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQUIRE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_RELEASE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELEASE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQ_REL_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_SEQ_CST_64)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_SUB_RELAXED_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELAXED_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQUIRE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_RELEASE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_RELEASE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_ACQ_REL_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_SUB_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_SUB_SEQ_CST_128)(type, ret, ptr, val) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_SUB_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_xor.h new file mode 100644 index 00000000..2887ea56 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_fetch_xor.h @@ -0,0 +1,98 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_FETCH_XOR_H +#define EASTL_ATOMIC_INTERNAL_MACROS_FETCH_XOR_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_FETCH_XOR_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_ATOMIC_FETCH_XOR_RELAXED_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELAXED_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQUIRE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_RELEASE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELEASE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQ_REL_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_SEQ_CST_8)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_XOR_RELAXED_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELAXED_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQUIRE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_RELEASE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELEASE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQ_REL_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_SEQ_CST_16)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_XOR_RELAXED_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELAXED_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQUIRE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_RELEASE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELEASE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQ_REL_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_SEQ_CST_32)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_XOR_RELAXED_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELAXED_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQUIRE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_RELEASE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELEASE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQ_REL_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_SEQ_CST_64)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_FETCH_XOR_RELAXED_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELAXED_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQUIRE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_RELEASE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_RELEASE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_ACQ_REL_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_FETCH_XOR_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_FETCH_XOR_SEQ_CST_128)(type, ret, ptr, val) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_FETCH_XOR_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_load.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_load.h new file mode 100644 index 00000000..76580593 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_load.h @@ -0,0 +1,75 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_LOAD_H +#define EASTL_ATOMIC_INTERNAL_MACROS_LOAD_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_LOAD_*_N(type, type ret, type * ptr) +// +#define EASTL_ATOMIC_LOAD_RELAXED_8(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_RELAXED_8)(type, ret, ptr) + +#define EASTL_ATOMIC_LOAD_ACQUIRE_8(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_ACQUIRE_8)(type, ret, ptr) + +#define EASTL_ATOMIC_LOAD_SEQ_CST_8(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_SEQ_CST_8)(type, ret, ptr) + + +#define EASTL_ATOMIC_LOAD_RELAXED_16(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_RELAXED_16)(type, ret, ptr) + +#define EASTL_ATOMIC_LOAD_ACQUIRE_16(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_ACQUIRE_16)(type, ret, ptr) + +#define EASTL_ATOMIC_LOAD_SEQ_CST_16(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_SEQ_CST_16)(type, ret, ptr) + + +#define EASTL_ATOMIC_LOAD_RELAXED_32(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_RELAXED_32)(type, ret, ptr) + +#define EASTL_ATOMIC_LOAD_ACQUIRE_32(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_ACQUIRE_32)(type, ret, ptr) + +#define EASTL_ATOMIC_LOAD_SEQ_CST_32(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_SEQ_CST_32)(type, ret, ptr) + + +#define EASTL_ATOMIC_LOAD_RELAXED_64(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_RELAXED_64)(type, ret, ptr) + +#define EASTL_ATOMIC_LOAD_ACQUIRE_64(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_ACQUIRE_64)(type, ret, ptr) + +#define EASTL_ATOMIC_LOAD_SEQ_CST_64(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_SEQ_CST_64)(type, ret, ptr) + + +#define EASTL_ATOMIC_LOAD_RELAXED_128(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_RELAXED_128)(type, ret, ptr) + +#define EASTL_ATOMIC_LOAD_ACQUIRE_128(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_ACQUIRE_128)(type, ret, ptr) + +#define EASTL_ATOMIC_LOAD_SEQ_CST_128(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_SEQ_CST_128)(type, ret, ptr) + + +#define EASTL_ATOMIC_LOAD_READ_DEPENDS_32(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_READ_DEPENDS_32)(type, ret, ptr) + +#define EASTL_ATOMIC_LOAD_READ_DEPENDS_64(type, ret, ptr) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_LOAD_READ_DEPENDS_64)(type, ret, ptr) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_LOAD_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_memory_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_memory_barrier.h new file mode 100644 index 00000000..14f7be92 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_memory_barrier.h @@ -0,0 +1,38 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_MEMORY_BARRIER_H +#define EASTL_ATOMIC_INTERNAL_MACROS_MEMORY_BARRIER_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_CPU_MB() +// +#define EASTL_ATOMIC_CPU_MB() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CPU_MB)() + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_CPU_WMB() +// +#define EASTL_ATOMIC_CPU_WMB() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CPU_WMB)() + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_CPU_RMB() +// +#define EASTL_ATOMIC_CPU_RMB() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_CPU_RMB)() + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_MEMORY_BARRIER_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_or_fetch.h new file mode 100644 index 00000000..c9ebd6e3 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_or_fetch.h @@ -0,0 +1,98 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_OR_FETCH_H +#define EASTL_ATOMIC_INTERNAL_MACROS_OR_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_OR_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_ATOMIC_OR_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELAXED_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQUIRE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELEASE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQ_REL_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_SEQ_CST_8)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_OR_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELAXED_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQUIRE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELEASE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQ_REL_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_SEQ_CST_16)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_OR_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELAXED_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQUIRE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELEASE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQ_REL_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_SEQ_CST_32)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_OR_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELAXED_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQUIRE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELEASE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQ_REL_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_SEQ_CST_64)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_OR_FETCH_RELAXED_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELAXED_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQUIRE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_RELEASE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_RELEASE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_ACQ_REL_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_OR_FETCH_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_OR_FETCH_SEQ_CST_128)(type, ret, ptr, val) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_OR_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_signal_fence.h new file mode 100644 index 00000000..dd16b106 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_signal_fence.h @@ -0,0 +1,34 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_SIGNAL_FENCE_H +#define EASTL_ATOMIC_INTERNAL_MACROS_SIGNAL_FENCE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_SIGNAL_FENCE_*() +// +#define EASTL_ATOMIC_SIGNAL_FENCE_RELAXED() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SIGNAL_FENCE_RELAXED)() + +#define EASTL_ATOMIC_SIGNAL_FENCE_ACQUIRE() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SIGNAL_FENCE_ACQUIRE)() + +#define EASTL_ATOMIC_SIGNAL_FENCE_RELEASE() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SIGNAL_FENCE_RELEASE)() + +#define EASTL_ATOMIC_SIGNAL_FENCE_ACQ_REL() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SIGNAL_FENCE_ACQ_REL)() + +#define EASTL_ATOMIC_SIGNAL_FENCE_SEQ_CST() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SIGNAL_FENCE_SEQ_CST)() + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_SIGNAL_FENCE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_store.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_store.h new file mode 100644 index 00000000..64b662e1 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_store.h @@ -0,0 +1,68 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_STORE_H +#define EASTL_ATOMIC_INTERNAL_MACROS_STORE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_STORE_*_N(type, type * ptr, type val) +// +#define EASTL_ATOMIC_STORE_RELAXED_8(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELAXED_8)(type, ptr, val) + +#define EASTL_ATOMIC_STORE_RELEASE_8(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELEASE_8)(type, ptr, val) + +#define EASTL_ATOMIC_STORE_SEQ_CST_8(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_SEQ_CST_8)(type, ptr, val) + + +#define EASTL_ATOMIC_STORE_RELAXED_16(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELAXED_16)(type, ptr, val) + +#define EASTL_ATOMIC_STORE_RELEASE_16(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELEASE_16)(type, ptr, val) + +#define EASTL_ATOMIC_STORE_SEQ_CST_16(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_SEQ_CST_16)(type, ptr, val) + + +#define EASTL_ATOMIC_STORE_RELAXED_32(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELAXED_32)(type, ptr, val) + +#define EASTL_ATOMIC_STORE_RELEASE_32(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELEASE_32)(type, ptr, val) + +#define EASTL_ATOMIC_STORE_SEQ_CST_32(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_SEQ_CST_32)(type, ptr, val) + + +#define EASTL_ATOMIC_STORE_RELAXED_64(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELAXED_64)(type, ptr, val) + +#define EASTL_ATOMIC_STORE_RELEASE_64(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELEASE_64)(type, ptr, val) + +#define EASTL_ATOMIC_STORE_SEQ_CST_64(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_SEQ_CST_64)(type, ptr, val) + + +#define EASTL_ATOMIC_STORE_RELAXED_128(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELAXED_128)(type, ptr, val) + +#define EASTL_ATOMIC_STORE_RELEASE_128(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_RELEASE_128)(type, ptr, val) + +#define EASTL_ATOMIC_STORE_SEQ_CST_128(type, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_STORE_SEQ_CST_128)(type, ptr, val) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_STORE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_sub_fetch.h new file mode 100644 index 00000000..330f38e9 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_sub_fetch.h @@ -0,0 +1,98 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_SUB_FETCH_H +#define EASTL_ATOMIC_INTERNAL_MACROS_SUB_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_SUB_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_ATOMIC_SUB_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELAXED_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQUIRE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELEASE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQ_REL_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_SEQ_CST_8)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_SUB_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELAXED_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQUIRE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELEASE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQ_REL_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_SEQ_CST_16)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_SUB_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELAXED_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQUIRE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELEASE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQ_REL_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_SEQ_CST_32)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_SUB_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELAXED_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQUIRE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELEASE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQ_REL_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_SEQ_CST_64)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_SUB_FETCH_RELAXED_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELAXED_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQUIRE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_RELEASE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_RELEASE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_ACQ_REL_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_SUB_FETCH_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_SUB_FETCH_SEQ_CST_128)(type, ret, ptr, val) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_SUB_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_thread_fence.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_thread_fence.h new file mode 100644 index 00000000..26492c59 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_thread_fence.h @@ -0,0 +1,34 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_THREAD_FENCE_H +#define EASTL_ATOMIC_INTERNAL_MACROS_THREAD_FENCE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_THREAD_FENCE_*() +// +#define EASTL_ATOMIC_THREAD_FENCE_RELAXED() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_THREAD_FENCE_RELAXED)() + +#define EASTL_ATOMIC_THREAD_FENCE_ACQUIRE() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_THREAD_FENCE_ACQUIRE)() + +#define EASTL_ATOMIC_THREAD_FENCE_RELEASE() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_THREAD_FENCE_RELEASE)() + +#define EASTL_ATOMIC_THREAD_FENCE_ACQ_REL() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_THREAD_FENCE_ACQ_REL)() + +#define EASTL_ATOMIC_THREAD_FENCE_SEQ_CST() \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_THREAD_FENCE_SEQ_CST)() + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_THREAD_FENCE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_xor_fetch.h new file mode 100644 index 00000000..42276470 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_macros/atomic_macros_xor_fetch.h @@ -0,0 +1,98 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MACROS_XOR_FETCH_H +#define EASTL_ATOMIC_INTERNAL_MACROS_XOR_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_ATOMIC_XOR_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_ATOMIC_XOR_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELAXED_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQUIRE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELEASE_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQ_REL_8)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_SEQ_CST_8)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_XOR_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELAXED_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQUIRE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELEASE_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQ_REL_16)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_SEQ_CST_16)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_XOR_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELAXED_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQUIRE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELEASE_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQ_REL_32)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_SEQ_CST_32)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_XOR_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELAXED_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQUIRE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELEASE_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQ_REL_64)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_SEQ_CST_64)(type, ret, ptr, val) + + +#define EASTL_ATOMIC_XOR_FETCH_RELAXED_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELAXED_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQUIRE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_RELEASE_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_RELEASE_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_ACQ_REL_128)(type, ret, ptr, val) + +#define EASTL_ATOMIC_XOR_FETCH_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_ATOMIC_CHOOSE_OP_IMPL(ATOMIC_XOR_FETCH_SEQ_CST_128)(type, ret, ptr, val) + + +#endif /* EASTL_ATOMIC_INTERNAL_MACROS_XOR_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_memory_order.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_memory_order.h new file mode 100644 index 00000000..b1c14035 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_memory_order.h @@ -0,0 +1,44 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_MEMORY_ORDER_H +#define EASTL_ATOMIC_INTERNAL_MEMORY_ORDER_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +namespace eastl +{ + + +namespace internal +{ + + +struct memory_order_relaxed_s {}; +struct memory_order_read_depends_s {}; +struct memory_order_acquire_s {}; +struct memory_order_release_s {}; +struct memory_order_acq_rel_s {}; +struct memory_order_seq_cst_s {}; + + +} // namespace internal + + +EASTL_CPP17_INLINE_VARIABLE constexpr auto memory_order_relaxed = internal::memory_order_relaxed_s{}; +EASTL_CPP17_INLINE_VARIABLE constexpr auto memory_order_read_depends = internal::memory_order_read_depends_s{}; +EASTL_CPP17_INLINE_VARIABLE constexpr auto memory_order_acquire = internal::memory_order_acquire_s{}; +EASTL_CPP17_INLINE_VARIABLE constexpr auto memory_order_release = internal::memory_order_release_s{}; +EASTL_CPP17_INLINE_VARIABLE constexpr auto memory_order_acq_rel = internal::memory_order_acq_rel_s{}; +EASTL_CPP17_INLINE_VARIABLE constexpr auto memory_order_seq_cst = internal::memory_order_seq_cst_s{}; + + +} // namespace eastl + + +#endif /* EASTL_ATOMIC_INTERNAL_MEMORY_ORDER_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_pointer.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_pointer.h new file mode 100644 index 00000000..18f6691c --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_pointer.h @@ -0,0 +1,281 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_POINTER_H +#define EASTL_ATOMIC_INTERNAL_POINTER_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#include "atomic_push_compiler_options.h" + + +namespace eastl +{ + + +namespace internal +{ + + + template + struct atomic_pointer_base; + +#define EASTL_ATOMIC_POINTER_STATIC_ASSERT_FUNCS_IMPL(funcName) \ + template \ + T* funcName(ptrdiff_t arg, Order order) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T); \ + } \ + \ + template \ + T* funcName(ptrdiff_t arg, Order order) volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + } \ + \ + T* funcName(ptrdiff_t arg) volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + } + +#define EASTL_ATOMIC_POINTER_STATIC_ASSERT_INC_DEC_OPERATOR_IMPL(operatorOp) \ + T* operator operatorOp() volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + } \ + \ + T* operator operatorOp(int) volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + } + +#define EASTL_ATOMIC_POINTER_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(operatorOp) \ + T* operator operatorOp(ptrdiff_t arg) volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + } + + + template + struct atomic_pointer_base : public atomic_base_width + { + private: + + using Base = atomic_base_width; + + public: /* ctors */ + + EA_CONSTEXPR atomic_pointer_base(T* desired) EA_NOEXCEPT + : Base{ desired } + { + } + + EA_CONSTEXPR atomic_pointer_base() EA_NOEXCEPT = default; + + atomic_pointer_base(const atomic_pointer_base&) EA_NOEXCEPT = delete; + + public: /* assignment operators */ + + using Base::operator=; + + atomic_pointer_base& operator=(const atomic_pointer_base&) EA_NOEXCEPT = delete; + atomic_pointer_base& operator=(const atomic_pointer_base&) volatile EA_NOEXCEPT = delete; + + public: /* fetch_add */ + + EASTL_ATOMIC_POINTER_STATIC_ASSERT_FUNCS_IMPL(fetch_add) + + public: /* add_fetch */ + + EASTL_ATOMIC_POINTER_STATIC_ASSERT_FUNCS_IMPL(add_fetch) + + public: /* fetch_sub */ + + EASTL_ATOMIC_POINTER_STATIC_ASSERT_FUNCS_IMPL(fetch_sub) + + public: /* sub_fetch */ + + EASTL_ATOMIC_POINTER_STATIC_ASSERT_FUNCS_IMPL(sub_fetch) + + public: /* operator++ && operator-- */ + + EASTL_ATOMIC_POINTER_STATIC_ASSERT_INC_DEC_OPERATOR_IMPL(++) + + EASTL_ATOMIC_POINTER_STATIC_ASSERT_INC_DEC_OPERATOR_IMPL(--) + + public: /* operator+= && operator-= */ + + EASTL_ATOMIC_POINTER_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(+=) + + EASTL_ATOMIC_POINTER_STATIC_ASSERT_ASSIGNMENT_OPERATOR_IMPL(-=) + + }; + + + template + struct atomic_pointer_width; + +#define EASTL_ATOMIC_POINTER_FUNC_IMPL(op, bits) \ + T* retVal; \ + { \ + ptr_integral_type retType; \ + ptr_integral_type addend = static_cast(arg) * static_cast(sizeof(T)); \ + \ + EA_PREPROCESSOR_JOIN(op, bits)(ptr_integral_type, retType, EASTL_ATOMIC_INTEGRAL_CAST(ptr_integral_type, this->GetAtomicAddress()), addend); \ + \ + retVal = reinterpret_cast(retType); \ + } \ + return retVal; + +#define EASTL_ATOMIC_POINTER_FETCH_IMPL(funcName, op, bits) \ + T* funcName(ptrdiff_t arg) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_TYPE_IS_OBJECT(T); \ + EASTL_ATOMIC_POINTER_FUNC_IMPL(op, bits); \ + } + +#define EASTL_ATOMIC_POINTER_FETCH_ORDER_IMPL(funcName, orderType, op, bits) \ + T* funcName(ptrdiff_t arg, orderType) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_TYPE_IS_OBJECT(T); \ + EASTL_ATOMIC_POINTER_FUNC_IMPL(op, bits); \ + } + +#define EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, Order) \ + EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_, fetchOp), Order) + +#define EASTL_ATOMIC_POINTER_FETCH_FUNCS_IMPL(funcName, fetchOp, bits) \ + using Base::funcName; \ + \ + EASTL_ATOMIC_POINTER_FETCH_IMPL(funcName, EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, _SEQ_CST_), bits) \ + \ + EASTL_ATOMIC_POINTER_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_relaxed_s, \ + EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, _RELAXED_), bits) \ + \ + EASTL_ATOMIC_POINTER_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_acquire_s, \ + EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, _ACQUIRE_), bits) \ + \ + EASTL_ATOMIC_POINTER_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_release_s, \ + EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, _RELEASE_), bits) \ + \ + EASTL_ATOMIC_POINTER_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_acq_rel_s, \ + EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, _ACQ_REL_), bits) \ + \ + EASTL_ATOMIC_POINTER_FETCH_ORDER_IMPL(funcName, eastl::internal::memory_order_seq_cst_s, \ + EASTL_ATOMIC_POINTER_FETCH_OP_JOIN(fetchOp, _SEQ_CST_), bits) + +#define EASTL_ATOMIC_POINTER_FETCH_INC_DEC_OPERATOR_IMPL(operatorOp, preFuncName, postFuncName) \ + using Base::operator operatorOp; \ + \ + T* operator operatorOp() EA_NOEXCEPT \ + { \ + return preFuncName(1, eastl::memory_order_seq_cst); \ + } \ + \ + T* operator operatorOp(int) EA_NOEXCEPT \ + { \ + return postFuncName(1, eastl::memory_order_seq_cst); \ + } + +#define EASTL_ATOMIC_POINTER_FETCH_ASSIGNMENT_OPERATOR_IMPL(operatorOp, funcName) \ + using Base::operator operatorOp; \ + \ + T* operator operatorOp(ptrdiff_t arg) EA_NOEXCEPT \ + { \ + return funcName(arg, eastl::memory_order_seq_cst); \ + } + + +#define EASTL_ATOMIC_POINTER_WIDTH_SPECIALIZE(bytes, bits) \ + template \ + struct atomic_pointer_width : public atomic_pointer_base \ + { \ + private: \ + \ + using Base = atomic_pointer_base; \ + using u_ptr_integral_type = EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(uint, bits), _t); \ + using ptr_integral_type = EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(int, bits), _t); \ + \ + public: /* ctors */ \ + \ + EA_CONSTEXPR atomic_pointer_width(T* desired) EA_NOEXCEPT \ + : Base{ desired } \ + { \ + } \ + \ + EA_CONSTEXPR atomic_pointer_width() EA_NOEXCEPT = default; \ + \ + atomic_pointer_width(const atomic_pointer_width&) EA_NOEXCEPT = delete; \ + \ + public: /* assignment operators */ \ + \ + using Base::operator=; \ + \ + atomic_pointer_width& operator=(const atomic_pointer_width&) EA_NOEXCEPT = delete; \ + atomic_pointer_width& operator=(const atomic_pointer_width&) volatile EA_NOEXCEPT = delete; \ + \ + public: /* fetch_add */ \ + \ + EASTL_ATOMIC_POINTER_FETCH_FUNCS_IMPL(fetch_add, FETCH_ADD, bits) \ + \ + public: /* add_fetch */ \ + \ + EASTL_ATOMIC_POINTER_FETCH_FUNCS_IMPL(add_fetch, ADD_FETCH, bits) \ + \ + public: /* fetch_sub */ \ + \ + EASTL_ATOMIC_POINTER_FETCH_FUNCS_IMPL(fetch_sub, FETCH_SUB, bits) \ + \ + public: /* sub_fetch */ \ + \ + EASTL_ATOMIC_POINTER_FETCH_FUNCS_IMPL(sub_fetch, SUB_FETCH, bits) \ + \ + public: /* operator++ && operator-- */ \ + \ + EASTL_ATOMIC_POINTER_FETCH_INC_DEC_OPERATOR_IMPL(++, add_fetch, fetch_add) \ + \ + EASTL_ATOMIC_POINTER_FETCH_INC_DEC_OPERATOR_IMPL(--, sub_fetch, fetch_sub) \ + \ + public: /* operator+= && operator-= */ \ + \ + EASTL_ATOMIC_POINTER_FETCH_ASSIGNMENT_OPERATOR_IMPL(+=, add_fetch) \ + \ + EASTL_ATOMIC_POINTER_FETCH_ASSIGNMENT_OPERATOR_IMPL(-=, sub_fetch) \ + \ + public: \ + \ + using Base::load; \ + \ + T* load(eastl::internal::memory_order_read_depends_s) EA_NOEXCEPT \ + { \ + T* retPointer; \ + EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_LOAD_READ_DEPENDS_, bits)(T*, retPointer, this->GetAtomicAddress()); \ + return retPointer; \ + } \ + }; + + +#if defined(EASTL_ATOMIC_HAS_32BIT) && EA_PLATFORM_PTR_SIZE == 4 + EASTL_ATOMIC_POINTER_WIDTH_SPECIALIZE(4, 32) +#endif + +#if defined(EASTL_ATOMIC_HAS_64BIT) && EA_PLATFORM_PTR_SIZE == 8 + EASTL_ATOMIC_POINTER_WIDTH_SPECIALIZE(8, 64) +#endif + + +} // namespace internal + + +} // namespace eastl + + +#include "atomic_pop_compiler_options.h" + + +#endif /* EASTL_ATOMIC_INTERNAL_POINTER_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_pop_compiler_options.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_pop_compiler_options.h new file mode 100644 index 00000000..92f241a1 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_pop_compiler_options.h @@ -0,0 +1,11 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +/* NOTE: No Header Guard */ + + +EA_RESTORE_VC_WARNING(); + +EA_RESTORE_CLANG_WARNING(); diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_push_compiler_options.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_push_compiler_options.h new file mode 100644 index 00000000..c5a54715 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_push_compiler_options.h @@ -0,0 +1,17 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +/* NOTE: No Header Guard */ + + +// 'class' : multiple assignment operators specified +EA_DISABLE_VC_WARNING(4522); + +// misaligned atomic operation may incur significant performance penalty +// The above warning is emitted in earlier versions of clang incorrectly. +// All eastl::atomic objects are size aligned. +// This is static and runtime asserted. +// Thus we disable this warning. +EA_DISABLE_CLANG_WARNING(-Watomic-alignment); diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_size_aligned.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_size_aligned.h new file mode 100644 index 00000000..db23e478 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_size_aligned.h @@ -0,0 +1,197 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_SIZE_ALIGNED_H +#define EASTL_ATOMIC_INTERNAL_SIZE_ALIGNED_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#include "atomic_push_compiler_options.h" + + +namespace eastl +{ + + +namespace internal +{ + + +#define EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_IMPL(funcName) \ + template \ + bool funcName(T& expected, T desired, \ + OrderSuccess orderSuccess, \ + OrderFailure orderFailure) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T); \ + return false; \ + } \ + \ + template \ + bool funcName(T& expected, T desired, \ + OrderSuccess orderSuccess, \ + OrderFailure orderFailure) volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + return false; \ + } \ + \ + template \ + bool funcName(T& expected, T desired, \ + Order order) EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T); \ + return false; \ + } \ + \ + template \ + bool funcName(T& expected, T desired, \ + Order order) volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + return false; \ + } \ + \ + bool funcName(T& expected, T desired) volatile EA_NOEXCEPT \ + { \ + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); \ + return false; \ + } + +#define EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_WEAK_IMPL() \ + EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_IMPL(compare_exchange_weak) + +#define EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_STRONG_IMPL() \ + EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_IMPL(compare_exchange_strong) + + + template + struct atomic_size_aligned + { + public: /* ctors */ + + EA_CONSTEXPR atomic_size_aligned(T desired) EA_NOEXCEPT + : mAtomic{ desired } + { + } + + EA_CONSTEXPR atomic_size_aligned() EA_NOEXCEPT_IF(eastl::is_nothrow_default_constructible_v) + : mAtomic{} /* Value-Initialize which will Zero-Initialize Trivial Constructible types */ + { + } + + atomic_size_aligned(const atomic_size_aligned&) EA_NOEXCEPT = delete; + + public: /* store */ + + template + void store(T desired, Order order) EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T); + } + + template + void store(T desired, Order order) volatile EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); + } + + void store(T desired) volatile EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); + } + + public: /* load */ + + template + T load(Order order) const EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T); + } + + template + T load(Order order) const volatile EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); + } + + T load() const volatile EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); + } + + public: /* exchange */ + + template + T exchange(T desired, Order order) EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(T); + } + + template + T exchange(T desired, Order order) volatile EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); + } + + T exchange(T desired) volatile EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); + } + + public: /* compare_exchange_weak */ + + EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_WEAK_IMPL() + + public: /* compare_exchange_strong */ + + EASTL_ATOMIC_SIZE_ALIGNED_STATIC_ASSERT_CMPXCHG_STRONG_IMPL() + + public: /* assignment operator */ + + T operator=(T desired) volatile EA_NOEXCEPT + { + EASTL_ATOMIC_STATIC_ASSERT_VOLATILE_MEM_FN(T); + } + + atomic_size_aligned& operator=(const atomic_size_aligned&) EA_NOEXCEPT = delete; + atomic_size_aligned& operator=(const atomic_size_aligned&) volatile EA_NOEXCEPT = delete; + + protected: /* Accessors */ + + T* GetAtomicAddress() const EA_NOEXCEPT + { + return eastl::addressof(mAtomic); + } + + private: + + /** + * Some compilers such as MSVC will align 64-bit values on 32-bit machines on + * 4-byte boundaries which can ruin the atomicity guarantees. + * + * Ensure everything is size aligned. + * + * mutable is needed in cases such as when loads are only guaranteed to be atomic + * using a compare exchange, such as for 128-bit atomics, so we need to be able + * to have write access to the variable as one example. + */ + EA_ALIGN(sizeof(T)) mutable T mAtomic; + }; + + +} // namespace internal + + +} // namespace eastl + + +#include "atomic_pop_compiler_options.h" + + +#endif /* EASTL_ATOMIC_INTERNAL_SIZE_ALIGNED_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/atomic_standalone.h b/libkram/eastl/include/EASTL/internal/atomic/atomic_standalone.h new file mode 100644 index 00000000..011d5fb3 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/atomic_standalone.h @@ -0,0 +1,470 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_STANDALONE_H +#define EASTL_ATOMIC_INTERNAL_STANDALONE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +namespace eastl +{ + + +//////////////////////////////////////////////////////////////////////////////// +// +// bool atomic_compare_exchange_strong(eastl::atomic*, T* expected, T desired) +// +template +EASTL_FORCE_INLINE bool atomic_compare_exchange_strong(eastl::atomic* atomicObj, + typename eastl::atomic::value_type* expected, + typename eastl::atomic::value_type desired) EA_NOEXCEPT +{ + return atomicObj->compare_exchange_strong(*expected, desired); +} + +template +EASTL_FORCE_INLINE bool atomic_compare_exchange_strong_explicit(eastl::atomic* atomicObj, + typename eastl::atomic::value_type* expected, + typename eastl::atomic::value_type desired, + OrderSuccess orderSuccess, OrderFailure orderFailure) EA_NOEXCEPT +{ + return atomicObj->compare_exchange_strong(*expected, desired, orderSuccess, orderFailure); +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// bool atomic_compare_exchange_weak(eastl::atomic*, T* expected, T desired) +// +template +EASTL_FORCE_INLINE bool atomic_compare_exchange_weak(eastl::atomic* atomicObj, + typename eastl::atomic::value_type* expected, + typename eastl::atomic::value_type desired) EA_NOEXCEPT +{ + return atomicObj->compare_exchange_weak(*expected, desired); +} + +template +EASTL_FORCE_INLINE bool atomic_compare_exchange_weak_explicit(eastl::atomic* atomicObj, + typename eastl::atomic::value_type* expected, + typename eastl::atomic::value_type desired, + OrderSuccess orderSuccess, OrderFailure orderFailure) EA_NOEXCEPT +{ + return atomicObj->compare_exchange_weak(*expected, desired, orderSuccess, orderFailure); +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// T atomic_fetch_xor(eastl::atomic*, T arg) +// +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_fetch_xor(eastl::atomic* atomicObj, + typename eastl::atomic::value_type arg) EA_NOEXCEPT +{ + return atomicObj->fetch_xor(arg); +} + +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_fetch_xor_explicit(eastl::atomic* atomicObj, + typename eastl::atomic::value_type arg, + Order order) EA_NOEXCEPT +{ + return atomicObj->fetch_xor(arg, order); +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// T atomic_xor_fetch(eastl::atomic*, T arg) +// +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_xor_fetch(eastl::atomic* atomicObj, + typename eastl::atomic::value_type arg) EA_NOEXCEPT +{ + return atomicObj->xor_fetch(arg); +} + +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_xor_fetch_explicit(eastl::atomic* atomicObj, + typename eastl::atomic::value_type arg, + Order order) EA_NOEXCEPT +{ + return atomicObj->xor_fetch(arg, order); +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// T atomic_fetch_or(eastl::atomic*, T arg) +// +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_fetch_or(eastl::atomic* atomicObj, + typename eastl::atomic::value_type arg) EA_NOEXCEPT +{ + return atomicObj->fetch_or(arg); +} + +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_fetch_or_explicit(eastl::atomic* atomicObj, + typename eastl::atomic::value_type arg, + Order order) EA_NOEXCEPT +{ + return atomicObj->fetch_or(arg, order); +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// T atomic_or_fetch(eastl::atomic*, T arg) +// +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_or_fetch(eastl::atomic* atomicObj, + typename eastl::atomic::value_type arg) EA_NOEXCEPT +{ + return atomicObj->or_fetch(arg); +} + +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_or_fetch_explicit(eastl::atomic* atomicObj, + typename eastl::atomic::value_type arg, + Order order) EA_NOEXCEPT +{ + return atomicObj->or_fetch(arg, order); +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// T atomic_fetch_and(eastl::atomic*, T arg) +// +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_fetch_and(eastl::atomic* atomicObj, + typename eastl::atomic::value_type arg) EA_NOEXCEPT +{ + return atomicObj->fetch_and(arg); +} + +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_fetch_and_explicit(eastl::atomic* atomicObj, + typename eastl::atomic::value_type arg, + Order order) EA_NOEXCEPT +{ + return atomicObj->fetch_and(arg, order); +} + + +//////////////////////////////////////////////////////////////////////////////// +// +// T atomic_and_fetch(eastl::atomic*, T arg) +// +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_and_fetch(eastl::atomic* atomicObj, + typename eastl::atomic::value_type arg) EA_NOEXCEPT +{ + return atomicObj->and_fetch(arg); +} + +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_and_fetch_explicit(eastl::atomic* atomicObj, + typename eastl::atomic::value_type arg, + Order order) EA_NOEXCEPT +{ + return atomicObj->and_fetch(arg, order); +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// T atomic_fetch_sub(eastl::atomic*, T arg) +// +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_fetch_sub(eastl::atomic* atomicObj, + typename eastl::atomic::difference_type arg) EA_NOEXCEPT +{ + return atomicObj->fetch_sub(arg); +} + +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_fetch_sub_explicit(eastl::atomic* atomicObj, + typename eastl::atomic::difference_type arg, + Order order) EA_NOEXCEPT +{ + return atomicObj->fetch_sub(arg, order); +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// T atomic_sub_fetch(eastl::atomic*, T arg) +// +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_sub_fetch(eastl::atomic* atomicObj, + typename eastl::atomic::difference_type arg) EA_NOEXCEPT +{ + return atomicObj->sub_fetch(arg); +} + +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_sub_fetch_explicit(eastl::atomic* atomicObj, + typename eastl::atomic::difference_type arg, + Order order) EA_NOEXCEPT +{ + return atomicObj->sub_fetch(arg, order); +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// T atomic_fetch_add(eastl::atomic*, T arg) +// +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_fetch_add(eastl::atomic* atomicObj, + typename eastl::atomic::difference_type arg) EA_NOEXCEPT +{ + return atomicObj->fetch_add(arg); +} + +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_fetch_add_explicit(eastl::atomic* atomicObj, + typename eastl::atomic::difference_type arg, + Order order) EA_NOEXCEPT +{ + return atomicObj->fetch_add(arg, order); +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// T atomic_add_fetch(eastl::atomic*, T arg) +// +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_add_fetch(eastl::atomic* atomicObj, + typename eastl::atomic::difference_type arg) EA_NOEXCEPT +{ + return atomicObj->add_fetch(arg); +} + +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_add_fetch_explicit(eastl::atomic* atomicObj, + typename eastl::atomic::difference_type arg, + Order order) EA_NOEXCEPT +{ + return atomicObj->add_fetch(arg, order); +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// T atomic_exchange(eastl::atomic*, T desired) +// +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_exchange(eastl::atomic* atomicObj, + typename eastl::atomic::value_type desired) EA_NOEXCEPT +{ + return atomicObj->exchange(desired); +} + +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_exchange_explicit(eastl::atomic* atomicObj, + typename eastl::atomic::value_type desired, + Order order) EA_NOEXCEPT +{ + return atomicObj->exchange(desired, order); +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// T atomic_load(const eastl::atomic*) +// +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_load(const eastl::atomic* atomicObj) EA_NOEXCEPT +{ + return atomicObj->load(); +} + +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_load_explicit(const eastl::atomic* atomicObj, Order order) EA_NOEXCEPT +{ + return atomicObj->load(order); +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// T atomic_load_cond(const eastl::atomic*) +// +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_load_cond(const eastl::atomic* atomicObj, Predicate pred) EA_NOEXCEPT +{ + for (;;) + { + typename eastl::atomic::value_type ret = atomicObj->load(); + + if (pred(ret)) + { + return ret; + } + + EASTL_ATOMIC_CPU_PAUSE(); + } +} + +template +EASTL_FORCE_INLINE typename eastl::atomic::value_type atomic_load_cond_explicit(const eastl::atomic* atomicObj, Predicate pred, Order order) EA_NOEXCEPT +{ + for (;;) + { + typename eastl::atomic::value_type ret = atomicObj->load(order); + + if (pred(ret)) + { + return ret; + } + + EASTL_ATOMIC_CPU_PAUSE(); + } +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// void atomic_store(eastl::atomic*, T) +// +template +EASTL_FORCE_INLINE void atomic_store(eastl::atomic* atomicObj, typename eastl::atomic::value_type desired) EA_NOEXCEPT +{ + atomicObj->store(desired); +} + +template +EASTL_FORCE_INLINE void atomic_store_explicit(eastl::atomic* atomicObj, typename eastl::atomic::value_type desired, Order order) EA_NOEXCEPT +{ + atomicObj->store(desired, order); +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// void eastl::atomic_thread_fence(Order) +// +template +EASTL_FORCE_INLINE void atomic_thread_fence(Order) EA_NOEXCEPT +{ + EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(Order); +} + +EASTL_FORCE_INLINE void atomic_thread_fence(eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT +{ + EASTL_ATOMIC_THREAD_FENCE_RELAXED(); +} + +EASTL_FORCE_INLINE void atomic_thread_fence(eastl::internal::memory_order_acquire_s) EA_NOEXCEPT +{ + EASTL_ATOMIC_THREAD_FENCE_ACQUIRE(); +} + +EASTL_FORCE_INLINE void atomic_thread_fence(eastl::internal::memory_order_release_s) EA_NOEXCEPT +{ + EASTL_ATOMIC_THREAD_FENCE_RELEASE(); +} + +EASTL_FORCE_INLINE void atomic_thread_fence(eastl::internal::memory_order_acq_rel_s) EA_NOEXCEPT +{ + EASTL_ATOMIC_THREAD_FENCE_ACQ_REL(); +} + +EASTL_FORCE_INLINE void atomic_thread_fence(eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT +{ + EASTL_ATOMIC_THREAD_FENCE_SEQ_CST(); +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// void eastl::atomic_signal_fence(Order) +// +template +EASTL_FORCE_INLINE void atomic_signal_fence(Order) EA_NOEXCEPT +{ + EASTL_ATOMIC_STATIC_ASSERT_INVALID_MEMORY_ORDER(Order); +} + +EASTL_FORCE_INLINE void atomic_signal_fence(eastl::internal::memory_order_relaxed_s) EA_NOEXCEPT +{ + EASTL_ATOMIC_SIGNAL_FENCE_RELAXED(); +} + +EASTL_FORCE_INLINE void atomic_signal_fence(eastl::internal::memory_order_acquire_s) EA_NOEXCEPT +{ + EASTL_ATOMIC_SIGNAL_FENCE_ACQUIRE(); +} + +EASTL_FORCE_INLINE void atomic_signal_fence(eastl::internal::memory_order_release_s) EA_NOEXCEPT +{ + EASTL_ATOMIC_SIGNAL_FENCE_RELEASE(); +} + +EASTL_FORCE_INLINE void atomic_signal_fence(eastl::internal::memory_order_acq_rel_s) EA_NOEXCEPT +{ + EASTL_ATOMIC_SIGNAL_FENCE_ACQ_REL(); +} + +EASTL_FORCE_INLINE void atomic_signal_fence(eastl::internal::memory_order_seq_cst_s) EA_NOEXCEPT +{ + EASTL_ATOMIC_SIGNAL_FENCE_SEQ_CST(); +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// void eastl::compiler_barrier() +// +EASTL_FORCE_INLINE void compiler_barrier() EA_NOEXCEPT +{ + EASTL_ATOMIC_COMPILER_BARRIER(); +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// void eastl::compiler_barrier_data_dependency(const T&) +// +template +EASTL_FORCE_INLINE void compiler_barrier_data_dependency(const T& val) EA_NOEXCEPT +{ + EASTL_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(val, T); +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// void eastl::cpu_pause() +// +EASTL_FORCE_INLINE void cpu_pause() EA_NOEXCEPT +{ + EASTL_ATOMIC_CPU_PAUSE(); +} + + +///////////////////////////////////////////////////////////////////////////////// +// +// bool eastl::atomic_is_lock_free(eastl::atomic*) +// +template +EASTL_FORCE_INLINE bool atomic_is_lock_free(const eastl::atomic* atomicObj) EA_NOEXCEPT +{ + return atomicObj->is_lock_free(); +} + + +} // namespace eastl + + +#endif /* EASTL_ATOMIC_INTERNAL_STANDALONE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler.h new file mode 100644 index 00000000..65a4cd00 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler.h @@ -0,0 +1,120 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// Include the compiler specific implementations +// +#if defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG) + + #include "gcc/compiler_gcc.h" + +#elif defined(EA_COMPILER_MSVC) + + #include "msvc/compiler_msvc.h" + +#endif + + +///////////////////////////////////////////////////////////////////////////////// + + +namespace eastl +{ + + +namespace internal +{ + + +/** + * NOTE: + * + * This can be used by specific compiler implementations to implement a data dependency compiler barrier. + * Some compiler barriers do not take in input dependencies as is possible with the gcc asm syntax. + * Thus we need a way to create a false dependency on the input variable so the compiler does not dead-store + * remove it. + * A volatile function pointer ensures the compiler must always load the function pointer and call thru it + * since the compiler cannot reason about any side effects. Thus the compiler must always assume the + * input variable may be accessed and thus cannot be dead-stored. This technique works even in the presence + * of Link-Time Optimization. A compiler barrier with a data dependency is useful in these situations. + * + * void foo() + * { + * eastl::vector v; + * while (Benchmark.ContinueRunning()) + * { + * v.push_back(0); + * eastl::compiler_barrier(); OR eastl::compiler_barrier_data_dependency(v); + * } + * } + * + * We are trying to benchmark the push_back function of a vector. The vector v has only local scope. + * The compiler is well within its writes to remove all accesses to v even with the compiler barrier + * because there are no observable uses of the vector v. + * The compiler barrier data dependency ensures there is an input dependency on the variable so that + * it isn't removed. This is also useful when writing test code that the compiler may remove. + */ + +typedef void (*CompilerBarrierDataDependencyFuncPtr)(void*); + +extern EASTL_API volatile CompilerBarrierDataDependencyFuncPtr gCompilerBarrierDataDependencyFunc; + + +#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY_FUNC(ptr) \ + eastl::internal::gCompilerBarrierDataDependencyFunc(ptr) + + +} // namespace internal + + +} // namespace eastl + + +///////////////////////////////////////////////////////////////////////////////// + + +#include "compiler_fetch_add.h" +#include "compiler_fetch_sub.h" + +#include "compiler_fetch_and.h" +#include "compiler_fetch_xor.h" +#include "compiler_fetch_or.h" + +#include "compiler_add_fetch.h" +#include "compiler_sub_fetch.h" + +#include "compiler_and_fetch.h" +#include "compiler_xor_fetch.h" +#include "compiler_or_fetch.h" + +#include "compiler_exchange.h" + +#include "compiler_cmpxchg_weak.h" +#include "compiler_cmpxchg_strong.h" + +#include "compiler_load.h" +#include "compiler_store.h" + +#include "compiler_barrier.h" + +#include "compiler_cpu_pause.h" + +#include "compiler_memory_barrier.h" + +#include "compiler_signal_fence.h" + +#include "compiler_thread_fence.h" + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_add_fetch.h new file mode 100644 index 00000000..763921c4 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_add_fetch.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_ADD_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_ADD_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_ADD_FETCH_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_8) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_8) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_16) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_16) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_32) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_32) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_64) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_64) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_128) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_128) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_ADD_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_and_fetch.h new file mode 100644 index 00000000..7b1e0a42 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_and_fetch.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_AND_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_AND_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_AND_FETCH_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_8) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_8) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_16) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_16) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_32) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_32) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_64) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_64) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_128) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_128) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_AND_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_barrier.h new file mode 100644 index 00000000..550070e3 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_barrier.h @@ -0,0 +1,36 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_BARRIER_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_BARRIER_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_COMPILER_BARRIER() +// +#if defined(EASTL_COMPILER_ATOMIC_COMPILER_BARRIER) + #define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_AVAILABLE 0 +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(const T&, type) +// +#if defined(EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY) + #define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_BARRIER_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_strong.h new file mode 100644 index 00000000..2ee29711 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_strong.h @@ -0,0 +1,430 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_STRONG_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_STRONG_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_*_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128_AVAILABLE 0 +#endif + + +///////////////////////////////////////////////////////////////////////////////// + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_8_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_8_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_8_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_8_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_8_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_16_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_16_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_16_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_16_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_16_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_32_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_32_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_32_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_32_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_32_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_64_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_64_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_64_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_64_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_64_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_128_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_128_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_128_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_128_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_128_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_STRONG_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_weak.h new file mode 100644 index 00000000..9bc1a621 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cmpxchg_weak.h @@ -0,0 +1,430 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_WEAK_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_WEAK_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_*_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128_AVAILABLE 0 +#endif + + +///////////////////////////////////////////////////////////////////////////////// + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_8_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_8_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_8_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_8_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_8_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_16_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_16_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_16_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_16_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_16_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_32_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_32_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_32_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_32_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_32_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_64_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_64_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_64_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_64_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_64_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_128_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_128_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_128_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_128_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_128_AVAILABLE \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128_AVAILABLE +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_CMPXCHG_WEAK_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cpu_pause.h new file mode 100644 index 00000000..073b3fbb --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_cpu_pause.h @@ -0,0 +1,32 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_CPU_PAUSE_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_CPU_PAUSE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CPU_PAUSE() +// +#if defined(EASTL_COMPILER_ATOMIC_CPU_PAUSE) + + #define EASTL_COMPILER_ATOMIC_CPU_PAUSE_AVAILABLE 1 + +#else + + #define EASTL_COMPILER_ATOMIC_CPU_PAUSE() \ + ((void)0) + + #define EASTL_COMPILER_ATOMIC_CPU_PAUSE_AVAILABLE 1 + +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_CPU_PAUSE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_exchange.h new file mode 100644 index 00000000..d82b199d --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_exchange.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_EXCHANGE_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_EXCHANGE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_EXCHANGE_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_8) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_8) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_16) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_16) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_32) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_32) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_64) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_64) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_128) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_128) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_EXCHANGE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_add.h new file mode 100644 index 00000000..e6c4238f --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_add.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_ADD_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_ADD_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_ADD_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_8) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_8) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_16) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_16) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_32) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_32) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_64) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_64) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_128) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_128) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_ADD_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_and.h new file mode 100644 index 00000000..b0976fc7 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_and.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_AND_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_AND_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_AND_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_8) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_8) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_16) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_16) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_32) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_32) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_64) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_64) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_128) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_128) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_AND_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_or.h new file mode 100644 index 00000000..2e6cfdac --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_or.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_OR_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_OR_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_OR_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_8) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_8) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_16) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_16) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_32) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_32) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_64) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_64) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_128) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_128) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_OR_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_sub.h new file mode 100644 index 00000000..d7ed86cc --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_sub.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_SUB_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_SUB_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_SUB_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_8) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_8) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_16) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_16) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_32) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_32) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_64) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_64) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_128) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_128) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_SUB_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_xor.h new file mode 100644 index 00000000..10cf7d90 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_fetch_xor.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_XOR_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_XOR_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_XOR_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_8) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_8) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_16) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_16) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_32) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_32) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_64) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_64) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_128) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_128) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_FETCH_XOR_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_load.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_load.h new file mode 100644 index 00000000..734dbb80 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_load.h @@ -0,0 +1,139 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_LOAD_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_LOAD_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_LOAD_*_N(type, type ret, type * ptr) +// +#if defined(EASTL_COMPILER_ATOMIC_LOAD_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_128_AVAILABLE 0 +#endif + + +/** + * NOTE: + * + * These are used for data-dependent reads thru a pointer. It is safe + * to assume that pointer-sized reads are atomic on any given platform. + * This implementation assumes the hardware doesn't reorder dependent + * loads unlike the DEC Alpha. + */ +#define EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_N(type, ret, ptr) \ + { \ + static_assert(eastl::is_pointer_v, "eastl::atomic : Read Depends Type must be a Pointer Type!"); \ + static_assert(eastl::is_pointer_v>, "eastl::atomic : Read Depends Ptr must be a Pointer to a Pointer!"); \ + \ + ret = (*EASTL_ATOMIC_VOLATILE_CAST(ptr)); \ + } + +#define EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_32(type, ret, ptr) \ + EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_N(type, ret, ptr) + +#define EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_64(type, ret, ptr) \ + EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_N(type, ret, ptr) + +#define EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_32_AVAILABLE 1 +#define EASTL_COMPILER_ATOMIC_LOAD_READ_DEPENDS_64_AVAILABLE 1 + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_LOAD_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_memory_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_memory_barrier.h new file mode 100644 index 00000000..ac3923c6 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_memory_barrier.h @@ -0,0 +1,47 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MEMORY_BARRIER_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MEMORY_BARRIER_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CPU_MB() +// +#if defined(EASTL_COMPILER_ATOMIC_CPU_MB) + #define EASTL_COMPILER_ATOMIC_CPU_MB_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CPU_MB_AVAILABLE 0 +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CPU_WMB() +// +#if defined(EASTL_COMPILER_ATOMIC_CPU_WMB) + #define EASTL_COMPILER_ATOMIC_CPU_WMB_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CPU_WMB_AVAILABLE 0 +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CPU_RMB() +// +#if defined(EASTL_COMPILER_ATOMIC_CPU_RMB) + #define EASTL_COMPILER_ATOMIC_CPU_RMB_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_CPU_RMB_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MEMORY_BARRIER_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_or_fetch.h new file mode 100644 index 00000000..a26a72c7 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_or_fetch.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_OR_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_OR_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_OR_FETCH_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_8) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_8) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_16) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_16) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_32) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_32) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_64) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_64) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_128) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_128) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_OR_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_signal_fence.h new file mode 100644 index 00000000..25b0b741 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_signal_fence.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_SIGNAL_FENCE_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_SIGNAL_FENCE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_*() +// +#if defined(EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELAXED) + #define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELAXED_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELAXED_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQUIRE) + #define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQUIRE_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQUIRE_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELEASE) + #define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELEASE_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELEASE_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQ_REL) + #define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQ_REL_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQ_REL_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_SEQ_CST) + #define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_SEQ_CST_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_SEQ_CST_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_SIGNAL_FENCE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_store.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_store.h new file mode 100644 index 00000000..1a553e2a --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_store.h @@ -0,0 +1,113 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_STORE_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_STORE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_STORE_*_N(type, type * ptr, type val) +// +#if defined(EASTL_COMPILER_ATOMIC_STORE_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_STORE_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_STORE_RELEASE_8) + #define EASTL_COMPILER_ATOMIC_STORE_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_STORE_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_STORE_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_STORE_RELEASE_16) + #define EASTL_COMPILER_ATOMIC_STORE_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_STORE_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_STORE_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_STORE_RELEASE_32) + #define EASTL_COMPILER_ATOMIC_STORE_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_STORE_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_STORE_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_STORE_RELEASE_64) + #define EASTL_COMPILER_ATOMIC_STORE_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_STORE_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_STORE_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_STORE_RELEASE_128) + #define EASTL_COMPILER_ATOMIC_STORE_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_STORE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_sub_fetch.h new file mode 100644 index 00000000..4b7eea92 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_sub_fetch.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_SUB_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_SUB_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_SUB_FETCH_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_8) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_8) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_16) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_16) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_32) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_32) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_64) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_64) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_128) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_128) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_SUB_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_thread_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_thread_fence.h new file mode 100644 index 00000000..01d8f0f9 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_thread_fence.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_THREAD_FENCE_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_THREAD_FENCE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_THREAD_FENCE_*() +// +#if defined(EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELAXED) + #define EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELAXED_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELAXED_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQUIRE) + #define EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQUIRE_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQUIRE_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELEASE) + #define EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELEASE_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELEASE_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQ_REL) + #define EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQ_REL_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQ_REL_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_THREAD_FENCE_SEQ_CST) + #define EASTL_COMPILER_ATOMIC_THREAD_FENCE_SEQ_CST_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_THREAD_FENCE_SEQ_CST_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_THREAD_FENCE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_xor_fetch.h new file mode 100644 index 00000000..05680bd1 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/compiler_xor_fetch.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_XOR_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_XOR_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_XOR_FETCH_*_N(type, type ret, type * ptr, type val) +// +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_8) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_8) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_8) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_8) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_8_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_8) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_8_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_8_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_16) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_16) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_16) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_16) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_16_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_16) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_16_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_16_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_32) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_32) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_32) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_32) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_32_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_32) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_32_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_32_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_64) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_64) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_64) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_64) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_64_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_64) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_64_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_64_AVAILABLE 0 +#endif + + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_128) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_128) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_128) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_128) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_128_AVAILABLE 0 +#endif + +#if defined(EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_128) + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_128_AVAILABLE 1 +#else + #define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_128_AVAILABLE 0 +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_XOR_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc.h new file mode 100644 index 00000000..26a99c20 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc.h @@ -0,0 +1,154 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +/** + * NOTE: + * + * gcc __atomic builtins may defer to function calls in libatomic.so for architectures that do not + * support atomic instructions of a given size. These functions will be implemented with pthread_mutex_t. + * It also requires the explicit linking against the compiler runtime libatomic.so. + * On architectures that do not support atomics, like armv6 the builtins may defer to kernel helpers + * or on classic uniprocessor systems just disable interrupts. + * + * We do not want to have to link against libatomic.so or fall into the trap of our atomics degrading + * into locks. We would rather have user-code explicitly use locking primitives if their code cannot + * be satisfied with atomic instructions on the given platform. + */ +static_assert(__atomic_always_lock_free(1, 0), "eastl::atomic where sizeof(T) == 1 must be lock-free!"); +static_assert(__atomic_always_lock_free(2, 0), "eastl::atomic where sizeof(T) == 2 must be lock-free!"); +static_assert(__atomic_always_lock_free(4, 0), "eastl::atomic where sizeof(T) == 4 must be lock-free!"); +#if EA_PLATFORM_PTR_SIZE == 8 + static_assert(__atomic_always_lock_free(8, 0), "eastl::atomic where sizeof(T) == 8 must be lock-free!"); +#endif + +/** + * NOTE: + * + * The following can fail on gcc/clang on 64-bit systems. + * Firstly, it depends on the -march setting on clang whether or not it calls out to libatomic for 128-bit operations. + * Second, gcc always calls out to libatomic for 128-bit atomics. It is unclear if it uses locks + * or tries to look at the cpuid and use cmpxchg16b if its available. + * gcc mailing lists argue that since load must be implemented with cmpxchg16b, then the __atomic bultin + * cannot be used in read-only memory which is why they always call out to libatomic. + * There is no way to tell gcc to not do that, unfortunately. + * We don't care about the read-only restriction because our eastl::atomic object is mutable + * and also msvc doesn't enforce this restriction thus to be fully platform agnostic we cannot either. + * + * Therefore, the follow static_assert is commented out for the time being, as it always fails on these compilers. + * We still guarantee 128-bit atomics are lock-free by handrolling the inline assembly ourselves. + * + * static_assert(__atomic_always_lock_free(16, 0), "eastl::atomic where sizeof(T) == 16 must be lock-free!"); + */ + +/** + * NOTE: + * + * Why do we do the cast to the unsigned fixed width types for every operation even though gcc/clang builtins are generics? + * Well gcc/clang correctly-incorrectly call out to libatomic and do locking on user types that may be potentially misaligned. + * struct UserType { uint8_t a,b; }; This given struct is 2 bytes in size but has only 1 byte alignment. + * gcc/clang cannot and doesn't know that we always guarantee every type T is size aligned within eastl::atomic. + * Therefore it always emits calls into libatomic and does locking for structs like these which we do not want. + * Therefore you'll notice we always cast each atomic ptr type to the equivalent unsigned fixed width type when doing the atomic operations. + * This ensures all user types are size aligned and thus are lock free. + */ + + +///////////////////////////////////////////////////////////////////////////////// + + +#define EASTL_COMPILER_ATOMIC_HAS_8BIT +#define EASTL_COMPILER_ATOMIC_HAS_16BIT +#define EASTL_COMPILER_ATOMIC_HAS_32BIT +#define EASTL_COMPILER_ATOMIC_HAS_64BIT + +#if EA_PLATFORM_PTR_SIZE == 8 + #define EASTL_COMPILER_ATOMIC_HAS_128BIT +#endif + + +///////////////////////////////////////////////////////////////////////////////// + + +#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_8 uint8_t +#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_16 uint16_t +#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_32 uint32_t +#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_64 uint64_t +#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_128 __uint128_t + + +///////////////////////////////////////////////////////////////////////////////// + + +#define EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, fetchIntrinsic, type, ret, ptr, val, gccMemoryOrder) \ + { \ + integralType retIntegral; \ + integralType valIntegral = EASTL_ATOMIC_TYPE_PUN_CAST(integralType, (val)); \ + \ + retIntegral = fetchIntrinsic(EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), valIntegral, gccMemoryOrder); \ + \ + ret = EASTL_ATOMIC_TYPE_PUN_CAST(type, retIntegral); \ + } + +#define EASTL_GCC_ATOMIC_CMPXCHG_INTRIN_N(integralType, type, ret, ptr, expected, desired, weak, successOrder, failOrder) \ + ret = __atomic_compare_exchange(EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), \ + EASTL_ATOMIC_INTEGRAL_CAST(integralType, (expected)), \ + EASTL_ATOMIC_INTEGRAL_CAST(integralType, &(desired)), \ + weak, successOrder, failOrder) + +#define EASTL_GCC_ATOMIC_EXCHANGE_INTRIN_N(integralType, type, ret, ptr, val, gccMemoryOrder) \ + { \ + integralType retIntegral; \ + integralType valIntegral = EASTL_ATOMIC_TYPE_PUN_CAST(integralType, (val)); \ + \ + __atomic_exchange(EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), \ + &valIntegral, &retIntegral, gccMemoryOrder); \ + \ + ret = EASTL_ATOMIC_TYPE_PUN_CAST(type, retIntegral); \ + } + + +///////////////////////////////////////////////////////////////////////////////// + + +#include "compiler_gcc_fetch_add.h" +#include "compiler_gcc_fetch_sub.h" + +#include "compiler_gcc_fetch_and.h" +#include "compiler_gcc_fetch_xor.h" +#include "compiler_gcc_fetch_or.h" + +#include "compiler_gcc_add_fetch.h" +#include "compiler_gcc_sub_fetch.h" + +#include "compiler_gcc_and_fetch.h" +#include "compiler_gcc_xor_fetch.h" +#include "compiler_gcc_or_fetch.h" + +#include "compiler_gcc_exchange.h" + +#include "compiler_gcc_cmpxchg_weak.h" +#include "compiler_gcc_cmpxchg_strong.h" + +#include "compiler_gcc_load.h" +#include "compiler_gcc_store.h" + +#include "compiler_gcc_barrier.h" + +#include "compiler_gcc_cpu_pause.h" + +#include "compiler_gcc_signal_fence.h" + +#include "compiler_gcc_thread_fence.h" + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_add_fetch.h new file mode 100644 index 00000000..1d19196b --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_add_fetch.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_ADD_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_ADD_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_ADD_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_add_fetch, type, ret, ptr, val, gccMemoryOrder) + + +#define EASTL_GCC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_ADD_FETCH_N(uint8_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_ADD_FETCH_N(uint16_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_ADD_FETCH_N(uint32_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_ADD_FETCH_N(uint64_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_ADD_FETCH_128(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_ADD_FETCH_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_ADD_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_128(type, ret, ptr, val, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_128(type, ret, ptr, val, __ATOMIC_RELEASE) + + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_ADD_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_ADD_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_and_fetch.h new file mode 100644 index 00000000..a35307f0 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_and_fetch.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_AND_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_AND_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_AND_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_and_fetch, type, ret, ptr, val, gccMemoryOrder) + + +#define EASTL_GCC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_AND_FETCH_N(uint8_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_AND_FETCH_N(uint16_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_AND_FETCH_N(uint32_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_AND_FETCH_N(uint64_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_AND_FETCH_128(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_AND_FETCH_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_AND_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_128(type, ret, ptr, val, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_128(type, ret, ptr, val, __ATOMIC_RELEASE) + + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_AND_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_AND_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_barrier.h new file mode 100644 index 00000000..64e8e541 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_barrier.h @@ -0,0 +1,30 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_BARRIER_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_BARRIER_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_COMPILER_BARRIER() +// +#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER() \ + __asm__ __volatile__ ("" ::: "memory") + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(const T&, type) +// +#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(val, type) \ + __asm__ __volatile__ ("" : /* Output Operands */ : "r"(&(val)) : "memory") + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_BARRIER_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_strong.h new file mode 100644 index 00000000..3e47cf2e --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_strong.h @@ -0,0 +1,182 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_STRONG_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_STRONG_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_CMPXCHG_STRONG_N(integralType, type, ret, ptr, expected, desired, successOrder, failOrder) \ + EASTL_GCC_ATOMIC_CMPXCHG_INTRIN_N(integralType, type, ret, ptr, expected, desired, false, successOrder, failOrder) + + +#define EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, successOrder, failOrder) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_N(uint8_t, type, ret, ptr, expected, desired, successOrder, failOrder) + +#define EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, successOrder, failOrder) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_N(uint16_t, type, ret, ptr, expected, desired, successOrder, failOrder) + +#define EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, successOrder, failOrder) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_N(uint32_t, type, ret, ptr, expected, desired, successOrder, failOrder) + +#define EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, successOrder, failOrder) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_N(uint64_t, type, ret, ptr, expected, desired, successOrder, failOrder) + +#define EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, successOrder, failOrder) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_N(__uint128_t, type, ret, ptr, expected, desired, successOrder, failOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_*_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_STRONG_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_weak.h new file mode 100644 index 00000000..f55fe3a3 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cmpxchg_weak.h @@ -0,0 +1,182 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_WEAK_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_WEAK_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_CMPXCHG_WEAK_N(integralType, type, ret, ptr, expected, desired, successOrder, failOrder) \ + EASTL_GCC_ATOMIC_CMPXCHG_INTRIN_N(integralType, type, ret, ptr, expected, desired, true, successOrder, failOrder) + + +#define EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, successOrder, failOrder) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_N(uint8_t, type, ret, ptr, expected, desired, successOrder, failOrder) + +#define EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, successOrder, failOrder) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_N(uint16_t, type, ret, ptr, expected, desired, successOrder, failOrder) + +#define EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, successOrder, failOrder) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_N(uint32_t, type, ret, ptr, expected, desired, successOrder, failOrder) + +#define EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, successOrder, failOrder) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_N(uint64_t, type, ret, ptr, expected, desired, successOrder, failOrder) + +#define EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, successOrder, failOrder) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_N(__uint128_t, type, ret, ptr, expected, desired, successOrder, failOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_*_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_RELEASE, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_8(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16(type,ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_16(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_32(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_64(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \ + EASTL_GCC_ATOMIC_CMPXCHG_WEAK_128(type, ret, ptr, expected, desired, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CMPXCHG_WEAK_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cpu_pause.h new file mode 100644 index 00000000..9d4ac35e --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_cpu_pause.h @@ -0,0 +1,31 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CPU_PAUSE_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CPU_PAUSE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CPU_PAUSE() +// +#if defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64) + + #define EASTL_COMPILER_ATOMIC_CPU_PAUSE() \ + __asm__ __volatile__ ("pause") + +#elif defined(EA_PROCESSOR_ARM32) || defined(EA_PROCESSOR_ARM64) + + #define EASTL_COMPILER_ATOMIC_CPU_PAUSE() \ + __asm__ __volatile__ ("yield") + +#endif + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_CPU_PAUSE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_exchange.h new file mode 100644 index 00000000..a3325547 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_exchange.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_EXCHANGE_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_EXCHANGE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_EXCHANGE_N(integralType, type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_EXCHANGE_INTRIN_N(integralType, type, ret, ptr, val, gccMemoryOrder) + + +#define EASTL_GCC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_EXCHANGE_N(uint8_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_EXCHANGE_N(uint16_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_EXCHANGE_N(uint32_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_EXCHANGE_N(uint64_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_EXCHANGE_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_EXCHANGE_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, __ATOMIC_RELEASE) + + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, __ATOMIC_ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_EXCHANGE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_add.h new file mode 100644 index 00000000..98abbb83 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_add.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_ADD_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_ADD_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_FETCH_ADD_N(integralType, type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_add, type, ret, ptr, val, gccMemoryOrder) + + +#define EASTL_GCC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_ADD_N(uint8_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_ADD_N(uint16_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_ADD_N(uint32_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_ADD_N(uint64_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_ADD_128(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_ADD_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_ADD_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_128(type, ret, ptr, val, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_128(type, ret, ptr, val, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_128(type, ret, ptr, val, __ATOMIC_RELEASE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_128(type, ret, ptr, val, __ATOMIC_ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_ADD_128(type, ret, ptr, val, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_ADD_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_and.h new file mode 100644 index 00000000..0dfb81db --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_and.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_AND_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_AND_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_FETCH_AND_N(integralType, type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_and, type, ret, ptr, val, gccMemoryOrder) + + +#define EASTL_GCC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_AND_N(uint8_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_AND_N(uint16_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_AND_N(uint32_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_AND_N(uint64_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_AND_128(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_AND_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_AND_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_128(type, ret, ptr, val, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_128(type, ret, ptr, val, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_128(type, ret, ptr, val, __ATOMIC_RELEASE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_128(type, ret, ptr, val, __ATOMIC_ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_AND_128(type, ret, ptr, val, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_AND_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_or.h new file mode 100644 index 00000000..ba259b74 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_or.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_OR_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_OR_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_FETCH_OR_N(integralType, type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_or, type, ret, ptr, val, gccMemoryOrder) + + +#define EASTL_GCC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_OR_N(uint8_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_OR_N(uint16_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_OR_N(uint32_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_OR_N(uint64_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_OR_128(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_OR_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_OR_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_128(type, ret, ptr, val, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_128(type, ret, ptr, val, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_128(type, ret, ptr, val, __ATOMIC_RELEASE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_128(type, ret, ptr, val, __ATOMIC_ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_OR_128(type, ret, ptr, val, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_OR_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_sub.h new file mode 100644 index 00000000..c8be225e --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_sub.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_SUB_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_SUB_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_FETCH_SUB_N(integralType, type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_sub, type, ret, ptr, val, gccMemoryOrder) + + +#define EASTL_GCC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_SUB_N(uint8_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_SUB_N(uint16_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_SUB_N(uint32_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_SUB_N(uint64_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_SUB_128(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_SUB_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_SUB_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_128(type, ret, ptr, val, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_128(type, ret, ptr, val, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_128(type, ret, ptr, val, __ATOMIC_RELEASE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_128(type, ret, ptr, val, __ATOMIC_ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_SUB_128(type, ret, ptr, val, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_SUB_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_xor.h new file mode 100644 index 00000000..4ec6d676 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_fetch_xor.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_XOR_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_XOR_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_FETCH_XOR_N(integralType, type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_fetch_xor, type, ret, ptr, val, gccMemoryOrder) + + +#define EASTL_GCC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_XOR_N(uint8_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_XOR_N(uint16_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_XOR_N(uint32_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_XOR_N(uint64_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_FETCH_XOR_128(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_XOR_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_XOR_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_128(type, ret, ptr, val, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_128(type, ret, ptr, val, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_128(type, ret, ptr, val, __ATOMIC_RELEASE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_128(type, ret, ptr, val, __ATOMIC_ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_FETCH_XOR_128(type, ret, ptr, val, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_FETCH_XOR_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_load.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_load.h new file mode 100644 index 00000000..a4a3ebf1 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_load.h @@ -0,0 +1,90 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_LOAD_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_LOAD_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_LOAD_N(integralType, type, ret, ptr, gccMemoryOrder) \ + { \ + integralType retIntegral; \ + __atomic_load(EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), &retIntegral, gccMemoryOrder); \ + \ + ret = EASTL_ATOMIC_TYPE_PUN_CAST(type, retIntegral); \ + } + +#define EASTL_GCC_ATOMIC_LOAD_8(type, ret, ptr, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_LOAD_N(uint8_t, type, ret, ptr, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_LOAD_16(type, ret, ptr, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_LOAD_N(uint16_t, type, ret, ptr, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_LOAD_32(type, ret, ptr, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_LOAD_N(uint32_t, type, ret, ptr, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_LOAD_64(type, ret, ptr, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_LOAD_N(uint64_t, type, ret, ptr, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_LOAD_128(type, ret, ptr, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_LOAD_N(__uint128_t, type, ret, ptr, gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_LOAD_*_N(type, type ret, type * ptr) +// +#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_8(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_8(type, ret, ptr, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_16(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_16(type, ret, ptr, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_32(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_32(type, ret, ptr, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_64(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_64(type, ret, ptr, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_LOAD_RELAXED_128(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_128(type, ret, ptr, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_8(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_8(type, ret, ptr, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_16(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_16(type, ret, ptr, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_32(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_32(type, ret, ptr, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_64(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_64(type, ret, ptr, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_LOAD_ACQUIRE_128(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_128(type, ret, ptr, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_8(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_8(type, ret, ptr, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_16(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_16(type, ret, ptr, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_32(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_32(type, ret, ptr, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_64(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_64(type, ret, ptr, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_LOAD_SEQ_CST_128(type, ret, ptr) \ + EASTL_GCC_ATOMIC_LOAD_128(type, ret, ptr, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_LOAD_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_or_fetch.h new file mode 100644 index 00000000..9e4db3e1 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_or_fetch.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_OR_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_OR_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_OR_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_or_fetch, type, ret, ptr, val, gccMemoryOrder) + + +#define EASTL_GCC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_OR_FETCH_N(uint8_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_OR_FETCH_N(uint16_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_OR_FETCH_N(uint32_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_OR_FETCH_N(uint64_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_OR_FETCH_128(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_OR_FETCH_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_OR_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_128(type, ret, ptr, val, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_128(type, ret, ptr, val, __ATOMIC_RELEASE) + + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_OR_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_OR_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_signal_fence.h new file mode 100644 index 00000000..16dff14f --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_signal_fence.h @@ -0,0 +1,38 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SIGNAL_FENCE_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SIGNAL_FENCE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_SIGNAL_FENCE(gccMemoryOrder) \ + __atomic_signal_fence(gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_*() +// +#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELAXED() \ + EASTL_GCC_ATOMIC_SIGNAL_FENCE(__ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQUIRE() \ + EASTL_GCC_ATOMIC_SIGNAL_FENCE(__ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELEASE() \ + EASTL_GCC_ATOMIC_SIGNAL_FENCE(__ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQ_REL() \ + EASTL_GCC_ATOMIC_SIGNAL_FENCE(__ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_SEQ_CST() \ + EASTL_GCC_ATOMIC_SIGNAL_FENCE(__ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SIGNAL_FENCE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_store.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_store.h new file mode 100644 index 00000000..04a28ac4 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_store.h @@ -0,0 +1,89 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_STORE_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_STORE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_STORE_N(integralType, ptr, val, gccMemoryOrder) \ + { \ + integralType valIntegral = EASTL_ATOMIC_TYPE_PUN_CAST(integralType, (val)); \ + __atomic_store(EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), &valIntegral, gccMemoryOrder); \ + } + + +#define EASTL_GCC_ATOMIC_STORE_8(ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_STORE_N(uint8_t, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_STORE_16(ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_STORE_N(uint16_t, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_STORE_32(ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_STORE_N(uint32_t, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_STORE_64(ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_STORE_N(uint64_t, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_STORE_128(ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_STORE_N(__uint128_t, ptr, val, gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_STORE_*_N(type, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_8(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_8(ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_16(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_16(ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_32(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_32(ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_64(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_64(ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_STORE_RELAXED_128(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_128(ptr, val, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_8(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_8(ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_16(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_16(ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_32(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_32(ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_64(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_64(ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_STORE_RELEASE_128(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_128(ptr, val, __ATOMIC_RELEASE) + + +#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_8(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_8(ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_16(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_16(ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_32(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_32(ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_64(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_64(ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_STORE_SEQ_CST_128(type, ptr, val) \ + EASTL_GCC_ATOMIC_STORE_128(ptr, val, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_STORE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_sub_fetch.h new file mode 100644 index 00000000..62f8cd91 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_sub_fetch.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SUB_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SUB_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_SUB_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_sub_fetch, type, ret, ptr, val, gccMemoryOrder) + + +#define EASTL_GCC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_SUB_FETCH_N(uint8_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_SUB_FETCH_N(uint16_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_SUB_FETCH_N(uint32_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_SUB_FETCH_N(uint64_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_SUB_FETCH_128(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_SUB_FETCH_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_SUB_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_128(type, ret, ptr, val, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_128(type, ret, ptr, val, __ATOMIC_RELEASE) + + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_SUB_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_SUB_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_thread_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_thread_fence.h new file mode 100644 index 00000000..0dd005e4 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_thread_fence.h @@ -0,0 +1,38 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_THREAD_FENCE_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_THREAD_FENCE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_THREAD_FENCE(gccMemoryOrder) \ + __atomic_thread_fence(gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_THREAD_FENCE_*() +// +#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELAXED() \ + EASTL_GCC_ATOMIC_THREAD_FENCE(__ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQUIRE() \ + EASTL_GCC_ATOMIC_THREAD_FENCE(__ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_RELEASE() \ + EASTL_GCC_ATOMIC_THREAD_FENCE(__ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_ACQ_REL() \ + EASTL_GCC_ATOMIC_THREAD_FENCE(__ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_THREAD_FENCE_SEQ_CST() \ + EASTL_GCC_ATOMIC_THREAD_FENCE(__ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_THREAD_FENCE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_xor_fetch.h new file mode 100644 index 00000000..4827d79f --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/gcc/compiler_gcc_xor_fetch.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_GCC_XOR_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_GCC_XOR_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_GCC_ATOMIC_XOR_FETCH_N(integralType, type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_FETCH_INTRIN_N(integralType, __atomic_xor_fetch, type, ret, ptr, val, gccMemoryOrder) + + +#define EASTL_GCC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_XOR_FETCH_N(uint8_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_XOR_FETCH_N(uint16_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_XOR_FETCH_N(uint32_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_XOR_FETCH_N(uint64_t, type, ret, ptr, val, gccMemoryOrder) + +#define EASTL_GCC_ATOMIC_XOR_FETCH_128(type, ret, ptr, val, gccMemoryOrder) \ + EASTL_GCC_ATOMIC_XOR_FETCH_N(__uint128_t, type, ret, ptr, val, gccMemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_XOR_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, __ATOMIC_RELAXED) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_128(type, ret, ptr, val, __ATOMIC_RELAXED) + + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, __ATOMIC_RELEASE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_128(type, ret, ptr, val, __ATOMIC_RELEASE) + + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, __ATOMIC_ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_128(type, ret, ptr, val, __ATOMIC_ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, __ATOMIC_SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_GCC_ATOMIC_XOR_FETCH_128(type, ret, ptr, val, __ATOMIC_SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_GCC_XOR_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc.h new file mode 100644 index 00000000..6df8c05f --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc.h @@ -0,0 +1,260 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +EA_DISABLE_ALL_VC_WARNINGS(); +#include +#include +EA_RESTORE_ALL_VC_WARNINGS(); + + +///////////////////////////////////////////////////////////////////////////////// + + +#define EASTL_COMPILER_ATOMIC_HAS_8BIT +#define EASTL_COMPILER_ATOMIC_HAS_16BIT +#define EASTL_COMPILER_ATOMIC_HAS_32BIT +#define EASTL_COMPILER_ATOMIC_HAS_64BIT + +#if EA_PLATFORM_PTR_SIZE == 8 + #define EASTL_COMPILER_ATOMIC_HAS_128BIT +#endif + + +///////////////////////////////////////////////////////////////////////////////// + + +#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_8 char +#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_16 short +#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_32 long +#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_64 __int64 + +namespace eastl +{ + +namespace internal +{ + +struct FixedWidth128 +{ + __int64 value[2]; +}; + +} // namespace internal + +} // namespace eastl + +#define EASTL_COMPILER_ATOMIC_FIXED_WIDTH_TYPE_128 eastl::internal::FixedWidth128 + + +///////////////////////////////////////////////////////////////////////////////// + + +/** + * NOTE: + * + * Unfortunately MSVC Intrinsics depend on the architecture + * that we are compiling for. + * These are some indirection macros to make our lives easier and + * ensure the least possible amount of copy-paste to reduce programmer errors. + * + * All compiler implementations end up deferring to the below macros. + */ +#if defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64) + + + #define EASTL_MSVC_ATOMIC_FETCH_OP(ret, ptr, val, MemoryOrder, Intrinsic) \ + ret = Intrinsic(ptr, val) + + #define EASTL_MSVC_ATOMIC_EXCHANGE_OP(ret, ptr, val, MemoryOrder, Intrinsic) \ + ret = Intrinsic(ptr, val) + + #define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP(ret, ptr, comparand, exchange, MemoryOrder, Intrinsic) \ + ret = Intrinsic(ptr, exchange, comparand) + + #define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128_OP(ret, ptr, comparandResult, exchangeHigh, exchangeLow, MemoryOrder) \ + ret = _InterlockedCompareExchange128_np(ptr, exchangeHigh, exchangeLow, comparandResult) + + +#elif defined(EA_PROCESSOR_ARM32) || defined(EA_PROCESSOR_ARM64) + + + #define EASTL_MSVC_INTRINSIC_RELAXED(Intrinsic) \ + EA_PREPROCESSOR_JOIN(Intrinsic, _nf) + + #define EASTL_MSVC_INTRINSIC_ACQUIRE(Intrinsic) \ + EA_PREPROCESSOR_JOIN(Intrinsic, _acq) + + #define EASTL_MSVC_INTRINSIC_RELEASE(Intrinsic) \ + EA_PREPROCESSOR_JOIN(Intrinsic, _rel) + + #define EASTL_MSVC_INTRINSIC_ACQ_REL(Intrinsic) \ + Intrinsic + + #define EASTL_MSVC_INTRINSIC_SEQ_CST(Intrinsic) \ + Intrinsic + + + #define EASTL_MSVC_ATOMIC_FETCH_OP(ret, ptr, val, MemoryOrder, Intrinsic) \ + ret = EA_PREPROCESSOR_JOIN(EASTL_MSVC_INTRINSIC_, MemoryOrder)(Intrinsic)(ptr, val) + + #define EASTL_MSVC_ATOMIC_EXCHANGE_OP(ret, ptr, val, MemoryOrder, Intrinsic) \ + ret = EA_PREPROCESSOR_JOIN(EASTL_MSVC_INTRINSIC_, MemoryOrder)(Intrinsic)(ptr, val) + + #define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP(ret, ptr, comparand, exchange, MemoryOrder, Intrinsic) \ + ret = EA_PREPROCESSOR_JOIN(EASTL_MSVC_INTRINSIC_, MemoryOrder)(Intrinsic)(ptr, exchange, comparand) + + #define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128_OP(ret, ptr, comparandResult, exchangeHigh, exchangeLow, MemoryOrder) \ + ret = EA_PREPROCESSOR_JOIN(EASTL_MSVC_INTRINSIC_, MemoryOrder)(_InterlockedCompareExchange128)(ptr, exchangeHigh, exchangeLow, comparandResult) + + +#endif + + +///////////////////////////////////////////////////////////////////////////////// + + +#define EASTL_MSVC_NOP_POST_INTRIN_COMPUTE(ret, lhs, rhs) + +#define EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE(ret, val) \ + ret = (val) + + +#define EASTL_MSVC_ATOMIC_FETCH_INTRIN_N(integralType, fetchIntrinsic, type, ret, ptr, val, MemoryOrder, PRE_INTRIN_COMPUTE, POST_INTRIN_COMPUTE) \ + { \ + integralType retIntegral; \ + type valCompute; \ + \ + PRE_INTRIN_COMPUTE(valCompute, (val)); \ + const integralType valIntegral = EASTL_ATOMIC_TYPE_PUN_CAST(integralType, valCompute); \ + \ + EASTL_MSVC_ATOMIC_FETCH_OP(retIntegral, EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), \ + valIntegral, MemoryOrder, fetchIntrinsic); \ + \ + ret = EASTL_ATOMIC_TYPE_PUN_CAST(type, retIntegral); \ + POST_INTRIN_COMPUTE(ret, ret, (val)); \ + } + +#define EASTL_MSVC_ATOMIC_EXCHANGE_INTRIN_N(integralType, exchangeIntrinsic, type, ret, ptr, val, MemoryOrder) \ + { \ + integralType retIntegral; \ + EASTL_MSVC_ATOMIC_EXCHANGE_OP(retIntegral, EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), \ + EASTL_ATOMIC_TYPE_PUN_CAST(integralType, (val)), MemoryOrder, \ + exchangeIntrinsic); \ + \ + ret = EASTL_ATOMIC_TYPE_PUN_CAST(type, retIntegral); \ + } + +#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_N(integralType, cmpxchgStrongIntrinsic, type, ret, ptr, expected, desired, MemoryOrder) \ + { \ + integralType comparandIntegral = EASTL_ATOMIC_TYPE_PUN_CAST(integralType, *(expected)); \ + integralType oldIntegral; \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP(oldIntegral, EASTL_ATOMIC_VOLATILE_INTEGRAL_CAST(integralType, (ptr)), \ + comparandIntegral, EASTL_ATOMIC_TYPE_PUN_CAST(integralType, (desired)), \ + MemoryOrder, cmpxchgStrongIntrinsic); \ + \ + if (oldIntegral == comparandIntegral) \ + { \ + ret = true; \ + } \ + else \ + { \ + *(expected) = EASTL_ATOMIC_TYPE_PUN_CAST(type, oldIntegral); \ + ret = false; \ + } \ + } + +/** + * In my own opinion, I found the wording on Microsoft docs a little confusing. + * ExchangeHigh means the top 8 bytes so (ptr + 8). + * ExchangeLow means the low 8 butes so (ptr). + * Endianness does not matter since we are just loading data and comparing data. + * Thought of as memcpy() and memcmp() function calls whereby the layout of the + * data itself is irrelevant. + * Only after we type pun back to the original type, and load from memory does + * the layout of the data matter again. + */ +#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_128(type, ret, ptr, expected, desired, MemoryOrder) \ + { \ + union TypePun \ + { \ + type templateType; \ + \ + struct exchange128 \ + { \ + __int64 value[2]; \ + }; \ + \ + struct exchange128 exchangePun; \ + }; \ + \ + union TypePun typePun = { (desired) }; \ + \ + unsigned char cmpxchgRetChar; \ + cmpxchgRetChar = EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128_OP(cmpxchgRetChar, EASTL_ATOMIC_VOLATILE_TYPE_CAST(__int64, (ptr)), \ + EASTL_ATOMIC_TYPE_CAST(__int64, (expected)), \ + typePun.exchangePun.value[1], typePun.exchangePun.value[0], \ + MemoryOrder); \ + \ + ret = static_cast(cmpxchgRetChar); \ + } + + +///////////////////////////////////////////////////////////////////////////////// + + +#define EASTL_MSVC_ATOMIC_FETCH_OP_N(integralType, fetchIntrinsic, type, ret, ptr, val, MemoryOrder, PRE_INTRIN_COMPUTE) \ + EASTL_MSVC_ATOMIC_FETCH_INTRIN_N(integralType, fetchIntrinsic, type, ret, ptr, val, MemoryOrder, PRE_INTRIN_COMPUTE, EASTL_MSVC_NOP_POST_INTRIN_COMPUTE) + +#define EASTL_MSVC_ATOMIC_OP_FETCH_N(integralType, fetchIntrinsic, type, ret, ptr, val, MemoryOrder, PRE_INTRIN_COMPUTE, POST_INTRIN_COMPUTE) \ + EASTL_MSVC_ATOMIC_FETCH_INTRIN_N(integralType, fetchIntrinsic, type, ret, ptr, val, MemoryOrder, PRE_INTRIN_COMPUTE, POST_INTRIN_COMPUTE) + +#define EASTL_MSVC_ATOMIC_EXCHANGE_OP_N(integralType, exchangeIntrinsic, type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_EXCHANGE_INTRIN_N(integralType, exchangeIntrinsic, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_N(integralType, cmpxchgStrongIntrinsic, type, ret, ptr, expected, desired, MemoryOrder) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_N(integralType, cmpxchgStrongIntrinsic, type, ret, ptr, expected, desired, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_128(type, ret, ptr, expected, desired, MemoryOrder) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_128(type, ret, ptr, expected, desired, MemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// + + +#include "compiler_msvc_fetch_add.h" +#include "compiler_msvc_fetch_sub.h" + +#include "compiler_msvc_fetch_and.h" +#include "compiler_msvc_fetch_xor.h" +#include "compiler_msvc_fetch_or.h" + +#include "compiler_msvc_add_fetch.h" +#include "compiler_msvc_sub_fetch.h" + +#include "compiler_msvc_and_fetch.h" +#include "compiler_msvc_xor_fetch.h" +#include "compiler_msvc_or_fetch.h" + +#include "compiler_msvc_exchange.h" + +#include "compiler_msvc_cmpxchg_weak.h" +#include "compiler_msvc_cmpxchg_strong.h" + +#include "compiler_msvc_barrier.h" + +#include "compiler_msvc_cpu_pause.h" + +#include "compiler_msvc_signal_fence.h" + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_add_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_add_fetch.h new file mode 100644 index 00000000..12fc4b04 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_add_fetch.h @@ -0,0 +1,104 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_ADD_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_ADD_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_MSVC_ADD_FETCH_POST_INTRIN_COMPUTE(ret, val, addend) \ + ret = (val) + (addend) + +#define EASTL_MSVC_ATOMIC_ADD_FETCH_N(integralType, addIntrinsic, type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_OP_FETCH_N(integralType, addIntrinsic, type, ret, ptr, val, MemoryOrder, \ + EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE, EASTL_MSVC_ADD_FETCH_POST_INTRIN_COMPUTE) + + +#define EASTL_MSVC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_N(char, _InterlockedExchangeAdd8, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_N(short, _InterlockedExchangeAdd16, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_N(long, _InterlockedExchangeAdd, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_N(__int64, _InterlockedExchangeAdd64, type, ret, ptr, val, MemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_ADD_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, RELAXED) + + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, RELEASE) + + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_8(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_16(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_32(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_ADD_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_ADD_FETCH_64(type, ret, ptr, val, SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_ADD_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_and_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_and_fetch.h new file mode 100644 index 00000000..70ec577f --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_and_fetch.h @@ -0,0 +1,121 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_AND_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_AND_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#if defined(EA_PROCESSOR_X86_64) + + #define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_8 _InterlockedAnd8_np + #define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_16 _InterlockedAnd16_np + #define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_32 _InterlockedAnd_np + #define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_64 _InterlockedAnd64_np + +#else + + #define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_8 _InterlockedAnd8 + #define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_16 _InterlockedAnd16 + #define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_32 _InterlockedAnd + #define EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_64 _InterlockedAnd64 + +#endif + + +#define EASTL_MSVC_AND_FETCH_POST_INTRIN_COMPUTE(ret, val, andend) \ + ret = (val) & (andend) + +#define EASTL_MSVC_ATOMIC_AND_FETCH_N(integralType, andIntrinsic, type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_OP_FETCH_N(integralType, andIntrinsic, type, ret, ptr, val, MemoryOrder, \ + EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE, EASTL_MSVC_AND_FETCH_POST_INTRIN_COMPUTE) + + +#define EASTL_MSVC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_AND_FETCH_N(char, EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_8, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_AND_FETCH_N(short, EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_16, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_AND_FETCH_N(long, EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_32, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_AND_FETCH_N(__int64, EASTL_MSVC_ATOMIC_AND_FETCH_INTRIN_64, type, ret, ptr, val, MemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_AND_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, RELAXED) + + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, RELEASE) + + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_8(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_16(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_32(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_AND_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_AND_FETCH_64(type, ret, ptr, val, SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_AND_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_barrier.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_barrier.h new file mode 100644 index 00000000..02e2d03a --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_barrier.h @@ -0,0 +1,31 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_BARRIER_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_BARRIER_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_COMPILER_BARRIER() +// +#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER() \ + _ReadWriteBarrier() + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(const T&, type) +// +#define EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY(val, type) \ + EASTL_COMPILER_ATOMIC_COMPILER_BARRIER_DATA_DEPENDENCY_FUNC(const_cast(eastl::addressof((val)))); \ + EASTL_ATOMIC_COMPILER_BARRIER() + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_BARRIER_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_strong.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_strong.h new file mode 100644 index 00000000..42117a1a --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_strong.h @@ -0,0 +1,195 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_STRONG_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_STRONG_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#if defined(EA_PROCESSOR_X86_64) + + #define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_8 _InterlockedCompareExchange8 + #define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_16 _InterlockedCompareExchange16_np + #define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_32 _InterlockedCompareExchange_np + #define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_64 _InterlockedCompareExchange64_np + +#else + + #define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_8 _InterlockedCompareExchange8 + #define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_16 _InterlockedCompareExchange16 + #define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_32 _InterlockedCompareExchange + #define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_64 _InterlockedCompareExchange64 + +#endif + + +#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, MemoryOrder) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_N(char, EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_8, type, ret, ptr, expected, desired, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, MemoryOrder) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_N(short, EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_16, type, ret, ptr, expected, desired, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, MemoryOrder) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_N(long, EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_32, type, ret, ptr, expected, desired, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, MemoryOrder) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_N(__int64, EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_INTRIN_64, type, ret, ptr, expected, desired, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, MemoryOrder) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_OP_128(type, ret, ptr, expected, desired, MemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_*_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, RELAXED) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, RELAXED) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, RELEASE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, RELEASE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, RELEASE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, RELEASE) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, RELEASE) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, SEQ_CST) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, SEQ_CST) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_8(type, ret, ptr, expected, desired, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_16(type, ret, ptr, expected, desired, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_32(type, ret, ptr, expected, desired, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_64(type, ret, ptr, expected, desired, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \ + EASTL_MSVC_ATOMIC_CMPXCHG_STRONG_128(type, ret, ptr, expected, desired, SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_STRONG_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_weak.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_weak.h new file mode 100644 index 00000000..8f4147ac --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cmpxchg_weak.h @@ -0,0 +1,162 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_WEAK_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_WEAK_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_*_*_N(type, bool ret, type * ptr, type * expected, type desired) +// +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELAXED_RELAXED_128(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_RELAXED_128(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQUIRE_ACQUIRE_128(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_RELEASE_RELAXED_128(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_RELAXED_128(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_ACQ_REL_ACQUIRE_128(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_RELAXED_128(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_ACQUIRE_128(type, ret, ptr, expected, desired) + + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_8(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_16(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_32(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_64(type, ret, ptr, expected, desired) + +#define EASTL_COMPILER_ATOMIC_CMPXCHG_WEAK_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) \ + EASTL_COMPILER_ATOMIC_CMPXCHG_STRONG_SEQ_CST_SEQ_CST_128(type, ret, ptr, expected, desired) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CMPXCHG_WEAK_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cpu_pause.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cpu_pause.h new file mode 100644 index 00000000..720701ab --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_cpu_pause.h @@ -0,0 +1,27 @@ +///////////////////////////////////////////////////////////////////////////////// +// copyright (c) electronic arts inc. all rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CPU_PAUSE_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CPU_PAUSE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_CPU_PAUSE() +// +// NOTE: +// Rather obscure macro in Windows.h that expands to pause or rep; nop on +// compatible x86 cpus or the arm yield on compatible arm processors. +// This is nicer than switching on platform specific intrinsics. +// +#define EASTL_COMPILER_ATOMIC_CPU_PAUSE() \ + YieldProcessor() + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_CPU_PAUSE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_exchange.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_exchange.h new file mode 100644 index 00000000..323f1fae --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_exchange.h @@ -0,0 +1,125 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_EXCHANGE_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_EXCHANGE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_MSVC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_EXCHANGE_OP_N(char, _InterlockedExchange8, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_EXCHANGE_OP_N(short, _InterlockedExchange16, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_EXCHANGE_OP_N(long, _InterlockedExchange, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_EXCHANGE_OP_N(__int64, _InterlockedExchange64, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, MemoryOrder) \ + { \ + bool cmpxchgRet; \ + /* This is intentionally a non-atomic 128-bit load which may observe shearing. */ \ + /* Either we do not observe *(ptr) but then the cmpxchg will fail and the observed */ \ + /* atomic load will be returned. Or the non-atomic load got lucky and the cmpxchg succeeds */ \ + /* because the observed value equals the value in *(ptr) thus we optimistically do a non-atomic load. */ \ + ret = *(ptr); \ + do \ + { \ + EA_PREPROCESSOR_JOIN(EA_PREPROCESSOR_JOIN(EASTL_ATOMIC_CMPXCHG_STRONG_, MemoryOrder), _128)(type, cmpxchgRet, ptr, &(ret), val); \ + } while (!cmpxchgRet); \ + } + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_EXCHANGE_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELAXED_128(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, RELAXED) + + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQUIRE_128(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_RELEASE_128(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, RELEASE) + + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_ACQ_REL_128(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_8(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_16(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_32(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_64(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_EXCHANGE_SEQ_CST_128(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_EXCHANGE_128(type, ret, ptr, val, SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_EXCHANGE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_add.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_add.h new file mode 100644 index 00000000..a951740e --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_add.h @@ -0,0 +1,101 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_ADD_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_ADD_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_MSVC_ATOMIC_FETCH_ADD_N(integralType, addIntrinsic, type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_OP_N(integralType, addIntrinsic, type, ret, ptr, val, MemoryOrder, \ + EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE) + + +#define EASTL_MSVC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_N(char, _InterlockedExchangeAdd8, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_N(short, _InterlockedExchangeAdd16, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_N(long, _InterlockedExchangeAdd, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_N(__int64, _InterlockedExchangeAdd64, type, ret, ptr, val, MemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_ADD_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELAXED_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, RELAXED) + + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_RELEASE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, RELEASE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_8(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_16(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_32(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_ADD_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_ADD_64(type, ret, ptr, val, SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_ADD_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_and.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_and.h new file mode 100644 index 00000000..96f78942 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_and.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_AND_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_AND_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#if defined(EA_PROCESSOR_X86_64) + + #define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_8 _InterlockedAnd8_np + #define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_16 _InterlockedAnd16_np + #define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_32 _InterlockedAnd_np + #define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_64 _InterlockedAnd64_np + +#else + + #define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_8 _InterlockedAnd8 + #define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_16 _InterlockedAnd16 + #define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_32 _InterlockedAnd + #define EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_64 _InterlockedAnd64 + +#endif + + +#define EASTL_MSVC_ATOMIC_FETCH_AND_N(integralType, andIntrinsic, type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_OP_N(integralType, andIntrinsic, type, ret, ptr, val, MemoryOrder, \ + EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE) + + +#define EASTL_MSVC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_AND_N(char, EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_8, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_AND_N(short, EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_16, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_AND_N(long, EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_32, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_AND_N(__int64, EASTL_MSVC_ATOMIC_FETCH_AND_INTRIN_64, type, ret, ptr, val, MemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_AND_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELAXED_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, RELAXED) + + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_RELEASE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, RELEASE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_8(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_16(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_32(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_AND_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_AND_64(type, ret, ptr, val, SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_AND_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_or.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_or.h new file mode 100644 index 00000000..2792fc3d --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_or.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_OR_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_OR_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#if defined(EA_PROCESSOR_X86_64) + + #define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_8 _InterlockedOr8_np + #define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_16 _InterlockedOr16_np + #define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_32 _InterlockedOr_np + #define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_64 _InterlockedOr64_np + +#else + + #define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_8 _InterlockedOr8 + #define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_16 _InterlockedOr16 + #define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_32 _InterlockedOr + #define EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_64 _InterlockedOr64 + +#endif + + +#define EASTL_MSVC_ATOMIC_FETCH_OR_N(integralType, orIntrinsic, type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_OP_N(integralType, orIntrinsic, type, ret, ptr, val, MemoryOrder, \ + EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE) + + +#define EASTL_MSVC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_OR_N(char, EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_8, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_OR_N(short, EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_16, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_OR_N(long, EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_32, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_OR_N(long long, EASTL_MSVC_ATOMIC_FETCH_OR_INTRIN_64, type, ret, ptr, val, MemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_OR_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELAXED_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, RELAXED) + + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_RELEASE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, RELEASE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_8(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_16(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_32(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_OR_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_OR_64(type, ret, ptr, val, SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_OR_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_sub.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_sub.h new file mode 100644 index 00000000..6d5d9e3a --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_sub.h @@ -0,0 +1,104 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_SUB_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_SUB_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_MSVC_FETCH_SUB_PRE_INTRIN_COMPUTE(ret, val) \ + ret = EASTL_ATOMIC_NEGATE_OPERAND((val)) + +#define EASTL_MSVC_ATOMIC_FETCH_SUB_N(integralType, subIntrinsic, type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_OP_N(integralType, subIntrinsic, type, ret, ptr, val, MemoryOrder, \ + EASTL_MSVC_FETCH_SUB_PRE_INTRIN_COMPUTE) + + +#define EASTL_MSVC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_N(char, _InterlockedExchangeAdd8, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_N(short, _InterlockedExchangeAdd16, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_N(long, _InterlockedExchangeAdd, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_N(__int64, _InterlockedExchangeAdd64, type, ret, ptr, val, MemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_SUB_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELAXED_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, RELAXED) + + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_RELEASE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, RELEASE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_8(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_16(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_32(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_SUB_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_SUB_64(type, ret, ptr, val, SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_SUB_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_xor.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_xor.h new file mode 100644 index 00000000..371153e9 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_fetch_xor.h @@ -0,0 +1,118 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_XOR_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_XOR_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#if defined(EA_PROCESSOR_X86_64) + + #define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_8 _InterlockedXor8_np + #define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_16 _InterlockedXor16_np + #define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_32 _InterlockedXor_np + #define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_64 _InterlockedXor64_np + +#else + + #define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_8 _InterlockedXor8 + #define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_16 _InterlockedXor16 + #define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_32 _InterlockedXor + #define EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_64 _InterlockedXor64 + +#endif + + +#define EASTL_MSVC_ATOMIC_FETCH_XOR_N(integralType, xorIntrinsic, type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_OP_N(integralType, xorIntrinsic, type, ret, ptr, val, MemoryOrder, \ + EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE) + + +#define EASTL_MSVC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_N(char, EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_8, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_N(short, EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_16, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_N(long, EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_32, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_N(__int64, EASTL_MSVC_ATOMIC_FETCH_XOR_INTRIN_64, type, ret, ptr, val, MemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_FETCH_XOR_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELAXED_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, RELAXED) + + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_RELEASE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, RELEASE) + + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_8(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_16(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_32(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_FETCH_XOR_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_FETCH_XOR_64(type, ret, ptr, val, SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_FETCH_XOR_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_or_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_or_fetch.h new file mode 100644 index 00000000..c5b5fac3 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_or_fetch.h @@ -0,0 +1,121 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_OR_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_OR_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#if defined(EA_PROCESSOR_X86_64) + + #define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_8 _InterlockedOr8_np + #define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_16 _InterlockedOr16_np + #define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_32 _InterlockedOr_np + #define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_64 _InterlockedOr64_np + +#else + + #define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_8 _InterlockedOr8 + #define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_16 _InterlockedOr16 + #define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_32 _InterlockedOr + #define EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_64 _InterlockedOr64 + +#endif + + +#define EASTL_MSVC_OR_FETCH_POST_INTRIN_COMPUTE(ret, val, orend) \ + ret = (val) | (orend) + +#define EASTL_MSVC_ATOMIC_OR_FETCH_N(integralType, orIntrinsic, type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_OP_FETCH_N(integralType, orIntrinsic, type, ret, ptr, val, MemoryOrder, \ + EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE, EASTL_MSVC_OR_FETCH_POST_INTRIN_COMPUTE) + + +#define EASTL_MSVC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_OR_FETCH_N(char, EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_8, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_OR_FETCH_N(short, EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_16, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_OR_FETCH_N(long, EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_32, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_OR_FETCH_N(__int64, EASTL_MSVC_ATOMIC_OR_FETCH_INTRIN_64, type, ret, ptr, val, MemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_OR_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, RELAXED) + + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, RELEASE) + + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_8(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_16(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_32(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_OR_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_OR_FETCH_64(type, ret, ptr, val, SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_OR_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_signal_fence.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_signal_fence.h new file mode 100644 index 00000000..f35f5772 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_signal_fence.h @@ -0,0 +1,34 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SIGNAL_FENCE_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SIGNAL_FENCE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_*() +// +#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELAXED() \ + EASTL_ATOMIC_COMPILER_BARRIER() + +#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQUIRE() \ + EASTL_ATOMIC_COMPILER_BARRIER() + +#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_RELEASE() \ + EASTL_ATOMIC_COMPILER_BARRIER() + +#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_ACQ_REL() \ + EASTL_ATOMIC_COMPILER_BARRIER() + +#define EASTL_COMPILER_ATOMIC_SIGNAL_FENCE_SEQ_CST() \ + EASTL_ATOMIC_COMPILER_BARRIER() + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SIGNAL_FENCE_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_sub_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_sub_fetch.h new file mode 100644 index 00000000..6fb61e29 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_sub_fetch.h @@ -0,0 +1,107 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SUB_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SUB_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#define EASTL_MSVC_SUB_FETCH_PRE_INTRIN_COMPUTE(ret, val) \ + ret = EASTL_ATOMIC_NEGATE_OPERAND((val)) + +#define EASTL_MSVC_SUB_FETCH_POST_INTRIN_COMPUTE(ret, val, subend) \ + ret = (val) - (subend) + +#define EASTL_MSVC_ATOMIC_SUB_FETCH_N(integralType, subIntrinsic, type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_OP_FETCH_N(integralType, subIntrinsic, type, ret, ptr, val, MemoryOrder, \ + EASTL_MSVC_SUB_FETCH_PRE_INTRIN_COMPUTE, EASTL_MSVC_SUB_FETCH_POST_INTRIN_COMPUTE) + + +#define EASTL_MSVC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_N(char, _InterlockedExchangeAdd8, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_N(short, _InterlockedExchangeAdd16, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_N(long, _InterlockedExchangeAdd, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_N(__int64, _InterlockedExchangeAdd64, type, ret, ptr, val, MemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_SUB_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, RELAXED) + + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, RELEASE) + + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_8(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_16(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_32(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_SUB_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_SUB_FETCH_64(type, ret, ptr, val, SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_SUB_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_xor_fetch.h b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_xor_fetch.h new file mode 100644 index 00000000..44ffff90 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/atomic/compiler/msvc/compiler_msvc_xor_fetch.h @@ -0,0 +1,121 @@ +///////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_XOR_FETCH_H +#define EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_XOR_FETCH_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +#if defined(EA_PROCESSOR_X86_64) + + #define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_8 _InterlockedXor8_np + #define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_16 _InterlockedXor16_np + #define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_32 _InterlockedXor_np + #define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_64 _InterlockedXor64_np + +#else + + #define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_8 _InterlockedXor8 + #define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_16 _InterlockedXor16 + #define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_32 _InterlockedXor + #define EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_64 _InterlockedXor64 + +#endif + + +#define EASTL_MSVC_XOR_FETCH_POST_INTRIN_COMPUTE(ret, val, xorend) \ + ret = (val) ^ (xorend) + +#define EASTL_MSVC_ATOMIC_XOR_FETCH_N(integralType, xorIntrinsic, type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_OP_FETCH_N(integralType, xorIntrinsic, type, ret, ptr, val, MemoryOrder, \ + EASTL_MSVC_NOP_PRE_INTRIN_COMPUTE, EASTL_MSVC_XOR_FETCH_POST_INTRIN_COMPUTE) + + +#define EASTL_MSVC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_N(char, EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_8, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_N(short, EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_16, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_N(long, EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_32, type, ret, ptr, val, MemoryOrder) + +#define EASTL_MSVC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, MemoryOrder) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_N(__int64, EASTL_MSVC_ATOMIC_XOR_FETCH_INTRIN_64, type, ret, ptr, val, MemoryOrder) + + +///////////////////////////////////////////////////////////////////////////////// +// +// void EASTL_COMPILER_ATOMIC_XOR_FETCH_*_N(type, type ret, type * ptr, type val) +// +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, RELAXED) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELAXED_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, RELAXED) + + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, ACQUIRE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQUIRE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, ACQUIRE) + + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, RELEASE) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_RELEASE_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, RELEASE) + + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, ACQ_REL) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_ACQ_REL_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, ACQ_REL) + + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_8(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_8(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_16(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_16(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_32(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_32(type, ret, ptr, val, SEQ_CST) + +#define EASTL_COMPILER_ATOMIC_XOR_FETCH_SEQ_CST_64(type, ret, ptr, val) \ + EASTL_MSVC_ATOMIC_XOR_FETCH_64(type, ret, ptr, val, SEQ_CST) + + +#endif /* EASTL_ATOMIC_INTERNAL_COMPILER_MSVC_XOR_FETCH_H */ diff --git a/libkram/eastl/include/EASTL/internal/char_traits.h b/libkram/eastl/include/EASTL/internal/char_traits.h new file mode 100644 index 00000000..62fe79b9 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/char_traits.h @@ -0,0 +1,464 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements similar functionality to char_traits which is part of +// the C++ standard STL library specification. This is intended for internal +// EASTL use only. Functionality can be accessed through the eastl::string or +// eastl::string_view types. +// +// http://en.cppreference.com/w/cpp/string/char_traits +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_CHAR_TRAITS_H +#define EASTL_CHAR_TRAITS_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include + +EA_DISABLE_ALL_VC_WARNINGS() +#include // toupper, etc. +#include // memset, etc. +EA_RESTORE_ALL_VC_WARNINGS() + +namespace eastl +{ + /////////////////////////////////////////////////////////////////////////////// + /// DecodePart + /// + /// These implement UTF8/UCS2/UCS4 encoding/decoding. + /// + EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char*& pDest, char* pDestEnd); + EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd); + EASTL_API bool DecodePart(const char*& pSrc, const char* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd); + + EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char*& pDest, char* pDestEnd); + EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd); + EASTL_API bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd); + + EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char*& pDest, char* pDestEnd); + EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd); + EASTL_API bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd); + + EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char*& pDest, char* pDestEnd); + EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd); + EASTL_API bool DecodePart(const int*& pSrc, const int* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd); + + #if EA_CHAR8_UNIQUE + bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd); + + bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char*& pDest, char* pDestEnd); + bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd); + bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd); + + bool DecodePart(const char*& pSrc, const char* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd); + bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd); + bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd); + #endif + + #if EA_WCHAR_UNIQUE + bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd); + + bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char*& pDest, char* pDestEnd); + bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd); + bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd); + + bool DecodePart(const char*& pSrc, const char* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd); + bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd); + bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd); + #endif + + #if EA_CHAR8_UNIQUE && EA_WCHAR_UNIQUE + bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd); + bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd); + #endif + + + #if EA_WCHAR_UNIQUE + inline bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd) + { + return DecodePart(reinterpret_cast(pSrc), reinterpret_cast(pSrcEnd), reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + } + + inline bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char*& pDest, char* pDestEnd) + { + #if (EA_WCHAR_SIZE == 2) + return DecodePart(reinterpret_cast(pSrc), reinterpret_cast(pSrcEnd), pDest, pDestEnd); + #elif (EA_WCHAR_SIZE == 4) + return DecodePart(reinterpret_cast(pSrc), reinterpret_cast(pSrcEnd), pDest, pDestEnd); + #endif + } + + inline bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd) + { + #if (EA_WCHAR_SIZE == 2) + return DecodePart(reinterpret_cast(pSrc), reinterpret_cast(pSrcEnd), pDest, pDestEnd); + #elif (EA_WCHAR_SIZE == 4) + return DecodePart(reinterpret_cast(pSrc), reinterpret_cast(pSrcEnd), pDest, pDestEnd); + #endif + } + + inline bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd) + { + #if (EA_WCHAR_SIZE == 2) + return DecodePart(reinterpret_cast(pSrc), reinterpret_cast(pSrcEnd), pDest, pDestEnd); + #elif (EA_WCHAR_SIZE == 4) + return DecodePart(reinterpret_cast(pSrc), reinterpret_cast(pSrcEnd), pDest, pDestEnd); + #endif + } + + inline bool DecodePart(const char*& pSrc, const char* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd) + { + #if (EA_WCHAR_SIZE == 2) + return DecodePart(pSrc, pSrcEnd, reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + #elif (EA_WCHAR_SIZE == 4) + return DecodePart(pSrc, pSrcEnd, reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + #endif + } + + inline bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd) + { + #if (EA_WCHAR_SIZE == 2) + return DecodePart(pSrc, pSrcEnd, reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + #elif (EA_WCHAR_SIZE == 4) + return DecodePart(pSrc, pSrcEnd, reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + #endif + } + + inline bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd) + { + #if (EA_WCHAR_SIZE == 2) + return DecodePart(pSrc, pSrcEnd, reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + #elif (EA_WCHAR_SIZE == 4) + return DecodePart(pSrc, pSrcEnd, reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + #endif + } + #endif + + #if EA_CHAR8_UNIQUE + inline bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd) + { + return DecodePart(reinterpret_cast(pSrc), reinterpret_cast(pSrcEnd), reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + } + + inline bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char*& pDest, char* pDestEnd) + { + return DecodePart(reinterpret_cast(pSrc), reinterpret_cast(pSrcEnd), pDest, pDestEnd); + } + + inline bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char16_t*& pDest, char16_t* pDestEnd) + { + return DecodePart(reinterpret_cast(pSrc), reinterpret_cast(pSrcEnd), pDest, pDestEnd); + } + + inline bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, char32_t*& pDest, char32_t* pDestEnd) + { + return DecodePart(reinterpret_cast(pSrc), reinterpret_cast(pSrcEnd), pDest, pDestEnd); + } + + inline bool DecodePart(const char*& pSrc, const char* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd) + { + return DecodePart(pSrc, pSrcEnd, reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + } + + inline bool DecodePart(const char16_t*& pSrc, const char16_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd) + { + return DecodePart(pSrc, pSrcEnd, reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + } + + inline bool DecodePart(const char32_t*& pSrc, const char32_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd) + { + return DecodePart(pSrc, pSrcEnd, reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + } + #endif + + #if EA_CHAR8_UNIQUE && EA_WCHAR_UNIQUE + inline bool DecodePart(const char8_t*& pSrc, const char8_t* pSrcEnd, wchar_t*& pDest, wchar_t* pDestEnd) + { + #if (EA_WCHAR_SIZE == 2) + return DecodePart(pSrc, pSrcEnd, reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + #elif (EA_WCHAR_SIZE == 4) + return DecodePart(pSrc, pSrcEnd, reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + #endif + } + + inline bool DecodePart(const wchar_t*& pSrc, const wchar_t* pSrcEnd, char8_t*& pDest, char8_t* pDestEnd) + { + #if (EA_WCHAR_SIZE == 2) + return DecodePart(reinterpret_cast(pSrc), reinterpret_cast(pSrcEnd), reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + #elif (EA_WCHAR_SIZE == 4) + return DecodePart(reinterpret_cast(pSrc), reinterpret_cast(pSrcEnd), reinterpret_cast(pDest), reinterpret_cast(pDestEnd)); + #endif + } + #endif + + /////////////////////////////////////////////////////////////////////////////// + // 'char traits' functionality + // + inline char CharToLower(char c) + { return (char)tolower((uint8_t)c); } + + template + inline T CharToLower(T c) + { if((unsigned)c <= 0xff) return (T)tolower((uint8_t)c); return c; } + + + inline char CharToUpper(char c) + { return (char)toupper((uint8_t)c); } + + template + inline T CharToUpper(T c) + { if((unsigned)c <= 0xff) return (T)toupper((uint8_t)c); return c; } + + + template + int Compare(const T* p1, const T* p2, size_t n) + { + for(; n > 0; ++p1, ++p2, --n) + { + if(*p1 != *p2) + return (static_cast::type>(*p1) < + static_cast::type>(*p2)) ? -1 : 1; + } + return 0; + } + + inline int Compare(const char* p1, const char* p2, size_t n) + { + return memcmp(p1, p2, n); + } + + + template + inline int CompareI(const T* p1, const T* p2, size_t n) + { + for(; n > 0; ++p1, ++p2, --n) + { + const T c1 = CharToLower(*p1); + const T c2 = CharToLower(*p2); + + if(c1 != c2) + return (static_cast::type>(c1) < + static_cast::type>(c2)) ? -1 : 1; + } + return 0; + } + + + template + inline const T* Find(const T* p, T c, size_t n) + { + for(; n > 0; --n, ++p) + { + if(*p == c) + return p; + } + + return NULL; + } + + inline const char* Find(const char* p, char c, size_t n) + { + return (const char*)memchr(p, c, n); + } + + + template + inline EA_CPP14_CONSTEXPR size_t CharStrlen(const T* p) + { + const auto* pCurrent = p; + while(*pCurrent) + ++pCurrent; + return (size_t)(pCurrent - p); + } + + + template + inline T* CharStringUninitializedCopy(const T* pSource, const T* pSourceEnd, T* pDestination) + { + memmove(pDestination, pSource, (size_t)(pSourceEnd - pSource) * sizeof(T)); + return pDestination + (pSourceEnd - pSource); + } + + + template + const T* CharTypeStringFindEnd(const T* pBegin, const T* pEnd, T c) + { + const T* pTemp = pEnd; + while(--pTemp >= pBegin) + { + if(*pTemp == c) + return pTemp; + } + + return pEnd; + } + + + template + const T* CharTypeStringRSearch(const T* p1Begin, const T* p1End, + const T* p2Begin, const T* p2End) + { + // Test for zero length strings, in which case we have a match or a failure, + // but the return value is the same either way. + if((p1Begin == p1End) || (p2Begin == p2End)) + return p1Begin; + + // Test for a pattern of length 1. + if((p2Begin + 1) == p2End) + return CharTypeStringFindEnd(p1Begin, p1End, *p2Begin); + + // Test for search string length being longer than string length. + if((p2End - p2Begin) > (p1End - p1Begin)) + return p1End; + + // General case. + const T* pSearchEnd = (p1End - (p2End - p2Begin) + 1); + const T* pCurrent1; + const T* pCurrent2; + + while(pSearchEnd != p1Begin) + { + // Search for the last occurrence of *p2Begin. + pCurrent1 = CharTypeStringFindEnd(p1Begin, pSearchEnd, *p2Begin); + if(pCurrent1 == pSearchEnd) // If the first char of p2 wasn't found, + return p1End; // then we immediately have failure. + + // In this case, *pTemp == *p2Begin. So compare the rest. + pCurrent2 = p2Begin; + while(*pCurrent1++ == *pCurrent2++) + { + if(pCurrent2 == p2End) + return (pCurrent1 - (p2End - p2Begin)); + } + + // A smarter algorithm might know to subtract more than just one, + // but in most cases it won't make much difference anyway. + --pSearchEnd; + } + + return p1End; + } + + + template + inline const T* CharTypeStringFindFirstOf(const T* p1Begin, const T* p1End, const T* p2Begin, const T* p2End) + { + for (; p1Begin != p1End; ++p1Begin) + { + for (const T* pTemp = p2Begin; pTemp != p2End; ++pTemp) + { + if (*p1Begin == *pTemp) + return p1Begin; + } + } + return p1End; + } + + + template + inline const T* CharTypeStringRFindFirstNotOf(const T* p1RBegin, const T* p1REnd, const T* p2Begin, const T* p2End) + { + for (; p1RBegin != p1REnd; --p1RBegin) + { + const T* pTemp; + for (pTemp = p2Begin; pTemp != p2End; ++pTemp) + { + if (*(p1RBegin - 1) == *pTemp) + break; + } + if (pTemp == p2End) + return p1RBegin; + } + return p1REnd; + } + + + template + inline const T* CharTypeStringFindFirstNotOf(const T* p1Begin, const T* p1End, const T* p2Begin, const T* p2End) + { + for (; p1Begin != p1End; ++p1Begin) + { + const T* pTemp; + for (pTemp = p2Begin; pTemp != p2End; ++pTemp) + { + if (*p1Begin == *pTemp) + break; + } + if (pTemp == p2End) + return p1Begin; + } + return p1End; + } + + + template + inline const T* CharTypeStringRFindFirstOf(const T* p1RBegin, const T* p1REnd, const T* p2Begin, const T* p2End) + { + for (; p1RBegin != p1REnd; --p1RBegin) + { + for (const T* pTemp = p2Begin; pTemp != p2End; ++pTemp) + { + if (*(p1RBegin - 1) == *pTemp) + return p1RBegin; + } + } + return p1REnd; + } + + + template + inline const T* CharTypeStringRFind(const T* pRBegin, const T* pREnd, const T c) + { + while (pRBegin > pREnd) + { + if (*(pRBegin - 1) == c) + return pRBegin; + --pRBegin; + } + return pREnd; + } + + + inline char* CharStringUninitializedFillN(char* pDestination, size_t n, const char c) + { + if(n) // Some compilers (e.g. GCC 4.3+) generate a warning (which can't be disabled) if you call memset with a size of 0. + memset(pDestination, (uint8_t)c, (size_t)n); + return pDestination + n; + } + + template + inline T* CharStringUninitializedFillN(T* pDestination, size_t n, const T c) + { + T * pDest = pDestination; + const T* const pEnd = pDestination + n; + while(pDest < pEnd) + *pDest++ = c; + return pDestination + n; + } + + + inline char* CharTypeAssignN(char* pDestination, size_t n, char c) + { + if(n) // Some compilers (e.g. GCC 4.3+) generate a warning (which can't be disabled) if you call memset with a size of 0. + return (char*)memset(pDestination, c, (size_t)n); + return pDestination; + } + + template + inline T* CharTypeAssignN(T* pDestination, size_t n, T c) + { + T* pDest = pDestination; + const T* const pEnd = pDestination + n; + while(pDest < pEnd) + *pDest++ = c; + return pDestination; + } +} // namespace eastl + +#endif // EASTL_CHAR_TRAITS_H diff --git a/libkram/eastl/include/EASTL/internal/config.h b/libkram/eastl/include/EASTL/internal/config.h new file mode 100644 index 00000000..530bbc87 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/config.h @@ -0,0 +1,1877 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_CONFIG_H +#define EASTL_INTERNAL_CONFIG_H + + +/////////////////////////////////////////////////////////////////////////////// +// ReadMe +// +// This is the EASTL configuration file. All configurable parameters of EASTL +// are controlled through this file. However, all the settings here can be +// manually overridden by the user. There are three ways for a user to override +// the settings in this file: +// +// - Simply edit this file. +// - Define EASTL_USER_CONFIG_HEADER. +// - Predefine individual defines (e.g. EASTL_ASSERT). +// +/////////////////////////////////////////////////////////////////////////////// + + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_USER_CONFIG_HEADER +// +// This allows the user to define a header file to be #included before the +// EASTL config.h contents are compiled. A primary use of this is to override +// the contents of this config.h file. Note that all the settings below in +// this file are user-overridable. +// +// Example usage: +// #define EASTL_USER_CONFIG_HEADER "MyConfigOverrides.h" +// #include +// +/////////////////////////////////////////////////////////////////////////////// + +#ifdef EASTL_USER_CONFIG_HEADER + #include EASTL_USER_CONFIG_HEADER +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_EABASE_DISABLED +// +// The user can disable EABase usage and manually supply the configuration +// via defining EASTL_EABASE_DISABLED and defining the appropriate entities +// globally or via the above EASTL_USER_CONFIG_HEADER. +// +// Example usage: +// #define EASTL_EABASE_DISABLED +// #include +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_EABASE_DISABLED + #include +#endif +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_VERSION +// +// We more or less follow the conventional EA packaging approach to versioning +// here. A primary distinction here is that minor versions are defined as two +// digit entities (e.g. .03") instead of minimal digit entities ".3"). The logic +// here is that the value is a counter and not a floating point fraction. +// Note that the major version doesn't have leading zeros. +// +// Example version strings: +// "0.91.00" // Major version 0, minor version 91, patch version 0. +// "1.00.00" // Major version 1, minor and patch version 0. +// "3.10.02" // Major version 3, minor version 10, patch version 02. +// "12.03.01" // Major version 12, minor version 03, patch version +// +// Example usage: +// printf("EASTL version: %s", EASTL_VERSION); +// printf("EASTL version: %d.%d.%d", EASTL_VERSION_N / 10000 % 100, EASTL_VERSION_N / 100 % 100, EASTL_VERSION_N % 100); +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_VERSION + #define EASTL_VERSION "3.17.06" + #define EASTL_VERSION_N 31706 +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EA_COMPILER_NO_STANDARD_CPP_LIBRARY +// +// Defined as 1 or undefined. +// Implements support for the definition of EA_COMPILER_NO_STANDARD_CPP_LIBRARY for the case +// of using EABase versions prior to the addition of its EA_COMPILER_NO_STANDARD_CPP_LIBRARY support. +// +#if !defined(EA_COMPILER_NO_STANDARD_CPP_LIBRARY) + #if defined(EA_PLATFORM_ANDROID) + // Disabled because EA's eaconfig/android_config/android_sdk packages currently + // don't support linking STL libraries. Perhaps we can figure out what linker arguments + // are needed for an app so we can manually specify them and then re-enable this code. + // + //#include + // + //#if (__ANDROID_API__ < 9) // Earlier versions of Android provide no std C++ STL implementation. + #define EA_COMPILER_NO_STANDARD_CPP_LIBRARY 1 + //#endif + #endif +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EA_NOEXCEPT +// +// Defined as a macro. Provided here for backward compatibility with older +// EABase versions prior to 2.00.40 that don't yet define it themselves. +// +#if !defined(EA_NOEXCEPT) + #define EA_NOEXCEPT + #define EA_NOEXCEPT_IF(predicate) + #define EA_NOEXCEPT_EXPR(expression) false +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EA_CPP14_CONSTEXPR +// +// Defined as constexpr when a C++14 compiler is present. Defines it as nothing +// when using a C++11 compiler. +// C++14 relaxes the specification for constexpr such that it allows more +// kinds of expressions. Since a C++11 compiler doesn't allow this, we need +// to make a unique define for C++14 constexpr. This macro should be used only +// when you are using it with code that specfically requires C++14 constexpr +// functionality beyond the regular C++11 constexpr functionality. +// http://en.wikipedia.org/wiki/C%2B%2B14#Relaxed_constexpr_restrictions +// +#if !defined(EA_CPP14_CONSTEXPR) + #if defined(EA_COMPILER_CPP14_ENABLED) + #define EA_CPP14_CONSTEXPR constexpr + #else + #define EA_CPP14_CONSTEXPR // not supported + #define EA_NO_CPP14_CONSTEXPR + #endif +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL namespace +// +// We define this so that users that #include this config file can reference +// these namespaces without seeing any other files that happen to use them. +/////////////////////////////////////////////////////////////////////////////// + +/// EA Standard Template Library +namespace eastl +{ + // Intentionally empty. +} + + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_DEBUG +// +// Defined as an integer >= 0. Default is 1 for debug builds and 0 for +// release builds. This define is also a master switch for the default value +// of some other settings. +// +// Example usage: +// #if EASTL_DEBUG +// ... +// #endif +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_DEBUG + #if defined(EA_DEBUG) || defined(_DEBUG) + #define EASTL_DEBUG 1 + #else + #define EASTL_DEBUG 0 + #endif +#endif + +// Developer debug. Helps EASTL developers assert EASTL is coded correctly. +// Normally disabled for users since it validates internal things and not user things. +#ifndef EASTL_DEV_DEBUG + #define EASTL_DEV_DEBUG 0 +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_DEBUGPARAMS_LEVEL +// +// EASTL_DEBUGPARAMS_LEVEL controls what debug information is passed through to +// the allocator by default. +// This value may be defined by the user ... if not it will default to 1 for +// EA_DEBUG builds, otherwise 0. +// +// 0 - no debug information is passed through to allocator calls. +// 1 - 'name' is passed through to allocator calls. +// 2 - 'name', __FILE__, and __LINE__ are passed through to allocator calls. +// +// This parameter mirrors the equivalent parameter in the CoreAllocator package. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_DEBUGPARAMS_LEVEL + #if EASTL_DEBUG + #define EASTL_DEBUGPARAMS_LEVEL 2 + #else + #define EASTL_DEBUGPARAMS_LEVEL 0 + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_DLL +// +// Defined as 0 or 1. The default is dependent on the definition of EA_DLL. +// If EA_DLL is defined, then EASTL_DLL is 1, else EASTL_DLL is 0. +// EA_DLL is a define that controls DLL builds within the EAConfig build system. +// EASTL_DLL controls whether EASTL is built and used as a DLL. +// Normally you wouldn't do such a thing, but there are use cases for such +// a thing, particularly in the case of embedding C++ into C# applications. +// +#ifndef EASTL_DLL + #if defined(EA_DLL) + #define EASTL_DLL 1 + #else + #define EASTL_DLL 0 + #endif +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_IF_NOT_DLL +// +// Utility to include expressions only for static builds. +// +#ifndef EASTL_IF_NOT_DLL + #if EASTL_DLL + #define EASTL_IF_NOT_DLL(x) + #else + #define EASTL_IF_NOT_DLL(x) x + #endif +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_API +// +// This is used to label functions as DLL exports under Microsoft platforms. +// If EA_DLL is defined, then the user is building EASTL as a DLL and EASTL's +// non-templated functions will be exported. EASTL template functions are not +// labelled as EASTL_API (and are thus not exported in a DLL build). This is +// because it's not possible (or at least unsafe) to implement inline templated +// functions in a DLL. +// +// Example usage of EASTL_API: +// EASTL_API int someVariable = 10; // Export someVariable in a DLL build. +// +// struct EASTL_API SomeClass{ // Export SomeClass and its member functions in a DLL build. +// EASTL_LOCAL void PrivateMethod(); // Not exported. +// }; +// +// EASTL_API void SomeFunction(); // Export SomeFunction in a DLL build. +// +// +#if defined(EA_DLL) && !defined(EASTL_DLL) + #define EASTL_DLL 1 +#endif + +#ifndef EASTL_API // If the build file hasn't already defined this to be dllexport... + #if EASTL_DLL + #if defined(_MSC_VER) + #define EASTL_API __declspec(dllimport) + #define EASTL_LOCAL + #elif defined(__CYGWIN__) + #define EASTL_API __attribute__((dllimport)) + #define EASTL_LOCAL + #elif (defined(__GNUC__) && (__GNUC__ >= 4)) + #define EASTL_API __attribute__ ((visibility("default"))) + #define EASTL_LOCAL __attribute__ ((visibility("hidden"))) + #else + #define EASTL_API + #define EASTL_LOCAL + #endif + #else + #define EASTL_API + #define EASTL_LOCAL + #endif +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_EASTDC_API +// +// This is used for importing EAStdC functions into EASTL, possibly via a DLL import. +// +#ifndef EASTL_EASTDC_API + #if EASTL_DLL + #if defined(_MSC_VER) + #define EASTL_EASTDC_API __declspec(dllimport) + #define EASTL_EASTDC_LOCAL + #elif defined(__CYGWIN__) + #define EASTL_EASTDC_API __attribute__((dllimport)) + #define EASTL_EASTDC_LOCAL + #elif (defined(__GNUC__) && (__GNUC__ >= 4)) + #define EASTL_EASTDC_API __attribute__ ((visibility("default"))) + #define EASTL_EASTDC_LOCAL __attribute__ ((visibility("hidden"))) + #else + #define EASTL_EASTDC_API + #define EASTL_EASTDC_LOCAL + #endif + #else + #define EASTL_EASTDC_API + #define EASTL_EASTDC_LOCAL + #endif +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_EASTDC_VSNPRINTF +// +// Defined as 0 or 1. By default it is 1. +// +// When enabled EASTL uses EAStdC's Vsnprintf function directly instead of +// having the user provide a global Vsnprintf8/16/32 function. The benefit +// of this is that it will allow EASTL to just link to EAStdC's Vsnprintf +// without the user doing anything. The downside is that any users who aren't +// already using EAStdC will either need to now depend on EAStdC or globally +// define this property to be 0 and simply provide functions that have the same +// names. See the usage of EASTL_EASTDC_VSNPRINTF in string.h for more info. +// +#if !defined(EASTL_EASTDC_VSNPRINTF) + #define EASTL_EASTDC_VSNPRINTF 1 +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_NAME_ENABLED / EASTL_NAME / EASTL_NAME_VAL +// +// Used to wrap debug string names. In a release build, the definition +// goes away. These are present to avoid release build compiler warnings +// and to make code simpler. +// +// Example usage of EASTL_NAME: +// // pName will defined away in a release build and thus prevent compiler warnings. +// void allocator::set_name(const char* EASTL_NAME(pName)) +// { +// #if EASTL_NAME_ENABLED +// mpName = pName; +// #endif +// } +// +// Example usage of EASTL_NAME_VAL: +// // "xxx" is defined to NULL in a release build. +// vector::vector(const allocator_type& allocator = allocator_type(EASTL_NAME_VAL("xxx"))); +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_NAME_ENABLED + #define EASTL_NAME_ENABLED EASTL_DEBUG +#endif + +#ifndef EASTL_NAME + #if EASTL_NAME_ENABLED + #define EASTL_NAME(x) x + #define EASTL_NAME_VAL(x) x + #else + #define EASTL_NAME(x) + #define EASTL_NAME_VAL(x) ((const char*)NULL) + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_DEFAULT_NAME_PREFIX +// +// Defined as a string literal. Defaults to "EASTL". +// This define is used as the default name for EASTL where such a thing is +// referenced in EASTL. For example, if the user doesn't specify an allocator +// name for their deque, it is named "EASTL deque". However, you can override +// this to say "SuperBaseball deque" by changing EASTL_DEFAULT_NAME_PREFIX. +// +// Example usage (which is simply taken from how deque.h uses this define): +// #ifndef EASTL_DEQUE_DEFAULT_NAME +// #define EASTL_DEQUE_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " deque" +// #endif +// +#ifndef EASTL_DEFAULT_NAME_PREFIX + #define EASTL_DEFAULT_NAME_PREFIX "EASTL" +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_ASSERT_ENABLED +// +// Defined as 0 or non-zero. Default is same as EASTL_DEBUG. +// If EASTL_ASSERT_ENABLED is non-zero, then asserts will be executed via +// the assertion mechanism. +// +// Example usage: +// #if EASTL_ASSERT_ENABLED +// EASTL_ASSERT(v.size() > 17); +// #endif +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_ASSERT_ENABLED + #define EASTL_ASSERT_ENABLED EASTL_DEBUG +#endif + +// Developer assert. Helps EASTL developers assert EASTL is coded correctly. +// Normally disabled for users since it validates internal things and not user things. +#ifndef EASTL_DEV_ASSERT_ENABLED + #define EASTL_DEV_ASSERT_ENABLED EASTL_DEV_DEBUG +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_EMPTY_REFERENCE_ASSERT_ENABLED +// +// Defined as 0 or non-zero. Default is same as EASTL_ASSERT_ENABLED. +// This is like EASTL_ASSERT_ENABLED, except it is for empty container +// references. Sometime people like to be able to take a reference to +// the front of the container, but not use it if the container is empty. +// In practice it's often easier and more efficient to do this than to write +// extra code to check if the container is empty. +// +// NOTE: If this is enabled, EASTL_ASSERT_ENABLED must also be enabled +// +// Example usage: +// template +// inline typename vector::reference +// vector::front() +// { +// #if EASTL_ASSERT_ENABLED +// EASTL_ASSERT(mpEnd > mpBegin); +// #endif +// +// return *mpBegin; +// } +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + #define EASTL_EMPTY_REFERENCE_ASSERT_ENABLED EASTL_ASSERT_ENABLED +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// SetAssertionFailureFunction +// +// Allows the user to set a custom assertion failure mechanism. +// +// Example usage: +// void Assert(const char* pExpression, void* pContext); +// SetAssertionFailureFunction(Assert, this); +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_ASSERTION_FAILURE_DEFINED + #define EASTL_ASSERTION_FAILURE_DEFINED + + namespace eastl + { + typedef void (*EASTL_AssertionFailureFunction)(const char* pExpression, void* pContext); + EASTL_API void SetAssertionFailureFunction(EASTL_AssertionFailureFunction pFunction, void* pContext); + + // These are the internal default functions that implement asserts. + EASTL_API void AssertionFailure(const char* pExpression); + EASTL_API void AssertionFailureFunctionDefault(const char* pExpression, void* pContext); + } +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_ASSERT +// +// Assertion macro. Can be overridden by user with a different value. +// +// Example usage: +// EASTL_ASSERT(intVector.size() < 100); +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_ASSERT + #if EASTL_ASSERT_ENABLED + #define EASTL_ASSERT(expression) \ + EA_DISABLE_VC_WARNING(4127) \ + do { \ + EA_ANALYSIS_ASSUME(expression); \ + (void)((expression) || (eastl::AssertionFailure(#expression), 0)); \ + } while (0) \ + EA_RESTORE_VC_WARNING() + #else + #define EASTL_ASSERT(expression) + #endif +#endif + +// Developer assert. Helps EASTL developers assert EASTL is coded correctly. +// Normally disabled for users since it validates internal things and not user things. +#ifndef EASTL_DEV_ASSERT + #if EASTL_DEV_ASSERT_ENABLED + #define EASTL_DEV_ASSERT(expression) \ + EA_DISABLE_VC_WARNING(4127) \ + do { \ + EA_ANALYSIS_ASSUME(expression); \ + (void)((expression) || (eastl::AssertionFailure(#expression), 0)); \ + } while(0) \ + EA_RESTORE_VC_WARNING() + #else + #define EASTL_DEV_ASSERT(expression) + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_ASSERT_MSG +// +// Example usage: +// EASTL_ASSERT_MSG(false, "detected error condition!"); +// +/////////////////////////////////////////////////////////////////////////////// +#ifndef EASTL_ASSERT_MSG + #if EASTL_ASSERT_ENABLED + #define EASTL_ASSERT_MSG(expression, message) \ + EA_DISABLE_VC_WARNING(4127) \ + do { \ + EA_ANALYSIS_ASSUME(expression); \ + (void)((expression) || (eastl::AssertionFailure(message), 0)); \ + } while (0) \ + EA_RESTORE_VC_WARNING() + #else + #define EASTL_ASSERT_MSG(expression, message) + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_FAIL_MSG +// +// Failure macro. Can be overridden by user with a different value. +// +// Example usage: +// EASTL_FAIL("detected error condition!"); +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_FAIL_MSG + #if EASTL_ASSERT_ENABLED + #define EASTL_FAIL_MSG(message) (eastl::AssertionFailure(message)) + #else + #define EASTL_FAIL_MSG(message) + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_CT_ASSERT / EASTL_CT_ASSERT_NAMED +// +// EASTL_CT_ASSERT is a macro for compile time assertion checks, useful for +// validating *constant* expressions. The advantage over using EASTL_ASSERT +// is that errors are caught at compile time instead of runtime. +// +// Example usage: +// EASTL_CT_ASSERT(sizeof(uint32_t) == 4); +// +/////////////////////////////////////////////////////////////////////////////// + +#define EASTL_CT_ASSERT(expression) static_assert(expression, #expression) + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_CT_ASSERT_MSG +// +// EASTL_CT_ASSERT_MSG is a macro for compile time assertion checks, useful for +// validating *constant* expressions. The advantage over using EASTL_ASSERT +// is that errors are caught at compile time instead of runtime. +// The message must be a string literal. +// +// Example usage: +// EASTL_CT_ASSERT_MSG(sizeof(uint32_t) == 4, "The size of uint32_t must be 4."); +// +/////////////////////////////////////////////////////////////////////////////// + +#define EASTL_CT_ASSERT_MSG(expression, message) static_assert(expression, message) + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_DEBUG_BREAK / EASTL_DEBUG_BREAK_OVERRIDE +// +// This function causes an app to immediately stop under the debugger. +// It is implemented as a macro in order to allow stopping at the site +// of the call. +// +// EASTL_DEBUG_BREAK_OVERRIDE allows one to define EASTL_DEBUG_BREAK directly. +// This is useful in cases where you desire to disable EASTL_DEBUG_BREAK +// but do not wish to (or cannot) define a custom void function() to replace +// EASTL_DEBUG_BREAK callsites. +// +// Example usage: +// EASTL_DEBUG_BREAK(); +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_DEBUG_BREAK_OVERRIDE + #ifndef EASTL_DEBUG_BREAK + #if defined(_MSC_VER) && (_MSC_VER >= 1300) + #define EASTL_DEBUG_BREAK() __debugbreak() // This is a compiler intrinsic which will map to appropriate inlined asm for the platform. + #elif (defined(EA_PROCESSOR_ARM) && !defined(EA_PROCESSOR_ARM64)) && defined(__APPLE__) + #define EASTL_DEBUG_BREAK() asm("trap") + #elif defined(EA_PROCESSOR_ARM64) && defined(__APPLE__) + #include + #include + #define EASTL_DEBUG_BREAK() kill( getpid(), SIGINT ) + #elif defined(EA_PROCESSOR_ARM64) && defined(__GNUC__) + #define EASTL_DEBUG_BREAK() asm("brk 10") + #elif defined(EA_PROCESSOR_ARM) && defined(__GNUC__) + #define EASTL_DEBUG_BREAK() asm("BKPT 10") // The 10 is arbitrary. It's just a unique id. + #elif defined(EA_PROCESSOR_ARM) && defined(__ARMCC_VERSION) + #define EASTL_DEBUG_BREAK() __breakpoint(10) + #elif defined(EA_PROCESSOR_POWERPC) // Generic PowerPC. + #define EASTL_DEBUG_BREAK() asm(".long 0") // This triggers an exception by executing opcode 0x00000000. + #elif (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)) && defined(EA_ASM_STYLE_INTEL) + #define EASTL_DEBUG_BREAK() { __asm int 3 } + #elif (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)) && (defined(EA_ASM_STYLE_ATT) || defined(__GNUC__)) + #define EASTL_DEBUG_BREAK() asm("int3") + #else + void EASTL_DEBUG_BREAK(); // User must define this externally. + #endif + #else + void EASTL_DEBUG_BREAK(); // User must define this externally. + #endif +#else + #ifndef EASTL_DEBUG_BREAK + #if EASTL_DEBUG_BREAK_OVERRIDE == 1 + // define an empty callable to satisfy the call site. + #define EASTL_DEBUG_BREAK ([]{}) + #else + #define EASTL_DEBUG_BREAK EASTL_DEBUG_BREAK_OVERRIDE + #endif + #else + #error EASTL_DEBUG_BREAK is already defined yet you would like to override it. Please ensure no other headers are already defining EASTL_DEBUG_BREAK before this header (config.h) is included + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_ALLOCATOR_COPY_ENABLED +// +// Defined as 0 or 1. Default is 0 (disabled) until some future date. +// If enabled (1) then container operator= copies the allocator from the +// source container. It ideally should be set to enabled but for backwards +// compatibility with older versions of EASTL it is currently set to 0. +// Regardless of whether this value is 0 or 1, this container copy constructs +// or copy assigns allocators. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_ALLOCATOR_COPY_ENABLED + #define EASTL_ALLOCATOR_COPY_ENABLED 0 +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_FIXED_SIZE_TRACKING_ENABLED +// +// Defined as an integer >= 0. Default is same as EASTL_DEBUG. +// If EASTL_FIXED_SIZE_TRACKING_ENABLED is enabled, then fixed +// containers in debug builds track the max count of objects +// that have been in the container. This allows for the tuning +// of fixed container sizes to their minimum required size. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_FIXED_SIZE_TRACKING_ENABLED + #define EASTL_FIXED_SIZE_TRACKING_ENABLED EASTL_DEBUG +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_RTTI_ENABLED +// +// Defined as 0 or 1. Default is 1 if RTTI is supported by the compiler. +// This define exists so that we can use some dynamic_cast operations in the +// code without warning. dynamic_cast is only used if the specifically refers +// to it; EASTL won't do dynamic_cast behind your back. +// +// Example usage: +// #if EASTL_RTTI_ENABLED +// pChildClass = dynamic_cast(pParentClass); +// #endif +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_RTTI_ENABLED + // The VC++ default Standard Library (Dinkumware) disables major parts of RTTI + // (e.g. type_info) if exceptions are disabled, even if RTTI itself is enabled. + // _HAS_EXCEPTIONS is defined by Dinkumware to 0 or 1 (disabled or enabled). + #if defined(EA_COMPILER_NO_RTTI) || (defined(_MSC_VER) && defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && !(defined(_HAS_EXCEPTIONS) && _HAS_EXCEPTIONS)) + #define EASTL_RTTI_ENABLED 0 + #else + #define EASTL_RTTI_ENABLED 1 + #endif +#endif + + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_EXCEPTIONS_ENABLED +// +// Defined as 0 or 1. Default is to follow what the compiler settings are. +// The user can predefine EASTL_EXCEPTIONS_ENABLED to 0 or 1; however, if the +// compiler is set to disable exceptions then EASTL_EXCEPTIONS_ENABLED is +// forced to a value of 0 regardless of the user predefine. +// +// Note that we do not enable EASTL exceptions by default if the compiler +// has exceptions enabled. To enable EASTL_EXCEPTIONS_ENABLED you need to +// manually set it to 1. +// +/////////////////////////////////////////////////////////////////////////////// + +#if !defined(EASTL_EXCEPTIONS_ENABLED) || ((EASTL_EXCEPTIONS_ENABLED == 1) && defined(EA_COMPILER_NO_EXCEPTIONS)) + #define EASTL_EXCEPTIONS_ENABLED 0 +#endif + + + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_STRING_OPT_XXXX +// +// Enables some options / optimizations options that cause the string class +// to behave slightly different from the C++ standard basic_string. These are +// options whereby you can improve performance by avoiding operations that +// in practice may never occur for you. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_STRING_OPT_EXPLICIT_CTORS + // Defined as 0 or 1. Default is 0. + // Defines if we should implement explicity in constructors where the C++ + // standard string does not. The advantage of enabling explicit constructors + // is that you can do this: string s = "hello"; in addition to string s("hello"); + // The disadvantage of enabling explicity constructors is that there can be + // silent conversions done which impede performance if the user isn't paying + // attention. + // C++ standard string ctors are not explicit. + #define EASTL_STRING_OPT_EXPLICIT_CTORS 0 +#endif + +#ifndef EASTL_STRING_OPT_LENGTH_ERRORS + // Defined as 0 or 1. Default is equal to EASTL_EXCEPTIONS_ENABLED. + // Defines if we check for string values going beyond kMaxSize + // (a very large value) and throw exections if so. + // C++ standard strings are expected to do such checks. + #define EASTL_STRING_OPT_LENGTH_ERRORS EASTL_EXCEPTIONS_ENABLED +#endif + +#ifndef EASTL_STRING_OPT_RANGE_ERRORS + // Defined as 0 or 1. Default is equal to EASTL_EXCEPTIONS_ENABLED. + // Defines if we check for out-of-bounds references to string + // positions and throw exceptions if so. Well-behaved code shouldn't + // refence out-of-bounds positions and so shouldn't need these checks. + // C++ standard strings are expected to do such range checks. + #define EASTL_STRING_OPT_RANGE_ERRORS EASTL_EXCEPTIONS_ENABLED +#endif + +#ifndef EASTL_STRING_OPT_ARGUMENT_ERRORS + // Defined as 0 or 1. Default is 0. + // Defines if we check for NULL ptr arguments passed to string + // functions by the user and throw exceptions if so. Well-behaved code + // shouldn't pass bad arguments and so shouldn't need these checks. + // Also, some users believe that strings should check for NULL pointers + // in all their arguments and do no-ops if so. This is very debatable. + // C++ standard strings are not required to check for such argument errors. + #define EASTL_STRING_OPT_ARGUMENT_ERRORS 0 +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_BITSET_SIZE_T +// +// Defined as 0 or 1. Default is 1. +// Controls whether bitset uses size_t or eastl_size_t. +// +#ifndef EASTL_BITSET_SIZE_T + #define EASTL_BITSET_SIZE_T 1 +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_INT128_SUPPORTED +// +// Defined as 0 or 1. +// +#ifndef EASTL_INT128_SUPPORTED + #if defined(__SIZEOF_INT128__) || (defined(EA_COMPILER_INTMAX_SIZE) && (EA_COMPILER_INTMAX_SIZE >= 16)) + #define EASTL_INT128_SUPPORTED 1 + #else + #define EASTL_INT128_SUPPORTED 0 + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_DEFAULT_ALLOCATOR_ALIGNED_ALLOCATIONS_SUPPORTED +// +// Defined as 0 or 1. +// Tells if you can use the default EASTL allocator to do aligned allocations, +// which for most uses tells if you can store aligned objects in containers +// that use default allocators. It turns out that when built as a DLL for +// some platforms, EASTL doesn't have a way to do aligned allocations, as it +// doesn't have a heap that supports it. There is a way to work around this +// with dynamically defined allocators, but that's currently a to-do. +// +#ifndef EASTL_DEFAULT_ALLOCATOR_ALIGNED_ALLOCATIONS_SUPPORTED + #if EASTL_DLL + #define EASTL_DEFAULT_ALLOCATOR_ALIGNED_ALLOCATIONS_SUPPORTED 0 + #else + #define EASTL_DEFAULT_ALLOCATOR_ALIGNED_ALLOCATIONS_SUPPORTED 1 + #endif +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_INT128_DEFINED +// +// Defined as 0 or 1. +// Specifies whether eastl_int128_t/eastl_uint128_t have been typedef'd yet. +// +#ifndef EASTL_INT128_DEFINED + #if EASTL_INT128_SUPPORTED + #define EASTL_INT128_DEFINED 1 + + #if defined(__SIZEOF_INT128__) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG) + typedef __int128_t eastl_int128_t; + typedef __uint128_t eastl_uint128_t; + #else + typedef int128_t eastl_int128_t; // The EAStdC package defines an EA::StdC::int128_t and uint128_t type, + typedef uint128_t eastl_uint128_t; // though they are currently within the EA::StdC namespace. + #endif + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_BITSET_WORD_TYPE_DEFAULT / EASTL_BITSET_WORD_SIZE_DEFAULT +// +// Defined as an integral power of two type, usually uint32_t or uint64_t. +// Specifies the word type that bitset should use internally to implement +// storage. By default this is the platform register word size, but there +// may be reasons to use a different value. +// +// Defines the integral data type used by bitset by default. +// You can override this default on a bitset-by-bitset case by supplying a +// custom bitset WordType template parameter. +// +// The C++ standard specifies that the std::bitset word type be unsigned long, +// but that isn't necessarily the most efficient data type for the given platform. +// We can follow the standard and be potentially less efficient or we can do what +// is more efficient but less like the C++ std::bitset. +// +#if !defined(EASTL_BITSET_WORD_TYPE_DEFAULT) + #if defined(EASTL_BITSET_WORD_SIZE) // EASTL_BITSET_WORD_SIZE is deprecated, but we temporarily support the ability for the user to specify it. Use EASTL_BITSET_WORD_TYPE_DEFAULT instead. + #if (EASTL_BITSET_WORD_SIZE == 4) + #define EASTL_BITSET_WORD_TYPE_DEFAULT uint32_t + #define EASTL_BITSET_WORD_SIZE_DEFAULT 4 + #else + #define EASTL_BITSET_WORD_TYPE_DEFAULT uint64_t + #define EASTL_BITSET_WORD_SIZE_DEFAULT 8 + #endif + #elif (EA_PLATFORM_WORD_SIZE == 16) // EA_PLATFORM_WORD_SIZE is defined in EABase. + #define EASTL_BITSET_WORD_TYPE_DEFAULT uint128_t + #define EASTL_BITSET_WORD_SIZE_DEFAULT 16 + #elif (EA_PLATFORM_WORD_SIZE == 8) + #define EASTL_BITSET_WORD_TYPE_DEFAULT uint64_t + #define EASTL_BITSET_WORD_SIZE_DEFAULT 8 + #elif (EA_PLATFORM_WORD_SIZE == 4) + #define EASTL_BITSET_WORD_TYPE_DEFAULT uint32_t + #define EASTL_BITSET_WORD_SIZE_DEFAULT 4 + #else + #define EASTL_BITSET_WORD_TYPE_DEFAULT uint16_t + #define EASTL_BITSET_WORD_SIZE_DEFAULT 2 + #endif +#endif + + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_LIST_SIZE_CACHE +// +// Defined as 0 or 1. Default is 1. Changed from 0 in version 1.16.01. +// If defined as 1, the list and slist containers (and possibly any additional +// containers as well) keep a member mSize (or similar) variable which allows +// the size() member function to execute in constant time (a.k.a. O(1)). +// There are debates on both sides as to whether it is better to have this +// cached value or not, as having it entails some cost (memory and code). +// To consider: Make list size caching an optional template parameter. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_LIST_SIZE_CACHE + #define EASTL_LIST_SIZE_CACHE 1 +#endif + +#ifndef EASTL_SLIST_SIZE_CACHE + #define EASTL_SLIST_SIZE_CACHE 1 +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_MAX_STACK_USAGE +// +// Defined as an integer greater than zero. Default is 4000. +// There are some places in EASTL where temporary objects are put on the +// stack. A common example of this is in the implementation of container +// swap functions whereby a temporary copy of the container is made. +// There is a problem, however, if the size of the item created on the stack +// is very large. This can happen with fixed-size containers, for example. +// The EASTL_MAX_STACK_USAGE define specifies the maximum amount of memory +// (in bytes) that the given platform/compiler will safely allow on the stack. +// Platforms such as Windows will generally allow larger values than embedded +// systems or console machines, but it is usually a good idea to stick with +// a max usage value that is portable across all platforms, lest the user be +// surprised when something breaks as it is ported to another platform. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_MAX_STACK_USAGE + #define EASTL_MAX_STACK_USAGE 4000 +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_VA_COPY_ENABLED +// +// Defined as 0 or 1. Default is 1 for compilers that need it, 0 for others. +// Some compilers on some platforms implement va_list whereby its contents +// are destroyed upon usage, even if passed by value to another function. +// With these compilers you can use va_copy to save and restore a va_list. +// Known compiler/platforms that destroy va_list contents upon usage include: +// CodeWarrior on PowerPC +// GCC on x86-64 +// However, va_copy is part of the C99 standard and not part of earlier C and +// C++ standards. So not all compilers support it. VC++ doesn't support va_copy, +// but it turns out that VC++ doesn't usually need it on the platforms it supports, +// and va_copy can usually be implemented via memcpy(va_list, va_list) with VC++. +// +// Example usage: +// void Function(va_list arguments) +// { +// #if EASTL_VA_COPY_ENABLED +// va_list argumentsCopy; +// va_copy(argumentsCopy, arguments); +// #endif +// +// #if EASTL_VA_COPY_ENABLED +// va_end(argumentsCopy); +// #endif +// } +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_VA_COPY_ENABLED + #if ((defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__)) && (!defined(__i386__) || defined(__x86_64__)) && !defined(__ppc__) && !defined(__PPC__) && !defined(__PPC64__) + #define EASTL_VA_COPY_ENABLED 1 + #else + #define EASTL_VA_COPY_ENABLED 0 + #endif +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_OPERATOR_EQUALS_OTHER_ENABLED +// +// Defined as 0 or 1. Default is 0 until such day that it's deemed safe. +// When enabled, enables operator= for other char types, e.g. for code +// like this: +// eastl::string8 s8; +// eastl::string16 s16; +// s8 = s16; +// This option is considered experimental, and may exist as such for an +// indefinite amount of time. +// +#if !defined(EASTL_OPERATOR_EQUALS_OTHER_ENABLED) + #define EASTL_OPERATOR_EQUALS_OTHER_ENABLED 0 +#endif +/////////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_LIST_PROXY_ENABLED +// +#if !defined(EASTL_LIST_PROXY_ENABLED) + // GCC with -fstrict-aliasing has bugs (or undocumented functionality in their + // __may_alias__ implementation. The compiler gets confused about function signatures. + // VC8 (1400) doesn't need the proxy because it has built-in smart debugging capabilities. + #if defined(EASTL_DEBUG) && !defined(__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER < 1400)) + #define EASTL_LIST_PROXY_ENABLED 1 + #define EASTL_LIST_PROXY_MAY_ALIAS EASTL_MAY_ALIAS + #else + #define EASTL_LIST_PROXY_ENABLED 0 + #define EASTL_LIST_PROXY_MAY_ALIAS + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_STD_ITERATOR_CATEGORY_ENABLED +// +// Defined as 0 or 1. Default is 0. +// If defined as non-zero, EASTL iterator categories (iterator.h's input_iterator_tag, +// forward_iterator_tag, etc.) are defined to be those from std C++ in the std +// namespace. The reason for wanting to enable such a feature is that it allows +// EASTL containers and algorithms to work with std STL containes and algorithms. +// The default value was changed from 1 to 0 in EASL 1.13.03, January 11, 2012. +// The reason for the change was that almost nobody was taking advantage of it and +// it was slowing down compile times for some compilers quite a bit due to them +// having a lot of headers behind . +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_STD_ITERATOR_CATEGORY_ENABLED + #define EASTL_STD_ITERATOR_CATEGORY_ENABLED 0 +#endif + +#if EASTL_STD_ITERATOR_CATEGORY_ENABLED + #define EASTL_ITC_NS std +#else + #define EASTL_ITC_NS eastl +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_VALIDATION_ENABLED +// +// Defined as an integer >= 0. Default is to be equal to EASTL_DEBUG. +// If nonzero, then a certain amount of automatic runtime validation is done. +// Runtime validation is not considered the same thing as asserting that user +// input values are valid. Validation refers to internal consistency checking +// of the validity of containers and their iterators. Validation checking is +// something that often involves significantly more than basic assertion +// checking, and it may sometimes be desirable to disable it. +// This macro would generally be used internally by EASTL. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_VALIDATION_ENABLED + #define EASTL_VALIDATION_ENABLED EASTL_DEBUG +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_VALIDATE_COMPARE +// +// Defined as EASTL_ASSERT or defined away. Default is EASTL_ASSERT if EASTL_VALIDATION_ENABLED is enabled. +// This is used to validate user-supplied comparison functions, particularly for sorting purposes. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_VALIDATE_COMPARE_ENABLED + #define EASTL_VALIDATE_COMPARE_ENABLED EASTL_VALIDATION_ENABLED +#endif + +#if EASTL_VALIDATE_COMPARE_ENABLED + #define EASTL_VALIDATE_COMPARE EASTL_ASSERT +#else + #define EASTL_VALIDATE_COMPARE(expression) +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_VALIDATE_INTRUSIVE_LIST +// +// Defined as an integral value >= 0. Controls the amount of automatic validation +// done by intrusive_list. A value of 0 means no automatic validation is done. +// As of this writing, EASTL_VALIDATE_INTRUSIVE_LIST defaults to 0, as it makes +// the intrusive_list_node become a non-POD, which may be an issue for some code. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_VALIDATE_INTRUSIVE_LIST + #define EASTL_VALIDATE_INTRUSIVE_LIST 0 +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_FORCE_INLINE +// +// Defined as a "force inline" expression or defined away. +// You generally don't need to use forced inlining with the Microsoft and +// Metrowerks compilers, but you may need it with the GCC compiler (any version). +// +// Example usage: +// template +// EASTL_FORCE_INLINE typename vector::size_type +// vector::size() const +// { return mpEnd - mpBegin; } +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_FORCE_INLINE + #define EASTL_FORCE_INLINE EA_FORCE_INLINE +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_MAY_ALIAS +// +// Defined as a macro that wraps the GCC may_alias attribute. This attribute +// has no significance for VC++ because VC++ doesn't support the concept of +// strict aliasing. Users should avoid writing code that breaks strict +// aliasing rules; EASTL_MAY_ALIAS is for cases with no alternative. +// +// Example usage: +// uint32_t value EASTL_MAY_ALIAS; +// +// Example usage: +// typedef uint32_t EASTL_MAY_ALIAS value_type; +// value_type value; +// +#if defined(__GNUC__) && (((__GNUC__ * 100) + __GNUC_MINOR__) >= 303) && !defined(EA_COMPILER_RVCT) + #define EASTL_MAY_ALIAS __attribute__((__may_alias__)) +#else + #define EASTL_MAY_ALIAS +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_LIKELY / EASTL_UNLIKELY +// +// Defined as a macro which gives a hint to the compiler for branch +// prediction. GCC gives you the ability to manually give a hint to +// the compiler about the result of a comparison, though it's often +// best to compile shipping code with profiling feedback under both +// GCC (-fprofile-arcs) and VC++ (/LTCG:PGO, etc.). However, there +// are times when you feel very sure that a boolean expression will +// usually evaluate to either true or false and can help the compiler +// by using an explicity directive... +// +// Example usage: +// if(EASTL_LIKELY(a == 0)) // Tell the compiler that a will usually equal 0. +// { ... } +// +// Example usage: +// if(EASTL_UNLIKELY(a == 0)) // Tell the compiler that a will usually not equal 0. +// { ... } +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_LIKELY + #if defined(__GNUC__) && (__GNUC__ >= 3) + #define EASTL_LIKELY(x) __builtin_expect(!!(x), true) + #define EASTL_UNLIKELY(x) __builtin_expect(!!(x), false) + #else + #define EASTL_LIKELY(x) (x) + #define EASTL_UNLIKELY(x) (x) + #endif +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_STD_TYPE_TRAITS_AVAILABLE +// +// Defined as 0 or 1; default is based on auto-detection. +// Specifies whether Standard C++11 support exists. +// Sometimes the auto-detection below fails to work properly and the +// user needs to override it. Does not define whether the compiler provides +// built-in compiler type trait support (e.g. __is_abstract()), as some +// compilers will EASTL_STD_TYPE_TRAITS_AVAILABLE = 0, but have built +// in type trait support. +// +#ifndef EASTL_STD_TYPE_TRAITS_AVAILABLE + /* Disabled because we don't currently need it. + #if defined(_MSC_VER) && (_MSC_VER >= 1500) // VS2008 or later + #pragma warning(push, 0) + #include + #pragma warning(pop) + #if ((defined(_HAS_TR1) && _HAS_TR1) || _MSC_VER >= 1700) // VS2012 (1700) and later has built-in type traits support. + #define EASTL_STD_TYPE_TRAITS_AVAILABLE 1 + #include + #else + #define EASTL_STD_TYPE_TRAITS_AVAILABLE 0 + #endif + + #elif defined(EA_COMPILER_CLANG) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003) && !defined(__GCCXML__)) && !defined(EA_COMPILER_NO_STANDARD_CPP_LIBRARY) + #include // This will define __GLIBCXX__ if using GNU's libstdc++ and _LIBCPP_VERSION if using clang's libc++. + + #if defined(EA_COMPILER_CLANG) && !defined(EA_PLATFORM_APPLE) // As of v3.0.0, Apple's clang doesn't support type traits. + // http://clang.llvm.org/docs/LanguageExtensions.html#checking_type_traits + // Clang has some built-in compiler trait support. This support doesn't currently + // directly cover all our type_traits, though the C++ Standard Library that's used + // with clang could fill that in. + #define EASTL_STD_TYPE_TRAITS_AVAILABLE 1 + #endif + + #if !defined(EASTL_STD_TYPE_TRAITS_AVAILABLE) + #if defined(_LIBCPP_VERSION) // This is defined by clang's libc++. + #include + + #elif defined(__GLIBCXX__) && (__GLIBCXX__ >= 20090124) // It's not clear if this is the oldest version that has type traits; probably it isn't. + #define EASTL_STD_TYPE_TRAITS_AVAILABLE 1 + + #if defined(__GXX_EXPERIMENTAL_CXX0X__) // To do: Update this test to include conforming C++11 implementations. + #include + #else + #include + #endif + #else + #define EASTL_STD_TYPE_TRAITS_AVAILABLE 0 + #endif + #endif + + #elif defined(__MSL_CPP__) && (__MSL_CPP__ >= 0x8000) // CodeWarrior compiler. + #define EASTL_STD_TYPE_TRAITS_AVAILABLE 0 + // To do: Implement support for this (via modifying the EASTL type + // traits headers, as CodeWarrior provides this. + #else + #define EASTL_STD_TYPE_TRAITS_AVAILABLE 0 + #endif + */ +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE +// +// Defined as 0 or 1; default is based on auto-detection. +// Specifies whether the compiler provides built-in compiler type trait support +// (e.g. __is_abstract()). Does not specify any details about which traits +// are available or what their standards-compliance is. Nevertheless this is a +// useful macro identifier for our type traits implementation. +// +#ifndef EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE + #if defined(_MSC_VER) && (_MSC_VER >= 1500) // VS2008 or later + #pragma warning(push, 0) + #include + #pragma warning(pop) + #if ((defined(_HAS_TR1) && _HAS_TR1) || _MSC_VER >= 1700) // VS2012 (1700) and later has built-in type traits support. + #define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 1 + #else + #define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 0 + #endif + #elif defined(EA_COMPILER_CLANG) && defined(__APPLE__) && defined(_CXXCONFIG) // Apple clang but with GCC's libstdc++. + #define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 0 + #elif defined(EA_COMPILER_CLANG) + #define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 1 + #elif defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003) && !defined(__GCCXML__) + #define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 1 + #elif defined(__MSL_CPP__) && (__MSL_CPP__ >= 0x8000) // CodeWarrior compiler. + #define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 1 + #else + #define EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE 0 + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_RESET_ENABLED +// +// Defined as 0 or 1; default is 1 for the time being. +// The reset_lose_memory function works the same as reset, as described below. +// +// Specifies whether the container reset functionality is enabled. If enabled +// then ::reset forgets its memory, otherwise it acts as the clear +// function. The reset function is potentially dangerous, as it (by design) +// causes containers to not free their memory. +// This option has no applicability to the bitset::reset function, as bitset +// isn't really a container. Also it has no applicability to the smart pointer +// wrappers (e.g. intrusive_ptr). +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_RESET_ENABLED + #define EASTL_RESET_ENABLED 0 +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_MINMAX_ENABLED +// +// Defined as 0 or 1; default is 1. +// Specifies whether the min and max algorithms are available. +// It may be useful to disable the min and max algorithms because sometimes +// #defines for min and max exist which would collide with EASTL min and max. +// Note that there are already alternative versions of min and max in EASTL +// with the min_alt and max_alt functions. You can use these without colliding +// with min/max macros that may exist. +// +/////////////////////////////////////////////////////////////////////////////// +#ifndef EASTL_MINMAX_ENABLED + #define EASTL_MINMAX_ENABLED 1 +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_NOMINMAX +// +// Defined as 0 or 1; default is 1. +// MSVC++ has #defines for min/max which collide with the min/max algorithm +// declarations. If EASTL_NOMINMAX is defined as 1, then we undefine min and +// max if they are #defined by an external library. This allows our min and +// max definitions in algorithm.h to work as expected. An alternative to +// the enabling of EASTL_NOMINMAX is to #define NOMINMAX in your project +// settings if you are compiling for Windows. +// Note that this does not control the availability of the EASTL min and max +// algorithms; the EASTL_MINMAX_ENABLED configuration parameter does that. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_NOMINMAX + #define EASTL_NOMINMAX 1 +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_STD_CPP_ONLY +// +// Defined as 0 or 1; default is 0. +// Disables the use of compiler language extensions. We use compiler language +// extensions only in the case that they provide some benefit that can't be +// had any other practical way. But sometimes the compiler is set to disable +// language extensions or sometimes one compiler's preprocesor is used to generate +// code for another compiler, and so it's necessary to disable language extension usage. +// +// Example usage: +// #if defined(_MSC_VER) && !EASTL_STD_CPP_ONLY +// enum : size_type { npos = container_type::npos }; // Microsoft extension which results in significantly smaller debug symbols. +// #else +// static const size_type npos = container_type::npos; +// #endif +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_STD_CPP_ONLY + #define EASTL_STD_CPP_ONLY 0 +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_NO_RVALUE_REFERENCES +// +// Defined as 0 or 1. +// This is the same as EABase EA_COMPILER_NO_RVALUE_REFERENCES except that it +// follows the convention of being always defined, as 0 or 1. +/////////////////////////////////////////////////////////////////////////////// +#if !defined(EASTL_NO_RVALUE_REFERENCES) + #if defined(EA_COMPILER_NO_RVALUE_REFERENCES) + #define EASTL_NO_RVALUE_REFERENCES 1 + #else + #define EASTL_NO_RVALUE_REFERENCES 0 + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_MOVE_SEMANTICS_ENABLED +// +// Defined as 0 or 1. +// If enabled then C++11-like functionality with rvalue references and move +// operations is enabled. +/////////////////////////////////////////////////////////////////////////////// +#if !defined(EASTL_MOVE_SEMANTICS_ENABLED) + #if EASTL_NO_RVALUE_REFERENCES // If the compiler doesn't support rvalue references or EASTL is configured to disable them... + #define EASTL_MOVE_SEMANTICS_ENABLED 0 + #else + #define EASTL_MOVE_SEMANTICS_ENABLED 1 + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_VARIADIC_TEMPLATES_ENABLED +// +// Defined as 0 or 1. +// If enabled then C++11-like functionality with variadic templates is enabled. +/////////////////////////////////////////////////////////////////////////////// +#if !defined(EASTL_VARIADIC_TEMPLATES_ENABLED) + #if defined(EA_COMPILER_NO_VARIADIC_TEMPLATES) // If the compiler doesn't support variadic templates + #define EASTL_VARIADIC_TEMPLATES_ENABLED 0 + #else + #define EASTL_VARIADIC_TEMPLATES_ENABLED 1 + #endif +#endif + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_VARIABLE_TEMPLATES_ENABLED +// +// Defined as 0 or 1. +// If enabled then C++11-like functionality with variable templates is enabled. +/////////////////////////////////////////////////////////////////////////////// +#if !defined(EASTL_VARIABLE_TEMPLATES_ENABLED) + #if((EABASE_VERSION_N < 20605) || defined(EA_COMPILER_NO_VARIABLE_TEMPLATES)) + #define EASTL_VARIABLE_TEMPLATES_ENABLED 0 + #else + #define EASTL_VARIABLE_TEMPLATES_ENABLED 1 + #endif +#endif + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_INLINE_VARIABLE_ENABLED +// +// Defined as 0 or 1. +// If enabled then C++17-like functionality with inline variable is enabled. +/////////////////////////////////////////////////////////////////////////////// +#if !defined(EASTL_INLINE_VARIABLE_ENABLED) + #if((EABASE_VERSION_N < 20707) || defined(EA_COMPILER_NO_INLINE_VARIABLES)) + #define EASTL_INLINE_VARIABLE_ENABLED 0 + #else + #define EASTL_INLINE_VARIABLE_ENABLED 1 + #endif +#endif + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_CPP17_INLINE_VARIABLE +// +// Used to prefix a variable as inline when C++17 inline variables are available +// Usage: EASTL_CPP17_INLINE_VARIABLE constexpr bool type_trait_v = type_trait::value +/////////////////////////////////////////////////////////////////////////////// +#if !defined(EASTL_CPP17_INLINE_VARIABLE) + #if EASTL_INLINE_VARIABLE_ENABLED + #define EASTL_CPP17_INLINE_VARIABLE inline + #else + #define EASTL_CPP17_INLINE_VARIABLE + #endif +#endif + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_HAVE_CPP11_TYPE_TRAITS +// +// Defined as 0 or 1. +// This is the same as EABase EA_HAVE_CPP11_TYPE_TRAITS except that it +// follows the convention of being always defined, as 0 or 1. Note that this +// identifies if the Standard Library has C++11 type traits and not if EASTL +// has its equivalents to C++11 type traits. +/////////////////////////////////////////////////////////////////////////////// +#if !defined(EASTL_HAVE_CPP11_TYPE_TRAITS) + // To do: Change this to use the EABase implementation once we have a few months of testing + // of this and we are sure it works right. Do this at some point after ~January 2014. + #if defined(EA_HAVE_DINKUMWARE_CPP_LIBRARY) && (_CPPLIB_VER >= 540) // Dinkumware. VS2012+ + #define EASTL_HAVE_CPP11_TYPE_TRAITS 1 + #elif defined(EA_COMPILER_CPP11_ENABLED) && defined(EA_HAVE_LIBSTDCPP_LIBRARY) && defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007) // Prior versions of libstdc++ have incomplete support for C++11 type traits. + #define EASTL_HAVE_CPP11_TYPE_TRAITS 1 + #elif defined(EA_HAVE_LIBCPP_LIBRARY) && (_LIBCPP_VERSION >= 1) + #define EASTL_HAVE_CPP11_TYPE_TRAITS 1 + #else + #define EASTL_HAVE_CPP11_TYPE_TRAITS 0 + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS undef +// +// We need revise this macro to be undefined in some cases, in case the user +// isn't using an updated EABase. +/////////////////////////////////////////////////////////////////////////////// +#if defined(__EDG_VERSION__) && (__EDG_VERSION__ >= 403) // It may in fact be supported by 4.01 or 4.02 but we don't have compilers to test with. + #if defined(EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS) + #undef EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_NO_RANGE_BASED_FOR_LOOP +// +// Defined as 0 or 1. +// This is the same as EABase EA_COMPILER_NO_RANGE_BASED_FOR_LOOP except that it +// follows the convention of being always defined, as 0 or 1. +/////////////////////////////////////////////////////////////////////////////// +#if !defined(EASTL_NO_RANGE_BASED_FOR_LOOP) + #if defined(EA_COMPILER_NO_RANGE_BASED_FOR_LOOP) + #define EASTL_NO_RANGE_BASED_FOR_LOOP 1 + #else + #define EASTL_NO_RANGE_BASED_FOR_LOOP 0 + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_ALIGN_OF +// +// Determines the alignment of a type. +// +// Example usage: +// size_t alignment = EASTL_ALIGN_OF(int); +// +/////////////////////////////////////////////////////////////////////////////// +#ifndef EASTL_ALIGN_OF + #define EASTL_ALIGN_OF alignof +#endif + + + + +/////////////////////////////////////////////////////////////////////////////// +// eastl_size_t +// +// Defined as an unsigned integer type, usually either size_t or uint32_t. +// Defaults to size_t to match std STL unless the user specifies to use +// uint32_t explicitly via the EASTL_SIZE_T_32BIT define +// +// Example usage: +// eastl_size_t n = intVector.size(); +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_SIZE_T_32BIT // Defines whether EASTL_SIZE_T uses uint32_t/int32_t as opposed to size_t/ssize_t. + #define EASTL_SIZE_T_32BIT 0 // This makes a difference on 64 bit platforms because they use a 64 bit size_t. +#endif // By default we do the same thing as std STL and use size_t. + +#ifndef EASTL_SIZE_T + #if (EASTL_SIZE_T_32BIT == 0) || (EA_PLATFORM_WORD_SIZE == 4) + #include + #define EASTL_SIZE_T size_t + #define EASTL_SSIZE_T intptr_t + #else + #define EASTL_SIZE_T uint32_t + #define EASTL_SSIZE_T int32_t + #endif +#endif + +typedef EASTL_SIZE_T eastl_size_t; // Same concept as std::size_t. +typedef EASTL_SSIZE_T eastl_ssize_t; // Signed version of eastl_size_t. Concept is similar to Posix's ssize_t. + + + + +/////////////////////////////////////////////////////////////////////////////// +// AddRef / Release +// +// AddRef and Release are used for "intrusive" reference counting. By the term +// "intrusive", we mean that the reference count is maintained by the object +// and not by the user of the object. Given that an object implements referencing +// counting, the user of the object needs to be able to increment and decrement +// that reference count. We do that via the venerable AddRef and Release functions +// which the object must supply. These defines here allow us to specify the name +// of the functions. They could just as well be defined to addref and delref or +// IncRef and DecRef. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTLAddRef + #define EASTLAddRef AddRef +#endif + +#ifndef EASTLRelease + #define EASTLRelease Release +#endif + + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_ALLOCATOR_EXPLICIT_ENABLED +// +// Defined as 0 or 1. Default is 0 for now but ideally would be changed to +// 1 some day. It's 0 because setting it to 1 breaks some existing code. +// This option enables the allocator ctor to be explicit, which avoids +// some undesirable silent conversions, especially with the string class. +// +// Example usage: +// class allocator +// { +// public: +// EASTL_ALLOCATOR_EXPLICIT allocator(const char* pName); +// }; +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_ALLOCATOR_EXPLICIT_ENABLED + #define EASTL_ALLOCATOR_EXPLICIT_ENABLED 0 +#endif + +#if EASTL_ALLOCATOR_EXPLICIT_ENABLED + #define EASTL_ALLOCATOR_EXPLICIT explicit +#else + #define EASTL_ALLOCATOR_EXPLICIT +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_ALLOCATOR_MIN_ALIGNMENT +// +// Defined as an integral power-of-2 that's >= 1. +// Identifies the minimum alignment that EASTL should assume its allocators +// use. There is code within EASTL that decides whether to do a Malloc or +// MallocAligned call and it's typically better if it can use the Malloc call. +// But this requires knowing what the minimum possible alignment is. +#if !defined(EASTL_ALLOCATOR_MIN_ALIGNMENT) + #define EASTL_ALLOCATOR_MIN_ALIGNMENT EA_PLATFORM_MIN_MALLOC_ALIGNMENT +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT +// +// Identifies the minimum alignment that EASTL should assume system allocations +// from malloc and new will have. +#if !defined(EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT) + #if defined(EA_PLATFORM_MICROSOFT) || defined(EA_PLATFORM_APPLE) + #define EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT 16 + #else + #define EASTL_SYSTEM_ALLOCATOR_MIN_ALIGNMENT (EA_PLATFORM_PTR_SIZE * 2) + #endif +#endif + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL allocator +// +// The EASTL allocator system allows you to redefine how memory is allocated +// via some defines that are set up here. In the container code, memory is +// allocated via macros which expand to whatever the user has them set to +// expand to. Given that there are multiple allocator systems available, +// this system allows you to configure it to use whatever system you want, +// provided your system meets the requirements of this library. +// The requirements are: +// +// - Must be constructable via a const char* (name) parameter. +// Some uses of allocators won't require this, however. +// - Allocate a block of memory of size n and debug name string. +// - Allocate a block of memory of size n, debug name string, +// alignment a, and offset o. +// - Free memory allocated via either of the allocation functions above. +// - Provide a default allocator instance which can be used if the user +// doesn't provide a specific one. +// +/////////////////////////////////////////////////////////////////////////////// + +// namespace eastl +// { +// class allocator +// { +// allocator(const char* pName = NULL); +// +// void* allocate(size_t n, int flags = 0); +// void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0); +// void deallocate(void* p, size_t n); +// +// const char* get_name() const; +// void set_name(const char* pName); +// }; +// +// allocator* GetDefaultAllocator(); // This is used for anonymous allocations. +// } + +#ifndef EASTLAlloc // To consider: Instead of calling through pAllocator, just go directly to operator new, since that's what allocator does. + #define EASTLAlloc(allocator, n) (allocator).allocate(n); +#endif + +#ifndef EASTLAllocFlags // To consider: Instead of calling through pAllocator, just go directly to operator new, since that's what allocator does. + #define EASTLAllocFlags(allocator, n, flags) (allocator).allocate(n, flags); +#endif + +#ifndef EASTLAllocAligned + #define EASTLAllocAligned(allocator, n, alignment, offset) (allocator).allocate((n), (alignment), (offset)) +#endif + +#ifndef EASTLAllocAlignedFlags + #define EASTLAllocAlignedFlags(allocator, n, alignment, offset, flags) (allocator).allocate((n), (alignment), (offset), (flags)) +#endif + +#ifndef EASTLFree + #define EASTLFree(allocator, p, size) (allocator).deallocate((void*)(p), (size)) // Important to cast to void* as p may be non-const. +#endif + +#ifndef EASTLAllocatorType + #define EASTLAllocatorType eastl::allocator +#endif + +#ifndef EASTLDummyAllocatorType + #define EASTLDummyAllocatorType eastl::dummy_allocator +#endif + +#ifndef EASTLAllocatorDefault + // EASTLAllocatorDefault returns the default allocator instance. This is not a global + // allocator which implements all container allocations but is the allocator that is + // used when EASTL needs to allocate memory internally. There are very few cases where + // EASTL allocates memory internally, and in each of these it is for a sensible reason + // that is documented to behave as such. + #define EASTLAllocatorDefault eastl::GetDefaultAllocator +#endif + + +/// EASTL_ALLOCATOR_DEFAULT_NAME +/// +/// Defines a default allocator name in the absence of a user-provided name. +/// +#ifndef EASTL_ALLOCATOR_DEFAULT_NAME + #define EASTL_ALLOCATOR_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX // Unless the user overrides something, this is "EASTL". +#endif + +/// EASTL_USE_FORWARD_WORKAROUND +/// +/// This is to workaround a compiler bug that we found in VS2013. Update 1 did not fix it. +/// This should be fixed in a future release of VS2013 http://accentuable4.rssing.com/browser.php?indx=3511740&item=15696 +/// +#ifndef EASTL_USE_FORWARD_WORKAROUND + #if defined(_MSC_FULL_VER) && _MSC_FULL_VER == 180021005 || (defined(__EDG_VERSION__) && (__EDG_VERSION__ < 405))// VS2013 initial release + #define EASTL_USE_FORWARD_WORKAROUND 1 + #else + #define EASTL_USE_FORWARD_WORKAROUND 0 + #endif +#endif + + +/// EASTL_TUPLE_ENABLED +/// EASTL tuple implementation depends on variadic template support +#if EASTL_VARIADIC_TEMPLATES_ENABLED && !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + #define EASTL_TUPLE_ENABLED 1 +#else + #define EASTL_TUPLE_ENABLED 0 +#endif + + +/// EASTL_FUNCTION_ENABLED +/// +#ifndef EASTL_FUNCTION_ENABLED + #define EASTL_FUNCTION_ENABLED 1 +#endif + + +/// EASTL_USER_LITERALS_ENABLED +#ifndef EASTL_USER_LITERALS_ENABLED + #if defined(EA_COMPILER_CPP14_ENABLED) + #define EASTL_USER_LITERALS_ENABLED 1 + + // Disabling the Clang/GCC/MSVC warning about using user defined literals without a leading '_' as they are + // reserved for standard libary usage. + EA_DISABLE_CLANG_WARNING(-Wuser-defined-literals) + EA_DISABLE_CLANG_WARNING(-Wreserved-user-defined-literal) + EA_DISABLE_GCC_WARNING(-Wliteral-suffix) + #ifdef _MSC_VER + #pragma warning(disable: 4455) // disable warning C4455: literal suffix identifiers that do not start with an underscore are reserved + #endif + + #else + #define EASTL_USER_LITERALS_ENABLED 0 + #endif +#endif + + +/// EASTL_INLINE_NAMESPACES_ENABLED +#ifndef EASTL_INLINE_NAMESPACES_ENABLED + #if defined(EA_COMPILER_CPP14_ENABLED) + #define EASTL_INLINE_NAMESPACES_ENABLED 1 + #else + #define EASTL_INLINE_NAMESPACES_ENABLED 0 + #endif +#endif + + +/// EASTL_CORE_ALLOCATOR_ENABLED +#ifndef EASTL_CORE_ALLOCATOR_ENABLED + #define EASTL_CORE_ALLOCATOR_ENABLED 0 +#endif + +/// EASTL_OPENSOURCE +/// This is enabled when EASTL is building built in an "open source" mode. Which is a mode that eliminates code +/// dependencies on other technologies that have not been released publically. +/// EASTL_OPENSOURCE = 0, is the default. +/// EASTL_OPENSOURCE = 1, utilizes technologies that not publically available. +/// +#ifndef EASTL_OPENSOURCE + #define EASTL_OPENSOURCE 0 +#endif + + +/// EASTL_OPTIONAL_ENABLED +#if defined(EA_COMPILER_MSVC_2012) + #define EASTL_OPTIONAL_ENABLED 0 +#elif defined(EA_COMPILER_MSVC_2013) + #define EASTL_OPTIONAL_ENABLED 0 +#elif defined(EA_COMPILER_MSVC_2015) + #define EASTL_OPTIONAL_ENABLED 1 +#elif EASTL_VARIADIC_TEMPLATES_ENABLED && !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) && !defined(EA_COMPILER_NO_DEFAULTED_FUNCTIONS) && defined(EA_COMPILER_CPP11_ENABLED) + #define EASTL_OPTIONAL_ENABLED 1 +#else + #define EASTL_OPTIONAL_ENABLED 0 +#endif + + +/// EASTL_HAS_UNIQUE_OBJECT_REPRESENTATIONS_AVAILABLE +#if defined(_MSC_VER) && (_MSC_VER >= 1913) // VS2017+ + #define EASTL_HAS_UNIQUE_OBJECT_REPRESENTATIONS_AVAILABLE 1 +#elif defined(EA_COMPILER_CLANG) + #if !__is_identifier(__has_unique_object_representations) + #define EASTL_HAS_UNIQUE_OBJECT_REPRESENTATIONS_AVAILABLE 1 + #else + #define EASTL_HAS_UNIQUE_OBJECT_REPRESENTATIONS_AVAILABLE 0 + #endif +#else + #define EASTL_HAS_UNIQUE_OBJECT_REPRESENTATIONS_AVAILABLE 0 +#endif + + +/// EASTL_ENABLE_PAIR_FIRST_ELEMENT_CONSTRUCTOR +/// This feature define allows users to toggle the problematic eastl::pair implicit +/// single element constructor. +#ifndef EASTL_ENABLE_PAIR_FIRST_ELEMENT_CONSTRUCTOR + #define EASTL_ENABLE_PAIR_FIRST_ELEMENT_CONSTRUCTOR 0 +#endif + +/// EASTL_SYSTEM_BIG_ENDIAN_STATEMENT +/// EASTL_SYSTEM_LITTLE_ENDIAN_STATEMENT +/// These macros allow you to write endian specific macros as statements. +/// This allows endian specific code to be macro expanded from within other macros +/// +#if defined(EA_SYSTEM_BIG_ENDIAN) + #define EASTL_SYSTEM_BIG_ENDIAN_STATEMENT(...) __VA_ARGS__ +#else + #define EASTL_SYSTEM_BIG_ENDIAN_STATEMENT(...) +#endif + +#if defined(EA_SYSTEM_LITTLE_ENDIAN) + #define EASTL_SYSTEM_LITTLE_ENDIAN_STATEMENT(...) __VA_ARGS__ +#else + #define EASTL_SYSTEM_LITTLE_ENDIAN_STATEMENT(...) +#endif + + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/internal/copy_help.h b/libkram/eastl/include/EASTL/internal/copy_help.h new file mode 100644 index 00000000..e5fb2abd --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/copy_help.h @@ -0,0 +1,215 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_COPY_HELP_H +#define EASTL_INTERNAL_COPY_HELP_H + + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include +#include +#include // memcpy, memcmp, memmove + + +namespace eastl +{ + /// move / move_n / move_backward + /// copy / copy_n / copy_backward + /// + /// We want to optimize move, move_n, move_backward, copy, copy_backward, copy_n to do memmove operations + /// when possible. + /// + /// We could possibly use memcpy, though it has stricter overlap requirements than the move and copy + /// algorithms and would require a runtime if/else to choose it over memmove. In particular, memcpy + /// allows no range overlap at all, whereas move/copy allow output end overlap and move_backward/copy_backward + /// allow output begin overlap. Despite this it might be useful to use memcpy for any platforms where + /// memcpy is significantly faster than memmove, and since in most cases the copy/move operation in fact + /// doesn't target overlapping memory and so memcpy would be usable. + /// + /// We can use memmove/memcpy if the following hold true: + /// InputIterator and OutputIterator are of the same type. + /// InputIterator and OutputIterator are of type contiguous_iterator_tag or simply are pointers (the two are virtually synonymous). + /// is_trivially_copyable::value is true. i.e. the constructor T(const T& t) (or T(T&& t) if present) can be replaced by memmove(this, &t, sizeof(T)) + /// + /// copy normally differs from move, but there is a case where copy is the same as move: when copy is + /// used with a move_iterator. We handle that case here by detecting that copy is being done with a + /// move_iterator and redirect it to move (which can take advantage of memmove/memcpy). + /// + /// The generic_iterator class is typically used for wrapping raw memory pointers so they can act like + /// formal iterators. Since pointers provide an opportunity for memmove/memcpy operations, we can + /// detect a generic iterator and use it's wrapped type as a pointer if it happens to be one. + + // Implementation moving copying both trivial and non-trivial data via a lesser iterator than random-access. + template + struct move_and_copy_helper + { + template + static OutputIterator move_or_copy(InputIterator first, InputIterator last, OutputIterator result) + { + for(; first != last; ++result, ++first) + *result = *first; + return result; + } + }; + + // Specialization for copying non-trivial data via a random-access iterator. It's theoretically faster because the compiler can see the count when its a compile-time const. + // This specialization converts the random access InputIterator last-first to an integral type. There's simple way for us to take advantage of a random access output iterator, + // as the range is specified by the input instead of the output, and distance(first, last) for a non-random-access iterator is potentially slow. + template <> + struct move_and_copy_helper + { + template + static OutputIterator move_or_copy(InputIterator first, InputIterator last, OutputIterator result) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + + for(difference_type n = (last - first); n > 0; --n, ++first, ++result) + *result = *first; + + return result; + } + }; + + // Specialization for moving non-trivial data via a lesser iterator than random-access. + template + struct move_and_copy_helper + { + template + static OutputIterator move_or_copy(InputIterator first, InputIterator last, OutputIterator result) + { + for(; first != last; ++result, ++first) + *result = eastl::move(*first); + return result; + } + }; + + // Specialization for moving non-trivial data via a random-access iterator. It's theoretically faster because the compiler can see the count when its a compile-time const. + template <> + struct move_and_copy_helper + { + template + static OutputIterator move_or_copy(InputIterator first, InputIterator last, OutputIterator result) + { + typedef typename eastl::iterator_traits::difference_type difference_type; + + for(difference_type n = (last - first); n > 0; --n, ++first, ++result) + *result = eastl::move(*first); + + return result; + } + }; + + // Specialization for when we can use memmove/memcpy. See the notes above for what conditions allow this. + template + struct move_and_copy_helper + { + template + static T* move_or_copy(const T* first, const T* last, T* result) + { + if (EASTL_UNLIKELY(first == last)) + return result; + + // We could use memcpy here if there's no range overlap, but memcpy is rarely much faster than memmove. + return (T*)memmove(result, first, (size_t)((uintptr_t)last - (uintptr_t)first)) + (last - first); + } + }; + + + + template + inline OutputIterator move_and_copy_chooser(InputIterator first, InputIterator last, OutputIterator result) + { + typedef typename eastl::iterator_traits::iterator_category IIC; + typedef typename eastl::iterator_traits::iterator_category OIC; + typedef typename eastl::iterator_traits::value_type value_type_input; + typedef typename eastl::iterator_traits::value_type value_type_output; + + const bool canBeMemmoved = eastl::is_trivially_copyable::value && + eastl::is_same::value && + (eastl::is_pointer::value || eastl::is_same::value) && + (eastl::is_pointer::value || eastl::is_same::value); + + return eastl::move_and_copy_helper::move_or_copy(first, last, result); // Need to chose based on the input iterator tag and not the output iterator tag, because containers accept input ranges of iterator types different than self. + } + + + // We have a second layer of unwrap_iterator calls because the original iterator might be something like move_iterator > (i.e. doubly-wrapped). + template + inline OutputIterator move_and_copy_unwrapper(InputIterator first, InputIterator last, OutputIterator result) + { + return OutputIterator(eastl::move_and_copy_chooser(eastl::unwrap_iterator(first), eastl::unwrap_iterator(last), eastl::unwrap_iterator(result))); // Have to convert to OutputIterator because result.base() could be a T* + } + + + /// move + /// + /// After this operation the elements in the moved-from range will still contain valid values of the + /// appropriate type, but not necessarily the same values as before the move. + /// Returns the end of the result range. + /// Note: When moving between containers, the dest range must be valid; this function doesn't resize containers. + /// Note: if result is within [first, last), move_backward must be used instead of move. + /// + /// Example usage: + /// eastl::move(myArray.begin(), myArray.end(), myDestArray.begin()); + /// + /// Reference implementation: + /// template + /// OutputIterator move(InputIterator first, InputIterator last, OutputIterator result) + /// { + /// while(first != last) + /// *result++ = eastl::move(*first++); + /// return result; + /// } + + template + inline OutputIterator move(InputIterator first, InputIterator last, OutputIterator result) + { + return eastl::move_and_copy_unwrapper(eastl::unwrap_iterator(first), eastl::unwrap_iterator(last), result); + } + + + /// copy + /// + /// Effects: Copies elements in the range [first, last) into the range [result, result + (last - first)) + /// starting from first and proceeding to last. For each nonnegative integer n < (last - first), + /// performs *(result + n) = *(first + n). + /// + /// Returns: result + (last - first). That is, returns the end of the result. Note that this + /// is different from how memmove/memcpy work, as they return the beginning of the result. + /// + /// Requires: result shall not be in the range [first, last). But the end of the result range + /// may in fact be within the input rante. + /// + /// Complexity: Exactly 'last - first' assignments. + /// + template + inline OutputIterator copy(InputIterator first, InputIterator last, OutputIterator result) + { + const bool isMove = eastl::is_move_iterator::value; EA_UNUSED(isMove); + + return eastl::move_and_copy_unwrapper(eastl::unwrap_iterator(first), eastl::unwrap_iterator(last), result); + } +} // namespace eastl + +#endif // Header include guard + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/internal/enable_shared.h b/libkram/eastl/include/EASTL/internal/enable_shared.h new file mode 100644 index 00000000..ac5f0729 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/enable_shared.h @@ -0,0 +1,83 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_ENABLE_SHARED_H +#define EASTL_INTERNAL_ENABLE_SHARED_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +namespace eastl +{ + + /// enable_shared_from_this + /// + /// This is a helper mixin class that allows you to make any class + /// export a shared_ptr instance that is associated with the class + /// instance. Any class that inherits from this class gets two functions: + /// shared_ptr shared_from_this(); + /// shared_ptr shared_from_this() const; + /// If you call shared_from_this, you get back a shared_ptr that + /// refers to the class. A second call to shared_from_this returns + /// another shared_ptr that is shared with the first one. + /// + /// The trick that happens which is not so obvious here (and which is + /// not mentioned at all in the Boost documentation of their version + /// of this) is that the shared_ptr constructor detects that the + /// class has an enable_shared_from_this mixin and sets up this system + /// automatically for the user. This is done with template tricks. + /// + /// For some additional explanation, see the Boost documentation for + /// their description of their version of enable_shared_from_this. + /// + template + class enable_shared_from_this + { + public: + shared_ptr shared_from_this() + { return shared_ptr(mWeakPtr); } + + shared_ptr shared_from_this() const + { return shared_ptr(mWeakPtr); } + + weak_ptr weak_from_this() + { return mWeakPtr; } + + weak_ptr weak_from_this() const + { return mWeakPtr; } + + public: // This is public because the alternative fails on some compilers that we need to support. + mutable weak_ptr mWeakPtr; + + protected: + template friend class shared_ptr; + + EA_CONSTEXPR enable_shared_from_this() EA_NOEXCEPT + { } + + enable_shared_from_this(const enable_shared_from_this&) EA_NOEXCEPT + { } + + enable_shared_from_this& operator=(const enable_shared_from_this&) EA_NOEXCEPT + { return *this; } + + ~enable_shared_from_this() + { } + + }; // enable_shared_from_this + +} // namespace eastl + + +#endif // Header include guard + + + + + + diff --git a/libkram/eastl/include/EASTL/internal/fill_help.h b/libkram/eastl/include/EASTL/internal/fill_help.h new file mode 100644 index 00000000..235a24ee --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/fill_help.h @@ -0,0 +1,484 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_FILL_HELP_H +#define EASTL_INTERNAL_FILL_HELP_H + + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include + +#if defined(EA_COMPILER_MICROSOFT) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)) +#include +#endif + +namespace eastl +{ + // fill + // + // We implement some fill helper functions in order to allow us to optimize it + // where possible. + // + template + struct fill_imp + { + template + static void do_fill(ForwardIterator first, ForwardIterator last, const T& value) + { + // The C++ standard doesn't specify whether we need to create a temporary + // or not, but all std STL implementations are written like what we have here. + for(; first != last; ++first) + *first = value; + } + }; + + template <> + struct fill_imp + { + template + static void do_fill(ForwardIterator first, ForwardIterator last, const T& value) + { + typedef typename eastl::iterator_traits::value_type value_type; + // We create a temp and fill from that because value might alias to the + // destination range and so the compiler would be forced into generating + // less efficient code. + for(const T temp = value; first != last; ++first) + { + EA_UNUSED(temp); + *first = static_cast(temp); + } + } + }; + + /// fill + /// + /// fill is like memset in that it assigns a single value repeatedly to a + /// destination range. It allows for any type of iterator (not just an array) + /// and the source value can be any type, not just a byte. + /// Note that the source value (which is a reference) can come from within + /// the destination range. + /// + /// Effects: Assigns value through all the iterators in the range [first, last). + /// + /// Complexity: Exactly 'last - first' assignments. + /// + /// Note: The C++ standard doesn't specify anything about the value parameter + /// coming from within the first-last range. All std STL implementations act + /// as if the standard specifies that value must not come from within this range. + /// + template + inline void fill(ForwardIterator first, ForwardIterator last, const T& value) + { + eastl::fill_imp< is_scalar::value >::do_fill(first, last, value); + + // Possibly better implementation, as it will deal with small PODs as well as scalars: + // bEasyCopy is true if the type has a trivial constructor (e.g. is a POD) and if + // it is small. Thus any built-in type or any small user-defined struct will qualify. + //const bool bEasyCopy = eastl::type_and::value, + // eastl::integral_constant::value; + //eastl::fill_imp::do_fill(first, last, value); + + } + + #if(defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)) + #if defined(EA_PROCESSOR_X86_64) + template + inline void fill(uint64_t* first, uint64_t* last, Value c) + { + uintptr_t count = (uintptr_t)(last - first); + uint64_t value = (uint64_t)(c); + + __asm__ __volatile__ ("cld\n\t" + "rep stosq\n\t" + : "+c" (count), "+D" (first), "=m" (first) + : "a" (value) + : "cc" ); + } + + + template + inline void fill(int64_t* first, int64_t* last, Value c) + { + uintptr_t count = (uintptr_t)(last - first); + int64_t value = (int64_t)(c); + + __asm__ __volatile__ ("cld\n\t" + "rep stosq\n\t" + : "+c" (count), "+D" (first), "=m" (first) + : "a" (value) + : "cc" ); + } + #endif + + template + inline void fill(uint32_t* first, uint32_t* last, Value c) + { + uintptr_t count = (uintptr_t)(last - first); + uint32_t value = (uint32_t)(c); + + __asm__ __volatile__ ("cld\n\t" + "rep stosl\n\t" + : "+c" (count), "+D" (first), "=m" (first) + : "a" (value) + : "cc" ); + } + + + template + inline void fill(int32_t* first, int32_t* last, Value c) + { + uintptr_t count = (uintptr_t)(last - first); + int32_t value = (int32_t)(c); + + __asm__ __volatile__ ("cld\n\t" + "rep stosl\n\t" + : "+c" (count), "+D" (first), "=m" (first) + : "a" (value) + : "cc" ); + } + + + template + inline void fill(uint16_t* first, uint16_t* last, Value c) + { + uintptr_t count = (uintptr_t)(last - first); + uint16_t value = (uint16_t)(c); + + __asm__ __volatile__ ("cld\n\t" + "rep stosw\n\t" + : "+c" (count), "+D" (first), "=m" (first) + : "a" (value) + : "cc" ); + } + + + template + inline void fill(int16_t* first, int16_t* last, Value c) + { + uintptr_t count = (uintptr_t)(last - first); + int16_t value = (int16_t)(c); + + __asm__ __volatile__ ("cld\n\t" + "rep stosw\n\t" + : "+c" (count), "+D" (first), "=m" (first) + : "a" (value) + : "cc" ); + } + + #elif defined(EA_COMPILER_MICROSOFT) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)) + #if defined(EA_PROCESSOR_X86_64) + template + inline void fill(uint64_t* first, uint64_t* last, Value c) + { + __stosq(first, (uint64_t)c, (size_t)(last - first)); + } + + template + inline void fill(int64_t* first, int64_t* last, Value c) + { + __stosq((uint64_t*)first, (uint64_t)c, (size_t)(last - first)); + } + #endif + + template + inline void fill(uint32_t* first, uint32_t* last, Value c) + { + __stosd((unsigned long*)first, (unsigned long)c, (size_t)(last - first)); + } + + template + inline void fill(int32_t* first, int32_t* last, Value c) + { + __stosd((unsigned long*)first, (unsigned long)c, (size_t)(last - first)); + } + + template + inline void fill(uint16_t* first, uint16_t* last, Value c) + { + __stosw(first, (uint16_t)c, (size_t)(last - first)); + } + + template + inline void fill(int16_t* first, int16_t* last, Value c) + { + __stosw((uint16_t*)first, (uint16_t)c, (size_t)(last - first)); + } + #endif + + + inline void fill(char* first, char* last, const char& c) // It's debateable whether we should use 'char& c' or 'char c' here. + { + memset(first, (unsigned char)c, (size_t)(last - first)); + } + + inline void fill(char* first, char* last, const int c) // This is used for cases like 'fill(first, last, 0)'. + { + memset(first, (unsigned char)c, (size_t)(last - first)); + } + + inline void fill(unsigned char* first, unsigned char* last, const unsigned char& c) + { + memset(first, (unsigned char)c, (size_t)(last - first)); + } + + inline void fill(unsigned char* first, unsigned char* last, const int c) + { + memset(first, (unsigned char)c, (size_t)(last - first)); + } + + inline void fill(signed char* first, signed char* last, const signed char& c) + { + memset(first, (unsigned char)c, (size_t)(last - first)); + } + + inline void fill(signed char* first, signed char* last, const int c) + { + memset(first, (unsigned char)c, (size_t)(last - first)); + } + + #if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__ICL) // ICL = Intel compiler + inline void fill(bool* first, bool* last, const bool& b) + { + memset(first, (char)b, (size_t)(last - first)); + } + #endif + + + + + // fill_n + // + // We implement some fill helper functions in order to allow us to optimize it + // where possible. + // + template + struct fill_n_imp + { + template + static OutputIterator do_fill(OutputIterator first, Size n, const T& value) + { + for(; n-- > 0; ++first) + *first = value; + return first; + } + }; + + template <> + struct fill_n_imp + { + template + static OutputIterator do_fill(OutputIterator first, Size n, const T& value) + { + typedef typename eastl::iterator_traits::value_type value_type; + + // We create a temp and fill from that because value might alias to + // the destination range and so the compiler would be forced into + // generating less efficient code. + for(const T temp = value; n-- > 0; ++first) + *first = static_cast(temp); + return first; + } + }; + + /// fill_n + /// + /// The fill_n function is very much like memset in that a copies a source value + /// n times into a destination range. The source value may come from within + /// the destination range. + /// + /// Effects: Assigns value through all the iterators in the range [first, first + n). + /// + /// Complexity: Exactly n assignments. + /// + template + OutputIterator fill_n(OutputIterator first, Size n, const T& value) + { + return eastl::fill_n_imp::value>::do_fill(first, n, value); + } + + template + inline char* fill_n(char* first, Size n, const char& c) + { + return (char*)memset(first, (char)c, (size_t)n) + n; + } + + template + inline unsigned char* fill_n(unsigned char* first, Size n, const unsigned char& c) + { + return (unsigned char*)memset(first, (unsigned char)c, (size_t)n) + n; + } + + template + inline signed char* fill_n(signed char* first, Size n, const signed char& c) + { + return (signed char*)memset(first, (signed char)c, n) + (size_t)n; + } + + #if defined(_MSC_VER) || defined(__BORLANDC__) || defined(__ICL) // ICL = Intel compiler + template + inline bool* fill_n(bool* first, Size n, const bool& b) + { + return (bool*)memset(first, (char)b, n) + (size_t)n; + } + #endif + + #if(defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)) + #if defined(EA_PROCESSOR_X86_64) + template + inline uint64_t* fill_n(uint64_t* first, Size n, Value c) + { + uintptr_t count = (uintptr_t)(n); + uint64_t value = (uint64_t)(c); + + __asm__ __volatile__ ("cld\n\t" + "rep stosq\n\t" + : "+c" (count), "+D" (first), "=m" (first) + : "a" (value) + : "cc" ); + return first; // first is updated by the code above. + } + + + template + inline int64_t* fill_n(int64_t* first, Size n, Value c) + { + uintptr_t count = (uintptr_t)(n); + int64_t value = (int64_t)(c); + + __asm__ __volatile__ ("cld\n\t" + "rep stosq\n\t" + : "+c" (count), "+D" (first), "=m" (first) + : "a" (value) + : "cc" ); + return first; // first is updated by the code above. + } + #endif + + template + inline uint32_t* fill_n(uint32_t* first, Size n, Value c) + { + uintptr_t count = (uintptr_t)(n); + uint32_t value = (uint32_t)(c); + + __asm__ __volatile__ ("cld\n\t" + "rep stosl\n\t" + : "+c" (count), "+D" (first), "=m" (first) + : "a" (value) + : "cc" ); + return first; // first is updated by the code above. + } + + + template + inline int32_t* fill_n(int32_t* first, Size n, Value c) + { + uintptr_t count = (uintptr_t)(n); + int32_t value = (int32_t)(c); + + __asm__ __volatile__ ("cld\n\t" + "rep stosl\n\t" + : "+c" (count), "+D" (first), "=m" (first) + : "a" (value) + : "cc" ); + return first; // first is updated by the code above. + } + + + template + inline uint16_t* fill_n(uint16_t* first, Size n, Value c) + { + uintptr_t count = (uintptr_t)(n); + uint16_t value = (uint16_t)(c); + + __asm__ __volatile__ ("cld\n\t" + "rep stosw\n\t" + : "+c" (count), "+D" (first), "=m" (first) + : "a" (value) + : "cc" ); + return first; // first is updated by the code above. + } + + + template + inline int16_t* fill_n(int16_t* first, Size n, Value c) + { + uintptr_t count = (uintptr_t)(n); + int16_t value = (int16_t)(c); + + __asm__ __volatile__ ("cld\n\t" + "rep stosw\n\t" + : "+c" (count), "+D" (first), "=m" (first) + : "a" (value) + : "cc" ); + return first; // first is updated by the code above. + } + + #elif defined(EA_COMPILER_MICROSOFT) && (defined(EA_PROCESSOR_X86) || defined(EA_PROCESSOR_X86_64)) + #if defined(EA_PROCESSOR_X86_64) + template + inline uint64_t* fill_n(uint64_t* first, Size n, Value c) + { + __stosq(first, (uint64_t)c, (size_t)n); + return first + n; + } + + template + inline int64_t* fill_n(int64_t* first, Size n, Value c) + { + __stosq((uint64_t*)first, (uint64_t)c, (size_t)n); + return first + n; + } + #endif + + template + inline uint32_t* fill_n(uint32_t* first, Size n, Value c) + { + __stosd((unsigned long*)first, (unsigned long)c, (size_t)n); + return first + n; + } + + template + inline int32_t* fill_n(int32_t* first, Size n, Value c) + { + __stosd((unsigned long*)first, (unsigned long)c, (size_t)n); + return first + n; + } + + template + inline uint16_t* fill_n(uint16_t* first, Size n, Value c) + { + __stosw(first, (uint16_t)c, (size_t)n); + return first + n; + } + + template + inline int16_t* fill_n(int16_t* first, Size n, Value c) + { + __stosw((uint16_t*)first, (uint16_t)c, (size_t)n); + return first + n; + } + #endif + +} // namespace eastl + +#endif // Header include guard + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/internal/fixed_pool.h b/libkram/eastl/include/EASTL/internal/fixed_pool.h new file mode 100644 index 00000000..5a380046 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/fixed_pool.h @@ -0,0 +1,1631 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements the following +// aligned_buffer +// fixed_pool_base +// fixed_pool +// fixed_pool_with_overflow +// fixed_hashtable_allocator +// fixed_vector_allocator +// fixed_swap +// +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_FIXED_POOL_H +#define EASTL_INTERNAL_FIXED_POOL_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include +#include +#include +#include + + +EA_DISABLE_ALL_VC_WARNINGS(); +#include +EA_RESTORE_ALL_VC_WARNINGS(); + +// 4275 - non dll-interface class used as base for DLL-interface classkey 'identifier' +EA_DISABLE_VC_WARNING(4275); + + +namespace eastl +{ + + /// EASTL_FIXED_POOL_DEFAULT_NAME + /// + /// Defines a default allocator name in the absence of a user-provided name. + /// + #ifndef EASTL_FIXED_POOL_DEFAULT_NAME + #define EASTL_FIXED_POOL_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " fixed_pool" // Unless the user overrides something, this is "EASTL fixed_pool". + #endif + + + + /////////////////////////////////////////////////////////////////////////// + // aligned_buffer + /////////////////////////////////////////////////////////////////////////// + + /// aligned_buffer + /// + /// This is useful for creating a buffer of the same size and alignment + /// of a given struct or class. This is useful for creating memory pools + /// that support both size and alignment requirements of stored objects + /// but without wasting space in over-allocating. + /// + /// Note that we implement this via struct specializations, as some + /// compilers such as VC++ do not support specification of alignments + /// in any way other than via an integral constant. + /// + /// Example usage: + /// struct Widget{ }; // This class has a given size and alignment. + /// + /// Declare a char buffer of equal size and alignment to Widget. + /// aligned_buffer mWidgetBuffer; + /// + /// Declare an array this time. + /// aligned_buffer mWidgetArray[15]; + /// + typedef char EASTL_MAY_ALIAS aligned_buffer_char; + + template + struct aligned_buffer { aligned_buffer_char buffer[size]; }; + + template + struct aligned_buffer { EA_PREFIX_ALIGN(2) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(2); }; + + template + struct aligned_buffer { EA_PREFIX_ALIGN(4) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(4); }; + + template + struct aligned_buffer { EA_PREFIX_ALIGN(8) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(8); }; + + template + struct aligned_buffer { EA_PREFIX_ALIGN(16) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(16); }; + + template + struct aligned_buffer { EA_PREFIX_ALIGN(32) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(32); }; + + template + struct aligned_buffer { EA_PREFIX_ALIGN(64) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(64); }; + + template + struct aligned_buffer { EA_PREFIX_ALIGN(128) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(128); }; + + template + struct aligned_buffer { EA_PREFIX_ALIGN(256) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(256); }; + + template + struct aligned_buffer { EA_PREFIX_ALIGN(512) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(512); }; + + template + struct aligned_buffer { EA_PREFIX_ALIGN(1024) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(1024); }; + + template + struct aligned_buffer { EA_PREFIX_ALIGN(2048) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(2048); }; + + template + struct aligned_buffer { EA_PREFIX_ALIGN(4096) aligned_buffer_char buffer[size] EA_POSTFIX_ALIGN(4096); }; + + + + + /////////////////////////////////////////////////////////////////////////// + // fixed_pool_base + /////////////////////////////////////////////////////////////////////////// + + /// fixed_pool_base + /// + /// This is a base class for the implementation of fixed-size pools. + /// In particular, the fixed_pool and fixed_pool_with_overflow classes + /// are based on fixed_pool_base. + /// + struct fixed_pool_base + { + public: + /// fixed_pool_base + /// + fixed_pool_base(void* pMemory = NULL) + : mpHead((Link*)pMemory) + , mpNext((Link*)pMemory) + , mpCapacity((Link*)pMemory) + , mnNodeSize(0) // This is normally set in the init function. + { + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + mnCurrentSize = 0; + mnPeakSize = 0; + #endif + } + + + /// fixed_pool_base + /// + // Disabled because the default is sufficient. While it normally makes no sense to deep copy + // this data, our usage of this class is such that this is OK and wanted. + // + // fixed_pool_base(const fixed_pool_base& x) + // { + // } + + + /// operator= + /// + fixed_pool_base& operator=(const fixed_pool_base&) + { + // By design we do nothing. We don't attempt to deep-copy member data. + return *this; + } + + + /// init + /// + /// Initializes a fixed_pool with a given set of parameters. + /// You cannot call this function twice else the resulting + /// behaviour will be undefined. You can only call this function + /// after constructing the fixed_pool with the default constructor. + /// + EASTL_API void init(void* pMemory, size_t memorySize, size_t nodeSize, + size_t alignment, size_t alignmentOffset = 0); + + + /// peak_size + /// + /// Returns the maximum number of outstanding allocations there have been + /// at any one time. This represents a high water mark for the allocation count. + /// + size_t peak_size() const + { + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + return mnPeakSize; + #else + return 0; + #endif + } + + + /// can_allocate + /// + /// Returns true if there are any free links. + /// + bool can_allocate() const + { + return (mpHead != NULL) || (mpNext != mpCapacity); + } + + public: + /// Link + /// Implements a singly-linked list. + struct Link + { + Link* mpNext; + }; + + Link* mpHead; + Link* mpNext; + Link* mpCapacity; + size_t mnNodeSize; + + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + uint32_t mnCurrentSize; /// Current number of allocated nodes. + uint32_t mnPeakSize; /// Max number of allocated nodes at any one time. + #endif + + }; // fixed_pool_base + + + + + + /////////////////////////////////////////////////////////////////////////// + // fixed_pool + /////////////////////////////////////////////////////////////////////////// + + /// fixed_pool + /// + /// Implements a simple fixed pool allocator for use by fixed-size containers. + /// This is not a generic eastl allocator which can be plugged into an arbitrary + /// eastl container, as it simplifies some functions are arguments for the + /// purpose of efficiency. + /// + class EASTL_API fixed_pool : public fixed_pool_base + { + public: + /// fixed_pool + /// + /// Default constructor. User usually will want to call init() after + /// constructing via this constructor. The pMemory argument is for the + /// purposes of temporarily storing a pointer to the buffer to be used. + /// Even though init may have a pMemory argument, this arg is useful + /// for temporary storage, as per copy construction. + /// + fixed_pool(void* pMemory = NULL) + : fixed_pool_base(pMemory) + { + } + + + /// fixed_pool + /// + /// Constructs a fixed_pool with a given set of parameters. + /// + fixed_pool(void* pMemory, size_t memorySize, size_t nodeSize, + size_t alignment, size_t alignmentOffset = 0) + { + init(pMemory, memorySize, nodeSize, alignment, alignmentOffset); + } + + + /// fixed_pool + /// + // Disabled because the default is sufficient. While it normally makes no sense to deep copy + // this data, our usage of this class is such that this is OK and wanted. + // + // fixed_pool(const fixed_pool& x) + // { + // } + + + /// operator= + /// + fixed_pool& operator=(const fixed_pool&) + { + // By design we do nothing. We don't attempt to deep-copy member data. + return *this; + } + + + /// allocate + /// + /// Allocates a new object of the size specified upon class initialization. + /// Returns NULL if there is no more memory. + /// + void* allocate() + { + Link* pLink = mpHead; + + if(pLink) // If we have space... + { + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + if(++mnCurrentSize > mnPeakSize) + mnPeakSize = mnCurrentSize; + #endif + + mpHead = pLink->mpNext; + return pLink; + } + else + { + // If there's no free node in the free list, just + // allocate another from the reserved memory area + + if(mpNext != mpCapacity) + { + pLink = mpNext; + + mpNext = reinterpret_cast(reinterpret_cast(mpNext) + mnNodeSize); + + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + if(++mnCurrentSize > mnPeakSize) + mnPeakSize = mnCurrentSize; + #endif + + return pLink; + } + + return NULL; + } + } + + void* allocate(size_t /*alignment*/, size_t /*offset*/) + { + return allocate(); + } + + /// deallocate + /// + /// Frees the given object which was allocated by allocate(). + /// If the given node was not allocated by allocate() then the behaviour + /// is undefined. + /// + void deallocate(void* p) + { + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + --mnCurrentSize; + #endif + + ((Link*)p)->mpNext = mpHead; + mpHead = ((Link*)p); + } + + + using fixed_pool_base::can_allocate; + + + const char* get_name() const + { + return EASTL_FIXED_POOL_DEFAULT_NAME; + } + + + void set_name(const char*) + { + // Nothing to do. We don't allocate memory. + } + + }; // fixed_pool + + + + + + /////////////////////////////////////////////////////////////////////////// + // fixed_pool_with_overflow + /////////////////////////////////////////////////////////////////////////// + + /// fixed_pool_with_overflow + /// + template + class fixed_pool_with_overflow : public fixed_pool_base + { + public: + typedef OverflowAllocator overflow_allocator_type; + + + fixed_pool_with_overflow(void* pMemory = NULL) + : fixed_pool_base(pMemory), + mOverflowAllocator(EASTL_FIXED_POOL_DEFAULT_NAME) + { + // Leave mpPoolBegin, mpPoolEnd uninitialized. + } + + + fixed_pool_with_overflow(void* pMemory, const overflow_allocator_type& allocator) + : fixed_pool_base(pMemory), + mOverflowAllocator(allocator) + { + // Leave mpPoolBegin, mpPoolEnd uninitialized. + } + + + fixed_pool_with_overflow(void* pMemory, size_t memorySize, size_t nodeSize, + size_t alignment, size_t alignmentOffset = 0) + : mOverflowAllocator(EASTL_FIXED_POOL_DEFAULT_NAME) + { + fixed_pool_base::init(pMemory, memorySize, nodeSize, alignment, alignmentOffset); + + mpPoolBegin = pMemory; + } + + + fixed_pool_with_overflow(void* pMemory, size_t memorySize, size_t nodeSize, + size_t alignment, size_t alignmentOffset, + const overflow_allocator_type& allocator) + : mOverflowAllocator(allocator) + { + fixed_pool_base::init(pMemory, memorySize, nodeSize, alignment, alignmentOffset); + + mpPoolBegin = pMemory; + } + + + // Disabled because the default is sufficient. While it normally makes no sense to deep copy + // this data, our usage of this class is such that this is OK and wanted. + // + //fixed_pool_with_overflow(const fixed_pool_with_overflow& x) + //{ + // ... + //} + + + fixed_pool_with_overflow& operator=(const fixed_pool_with_overflow& x) + { + #if EASTL_ALLOCATOR_COPY_ENABLED + mOverflowAllocator = x.mOverflowAllocator; + #else + (void)x; + #endif + + return *this; + } + + + void init(void* pMemory, size_t memorySize, size_t nodeSize, + size_t alignment, size_t alignmentOffset = 0) + { + fixed_pool_base::init(pMemory, memorySize, nodeSize, alignment, alignmentOffset); + + mpPoolBegin = pMemory; + } + + + void* allocate() + { + void* p = NULL; + Link* pLink = mpHead; + + if(pLink) + { + // Unlink from chain + p = pLink; + mpHead = pLink->mpNext; + } + else + { + // If there's no free node in the free list, just + // allocate another from the reserved memory area + + if(mpNext != mpCapacity) + { + p = pLink = mpNext; + mpNext = reinterpret_cast(reinterpret_cast(mpNext) + mnNodeSize); + } + else + p = mOverflowAllocator.allocate(mnNodeSize); + } + + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + if(p && (++mnCurrentSize > mnPeakSize)) + mnPeakSize = mnCurrentSize; + #endif + + return p; + } + + + void* allocate(size_t alignment, size_t alignmentOffset) + { + void* p = NULL; + Link* pLink = mpHead; + + if (pLink) + { + // Unlink from chain + p = pLink; + mpHead = pLink->mpNext; + } + else + { + // If there's no free node in the free list, just + // allocate another from the reserved memory area + + if (mpNext != mpCapacity) + { + p = pLink = mpNext; + mpNext = reinterpret_cast(reinterpret_cast(mpNext)+mnNodeSize); + } + else + { + p = allocate_memory(mOverflowAllocator, mnNodeSize, alignment, alignmentOffset); + EASTL_ASSERT_MSG(p != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined."); + } + + } + + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + if (p && (++mnCurrentSize > mnPeakSize)) + mnPeakSize = mnCurrentSize; + #endif + + return p; + } + + void deallocate(void* p) + { + #if EASTL_FIXED_SIZE_TRACKING_ENABLED + --mnCurrentSize; + #endif + + if((p >= mpPoolBegin) && (p < mpCapacity)) + { + ((Link*)p)->mpNext = mpHead; + mpHead = ((Link*)p); + } + else + mOverflowAllocator.deallocate(p, (size_t)mnNodeSize); + } + + + using fixed_pool_base::can_allocate; + + + const char* get_name() const + { + return mOverflowAllocator.get_name(); + } + + + void set_name(const char* pName) + { + mOverflowAllocator.set_name(pName); + } + + + const overflow_allocator_type& get_overflow_allocator() const + { + return mOverflowAllocator; + } + + + overflow_allocator_type& get_overflow_allocator() + { + return mOverflowAllocator; + } + + + void set_overflow_allocator(const overflow_allocator_type& overflowAllocator) + { + mOverflowAllocator = overflowAllocator; + } + public: + OverflowAllocator mOverflowAllocator; + void* mpPoolBegin; // Ideally we wouldn't need this member variable. he problem is that the information about the pool buffer and object size is stored in the owning container and we can't have access to it without increasing the amount of code we need and by templating more code. It may turn out that simply storing data here is smaller in the end. + + }; // fixed_pool_with_overflow + + + + + + /////////////////////////////////////////////////////////////////////////// + // fixed_node_allocator + /////////////////////////////////////////////////////////////////////////// + + /// fixed_node_allocator + /// + /// Note: This class was previously named fixed_node_pool, but was changed because this name + /// was inconsistent with the other allocators here which ended with _allocator. + /// + /// Implements a fixed_pool with a given node count, alignment, and alignment offset. + /// fixed_node_allocator is like fixed_pool except it is templated on the node type instead + /// of being a generic allocator. All it does is pass allocations through to + /// the fixed_pool base. This functionality is separate from fixed_pool because there + /// are other uses for fixed_pool. + /// + /// We template on kNodeSize instead of node_type because the former allows for the + /// two different node_types of the same size to use the same template implementation. + /// + /// Template parameters: + /// nodeSize The size of the object to allocate. + /// nodeCount The number of objects the pool contains. + /// nodeAlignment The alignment of the objects to allocate. + /// nodeAlignmentOffset The alignment offset of the objects to allocate. + /// bEnableOverflow Whether or not we should use the overflow heap if our object pool is exhausted. + /// OverflowAllocator Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap. + /// + template + class fixed_node_allocator + { + public: + typedef typename type_select, fixed_pool>::type pool_type; + typedef fixed_node_allocator this_type; + typedef OverflowAllocator overflow_allocator_type; + + enum + { + kNodeSize = nodeSize, + kNodeCount = nodeCount, + kNodesSize = nodeCount * nodeSize, // Note that the kBufferSize calculation assumes that the compiler sets sizeof(T) to be a multiple alignof(T), and so sizeof(T) is always >= alignof(T). + kBufferSize = kNodesSize + ((nodeAlignment > 1) ? nodeSize-1 : 0) + nodeAlignmentOffset, + kNodeAlignment = nodeAlignment, + kNodeAlignmentOffset = nodeAlignmentOffset + }; + + public: + pool_type mPool; + + public: + //fixed_node_allocator(const char* pName) + //{ + // mPool.set_name(pName); + //} + + + fixed_node_allocator(void* pNodeBuffer) + : mPool(pNodeBuffer, kNodesSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset) + { + } + + + fixed_node_allocator(void* pNodeBuffer, const overflow_allocator_type& allocator) + : mPool(pNodeBuffer, kNodesSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset, allocator) + { + } + + + /// fixed_node_allocator + /// + /// Note that we are copying x.mpHead to our own fixed_pool. This at first may seem + /// broken, as fixed pools cannot take over ownership of other fixed pools' memory. + /// However, we declare that this copy ctor can only ever be safely called when + /// the user has intentionally pre-seeded the source with the destination pointer. + /// This is somewhat playing with fire, but it allows us to get around chicken-and-egg + /// problems with containers being their own allocators, without incurring any memory + /// costs or extra code costs. There's another reason for this: we very strongly want + /// to avoid full copying of instances of fixed_pool around, especially via the stack. + /// Larger pools won't even be able to fit on many machine's stacks. So this solution + /// is also a mechanism to prevent that situation from existing and being used. + /// Perhaps some day we'll find a more elegant yet costless way around this. + /// + fixed_node_allocator(const this_type& x) + : mPool(x.mPool.mpNext, kNodesSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset, x.mPool.mOverflowAllocator) + { + } + + + this_type& operator=(const this_type& x) + { + mPool = x.mPool; + return *this; + } + + + void* allocate(size_t n, int /*flags*/ = 0) + { + (void)n; + EASTL_ASSERT(n == kNodeSize); + return mPool.allocate(); + } + + + void* allocate(size_t n, size_t alignment, size_t offset, int /*flags*/ = 0) + { + (void)n; + EASTL_ASSERT(n == kNodeSize); + return mPool.allocate(alignment, offset); + } + + + void deallocate(void* p, size_t) + { + mPool.deallocate(p); + } + + + /// can_allocate + /// + /// Returns true if there are any free links. + /// + bool can_allocate() const + { + return mPool.can_allocate(); + } + + + /// reset + /// + /// This function unilaterally resets the fixed pool back to a newly initialized + /// state. This is useful for using in tandem with container reset functionality. + /// + void reset(void* pNodeBuffer) + { + mPool.init(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset); + } + + + const char* get_name() const + { + return mPool.get_name(); + } + + + void set_name(const char* pName) + { + mPool.set_name(pName); + } + + + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT + { + return mPool.mOverflowAllocator; + } + + + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT + { + return mPool.mOverflowAllocator; + } + + + void set_overflow_allocator(const overflow_allocator_type& allocator) + { + mPool.mOverflowAllocator = allocator; + } + + + void copy_overflow_allocator(const this_type& x) // This function exists so we can write generic code that works for allocators that do and don't have overflow allocators. + { + mPool.mOverflowAllocator = x.mPool.mOverflowAllocator; + } + + }; // fixed_node_allocator + + + // This is a near copy of the code above, with the only difference being + // the 'false' bEnableOverflow template parameter, the pool_type and this_type typedefs, + // and the get_overflow_allocator / set_overflow_allocator functions. + template + class fixed_node_allocator + { + public: + typedef fixed_pool pool_type; + typedef fixed_node_allocator this_type; + typedef OverflowAllocator overflow_allocator_type; + + enum + { + kNodeSize = nodeSize, + kNodeCount = nodeCount, + kNodesSize = nodeCount * nodeSize, // Note that the kBufferSize calculation assumes that the compiler sets sizeof(T) to be a multiple alignof(T), and so sizeof(T) is always >= alignof(T). + kBufferSize = kNodesSize + ((nodeAlignment > 1) ? nodeSize-1 : 0) + nodeAlignmentOffset, + kNodeAlignment = nodeAlignment, + kNodeAlignmentOffset = nodeAlignmentOffset + }; + + public: + pool_type mPool; + + public: + fixed_node_allocator(void* pNodeBuffer) + : mPool(pNodeBuffer, kNodesSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset) + { + } + + + fixed_node_allocator(void* pNodeBuffer, const overflow_allocator_type& /*allocator*/) // allocator is unused because bEnableOverflow is false in this specialization. + : mPool(pNodeBuffer, kNodesSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset) + { + } + + + /// fixed_node_allocator + /// + /// Note that we are copying x.mpHead to our own fixed_pool. This at first may seem + /// broken, as fixed pools cannot take over ownership of other fixed pools' memory. + /// However, we declare that this copy ctor can only ever be safely called when + /// the user has intentionally pre-seeded the source with the destination pointer. + /// This is somewhat playing with fire, but it allows us to get around chicken-and-egg + /// problems with containers being their own allocators, without incurring any memory + /// costs or extra code costs. There's another reason for this: we very strongly want + /// to avoid full copying of instances of fixed_pool around, especially via the stack. + /// Larger pools won't even be able to fit on many machine's stacks. So this solution + /// is also a mechanism to prevent that situation from existing and being used. + /// Perhaps some day we'll find a more elegant yet costless way around this. + /// + fixed_node_allocator(const this_type& x) // No need to copy the overflow allocator, because bEnableOverflow is false in this specialization. + : mPool(x.mPool.mpNext, kNodesSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset) + { + } + + + this_type& operator=(const this_type& x) + { + mPool = x.mPool; + return *this; + } + + + void* allocate(size_t n, int /*flags*/ = 0) + { + (void)n; + EASTL_ASSERT(n == kNodeSize); + return mPool.allocate(); + } + + + void* allocate(size_t n, size_t alignment, size_t offset, int /*flags*/ = 0) + { + (void)n; + EASTL_ASSERT(n == kNodeSize); + return mPool.allocate(alignment, offset); + } + + + void deallocate(void* p, size_t) + { + mPool.deallocate(p); + } + + + bool can_allocate() const + { + return mPool.can_allocate(); + } + + + void reset(void* pNodeBuffer) + { + mPool.init(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset); + } + + + const char* get_name() const + { + return mPool.get_name(); + } + + + void set_name(const char* pName) + { + mPool.set_name(pName); + } + + + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT + { + EASTL_ASSERT(false); + overflow_allocator_type* pNULL = NULL; + return *pNULL; // This is not pretty, but it should never execute. This is here only to allow this to compile. + } + + + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT + { + EASTL_ASSERT(false); + overflow_allocator_type* pNULL = NULL; + return *pNULL; // This is not pretty, but it should never execute. This is here only to allow this to compile. + } + + + void set_overflow_allocator(const overflow_allocator_type& /*allocator*/) + { + // We don't have an overflow allocator. + EASTL_ASSERT(false); + } + + + void copy_overflow_allocator(const this_type&) // This function exists so we can write generic code that works for allocators that do and don't have overflow allocators. + { + // We don't have an overflow allocator. + } + + }; // fixed_node_allocator + + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const fixed_node_allocator& a, + const fixed_node_allocator& b) + { + return (&a == &b); // They are only equal if they are the same object. + } + + + template + inline bool operator!=(const fixed_node_allocator& a, + const fixed_node_allocator& b) + { + return (&a != &b); // They are only equal if they are the same object. + } + + + + + + + /////////////////////////////////////////////////////////////////////////// + // fixed_hashtable_allocator + /////////////////////////////////////////////////////////////////////////// + + /// fixed_hashtable_allocator + /// + /// Provides a base class for fixed hashtable allocations. + /// To consider: Have this inherit from fixed_node_allocator. + /// + /// Template parameters: + /// bucketCount The fixed number of hashtable buckets to provide. + /// nodeCount The number of objects the pool contains. + /// nodeAlignment The alignment of the objects to allocate. + /// nodeAlignmentOffset The alignment offset of the objects to allocate. + /// bEnableOverflow Whether or not we should use the overflow heap if our object pool is exhausted. + /// OverflowAllocator Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap. + /// + template + class fixed_hashtable_allocator + { + public: + typedef typename type_select, fixed_pool>::type pool_type; + typedef fixed_hashtable_allocator this_type; + typedef OverflowAllocator overflow_allocator_type; + + enum + { + kBucketCount = bucketCount + 1, // '+1' because the hash table needs a null terminating bucket. + kBucketsSize = bucketCount * sizeof(void*), + kNodeSize = nodeSize, + kNodeCount = nodeCount, + kNodesSize = nodeCount * nodeSize, // Note that the kBufferSize calculation assumes that the compiler sets sizeof(T) to be a multiple alignof(T), and so sizeof(T) is always >= alignof(T). + kBufferSize = kNodesSize + ((nodeAlignment > 1) ? nodeSize-1 : 0) + nodeAlignmentOffset, // Don't need to include kBucketsSize in this calculation, as fixed_hash_xxx containers have a separate buffer for buckets. + kNodeAlignment = nodeAlignment, + kNodeAlignmentOffset = nodeAlignmentOffset, + kAllocFlagBuckets = 0x00400000 // Flag to allocator which indicates that we are allocating buckets and not nodes. + }; + + protected: + pool_type mPool; + void* mpBucketBuffer; + + public: + // Disabled because it causes compile conflicts. + //fixed_hashtable_allocator(const char* pName) + //{ + // mPool.set_name(pName); + //} + + fixed_hashtable_allocator(void* pNodeBuffer) + : mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset), + mpBucketBuffer(NULL) + { + // EASTL_ASSERT(false); // As it stands now, this is not supposed to be called. + } + + + fixed_hashtable_allocator(void* pNodeBuffer, const overflow_allocator_type& allocator) + : mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset, allocator), + mpBucketBuffer(NULL) + { + // EASTL_ASSERT(false); // As it stands now, this is not supposed to be called. + } + + + fixed_hashtable_allocator(void* pNodeBuffer, void* pBucketBuffer) + : mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset), + mpBucketBuffer(pBucketBuffer) + { + } + + + fixed_hashtable_allocator(void* pNodeBuffer, void* pBucketBuffer, const overflow_allocator_type& allocator) + : mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset, allocator), + mpBucketBuffer(pBucketBuffer) + { + } + + + /// fixed_hashtable_allocator + /// + /// Note that we are copying x.mpHead and mpBucketBuffer to our own fixed_pool. + /// See the discussion above in fixed_node_allocator for important information about this. + /// + fixed_hashtable_allocator(const this_type& x) + : mPool(x.mPool.mpHead, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset, x.mPool.mOverflowAllocator), + mpBucketBuffer(x.mpBucketBuffer) + { + } + + + fixed_hashtable_allocator& operator=(const fixed_hashtable_allocator& x) + { + mPool = x.mPool; + return *this; + } + + + void* allocate(size_t n, int flags = 0) + { + // We expect that the caller uses kAllocFlagBuckets when it wants us to allocate buckets instead of nodes. + EASTL_CT_ASSERT(kAllocFlagBuckets == 0x00400000); // Currently we expect this to be so, because the hashtable has a copy of this enum. + + if((flags & kAllocFlagBuckets) == 0) // If we are allocating nodes and (probably) not buckets... + { + EASTL_ASSERT(n == kNodeSize); EA_UNUSED(n); + return mPool.allocate(); + } + + // If bucket size no longer fits within local buffer... + if ((flags & kAllocFlagBuckets) == kAllocFlagBuckets && (n > kBucketsSize)) + return get_overflow_allocator().allocate(n); + + EASTL_ASSERT(n <= kBucketsSize); + return mpBucketBuffer; + } + + + void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0) + { + // We expect that the caller uses kAllocFlagBuckets when it wants us to allocate buckets instead of nodes. + if ((flags & kAllocFlagBuckets) == 0) // If we are allocating nodes and (probably) not buckets... + { + EASTL_ASSERT(n == kNodeSize); EA_UNUSED(n); + return mPool.allocate(alignment, offset); + } + + // If bucket size no longer fits within local buffer... + if ((flags & kAllocFlagBuckets) == kAllocFlagBuckets && (n > kBucketsSize)) + return get_overflow_allocator().allocate(n, alignment, offset); + + EASTL_ASSERT(n <= kBucketsSize); + return mpBucketBuffer; + } + + + void deallocate(void* p, size_t) + { + if(p != mpBucketBuffer) // If we are freeing a node and not buckets... + mPool.deallocate(p); + } + + + bool can_allocate() const + { + return mPool.can_allocate(); + } + + + void reset(void* pNodeBuffer) + { + // No need to modify mpBucketBuffer, as that is constant. + mPool.init(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset); + } + + + const char* get_name() const + { + return mPool.get_name(); + } + + + void set_name(const char* pName) + { + mPool.set_name(pName); + } + + + const overflow_allocator_type& get_overflow_allocator() const + { + return mPool.mOverflowAllocator; + } + + + overflow_allocator_type& get_overflow_allocator() + { + return mPool.mOverflowAllocator; + } + + + void set_overflow_allocator(const overflow_allocator_type& allocator) + { + mPool.mOverflowAllocator = allocator; + } + + + void copy_overflow_allocator(const this_type& x) // This function exists so we can write generic code that works for allocators that do and don't have overflow allocators. + { + mPool.mOverflowAllocator = x.mPool.mOverflowAllocator; + } + + }; // fixed_hashtable_allocator + + + // This is a near copy of the code above, with the only difference being + // the 'false' bEnableOverflow template parameter, the pool_type and this_type typedefs, + // and the get_overflow_allocator / set_overflow_allocator functions. + template + class fixed_hashtable_allocator + { + public: + typedef fixed_pool pool_type; + typedef fixed_hashtable_allocator this_type; + typedef OverflowAllocator overflow_allocator_type; + + enum + { + kBucketCount = bucketCount + 1, // '+1' because the hash table needs a null terminating bucket. + kBucketsSize = bucketCount * sizeof(void*), + kNodeSize = nodeSize, + kNodeCount = nodeCount, + kNodesSize = nodeCount * nodeSize, // Note that the kBufferSize calculation assumes that the compiler sets sizeof(T) to be a multiple alignof(T), and so sizeof(T) is always >= alignof(T). + kBufferSize = kNodesSize + ((nodeAlignment > 1) ? nodeSize-1 : 0) + nodeAlignmentOffset, // Don't need to include kBucketsSize in this calculation, as fixed_hash_xxx containers have a separate buffer for buckets. + kNodeAlignment = nodeAlignment, + kNodeAlignmentOffset = nodeAlignmentOffset, + kAllocFlagBuckets = 0x00400000 // Flag to allocator which indicates that we are allocating buckets and not nodes. + }; + + protected: + pool_type mPool; + void* mpBucketBuffer; + + public: + // Disabled because it causes compile conflicts. + //fixed_hashtable_allocator(const char* pName) + //{ + // mPool.set_name(pName); + //} + + fixed_hashtable_allocator(void* pNodeBuffer) + : mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset), + mpBucketBuffer(NULL) + { + // EASTL_ASSERT(false); // As it stands now, this is not supposed to be called. + } + + fixed_hashtable_allocator(void* pNodeBuffer, const overflow_allocator_type& /*allocator*/) // allocator is unused because bEnableOverflow is false in this specialization. + : mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset), + mpBucketBuffer(NULL) + { + // EASTL_ASSERT(false); // As it stands now, this is not supposed to be called. + } + + + fixed_hashtable_allocator(void* pNodeBuffer, void* pBucketBuffer) + : mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset), + mpBucketBuffer(pBucketBuffer) + { + } + + + fixed_hashtable_allocator(void* pNodeBuffer, void* pBucketBuffer, const overflow_allocator_type& /*allocator*/) // allocator is unused because bEnableOverflow is false in this specialization. + : mPool(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset), + mpBucketBuffer(pBucketBuffer) + { + } + + + /// fixed_hashtable_allocator + /// + /// Note that we are copying x.mpHead and mpBucketBuffer to our own fixed_pool. + /// See the discussion above in fixed_node_allocator for important information about this. + /// + fixed_hashtable_allocator(const this_type& x) // No need to copy the overflow allocator, because bEnableOverflow is false in this specialization. + : mPool(x.mPool.mpHead, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset), + mpBucketBuffer(x.mpBucketBuffer) + { + } + + + fixed_hashtable_allocator& operator=(const fixed_hashtable_allocator& x) + { + mPool = x.mPool; + return *this; + } + + + void* allocate(size_t n, int flags = 0) + { + // We expect that the caller uses kAllocFlagBuckets when it wants us to allocate buckets instead of nodes. + EASTL_CT_ASSERT(kAllocFlagBuckets == 0x00400000); // Currently we expect this to be so, because the hashtable has a copy of this enum. + if((flags & kAllocFlagBuckets) == 0) // If we are allocating nodes and (probably) not buckets... + { + EASTL_ASSERT(n == kNodeSize); (void)n; // Make unused var warning go away. + return mPool.allocate(); + } + + // Don't allow hashtable buckets to overflow in this case. + EASTL_ASSERT(n <= kBucketsSize); + return mpBucketBuffer; + } + + + void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0) + { + // We expect that the caller uses kAllocFlagBuckets when it wants us to allocate buckets instead of nodes. + if((flags & kAllocFlagBuckets) == 0) // If we are allocating nodes and (probably) not buckets... + { + EASTL_ASSERT(n == kNodeSize); (void)n; // Make unused var warning go away. + return mPool.allocate(alignment, offset); + } + + // Don't allow hashtable buckets to overflow in this case. + EASTL_ASSERT(n <= kBucketsSize); + return mpBucketBuffer; + } + + + void deallocate(void* p, size_t) + { + if(p != mpBucketBuffer) // If we are freeing a node and not buckets... + mPool.deallocate(p); + } + + + bool can_allocate() const + { + return mPool.can_allocate(); + } + + + void reset(void* pNodeBuffer) + { + // No need to modify mpBucketBuffer, as that is constant. + mPool.init(pNodeBuffer, kBufferSize, kNodeSize, kNodeAlignment, kNodeAlignmentOffset); + } + + + const char* get_name() const + { + return mPool.get_name(); + } + + + void set_name(const char* pName) + { + mPool.set_name(pName); + } + + + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT + { + EASTL_ASSERT(false); + overflow_allocator_type* pNULL = NULL; + return *pNULL; // This is not pretty, but it should never execute. This is here only to allow this to compile. + } + + + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT + { + EASTL_ASSERT(false); + overflow_allocator_type* pNULL = NULL; + return *pNULL; // This is not pretty, but it should never execute. This is here only to allow this to compile. + } + + void set_overflow_allocator(const overflow_allocator_type& /*allocator*/) + { + // We don't have an overflow allocator. + EASTL_ASSERT(false); + } + + void copy_overflow_allocator(const this_type&) // This function exists so we can write generic code that works for allocators that do and don't have overflow allocators. + { + // We don't have an overflow allocator. + } + + }; // fixed_hashtable_allocator + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const fixed_hashtable_allocator& a, + const fixed_hashtable_allocator& b) + { + return (&a == &b); // They are only equal if they are the same object. + } + + + template + inline bool operator!=(const fixed_hashtable_allocator& a, + const fixed_hashtable_allocator& b) + { + return (&a != &b); // They are only equal if they are the same object. + } + + + + + + + /////////////////////////////////////////////////////////////////////////// + // fixed_vector_allocator + /////////////////////////////////////////////////////////////////////////// + + /// fixed_vector_allocator + /// + /// Template parameters: + /// nodeSize The size of individual objects. + /// nodeCount The number of objects the pool contains. + /// nodeAlignment The alignment of the objects to allocate. + /// nodeAlignmentOffset The alignment offset of the objects to allocate. + /// bEnableOverflow Whether or not we should use the overflow heap if our object pool is exhausted. + /// OverflowAllocator Overflow allocator, which is only used if bEnableOverflow == true. Defaults to the global heap. + /// + template + class fixed_vector_allocator + { + public: + typedef fixed_vector_allocator this_type; + typedef OverflowAllocator overflow_allocator_type; + + enum + { + kNodeSize = nodeSize, + kNodeCount = nodeCount, + kNodesSize = nodeCount * nodeSize, // Note that the kBufferSize calculation assumes that the compiler sets sizeof(T) to be a multiple alignof(T), and so sizeof(T) is always >= alignof(T). + kBufferSize = kNodesSize + ((nodeAlignment > 1) ? nodeSize-1 : 0) + nodeAlignmentOffset, + kNodeAlignment = nodeAlignment, + kNodeAlignmentOffset = nodeAlignmentOffset + }; + + public: + overflow_allocator_type mOverflowAllocator; + void* mpPoolBegin; // To consider: Find some way to make this data unnecessary, without increasing template proliferation. + + public: + // Disabled because it causes compile conflicts. + //fixed_vector_allocator(const char* pName = NULL) + //{ + // mOverflowAllocator.set_name(pName); + //} + + fixed_vector_allocator(void* pNodeBuffer = nullptr) + : mpPoolBegin(pNodeBuffer) + { + } + + fixed_vector_allocator(void* pNodeBuffer, const overflow_allocator_type& allocator) + : mOverflowAllocator(allocator), mpPoolBegin(pNodeBuffer) + { + } + + // Disabled because the default is sufficient. + //fixed_vector_allocator(const fixed_vector_allocator& x) + //{ + // mpPoolBegin = x.mpPoolBegin; + // mOverflowAllocator = x.mOverflowAllocator; + //} + + fixed_vector_allocator& operator=(const fixed_vector_allocator& x) + { + // We leave our mpPoolBegin variable alone. + + #if EASTL_ALLOCATOR_COPY_ENABLED + mOverflowAllocator = x.mOverflowAllocator; + #else + (void)x; + #endif + + return *this; + } + + void* allocate(size_t n, int flags = 0) + { + return mOverflowAllocator.allocate(n, flags); + } + + void* allocate(size_t n, size_t alignment, size_t offset, int flags = 0) + { + return mOverflowAllocator.allocate(n, alignment, offset, flags); + } + + void deallocate(void* p, size_t n) + { + if(p != mpPoolBegin) + mOverflowAllocator.deallocate(p, n); // Can't do this to our own allocation. + } + + const char* get_name() const + { + return mOverflowAllocator.get_name(); + } + + void set_name(const char* pName) + { + mOverflowAllocator.set_name(pName); + } + + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT + { + return mOverflowAllocator; + } + + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT + { + return mOverflowAllocator; + } + + void set_overflow_allocator(const overflow_allocator_type& allocator) + { + mOverflowAllocator = allocator; + } + + void copy_overflow_allocator(const this_type& x) // This function exists so we can write generic code that works for allocators that do and don't have overflow allocators. + { + mOverflowAllocator = x.mOverflowAllocator; + } + + }; // fixed_vector_allocator + + + template + class fixed_vector_allocator + { + public: + typedef fixed_vector_allocator this_type; + typedef OverflowAllocator overflow_allocator_type; + + enum + { + kNodeSize = nodeSize, + kNodeCount = nodeCount, + kNodesSize = nodeCount * nodeSize, // Note that the kBufferSize calculation assumes that the compiler sets sizeof(T) to be a multiple alignof(T), and so sizeof(T) is always >= alignof(T). + kBufferSize = kNodesSize + ((nodeAlignment > 1) ? nodeSize-1 : 0) + nodeAlignmentOffset, + kNodeAlignment = nodeAlignment, + kNodeAlignmentOffset = nodeAlignmentOffset + }; + + // Disabled because it causes compile conflicts. + //fixed_vector_allocator(const char* = NULL) // This char* parameter is present so that this class can be like the other version. + //{ + //} + + fixed_vector_allocator() + { + } + + fixed_vector_allocator(void* /*pNodeBuffer*/) + { + } + + fixed_vector_allocator(void* /*pNodeBuffer*/, const overflow_allocator_type& /*allocator*/) // allocator is unused because bEnableOverflow is false in this specialization. + { + } + + /// fixed_vector_allocator + /// + // Disabled because there is nothing to do. No member data. And the default for this is sufficient. + // fixed_vector_allocator(const fixed_vector_allocator&) + // { + // } + + // Disabled because there is nothing to do. No member data. + //fixed_vector_allocator& operator=(const fixed_vector_allocator& x) + //{ + // return *this; + //} + + void* allocate(size_t /*n*/, int /*flags*/ = 0) + { + EASTL_ASSERT(false); // A fixed_vector should not reallocate, else the user has exhausted its space. + return NULL; + } + + void* allocate(size_t /*n*/, size_t /*alignment*/, size_t /*offset*/, int /*flags*/ = 0) + { + EASTL_ASSERT(false); + return NULL; + } + + void deallocate(void* /*p*/, size_t /*n*/) + { + } + + const char* get_name() const + { + return EASTL_FIXED_POOL_DEFAULT_NAME; + } + + void set_name(const char* /*pName*/) + { + } + + const overflow_allocator_type& get_overflow_allocator() const EA_NOEXCEPT + { + EASTL_ASSERT(false); + overflow_allocator_type* pNULL = NULL; + return *pNULL; // This is not pretty, but it should never execute. This is here only to allow this to compile. + } + + overflow_allocator_type& get_overflow_allocator() EA_NOEXCEPT + { + EASTL_ASSERT(false); + overflow_allocator_type* pNULL = NULL; + return *pNULL; // This is not pretty, but it should never execute. This is here only to allow this to compile. + } + + void set_overflow_allocator(const overflow_allocator_type& /*allocator*/) + { + // We don't have an overflow allocator. + EASTL_ASSERT(false); + } + + void copy_overflow_allocator(const this_type&) // This function exists so we can write generic code that works for allocators that do and don't have overflow allocators. + { + // We don't have an overflow allocator. + } + + }; // fixed_vector_allocator + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const fixed_vector_allocator& a, + const fixed_vector_allocator& b) + { + return (&a == &b); // They are only equal if they are the same object. + } + + + template + inline bool operator!=(const fixed_vector_allocator& a, + const fixed_vector_allocator& b) + { + return (&a != &b); // They are only equal if they are the same object. + } + + + + + + /////////////////////////////////////////////////////////////////////////// + // fixed_swap + /////////////////////////////////////////////////////////////////////////// + + /// fixed_swap + /// + /// This function implements a swap suitable for fixed containers. + /// This is an issue because the size of fixed containers can be very + /// large, due to their having the container buffer within themselves. + /// Note that we are referring to sizeof(container) and not the total + /// sum of memory allocated by the container from the heap. + /// + /// + /// This implementation switches at compile time whether or not the + /// temporary is allocated on the stack or the heap as some compilers + /// will allocate the (large) stack frame regardless of which code + /// path is picked. + template + class fixed_swap_impl + { + public: + static void swap(Container& a, Container& b); + }; + + + template + class fixed_swap_impl + { + public: + static void swap(Container& a, Container& b) + { + Container temp(EASTL_MOVE(a)); // Can't use global swap because that could + a = EASTL_MOVE(b); // itself call this swap function in return. + b = EASTL_MOVE(temp); + } + }; + + + template + class fixed_swap_impl + { + public: + static void swap(Container& a, Container& b) + { + EASTLAllocatorType allocator(*EASTLAllocatorDefault(), EASTL_TEMP_DEFAULT_NAME); + void* const pMemory = allocator.allocate(sizeof(a)); + + if(pMemory) + { + Container* pTemp = ::new(pMemory) Container(EASTL_MOVE(a)); + a = EASTL_MOVE(b); + b = EASTL_MOVE(*pTemp); + + pTemp->~Container(); + allocator.deallocate(pMemory, sizeof(a)); + } + } + }; + + + template + void fixed_swap(Container& a, Container& b) + { + return fixed_swap_impl= EASTL_MAX_STACK_USAGE>::swap(a, b); + } + + + +} // namespace eastl + + +EA_RESTORE_VC_WARNING(); + + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/internal/function.h b/libkram/eastl/include/EASTL/internal/function.h new file mode 100644 index 00000000..6e857f0b --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/function.h @@ -0,0 +1,161 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_FUNCTION_H +#define EASTL_FUNCTION_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include + +namespace eastl +{ + + /// EASTL_FUNCTION_DEFAULT_CAPTURE_SSO_SIZE + /// + /// Defines the size of the SSO buffer which is used to hold the specified capture state of the callable. + /// + #ifndef EASTL_FUNCTION_DEFAULT_CAPTURE_SSO_SIZE + #define EASTL_FUNCTION_DEFAULT_CAPTURE_SSO_SIZE (2 * sizeof(void*)) + #endif + + static_assert(EASTL_FUNCTION_DEFAULT_CAPTURE_SSO_SIZE >= sizeof(void*), "functor storage must be able to hold at least a pointer!"); + + template + class function; + + template + class function : public internal::function_detail + { + private: + using Base = internal::function_detail; + public: + using typename Base::result_type; + + function() EA_NOEXCEPT = default; + function(std::nullptr_t p) EA_NOEXCEPT + : Base(p) + { + } + + function(const function& other) + : Base(other) + { + } + + function(function&& other) + : Base(eastl::move(other)) + { + } + + template + function(Functor functor) + : Base(eastl::move(functor)) + { + } + + ~function() EA_NOEXCEPT = default; + + function& operator=(const function& other) + { + Base::operator=(other); + return *this; + } + + function& operator=(function&& other) + { + Base::operator=(eastl::move(other)); + return *this; + } + + function& operator=(std::nullptr_t p) EA_NOEXCEPT + { + Base::operator=(p); + return *this; + } + + template + function& operator=(Functor&& functor) + { + Base::operator=(eastl::forward(functor)); + return *this; + } + + template + function& operator=(eastl::reference_wrapper f) EA_NOEXCEPT + { + Base::operator=(f); + return *this; + } + + void swap(function& other) EA_NOEXCEPT + { + Base::swap(other); + } + + explicit operator bool() const EA_NOEXCEPT + { + return Base::operator bool(); + } + + R operator ()(Args... args) const + { + return Base::operator ()(eastl::forward(args)...); + } + + #if EASTL_RTTI_ENABLED + const std::type_info& target_type() const EA_NOEXCEPT + { + return Base::target_type(); + } + + template + Functor* target() EA_NOEXCEPT + { + return Base::target(); + } + + template + const Functor* target() const EA_NOEXCEPT + { + return Base::target(); + } + #endif // EASTL_RTTI_ENABLED + }; + + template + bool operator==(const function& f, std::nullptr_t) EA_NOEXCEPT + { + return !f; + } + + template + bool operator==(std::nullptr_t, const function& f) EA_NOEXCEPT + { + return !f; + } + + template + bool operator!=(const function& f, std::nullptr_t) EA_NOEXCEPT + { + return !!f; + } + + template + bool operator!=(std::nullptr_t, const function& f) EA_NOEXCEPT + { + return !!f; + } + + template + void swap(function& lhs, function& rhs) + { + lhs.swap(rhs); + } + +} // namespace eastl + +#endif // EASTL_FUNCTION_H diff --git a/libkram/eastl/include/EASTL/internal/function_detail.h b/libkram/eastl/include/EASTL/internal/function_detail.h new file mode 100644 index 00000000..dc18b631 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/function_detail.h @@ -0,0 +1,673 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_FUNCTION_DETAIL_H +#define EASTL_FUNCTION_DETAIL_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#if EASTL_RTTI_ENABLED + #include +#endif + +#if EASTL_EXCEPTIONS_ENABLED + EA_DISABLE_ALL_VC_WARNINGS() + #include + #include + EA_RESTORE_ALL_VC_WARNINGS() +#endif + +namespace eastl +{ + #if EASTL_EXCEPTIONS_ENABLED + class bad_function_call : public std::exception + { + public: + bad_function_call() EA_NOEXCEPT = default; + + const char* what() const EA_NOEXCEPT EA_OVERRIDE + { + return "bad function_detail call"; + } + }; + #endif + + namespace internal + { + class unused_class {}; + + union functor_storage_alignment + { + void (*unused_func_ptr)(void); + void (unused_class::*unused_func_mem_ptr)(void); + void* unused_ptr; + }; + + template + struct functor_storage + { + static_assert(SIZE_IN_BYTES >= 0, "local buffer storage cannot have a negative size!"); + template + Ret& GetStorageTypeRef() const + { + return *reinterpret_cast(const_cast(&storage[0])); + } + + union + { + functor_storage_alignment align; + char storage[SIZE_IN_BYTES]; + }; + }; + + template <> + struct functor_storage<0> + { + template + Ret& GetStorageTypeRef() const + { + return *reinterpret_cast(const_cast(&storage[0])); + } + + union + { + functor_storage_alignment align; + char storage[sizeof(functor_storage_alignment)]; + }; + }; + + template + struct is_functor_inplace_allocatable + { + static constexpr bool value = + sizeof(Functor) <= sizeof(functor_storage) && + (eastl::alignment_of_v> % eastl::alignment_of_v) == 0; + }; + + + /// function_base_detail + /// + template + class function_base_detail + { + public: + using FunctorStorageType = functor_storage; + FunctorStorageType mStorage; + + enum ManagerOperations : int + { + MGROPS_DESTRUCT_FUNCTOR = 0, + MGROPS_COPY_FUNCTOR = 1, + MGROPS_MOVE_FUNCTOR = 2, + #if EASTL_RTTI_ENABLED + MGROPS_GET_TYPE_INFO = 3, + MGROPS_GET_FUNC_PTR = 4, + #endif + }; + + // Functor can be allocated inplace + template + class function_manager_base + { + public: + + static Functor* GetFunctorPtr(const FunctorStorageType& storage) EA_NOEXCEPT + { + return &(storage.template GetStorageTypeRef()); + } + + template + static void CreateFunctor(FunctorStorageType& storage, T&& functor) + { + ::new (GetFunctorPtr(storage)) Functor(eastl::forward(functor)); + } + + static void DestructFunctor(FunctorStorageType& storage) + { + GetFunctorPtr(storage)->~Functor(); + } + + static void CopyFunctor(FunctorStorageType& to, const FunctorStorageType& from) + { + ::new (GetFunctorPtr(to)) Functor(*GetFunctorPtr(from)); + } + + static void MoveFunctor(FunctorStorageType& to, FunctorStorageType& from) EA_NOEXCEPT + { + ::new (GetFunctorPtr(to)) Functor(eastl::move(*GetFunctorPtr(from))); + } + + static void* Manager(void* to, void* from, typename function_base_detail::ManagerOperations ops) EA_NOEXCEPT + { + switch (ops) + { + case MGROPS_DESTRUCT_FUNCTOR: + { + DestructFunctor(*static_cast(to)); + } + break; + case MGROPS_COPY_FUNCTOR: + { + CopyFunctor(*static_cast(to), + *static_cast(from)); + } + break; + case MGROPS_MOVE_FUNCTOR: + { + MoveFunctor(*static_cast(to), *static_cast(from)); + DestructFunctor(*static_cast(from)); + } + break; + default: + break; + } + return nullptr; + } + }; + + // Functor is allocated on the heap + template + class function_manager_base::value>::type> + { + public: + static Functor* GetFunctorPtr(const FunctorStorageType& storage) EA_NOEXCEPT + { + return storage.template GetStorageTypeRef(); + } + + static Functor*& GetFunctorPtrRef(const FunctorStorageType& storage) EA_NOEXCEPT + { + return storage.template GetStorageTypeRef(); + } + + template + static void CreateFunctor(FunctorStorageType& storage, T&& functor) + { + auto& allocator = *EASTLAllocatorDefault(); + Functor* func = static_cast(allocator.allocate(sizeof(Functor), alignof(Functor), 0)); + + #if EASTL_EXCEPTIONS_ENABLED + if (!func) + { + throw std::bad_alloc(); + } + #else + EASTL_ASSERT_MSG(func != nullptr, "Allocation failed!"); + #endif + + ::new (static_cast(func)) Functor(eastl::forward(functor)); + GetFunctorPtrRef(storage) = func; + } + + static void DestructFunctor(FunctorStorageType& storage) + { + Functor* func = GetFunctorPtr(storage); + if (func) + { + auto& allocator = *EASTLAllocatorDefault(); + func->~Functor(); + allocator.deallocate(static_cast(func), sizeof(Functor)); + } + } + + static void CopyFunctor(FunctorStorageType& to, const FunctorStorageType& from) + { + auto& allocator = *EASTLAllocatorDefault(); + Functor* func = static_cast(allocator.allocate(sizeof(Functor), alignof(Functor), 0)); + #if EASTL_EXCEPTIONS_ENABLED + if (!func) + { + throw std::bad_alloc(); + } + #else + EASTL_ASSERT_MSG(func != nullptr, "Allocation failed!"); + #endif + ::new (static_cast(func)) Functor(*GetFunctorPtr(from)); + GetFunctorPtrRef(to) = func; + } + + static void MoveFunctor(FunctorStorageType& to, FunctorStorageType& from) EA_NOEXCEPT + { + Functor* func = GetFunctorPtr(from); + GetFunctorPtrRef(to) = func; + GetFunctorPtrRef(from) = nullptr; + } + + static void* Manager(void* to, void* from, typename function_base_detail::ManagerOperations ops) EA_NOEXCEPT + { + switch (ops) + { + case MGROPS_DESTRUCT_FUNCTOR: + { + DestructFunctor(*static_cast(to)); + } + break; + case MGROPS_COPY_FUNCTOR: + { + CopyFunctor(*static_cast(to), + *static_cast(from)); + } + break; + case MGROPS_MOVE_FUNCTOR: + { + MoveFunctor(*static_cast(to), *static_cast(from)); + // Moved ptr, no need to destruct ourselves + } + break; + default: + break; + } + return nullptr; + } + }; + + template + class function_manager final : public function_manager_base + { + public: + using Base = function_manager_base; + + #if EASTL_RTTI_ENABLED + static void* GetTypeInfo() EA_NOEXCEPT + { + return reinterpret_cast(const_cast(&typeid(Functor))); + } + + static void* Manager(void* to, void* from, typename function_base_detail::ManagerOperations ops) EA_NOEXCEPT + { + switch (ops) + { + case MGROPS_GET_TYPE_INFO: + { + return GetTypeInfo(); + } + break; + case MGROPS_GET_FUNC_PTR: + { + return static_cast(Base::GetFunctorPtr(*static_cast(to))); + } + break; + default: + { + return Base::Manager(to, from, ops); + } + break; + } + } + #endif // EASTL_RTTI_ENABLED + + /** + * NOTE: + * + * The order of arguments here is vital to the call optimization. Let's dig into why and look at some asm. + * We have two invoker signatures to consider: + * R Invoker(const FunctorStorageType& functor, Args... args) + * R Invoker(Args... args, const FunctorStorageType& functor) + * + * Assume we are using the Windows x64 Calling Convention where the first 4 arguments are passed into + * RCX, RDX, R8, R9. This optimization works for any Calling Convention, we are just using Windows x64 for + * this example. + * + * Given the following member function: void TestMemberFunc(int a, int b) + * RCX == this + * RDX == a + * R8 == b + * + * All three arguments to the function including the hidden this pointer, which in C++ is always the first argument + * are passed into the first three registers. + * The function call chain for eastl::function<>() is as follows: + * operator ()(this, Args... args) -> Invoker(Args... args, this->mStorage) -> StoredFunction(Args... arg) + * + * Let's look at what is happening at the asm level with the different Invoker function signatures and why. + * + * You will notice that operator ()() and Invoker() have the arguments reversed. operator ()() just directly calls + * to Invoker(), it is a tail call, so we force inline the call operator to ensure we directly call to the Invoker(). + * Most compilers always inline it anyways by default; have been instances where it doesn't even though the asm ends + * up being cheaper. + * call -> call -> call versus call -> call + * + * eastl::function = FunctionPointer + * + * Assume we have the above eastl::function object that holds a pointer to a function as the internal callable. + * + * Invoker(this->mStorage, Args... args) is called with the follow arguments in registers: + * RCX = this | RDX = a | R8 = b + * + * Inside Invoker() we use RCX to deference into the eastl::function object and get the function pointer to call. + * This function to call has signature Func(int, int) and thus requires its arguments in registers RCX and RDX. + * The compiler must shift all the arguments towards the left. The full asm looks something as follows. + * + * Calling Invoker: Inside Invoker: + * + * mov rcx, this mov rax, [rcx] + * mov rdx, a mov rcx, rdx + * mov r8, b mov rdx, r8 + * call [rcx + offset to Invoker] jmp [rax] + * + * Notice how the compiler shifts all the arguments before calling the callable and also we only use the this pointer + * to access the internal storage inside the eastl::function object. + * + * Invoker(Args... args, this->mStorage) is called with the following arguments in registers: + * RCX = a | RDX = b | R8 = this + * + * You can see we no longer have to shift the arguments down when going to call the internal stored callable. + * + * Calling Invoker: Inside Invoker: + * + * mov rcx, a mov rax, [r8] + * mov rdx, b jmp [rax] + * mov r8, this + * call [r8 + offset to Invoker] + * + * The generated asm does a straight tail jmp to the loaded function pointer. The arguments are already in the correct + * registers. + * + * For Functors or Lambdas with no captures, this gives us another free register to use to pass arguments since the this + * is at the end, it can be passed onto the stack if we run out of registers. Since the callable has no captures; inside + * the Invoker(), we won't ever need to touch this thus we can just call the operator ()() or let the compiler inline it. + * + * For a callable with captures there is no perf hit since the callable in the common case is inlined and the pointer to the callable + * buffer is passed in a register which the compiler can use to access the captures. + * + * For eastl::function that a holds a pointer to member function. The this pointers is implicitly + * the first argument in the argument list, const T&, and the member function pointer will be called on that object. + * This prevents any argument shifting since the this for the member function pointer is already in RCX. + * + * This is why having this at the end of the argument list is important for generating efficient Invoker() thunks. + */ + static R Invoker(Args... args, const FunctorStorageType& functor) + { + return eastl::invoke(*Base::GetFunctorPtr(functor), eastl::forward(args)...); + } + }; + + function_base_detail() EA_NOEXCEPT = default; + ~function_base_detail() EA_NOEXCEPT = default; + }; + + #define EASTL_INTERNAL_FUNCTION_VALID_FUNCTION_ARGS(FUNCTOR, RET, ARGS, BASE, MYSELF) \ + typename eastl::enable_if_t && \ + !eastl::is_base_of_v> && \ + !eastl::is_same_v, MYSELF>> + + #define EASTL_INTERNAL_FUNCTION_DETAIL_VALID_FUNCTION_ARGS(FUNCTOR, RET, ARGS, MYSELF) \ + EASTL_INTERNAL_FUNCTION_VALID_FUNCTION_ARGS(FUNCTOR, RET, ARGS, MYSELF, MYSELF) + + + /// function_detail + /// + template + class function_detail; + + template + class function_detail : public function_base_detail + { + public: + using result_type = R; + + protected: + using Base = function_base_detail; + using FunctorStorageType = typename function_base_detail::FunctorStorageType; + using Base::mStorage; + + public: + function_detail() EA_NOEXCEPT = default; + function_detail(std::nullptr_t) EA_NOEXCEPT {} + + function_detail(const function_detail& other) + { + if (this != &other) + { + Copy(other); + } + } + + function_detail(function_detail&& other) + { + if (this != &other) + { + Move(eastl::move(other)); + } + } + + template + function_detail(Functor functor) + { + CreateForwardFunctor(eastl::move(functor)); + } + + ~function_detail() EA_NOEXCEPT + { + Destroy(); + } + + function_detail& operator=(const function_detail& other) + { + if (this != &other) + { + Destroy(); + Copy(other); + } + + return *this; + } + + function_detail& operator=(function_detail&& other) + { + if(this != &other) + { + Destroy(); + Move(eastl::move(other)); + } + + return *this; + } + + function_detail& operator=(std::nullptr_t) EA_NOEXCEPT + { + Destroy(); + mMgrFuncPtr = nullptr; + mInvokeFuncPtr = &DefaultInvoker; + + return *this; + } + + template + function_detail& operator=(Functor&& functor) + { + Destroy(); + CreateForwardFunctor(eastl::forward(functor)); + return *this; + } + + template + function_detail& operator=(eastl::reference_wrapper f) EA_NOEXCEPT + { + Destroy(); + CreateForwardFunctor(f); + return *this; + } + + void swap(function_detail& other) EA_NOEXCEPT + { + if(this == &other) + return; + + FunctorStorageType tempStorage; + if (other.HaveManager()) + { + (void)(*other.mMgrFuncPtr)(static_cast(&tempStorage), static_cast(&other.mStorage), + Base::ManagerOperations::MGROPS_MOVE_FUNCTOR); + } + + if (HaveManager()) + { + (void)(*mMgrFuncPtr)(static_cast(&other.mStorage), static_cast(&mStorage), + Base::ManagerOperations::MGROPS_MOVE_FUNCTOR); + } + + if (other.HaveManager()) + { + (void)(*other.mMgrFuncPtr)(static_cast(&mStorage), static_cast(&tempStorage), + Base::ManagerOperations::MGROPS_MOVE_FUNCTOR); + } + + eastl::swap(mMgrFuncPtr, other.mMgrFuncPtr); + eastl::swap(mInvokeFuncPtr, other.mInvokeFuncPtr); + } + + explicit operator bool() const EA_NOEXCEPT + { + return HaveManager(); + } + + EASTL_FORCE_INLINE R operator ()(Args... args) const + { + return (*mInvokeFuncPtr)(eastl::forward(args)..., this->mStorage); + } + + #if EASTL_RTTI_ENABLED + const std::type_info& target_type() const EA_NOEXCEPT + { + if (HaveManager()) + { + void* ret = (*mMgrFuncPtr)(nullptr, nullptr, Base::ManagerOperations::MGROPS_GET_TYPE_INFO); + return *(static_cast(ret)); + } + return typeid(void); + } + + template + Functor* target() EA_NOEXCEPT + { + if (HaveManager() && target_type() == typeid(Functor)) + { + void* ret = (*mMgrFuncPtr)(static_cast(&mStorage), nullptr, + Base::ManagerOperations::MGROPS_GET_FUNC_PTR); + return ret ? static_cast(ret) : nullptr; + } + return nullptr; + } + + template + const Functor* target() const EA_NOEXCEPT + { + if (HaveManager() && target_type() == typeid(Functor)) + { + void* ret = (*mMgrFuncPtr)(static_cast(&mStorage), nullptr, + Base::ManagerOperations::MGROPS_GET_FUNC_PTR); + return ret ? static_cast(ret) : nullptr; + } + return nullptr; + } + #endif // EASTL_RTTI_ENABLED + + private: + bool HaveManager() const EA_NOEXCEPT + { + return (mMgrFuncPtr != nullptr); + } + + void Destroy() EA_NOEXCEPT + { + if (HaveManager()) + { + (void)(*mMgrFuncPtr)(static_cast(&mStorage), nullptr, + Base::ManagerOperations::MGROPS_DESTRUCT_FUNCTOR); + } + } + + void Copy(const function_detail& other) + { + if (other.HaveManager()) + { + (void)(*other.mMgrFuncPtr)(static_cast(&mStorage), + const_cast(static_cast(&other.mStorage)), + Base::ManagerOperations::MGROPS_COPY_FUNCTOR); + } + + mMgrFuncPtr = other.mMgrFuncPtr; + mInvokeFuncPtr = other.mInvokeFuncPtr; + } + + void Move(function_detail&& other) + { + if (other.HaveManager()) + { + (void)(*other.mMgrFuncPtr)(static_cast(&mStorage), static_cast(&other.mStorage), + Base::ManagerOperations::MGROPS_MOVE_FUNCTOR); + } + + mMgrFuncPtr = other.mMgrFuncPtr; + mInvokeFuncPtr = other.mInvokeFuncPtr; + other.mMgrFuncPtr = nullptr; + other.mInvokeFuncPtr = &DefaultInvoker; + } + + template + void CreateForwardFunctor(Functor&& functor) + { + using DecayedFunctorType = typename eastl::decay::type; + using FunctionManagerType = typename Base::template function_manager; + + if (internal::is_null(functor)) + { + mMgrFuncPtr = nullptr; + mInvokeFuncPtr = &DefaultInvoker; + } + else + { + mMgrFuncPtr = &FunctionManagerType::Manager; + mInvokeFuncPtr = &FunctionManagerType::Invoker; + FunctionManagerType::CreateFunctor(mStorage, eastl::forward(functor)); + } + } + + private: + typedef void* (*ManagerFuncPtr)(void*, void*, typename Base::ManagerOperations); + typedef R (*InvokeFuncPtr)(Args..., const FunctorStorageType&); + + EA_DISABLE_GCC_WARNING(-Wreturn-type); + EA_DISABLE_CLANG_WARNING(-Wreturn-type); + EA_DISABLE_VC_WARNING(4716); // 'function' must return a value + // We cannot assume that R is default constructible. + // This function is called only when the function object CANNOT be called because it is empty, + // it will always throw or assert so we never use the return value anyways and neither should the caller. + static R DefaultInvoker(Args... /*args*/, const FunctorStorageType& /*functor*/) + { + #if EASTL_EXCEPTIONS_ENABLED + throw eastl::bad_function_call(); + #else + EASTL_ASSERT_MSG(false, "function_detail call on an empty function_detail"); + #endif + }; + EA_RESTORE_VC_WARNING(); + EA_RESTORE_CLANG_WARNING(); + EA_RESTORE_GCC_WARNING(); + + + ManagerFuncPtr mMgrFuncPtr = nullptr; + InvokeFuncPtr mInvokeFuncPtr = &DefaultInvoker; + }; + + } // namespace internal + +} // namespace eastl + +#endif // EASTL_FUNCTION_DETAIL_H diff --git a/libkram/eastl/include/EASTL/internal/function_help.h b/libkram/eastl/include/EASTL/internal/function_help.h new file mode 100644 index 00000000..04481d37 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/function_help.h @@ -0,0 +1,51 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_INTERNAL_FUNCTION_HELP_H +#define EASTL_INTERNAL_FUNCTION_HELP_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include + +namespace eastl +{ + namespace internal + { + + ////////////////////////////////////////////////////////////////////// + // is_null + // + template + bool is_null(const T&) + { + return false; + } + + template + bool is_null(Result (*const& function_pointer)(Arguments...)) + { + return function_pointer == nullptr; + } + + template + bool is_null(Result (Class::*const& function_pointer)(Arguments...)) + { + return function_pointer == nullptr; + } + + template + bool is_null(Result (Class::*const& function_pointer)(Arguments...) const) + { + return function_pointer == nullptr; + } + + } // namespace internal +} // namespace eastl + +#endif // Header include guard + diff --git a/libkram/eastl/include/EASTL/internal/functional_base.h b/libkram/eastl/include/EASTL/internal/functional_base.h new file mode 100644 index 00000000..a7d2dc91 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/functional_base.h @@ -0,0 +1,389 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_FUNCTIONAL_BASE_H +#define EASTL_INTERNAL_FUNCTIONAL_BASE_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include +#include +#include + +namespace eastl +{ + // foward declaration for swap + template + inline void swap(T& a, T& b) + EA_NOEXCEPT_IF(eastl::is_nothrow_move_constructible::value&& eastl::is_nothrow_move_assignable::value); + + + /// invoke + /// + /// invoke is a generalized function-call operator which works on function pointers, member function + /// pointers, callable objects and member pointers. + /// + /// For (member/non-member) function pointers and callable objects, it returns the result of calling + /// the function/object with the specified arguments. For member data pointers, it simply returns + /// the member. + /// + /// Note that there are also reference_wrapper specializations of invoke, which need to be defined + /// later since reference_wrapper uses invoke in its implementation. Those are defined immediately + /// after the definition of reference_wrapper. + /// + /// http://en.cppreference.com/w/cpp/utility/functional/invoke + /// + template + auto invoke_impl(R C::*func, T&& obj, Args&&... args) -> + typename enable_if>::value, + decltype((eastl::forward(obj).*func)(eastl::forward(args)...))>::type + { + return (eastl::forward(obj).*func)(eastl::forward(args)...); + } + + template + auto invoke_impl(F&& func, Args&&... args) -> decltype(eastl::forward(func)(eastl::forward(args)...)) + { + return eastl::forward(func)(eastl::forward(args)...); + } + + + template + auto invoke_impl(R C::*func, T&& obj, Args&&... args) -> decltype(((*eastl::forward(obj)).*func)(eastl::forward(args)...)) + { + return ((*eastl::forward(obj)).*func)(eastl::forward(args)...); + } + + template + auto invoke_impl(M C::*member, T&& obj) -> + typename enable_if< + is_base_of>::value, + decltype(obj.*member) + >::type + { + return obj.*member; + } + + template + auto invoke_impl(M C::*member, T&& obj) -> decltype((*eastl::forward(obj)).*member) + { + return (*eastl::forward(obj)).*member; + } + + template + inline decltype(auto) invoke(F&& func, Args&&... args) + { + return invoke_impl(eastl::forward(func), eastl::forward(args)...); + } + + template + struct invoke_result_impl { + }; + + template + struct invoke_result_impl>(), eastl::declval()...))>, Args...> + { + typedef decltype(invoke_impl(eastl::declval>(), eastl::declval()...)) type; + }; + + template + struct invoke_result : public invoke_result_impl {}; + + #if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + template + using invoke_result_t = typename invoke_result::type; + #endif + + template + struct is_invocable_impl : public eastl::false_type {}; + + template + struct is_invocable_impl::type>, Args...> : public eastl::true_type {}; + + template + struct is_invocable : public is_invocable_impl {}; + + template + struct is_invocable_r_impl : public eastl::false_type {}; + + template + struct is_invocable_r_impl::type>, Args...> + : public is_convertible::type, R> {}; + + template + struct is_invocable_r : public is_invocable_r_impl {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EASTL_CPP17_INLINE_VARIABLE EA_CONSTEXPR bool is_invocable_v = is_invocable::value; + + template + EASTL_CPP17_INLINE_VARIABLE EA_CONSTEXPR bool is_invocable_r_v = is_invocable_r::value; + #endif + + /// allocator_arg_t + /// + /// allocator_arg_t is an empty class type used to disambiguate the overloads of + /// constructors and member functions of allocator-aware objects, including tuple, + /// function, promise, and packaged_task. + /// http://en.cppreference.com/w/cpp/memory/allocator_arg_t + /// + struct allocator_arg_t + {}; + + + /// allocator_arg + /// + /// allocator_arg is a constant of type allocator_arg_t used to disambiguate, at call site, + /// the overloads of the constructors and member functions of allocator-aware objects, + /// such as tuple, function, promise, and packaged_task. + /// http://en.cppreference.com/w/cpp/memory/allocator_arg + /// + #if !defined(EA_COMPILER_NO_CONSTEXPR) + EA_CONSTEXPR allocator_arg_t allocator_arg = allocator_arg_t(); + #endif + + + template + struct unary_function + { + typedef Argument argument_type; + typedef Result result_type; + }; + + + template + struct binary_function + { + typedef Argument1 first_argument_type; + typedef Argument2 second_argument_type; + typedef Result result_type; + }; + + + /// less + template + struct less : public binary_function + { + EA_CPP14_CONSTEXPR bool operator()(const T& a, const T& b) const + { return a < b; } + }; + + // http://en.cppreference.com/w/cpp/utility/functional/less_void + template <> + struct less + { + template + EA_CPP14_CONSTEXPR auto operator()(A&& a, B&& b) const + -> decltype(eastl::forward(a) < eastl::forward(b)) + { return eastl::forward(a) < eastl::forward(b); } + }; + + + /// reference_wrapper + template + class reference_wrapper + { + public: + typedef T type; + + reference_wrapper(T&) EA_NOEXCEPT; + reference_wrapper(T&&) = delete; + reference_wrapper(const reference_wrapper& x) EA_NOEXCEPT; + + reference_wrapper& operator=(const reference_wrapper& x) EA_NOEXCEPT; + + operator T& () const EA_NOEXCEPT; + T& get() const EA_NOEXCEPT; + + template + typename eastl::result_of::type operator() (ArgTypes&&...) const; + + private: + T* val; + }; + + template + reference_wrapper::reference_wrapper(T &v) EA_NOEXCEPT + : val(eastl::addressof(v)) + {} + + template + reference_wrapper::reference_wrapper(const reference_wrapper& other) EA_NOEXCEPT + : val(other.val) + {} + + template + reference_wrapper& reference_wrapper::operator=(const reference_wrapper& other) EA_NOEXCEPT + { + val = other.val; + return *this; + } + + template + reference_wrapper::operator T&() const EA_NOEXCEPT + { + return *val; + } + + template + T& reference_wrapper::get() const EA_NOEXCEPT + { + return *val; + } + + template + template + typename eastl::result_of::type reference_wrapper::operator() (ArgTypes&&... args) const + { + return eastl::invoke(*val, eastl::forward(args)...); + } + + // reference_wrapper-specific utilties + template + reference_wrapper ref(T& t) EA_NOEXCEPT + { + return eastl::reference_wrapper(t); + } + + template + void ref(const T&&) = delete; + + template + reference_wrapper ref(reference_wrappert) EA_NOEXCEPT + { + return eastl::ref(t.get()); + } + + template + reference_wrapper cref(const T& t) EA_NOEXCEPT + { + return eastl::reference_wrapper(t); + } + + template + void cref(const T&&) = delete; + + template + reference_wrapper cref(reference_wrapper t) EA_NOEXCEPT + { + return eastl::cref(t.get()); + } + + + // reference_wrapper-specific type traits + template + struct is_reference_wrapper_helper + : public eastl::false_type {}; + + template + struct is_reference_wrapper_helper > + : public eastl::true_type {}; + + template + struct is_reference_wrapper + : public eastl::is_reference_wrapper_helper::type> {}; + + + // Helper which adds a reference to a type when given a reference_wrapper of that type. + template + struct remove_reference_wrapper + { typedef T type; }; + + template + struct remove_reference_wrapper< eastl::reference_wrapper > + { typedef T& type; }; + + template + struct remove_reference_wrapper< const eastl::reference_wrapper > + { typedef T& type; }; + + // reference_wrapper specializations of invoke + // These have to come after reference_wrapper is defined, but reference_wrapper needs to have a + // definition of invoke, so these specializations need to come after everything else has been defined. + template + auto invoke_impl(R (C::*func)(Args...), T&& obj, Args&&... args) -> + typename enable_if::type>::value, + decltype((obj.get().*func)(eastl::forward(args)...))>::type + { + return (obj.get().*func)(eastl::forward(args)...); + } + + template + auto invoke_impl(M(C::*member), T&& obj) -> + typename enable_if::type>::value, + decltype(obj.get().*member)>::type + { + return obj.get().*member; + } + + + /////////////////////////////////////////////////////////////////////// + // bind + /////////////////////////////////////////////////////////////////////// + + /// bind1st + /// + template + class binder1st : public unary_function + { + protected: + typename Operation::first_argument_type value; + Operation op; + + public: + binder1st(const Operation& x, const typename Operation::first_argument_type& y) + : value(y), op(x) { } + + typename Operation::result_type operator()(const typename Operation::second_argument_type& x) const + { return op(value, x); } + + typename Operation::result_type operator()(typename Operation::second_argument_type& x) const + { return op(value, x); } + }; + + + template + inline binder1st bind1st(const Operation& op, const T& x) + { + typedef typename Operation::first_argument_type value; + return binder1st(op, value(x)); + } + + + /// bind2nd + /// + template + class binder2nd : public unary_function + { + protected: + Operation op; + typename Operation::second_argument_type value; + + public: + binder2nd(const Operation& x, const typename Operation::second_argument_type& y) + : op(x), value(y) { } + + typename Operation::result_type operator()(const typename Operation::first_argument_type& x) const + { return op(x, value); } + + typename Operation::result_type operator()(typename Operation::first_argument_type& x) const + { return op(x, value); } + }; + + + template + inline binder2nd bind2nd(const Operation& op, const T& x) + { + typedef typename Operation::second_argument_type value; + return binder2nd(op, value(x)); + } + +} // namespace eastl + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/internal/generic_iterator.h b/libkram/eastl/include/EASTL/internal/generic_iterator.h new file mode 100644 index 00000000..b32998a8 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/generic_iterator.h @@ -0,0 +1,208 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// Implements a generic iterator from a given iteratable type, such as a pointer. +// We cannot put this file into our own iterator.h file because we need to +// still be able to use this file when we have our iterator.h disabled. +// +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_GENERIC_ITERATOR_H +#define EASTL_INTERNAL_GENERIC_ITERATOR_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include +#include + +// There is no warning number 'number'. +// Member template functions cannot be used for copy-assignment or copy-construction. +EA_DISABLE_VC_WARNING(4619 4217); + + +namespace eastl +{ + + /// generic_iterator + /// + /// Converts something which can be iterated into a formal iterator. + /// While this class' primary purpose is to allow the conversion of + /// a pointer to an iterator, you can convert anything else to an + /// iterator by defining an iterator_traits<> specialization for that + /// object type. See EASTL iterator.h for this. + /// + /// Example usage: + /// typedef generic_iterator IntArrayIterator; + /// typedef generic_iterator IntArrayIteratorOther; + /// + template + class generic_iterator + { + protected: + Iterator mIterator; + + public: + typedef typename eastl::iterator_traits::iterator_category iterator_category; + typedef typename eastl::iterator_traits::value_type value_type; + typedef typename eastl::iterator_traits::difference_type difference_type; + typedef typename eastl::iterator_traits::reference reference; + typedef typename eastl::iterator_traits::pointer pointer; + typedef Iterator iterator_type; + typedef iterator_type wrapped_iterator_type; // This is not in the C++ Standard; it's used by use to identify it as a wrapping iterator type. + typedef Container container_type; + typedef generic_iterator this_type; + + generic_iterator() + : mIterator(iterator_type()) { } + + explicit generic_iterator(const iterator_type& x) + : mIterator(x) { } + + this_type& operator=(const iterator_type& x) + { mIterator = x; return *this; } + + template + generic_iterator(const generic_iterator& x) + : mIterator(x.base()) { } + + reference operator*() const + { return *mIterator; } + + pointer operator->() const + { return mIterator; } + + this_type& operator++() + { ++mIterator; return *this; } + + this_type operator++(int) + { return this_type(mIterator++); } + + this_type& operator--() + { --mIterator; return *this; } + + this_type operator--(int) + { return this_type(mIterator--); } + + reference operator[](const difference_type& n) const + { return mIterator[n]; } + + this_type& operator+=(const difference_type& n) + { mIterator += n; return *this; } + + this_type operator+(const difference_type& n) const + { return this_type(mIterator + n); } + + this_type& operator-=(const difference_type& n) + { mIterator -= n; return *this; } + + this_type operator-(const difference_type& n) const + { return this_type(mIterator - n); } + + const iterator_type& base() const + { return mIterator; } + + }; // class generic_iterator + + + template + inline bool operator==(const generic_iterator& lhs, const generic_iterator& rhs) + { return lhs.base() == rhs.base(); } + + template + inline bool operator==(const generic_iterator& lhs, const generic_iterator& rhs) + { return lhs.base() == rhs.base(); } + + template + inline bool operator!=(const generic_iterator& lhs, const generic_iterator& rhs) + { return lhs.base() != rhs.base(); } + + template + inline bool operator!=(const generic_iterator& lhs, const generic_iterator& rhs) + { return lhs.base() != rhs.base(); } + + template + inline bool operator<(const generic_iterator& lhs, const generic_iterator& rhs) + { return lhs.base() < rhs.base(); } + + template + inline bool operator<(const generic_iterator& lhs, const generic_iterator& rhs) + { return lhs.base() < rhs.base(); } + + template + inline bool operator>(const generic_iterator& lhs, const generic_iterator& rhs) + { return lhs.base() > rhs.base(); } + + template + inline bool operator>(const generic_iterator& lhs, const generic_iterator& rhs) + { return lhs.base() > rhs.base(); } + + template + inline bool operator<=(const generic_iterator& lhs, const generic_iterator& rhs) + { return lhs.base() <= rhs.base(); } + + template + inline bool operator<=(const generic_iterator& lhs, const generic_iterator& rhs) + { return lhs.base() <= rhs.base(); } + + template + inline bool operator>=(const generic_iterator& lhs, const generic_iterator& rhs) + { return lhs.base() >= rhs.base(); } + + template + inline bool operator>=(const generic_iterator& lhs, const generic_iterator& rhs) + { return lhs.base() >= rhs.base(); } + + template + inline typename generic_iterator::difference_type + operator-(const generic_iterator& lhs, const generic_iterator& rhs) + { return lhs.base() - rhs.base(); } + + template + inline generic_iterator + operator+(typename generic_iterator::difference_type n, const generic_iterator& x) + { return generic_iterator(x.base() + n); } + + + + /// is_generic_iterator + /// + /// Tells if an iterator is one of these generic_iterators. This is useful if you want to + /// write code that uses miscellaneous iterators but wants to tell if they are generic_iterators. + /// A primary reason to do so is that you can get at the pointer within the generic_iterator. + /// + template + struct is_generic_iterator : public false_type { }; + + template + struct is_generic_iterator > : public true_type { }; + + + /// unwrap_generic_iterator + /// + /// Returns Iterator::get_base() if it's a generic_iterator, else returns Iterator as-is. + /// + /// Example usage: + /// vector intVector; + /// eastl::generic_iterator::iterator> genericIterator(intVector.begin()); + /// vector::iterator it = unwrap_generic_iterator(genericIterator); + /// + template + inline typename eastl::is_iterator_wrapper_helper::value>::iterator_type unwrap_generic_iterator(Iterator it) + { return eastl::is_iterator_wrapper_helper::value>::get_base(it); } + + +} // namespace eastl + + +EA_RESTORE_VC_WARNING(); + + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/internal/hashtable.h b/libkram/eastl/include/EASTL/internal/hashtable.h new file mode 100644 index 00000000..bb6d27eb --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/hashtable.h @@ -0,0 +1,3222 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements a hashtable, much like the C++11 unordered_set/unordered_map. +// proposed classes. +// The primary distinctions between this hashtable and C++11 unordered containers are: +// - hashtable is savvy to an environment that doesn't have exception handling, +// as is sometimes the case with console or embedded environments. +// - hashtable is slightly more space-efficient than a conventional std hashtable +// implementation on platforms with 64 bit size_t. This is +// because std STL uses size_t (64 bits) in data structures whereby 32 bits +// of data would be fine. +// - hashtable can contain objects with alignment requirements. TR1 hash tables +// cannot do so without a bit of tedious non-portable effort. +// - hashtable supports debug memory naming natively. +// - hashtable provides a find function that lets you specify a type that is +// different from the hash table key type. This is particularly useful for +// the storing of string objects but finding them by char pointers. +// - hashtable provides a lower level insert function which lets the caller +// specify the hash code and optionally the node instance. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_HASHTABLE_H +#define EASTL_INTERNAL_HASHTABLE_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +EA_DISABLE_ALL_VC_WARNINGS() + #include + #include +EA_RESTORE_ALL_VC_WARNINGS() + +// 4512 - 'class' : assignment operator could not be generated. +// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc +// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught. +EA_DISABLE_VC_WARNING(4512 4530 4571); + + +namespace eastl +{ + + /// EASTL_HASHTABLE_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_HASHTABLE_DEFAULT_NAME + #define EASTL_HASHTABLE_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " hashtable" // Unless the user overrides something, this is "EASTL hashtable". + #endif + + + /// EASTL_HASHTABLE_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_HASHTABLE_DEFAULT_ALLOCATOR + #define EASTL_HASHTABLE_DEFAULT_ALLOCATOR allocator_type(EASTL_HASHTABLE_DEFAULT_NAME) + #endif + + + /// kHashtableAllocFlagBuckets + /// Flag to allocator which indicates that we are allocating buckets and not nodes. + enum { kHashtableAllocFlagBuckets = 0x00400000 }; + + + /// gpEmptyBucketArray + /// + /// A shared representation of an empty hash table. This is present so that + /// a new empty hashtable allocates no memory. It has two entries, one for + /// the first lone empty (NULL) bucket, and one for the non-NULL trailing sentinel. + /// + extern EASTL_API void* gpEmptyBucketArray[2]; + + + /// EASTL_MACRO_SWAP + /// + /// Use EASTL_MACRO_SWAP because GCC (at least v4.6-4.8) has a bug where it fails to compile eastl::swap(mpBucketArray, x.mpBucketArray). + /// + #define EASTL_MACRO_SWAP(Type, a, b) \ + { Type temp = a; a = b; b = temp; } + + + /// hash_node + /// + /// A hash_node stores an element in a hash table, much like a + /// linked list node stores an element in a linked list. + /// A hash_node additionally can, via template parameter, + /// store a hash code in the node to speed up hash calculations + /// and comparisons in some cases. + /// + template + struct hash_node; + + EA_DISABLE_VC_WARNING(4625 4626) // "copy constructor / assignment operator could not be generated because a base class copy constructor is inaccessible or deleted" + #ifdef EA_COMPILER_MSVC_2015 + EA_DISABLE_VC_WARNING(5026) // disable warning: "move constructor was implicitly defined as deleted" + #endif + template + struct hash_node + { + hash_node() = default; + hash_node(const hash_node&) = default; + hash_node(hash_node&&) = default; + + Value mValue; + hash_node* mpNext; + eastl_size_t mnHashCode; // See config.h for the definition of eastl_size_t, which defaults to size_t. + } EASTL_MAY_ALIAS; + + template + struct hash_node + { + hash_node() = default; + hash_node(const hash_node&) = default; + hash_node(hash_node&&) = default; + + Value mValue; + hash_node* mpNext; + } EASTL_MAY_ALIAS; + + #ifdef EA_COMPILER_MSVC_2015 + EA_RESTORE_VC_WARNING() + #endif + EA_RESTORE_VC_WARNING() + + + // has_hashcode_member + // + // Custom type-trait that checks for the existence of a class data member 'mnHashCode'. + // + // In order to explicitly instantiate the hashtable without error we need to SFINAE away the functions that will + // fail to compile based on if the 'hash_node' contains a 'mnHashCode' member dictated by the hashtable template + // parameters. The hashtable support this level of configuration to allow users to choose which between the space vs. + // time optimization. + // + namespace Internal + { + template + struct has_hashcode_member + { + private: + template static eastl::no_type test(...); + template static eastl::yes_type test(decltype(U::mnHashCode)* = 0); + public: + static const bool value = sizeof(test(0)) == sizeof(eastl::yes_type); + }; + } + + static_assert(Internal::has_hashcode_member>::value, "contains a mnHashCode member"); + static_assert(!Internal::has_hashcode_member>::value, "doesn't contain a mnHashCode member"); + + // convenience macros to increase the readability of the code paths that must SFINAE on if the 'hash_node' + // contains the cached hashed value or not. + #define ENABLE_IF_HAS_HASHCODE(T, RT) typename eastl::enable_if::value, RT>::type* + #define ENABLE_IF_HASHCODE_EASTLSIZET(T, RT) typename eastl::enable_if::value, RT>::type + #define ENABLE_IF_TRUETYPE(T) typename eastl::enable_if::type* + #define DISABLE_IF_TRUETYPE(T) typename eastl::enable_if::type* + + + /// node_iterator_base + /// + /// Node iterators iterate nodes within a given bucket. + /// + /// We define a base class here because it is shared by both const and + /// non-const iterators. + /// + template + struct node_iterator_base + { + typedef hash_node node_type; + + node_type* mpNode; + + node_iterator_base(node_type* pNode) + : mpNode(pNode) { } + + void increment() + { mpNode = mpNode->mpNext; } + }; + + + + /// node_iterator + /// + /// Node iterators iterate nodes within a given bucket. + /// + /// The bConst parameter defines if the iterator is a const_iterator + /// or an iterator. + /// + template + struct node_iterator : public node_iterator_base + { + public: + typedef node_iterator_base base_type; + typedef node_iterator this_type; + typedef typename base_type::node_type node_type; + typedef Value value_type; + typedef typename type_select::type pointer; + typedef typename type_select::type reference; + typedef ptrdiff_t difference_type; + typedef EASTL_ITC_NS::forward_iterator_tag iterator_category; + + public: + explicit node_iterator(node_type* pNode = NULL) + : base_type(pNode) { } + + node_iterator(const node_iterator& x) + : base_type(x.mpNode) { } + + reference operator*() const + { return base_type::mpNode->mValue; } + + pointer operator->() const + { return &(base_type::mpNode->mValue); } + + node_iterator& operator++() + { base_type::increment(); return *this; } + + node_iterator operator++(int) + { node_iterator temp(*this); base_type::increment(); return temp; } + + }; // node_iterator + + + + /// hashtable_iterator_base + /// + /// A hashtable_iterator iterates the entire hash table and not just + /// nodes within a single bucket. Users in general will use a hash + /// table iterator much more often, as it is much like other container + /// iterators (e.g. vector::iterator). + /// + /// We define a base class here because it is shared by both const and + /// non-const iterators. + /// + template + struct hashtable_iterator_base + { + public: + typedef hashtable_iterator_base this_type; + typedef hash_node node_type; + + protected: + template + friend class hashtable; + + template + friend struct hashtable_iterator; + + template + friend bool operator==(const hashtable_iterator_base&, const hashtable_iterator_base&); + + template + friend bool operator!=(const hashtable_iterator_base&, const hashtable_iterator_base&); + + node_type* mpNode; // Current node within current bucket. + node_type** mpBucket; // Current bucket. + + public: + hashtable_iterator_base(node_type* pNode, node_type** pBucket) + : mpNode(pNode), mpBucket(pBucket) { } + + void increment_bucket() + { + ++mpBucket; + while(*mpBucket == NULL) // We store an extra bucket with some non-NULL value at the end + ++mpBucket; // of the bucket array so that finding the end of the bucket + mpNode = *mpBucket; // array is quick and simple. + } + + void increment() + { + mpNode = mpNode->mpNext; + + while(mpNode == NULL) + mpNode = *++mpBucket; + } + + }; // hashtable_iterator_base + + + + + /// hashtable_iterator + /// + /// A hashtable_iterator iterates the entire hash table and not just + /// nodes within a single bucket. Users in general will use a hash + /// table iterator much more often, as it is much like other container + /// iterators (e.g. vector::iterator). + /// + /// The bConst parameter defines if the iterator is a const_iterator + /// or an iterator. + /// + template + struct hashtable_iterator : public hashtable_iterator_base + { + public: + typedef hashtable_iterator_base base_type; + typedef hashtable_iterator this_type; + typedef hashtable_iterator this_type_non_const; + typedef typename base_type::node_type node_type; + typedef Value value_type; + typedef typename type_select::type pointer; + typedef typename type_select::type reference; + typedef ptrdiff_t difference_type; + typedef EASTL_ITC_NS::forward_iterator_tag iterator_category; + + public: + hashtable_iterator(node_type* pNode = NULL, node_type** pBucket = NULL) + : base_type(pNode, pBucket) { } + + hashtable_iterator(node_type** pBucket) + : base_type(*pBucket, pBucket) { } + + hashtable_iterator(const this_type_non_const& x) + : base_type(x.mpNode, x.mpBucket) { } + + reference operator*() const + { return base_type::mpNode->mValue; } + + pointer operator->() const + { return &(base_type::mpNode->mValue); } + + hashtable_iterator& operator++() + { base_type::increment(); return *this; } + + hashtable_iterator operator++(int) + { hashtable_iterator temp(*this); base_type::increment(); return temp; } + + const node_type* get_node() const + { return base_type::mpNode; } + + }; // hashtable_iterator + + + + + /// ht_distance + /// + /// This function returns the same thing as distance() for + /// forward iterators but returns zero for input iterators. + /// The reason why is that input iterators can only be read + /// once, and calling distance() on an input iterator destroys + /// the ability to read it. This ht_distance is used only for + /// optimization and so the code will merely work better with + /// forward iterators that input iterators. + /// + template + inline typename eastl::iterator_traits::difference_type + distance_fw_impl(Iterator /*first*/, Iterator /*last*/, EASTL_ITC_NS::input_iterator_tag) + { + return 0; + } + + template + inline typename eastl::iterator_traits::difference_type + distance_fw_impl(Iterator first, Iterator last, EASTL_ITC_NS::forward_iterator_tag) + { return eastl::distance(first, last); } + + template + inline typename eastl::iterator_traits::difference_type + ht_distance(Iterator first, Iterator last) + { + typedef typename eastl::iterator_traits::iterator_category IC; + return distance_fw_impl(first, last, IC()); + } + + + + + /// mod_range_hashing + /// + /// Implements the algorithm for conversion of a number in the range of + /// [0, SIZE_T_MAX] to the range of [0, BucketCount). + /// + struct mod_range_hashing + { + uint32_t operator()(size_t r, uint32_t n) const + { return r % n; } + }; + + + /// default_ranged_hash + /// + /// Default ranged hash function H. In principle it should be a + /// function object composed from objects of type H1 and H2 such that + /// h(k, n) = h2(h1(k), n), but that would mean making extra copies of + /// h1 and h2. So instead we'll just use a tag to tell class template + /// hashtable to do that composition. + /// + struct default_ranged_hash{ }; + + + /// prime_rehash_policy + /// + /// Default value for rehash policy. Bucket size is (usually) the + /// smallest prime that keeps the load factor small enough. + /// + struct EASTL_API prime_rehash_policy + { + public: + float mfMaxLoadFactor; + float mfGrowthFactor; + mutable uint32_t mnNextResize; + + public: + prime_rehash_policy(float fMaxLoadFactor = 1.f) + : mfMaxLoadFactor(fMaxLoadFactor), mfGrowthFactor(2.f), mnNextResize(0) { } + + float GetMaxLoadFactor() const + { return mfMaxLoadFactor; } + + /// Return a bucket count no greater than nBucketCountHint, + /// Don't update member variables while at it. + static uint32_t GetPrevBucketCountOnly(uint32_t nBucketCountHint); + + /// Return a bucket count no greater than nBucketCountHint. + /// This function has a side effect of updating mnNextResize. + uint32_t GetPrevBucketCount(uint32_t nBucketCountHint) const; + + /// Return a bucket count no smaller than nBucketCountHint. + /// This function has a side effect of updating mnNextResize. + uint32_t GetNextBucketCount(uint32_t nBucketCountHint) const; + + /// Return a bucket count appropriate for nElementCount elements. + /// This function has a side effect of updating mnNextResize. + uint32_t GetBucketCount(uint32_t nElementCount) const; + + /// nBucketCount is current bucket count, nElementCount is current element count, + /// and nElementAdd is number of elements to be inserted. Do we need + /// to increase bucket count? If so, return pair(true, n), where + /// n is the new bucket count. If not, return pair(false, 0). + eastl::pair + GetRehashRequired(uint32_t nBucketCount, uint32_t nElementCount, uint32_t nElementAdd) const; + }; + + + + + + /////////////////////////////////////////////////////////////////////// + // Base classes for hashtable. We define these base classes because + // in some cases we want to do different things depending on the + // value of a policy class. In some cases the policy class affects + // which member functions and nested typedefs are defined; we handle that + // by specializing base class templates. Several of the base class templates + // need to access other members of class template hashtable, so we use + // the "curiously recurring template pattern" (parent class is templated + // on type of child class) for them. + /////////////////////////////////////////////////////////////////////// + + + /// rehash_base + /// + /// Give hashtable the get_max_load_factor functions if the rehash + /// policy is prime_rehash_policy. + /// + template + struct rehash_base { }; + + template + struct rehash_base + { + // Returns the max load factor, which is the load factor beyond + // which we rebuild the container with a new bucket count. + float get_max_load_factor() const + { + const Hashtable* const pThis = static_cast(this); + return pThis->rehash_policy().GetMaxLoadFactor(); + } + + // If you want to make the hashtable never rehash (resize), + // set the max load factor to be a very high number (e.g. 100000.f). + void set_max_load_factor(float fMaxLoadFactor) + { + Hashtable* const pThis = static_cast(this); + pThis->rehash_policy(prime_rehash_policy(fMaxLoadFactor)); + } + }; + + + + + /// hash_code_base + /// + /// Encapsulates two policy issues that aren't quite orthogonal. + /// (1) The difference between using a ranged hash function and using + /// the combination of a hash function and a range-hashing function. + /// In the former case we don't have such things as hash codes, so + /// we have a dummy type as placeholder. + /// (2) Whether or not we cache hash codes. Caching hash codes is + /// meaningless if we have a ranged hash function. This is because + /// a ranged hash function converts an object directly to its + /// bucket index without ostensibly using a hash code. + /// We also put the key extraction and equality comparison function + /// objects here, for convenience. + /// + template + struct hash_code_base; + + + /// hash_code_base + /// + /// Specialization: ranged hash function, no caching hash codes. + /// H1 and H2 are provided but ignored. We define a dummy hash code type. + /// + template + struct hash_code_base + { + protected: + ExtractKey mExtractKey; // To do: Make this member go away entirely, as it never has any data. + Equal mEqual; // To do: Make this instance use zero space when it is zero size. + H mRangedHash; // To do: Make this instance use zero space when it is zero size + + public: + H1 hash_function() const + { return H1(); } + + Equal equal_function() const // Deprecated. Use key_eq() instead, as key_eq is what the new C++ standard + { return mEqual; } // has specified in its hashtable (unordered_*) proposal. + + const Equal& key_eq() const + { return mEqual; } + + Equal& key_eq() + { return mEqual; } + + protected: + typedef void* hash_code_t; + typedef uint32_t bucket_index_t; + + hash_code_base(const ExtractKey& extractKey, const Equal& eq, const H1&, const H2&, const H& h) + : mExtractKey(extractKey), mEqual(eq), mRangedHash(h) { } + + hash_code_t get_hash_code(const Key& key) const + { + EA_UNUSED(key); + return NULL; + } + + bucket_index_t bucket_index(hash_code_t, uint32_t) const + { return (bucket_index_t)0; } + + bucket_index_t bucket_index(const Key& key, hash_code_t, uint32_t nBucketCount) const + { return (bucket_index_t)mRangedHash(key, nBucketCount); } + + bucket_index_t bucket_index(const hash_node* pNode, uint32_t nBucketCount) const + { return (bucket_index_t)mRangedHash(mExtractKey(pNode->mValue), nBucketCount); } + + bool compare(const Key& key, hash_code_t, hash_node* pNode) const + { return mEqual(key, mExtractKey(pNode->mValue)); } + + void copy_code(hash_node*, const hash_node*) const + { } // Nothing to do. + + void set_code(hash_node* pDest, hash_code_t c) const + { + EA_UNUSED(pDest); + EA_UNUSED(c); + } + + void base_swap(hash_code_base& x) + { + eastl::swap(mExtractKey, x.mExtractKey); + eastl::swap(mEqual, x.mEqual); + eastl::swap(mRangedHash, x.mRangedHash); + } + + }; // hash_code_base + + + + // No specialization for ranged hash function while caching hash codes. + // That combination is meaningless, and trying to do it is an error. + + + /// hash_code_base + /// + /// Specialization: ranged hash function, cache hash codes. + /// This combination is meaningless, so we provide only a declaration + /// and no definition. + /// + template + struct hash_code_base; + + + + /// hash_code_base + /// + /// Specialization: hash function and range-hashing function, + /// no caching of hash codes. H is provided but ignored. + /// Provides typedef and accessor required by TR1. + /// + template + struct hash_code_base + { + protected: + ExtractKey mExtractKey; + Equal mEqual; + H1 m_h1; + H2 m_h2; + + public: + typedef H1 hasher; + + H1 hash_function() const + { return m_h1; } + + Equal equal_function() const // Deprecated. Use key_eq() instead, as key_eq is what the new C++ standard + { return mEqual; } // has specified in its hashtable (unordered_*) proposal. + + const Equal& key_eq() const + { return mEqual; } + + Equal& key_eq() + { return mEqual; } + + protected: + typedef size_t hash_code_t; + typedef uint32_t bucket_index_t; + typedef hash_node node_type; + + hash_code_base(const ExtractKey& ex, const Equal& eq, const H1& h1, const H2& h2, const default_ranged_hash&) + : mExtractKey(ex), mEqual(eq), m_h1(h1), m_h2(h2) { } + + hash_code_t get_hash_code(const Key& key) const + { return (hash_code_t)m_h1(key); } + + bucket_index_t bucket_index(hash_code_t c, uint32_t nBucketCount) const + { return (bucket_index_t)m_h2(c, nBucketCount); } + + bucket_index_t bucket_index(const Key&, hash_code_t c, uint32_t nBucketCount) const + { return (bucket_index_t)m_h2(c, nBucketCount); } + + bucket_index_t bucket_index(const node_type* pNode, uint32_t nBucketCount) const + { return (bucket_index_t)m_h2((hash_code_t)m_h1(mExtractKey(pNode->mValue)), nBucketCount); } + + bool compare(const Key& key, hash_code_t, node_type* pNode) const + { return mEqual(key, mExtractKey(pNode->mValue)); } + + void copy_code(node_type*, const node_type*) const + { } // Nothing to do. + + void set_code(node_type*, hash_code_t) const + { } // Nothing to do. + + void base_swap(hash_code_base& x) + { + eastl::swap(mExtractKey, x.mExtractKey); + eastl::swap(mEqual, x.mEqual); + eastl::swap(m_h1, x.m_h1); + eastl::swap(m_h2, x.m_h2); + } + + }; // hash_code_base + + + + /// hash_code_base + /// + /// Specialization: hash function and range-hashing function, + /// caching hash codes. H is provided but ignored. + /// Provides typedef and accessor required by TR1. + /// + template + struct hash_code_base + { + protected: + ExtractKey mExtractKey; + Equal mEqual; + H1 m_h1; + H2 m_h2; + + public: + typedef H1 hasher; + + H1 hash_function() const + { return m_h1; } + + Equal equal_function() const // Deprecated. Use key_eq() instead, as key_eq is what the new C++ standard + { return mEqual; } // has specified in its hashtable (unordered_*) proposal. + + const Equal& key_eq() const + { return mEqual; } + + Equal& key_eq() + { return mEqual; } + + protected: + typedef uint32_t hash_code_t; + typedef uint32_t bucket_index_t; + typedef hash_node node_type; + + hash_code_base(const ExtractKey& ex, const Equal& eq, const H1& h1, const H2& h2, const default_ranged_hash&) + : mExtractKey(ex), mEqual(eq), m_h1(h1), m_h2(h2) { } + + hash_code_t get_hash_code(const Key& key) const + { return (hash_code_t)m_h1(key); } + + bucket_index_t bucket_index(hash_code_t c, uint32_t nBucketCount) const + { return (bucket_index_t)m_h2(c, nBucketCount); } + + bucket_index_t bucket_index(const Key&, hash_code_t c, uint32_t nBucketCount) const + { return (bucket_index_t)m_h2(c, nBucketCount); } + + bucket_index_t bucket_index(const node_type* pNode, uint32_t nBucketCount) const + { return (bucket_index_t)m_h2((uint32_t)pNode->mnHashCode, nBucketCount); } + + bool compare(const Key& key, hash_code_t c, node_type* pNode) const + { return (pNode->mnHashCode == c) && mEqual(key, mExtractKey(pNode->mValue)); } + + void copy_code(node_type* pDest, const node_type* pSource) const + { pDest->mnHashCode = pSource->mnHashCode; } + + void set_code(node_type* pDest, hash_code_t c) const + { pDest->mnHashCode = c; } + + void base_swap(hash_code_base& x) + { + eastl::swap(mExtractKey, x.mExtractKey); + eastl::swap(mEqual, x.mEqual); + eastl::swap(m_h1, x.m_h1); + eastl::swap(m_h2, x.m_h2); + } + + }; // hash_code_base + + + + + + /////////////////////////////////////////////////////////////////////////// + /// hashtable + /// + /// Key and Value: arbitrary CopyConstructible types. + /// + /// ExtractKey: function object that takes a object of type Value + /// and returns a value of type Key. + /// + /// Equal: function object that takes two objects of type k and returns + /// a bool-like value that is true if the two objects are considered equal. + /// + /// H1: a hash function. A unary function object with argument type + /// Key and result type size_t. Return values should be distributed + /// over the entire range [0, numeric_limits::max()]. + /// + /// H2: a range-hashing function (in the terminology of Tavori and + /// Dreizin). This is a function which takes the output of H1 and + /// converts it to the range of [0, n]. Usually it merely takes the + /// output of H1 and mods it to n. + /// + /// H: a ranged hash function (Tavori and Dreizin). This is merely + /// a class that combines the functionality of H1 and H2 together, + /// possibly in some way that is somehow improved over H1 and H2 + /// It is a binary function whose argument types are Key and size_t + /// and whose result type is uint32_t. Given arguments k and n, the + /// return value is in the range [0, n). Default: h(k, n) = h2(h1(k), n). + /// If H is anything other than the default, H1 and H2 are ignored, + /// as H is thus overriding H1 and H2. + /// + /// RehashPolicy: Policy class with three members, all of which govern + /// the bucket count. nBucket(n) returns a bucket count no smaller + /// than n. GetBucketCount(n) returns a bucket count appropriate + /// for an element count of n. GetRehashRequired(nBucketCount, nElementCount, nElementAdd) + /// determines whether, if the current bucket count is nBucket and the + /// current element count is nElementCount, we need to increase the bucket + /// count. If so, returns pair(true, n), where n is the new + /// bucket count. If not, returns pair(false, ). + /// + /// Currently it is hard-wired that the number of buckets never + /// shrinks. Should we allow RehashPolicy to change that? + /// + /// bCacheHashCode: true if we store the value of the hash + /// function along with the value. This is a time-space tradeoff. + /// Storing it may improve lookup speed by reducing the number of + /// times we need to call the Equal function. + /// + /// bMutableIterators: true if hashtable::iterator is a mutable + /// iterator, false if iterator and const_iterator are both const + /// iterators. This is true for hash_map and hash_multimap, + /// false for hash_set and hash_multiset. + /// + /// bUniqueKeys: true if the return value of hashtable::count(k) + /// is always at most one, false if it may be an arbitrary number. + /// This is true for hash_set and hash_map and is false for + /// hash_multiset and hash_multimap. + /// + /////////////////////////////////////////////////////////////////////// + /// Note: + /// If you want to make a hashtable never increase its bucket usage, + /// call set_max_load_factor with a very high value such as 100000.f. + /// + /// find_as + /// In order to support the ability to have a hashtable of strings but + /// be able to do efficiently lookups via char pointers (i.e. so they + /// aren't converted to string objects), we provide the find_as + /// function. This function allows you to do a find with a key of a + /// type other than the hashtable key type. See the find_as function + /// for more documentation on this. + /// + /// find_by_hash + /// In the interest of supporting fast operations wherever possible, + /// we provide a find_by_hash function which finds a node using its + /// hash code. This is useful for cases where the node's hash is + /// already known, allowing us to avoid a redundant hash operation + /// in the normal find path. + /// + template + class hashtable + : public rehash_base >, + public hash_code_base + { + public: + typedef Key key_type; + typedef Value value_type; + typedef typename ExtractKey::result_type mapped_type; + typedef hash_code_base hash_code_base_type; + typedef typename hash_code_base_type::hash_code_t hash_code_t; + typedef Allocator allocator_type; + typedef Equal key_equal; + typedef ptrdiff_t difference_type; + typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. + typedef value_type& reference; + typedef const value_type& const_reference; + typedef node_iterator local_iterator; + typedef node_iterator const_local_iterator; + typedef hashtable_iterator iterator; + typedef hashtable_iterator const_iterator; + typedef hash_node node_type; + typedef typename type_select, iterator>::type insert_return_type; + typedef hashtable this_type; + typedef RehashPolicy rehash_policy_type; + typedef ExtractKey extract_key_type; + typedef H1 h1_type; + typedef H2 h2_type; + typedef H h_type; + typedef integral_constant has_unique_keys_type; + + using hash_code_base_type::key_eq; + using hash_code_base_type::hash_function; + using hash_code_base_type::mExtractKey; + using hash_code_base_type::get_hash_code; + using hash_code_base_type::bucket_index; + using hash_code_base_type::compare; + using hash_code_base_type::set_code; + using hash_code_base_type::copy_code; + + static const bool kCacheHashCode = bCacheHashCode; + + enum + { + // This enumeration is deprecated in favor of eastl::kHashtableAllocFlagBuckets. + kAllocFlagBuckets = eastl::kHashtableAllocFlagBuckets // Flag to allocator which indicates that we are allocating buckets and not nodes. + }; + + protected: + node_type** mpBucketArray; + size_type mnBucketCount; + size_type mnElementCount; + RehashPolicy mRehashPolicy; // To do: Use base class optimization to make this go away. + allocator_type mAllocator; // To do: Use base class optimization to make this go away. + + public: + hashtable(size_type nBucketCount, const H1&, const H2&, const H&, const Equal&, const ExtractKey&, + const allocator_type& allocator = EASTL_HASHTABLE_DEFAULT_ALLOCATOR); + + template + hashtable(FowardIterator first, FowardIterator last, size_type nBucketCount, + const H1&, const H2&, const H&, const Equal&, const ExtractKey&, + const allocator_type& allocator = EASTL_HASHTABLE_DEFAULT_ALLOCATOR); + + hashtable(const hashtable& x); + + // initializer_list ctor support is implemented in subclasses (e.g. hash_set). + // hashtable(initializer_list, size_type nBucketCount, const H1&, const H2&, const H&, + // const Equal&, const ExtractKey&, const allocator_type& allocator = EASTL_HASHTABLE_DEFAULT_ALLOCATOR); + + hashtable(this_type&& x); + hashtable(this_type&& x, const allocator_type& allocator); + ~hashtable(); + + const allocator_type& get_allocator() const EA_NOEXCEPT; + allocator_type& get_allocator() EA_NOEXCEPT; + void set_allocator(const allocator_type& allocator); + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + + iterator begin() EA_NOEXCEPT + { + iterator i(mpBucketArray); + if(!i.mpNode) + i.increment_bucket(); + return i; + } + + const_iterator begin() const EA_NOEXCEPT + { + const_iterator i(mpBucketArray); + if(!i.mpNode) + i.increment_bucket(); + return i; + } + + const_iterator cbegin() const EA_NOEXCEPT + { return begin(); } + + iterator end() EA_NOEXCEPT + { return iterator(mpBucketArray + mnBucketCount); } + + const_iterator end() const EA_NOEXCEPT + { return const_iterator(mpBucketArray + mnBucketCount); } + + const_iterator cend() const EA_NOEXCEPT + { return const_iterator(mpBucketArray + mnBucketCount); } + + // Returns an iterator to the first item in bucket n. + local_iterator begin(size_type n) EA_NOEXCEPT + { return local_iterator(mpBucketArray[n]); } + + const_local_iterator begin(size_type n) const EA_NOEXCEPT + { return const_local_iterator(mpBucketArray[n]); } + + const_local_iterator cbegin(size_type n) const EA_NOEXCEPT + { return const_local_iterator(mpBucketArray[n]); } + + // Returns an iterator to the last item in a bucket returned by begin(n). + local_iterator end(size_type) EA_NOEXCEPT + { return local_iterator(NULL); } + + const_local_iterator end(size_type) const EA_NOEXCEPT + { return const_local_iterator(NULL); } + + const_local_iterator cend(size_type) const EA_NOEXCEPT + { return const_local_iterator(NULL); } + + bool empty() const EA_NOEXCEPT + { return mnElementCount == 0; } + + size_type size() const EA_NOEXCEPT + { return mnElementCount; } + + size_type bucket_count() const EA_NOEXCEPT + { return mnBucketCount; } + + size_type bucket_size(size_type n) const EA_NOEXCEPT + { return (size_type)eastl::distance(begin(n), end(n)); } + + //size_type bucket(const key_type& k) const EA_NOEXCEPT + // { return bucket_index(k, (hash code here), (uint32_t)mnBucketCount); } + + // Returns the ratio of element count to bucket count. A return value of 1 means + // there's an optimal 1 bucket for each element. + float load_factor() const EA_NOEXCEPT + { return (float)mnElementCount / (float)mnBucketCount; } + + // Inherited from the base class. + // Returns the max load factor, which is the load factor beyond + // which we rebuild the container with a new bucket count. + // get_max_load_factor comes from rehash_base. + // float get_max_load_factor() const; + + // Inherited from the base class. + // If you want to make the hashtable never rehash (resize), + // set the max load factor to be a very high number (e.g. 100000.f). + // set_max_load_factor comes from rehash_base. + // void set_max_load_factor(float fMaxLoadFactor); + + /// Generalization of get_max_load_factor. This is an extension that's + /// not present in C++ hash tables (unordered containers). + const rehash_policy_type& rehash_policy() const EA_NOEXCEPT + { return mRehashPolicy; } + + /// Generalization of set_max_load_factor. This is an extension that's + /// not present in C++ hash tables (unordered containers). + void rehash_policy(const rehash_policy_type& rehashPolicy); + + template + insert_return_type emplace(Args&&... args); + + template + iterator emplace_hint(const_iterator position, Args&&... args); + + template insert_return_type try_emplace(const key_type& k, Args&&... args); + template insert_return_type try_emplace(key_type&& k, Args&&... args); + template iterator try_emplace(const_iterator position, const key_type& k, Args&&... args); + template iterator try_emplace(const_iterator position, key_type&& k, Args&&... args); + + insert_return_type insert(const value_type& value); + insert_return_type insert(value_type&& otherValue); + iterator insert(const_iterator hint, const value_type& value); + iterator insert(const_iterator hint, value_type&& value); + void insert(std::initializer_list ilist); + template void insert(InputIterator first, InputIterator last); + //insert_return_type insert(node_type&& nh); + //iterator insert(const_iterator hint, node_type&& nh); + + // This overload attempts to mitigate the overhead associated with mismatched cv-quality elements of + // the hashtable pair. It can avoid copy overhead because it will perfect forward the user provided pair types + // until it can constructed in-place in the allocated hashtable node. + // + // Ideally we would remove this overload as it deprecated and removed in C++17 but it currently causes + // performance regressions for hashtables with complex keys (keys that allocate resources). + template , key_type> && + #endif + !eastl::is_literal_type_v

&& + eastl::is_constructible_v>> + insert_return_type insert(P&& otherValue); + + // Non-standard extension + template // See comments below for the const value_type& equivalent to this function. + insert_return_type insert(hash_code_t c, node_type* pNodeNew, P&& otherValue); + + // We provide a version of insert which lets the caller directly specify the hash value and + // a potential node to insert if needed. This allows for less thread contention in the case + // of a thread-shared hash table that's accessed during a mutex lock, because the hash calculation + // and node creation is done outside of the lock. If pNodeNew is supplied by the user (i.e. non-NULL) + // then it must be freeable via the hash table's allocator. If the return value is true then this function + // took over ownership of pNodeNew, else pNodeNew is still owned by the caller to free or to pass + // to another call to insert. pNodeNew need not be assigned the value by the caller, as the insert + // function will assign value to pNodeNew upon insertion into the hash table. pNodeNew may be + // created by the user with the allocate_uninitialized_node function, and freed by the free_uninitialized_node function. + insert_return_type insert(hash_code_t c, node_type* pNodeNew, const value_type& value); + + template eastl::pair insert_or_assign(const key_type& k, M&& obj); + template eastl::pair insert_or_assign(key_type&& k, M&& obj); + template iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj); + template iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj); + + // Used to allocate and free memory used by insert(const value_type& value, hash_code_t c, node_type* pNodeNew). + node_type* allocate_uninitialized_node(); + void free_uninitialized_node(node_type* pNode); + + iterator erase(const_iterator position); + iterator erase(const_iterator first, const_iterator last); + size_type erase(const key_type& k); + + void clear(); + void clear(bool clearBuckets); // If clearBuckets is true, we free the bucket memory and set the bucket count back to the newly constructed count. + void reset_lose_memory() EA_NOEXCEPT; // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + void rehash(size_type nBucketCount); + void reserve(size_type nElementCount); + + iterator find(const key_type& key); + const_iterator find(const key_type& key) const; + + /// Implements a find whereby the user supplies a comparison of a different type + /// than the hashtable value_type. A useful case of this is one whereby you have + /// a container of string objects but want to do searches via passing in char pointers. + /// The problem is that without this kind of find, you need to do the expensive operation + /// of converting the char pointer to a string so it can be used as the argument to the + /// find function. + /// + /// Example usage (namespaces omitted for brevity): + /// hash_set hashSet; + /// hashSet.find_as("hello"); // Use default hash and compare. + /// + /// Example usage (note that the predicate uses string as first type and char* as second): + /// hash_set hashSet; + /// hashSet.find_as("hello", hash(), equal_to_2()); + /// + template + iterator find_as(const U& u, UHash uhash, BinaryPredicate predicate); + + template + const_iterator find_as(const U& u, UHash uhash, BinaryPredicate predicate) const; + + template + iterator find_as(const U& u); + + template + const_iterator find_as(const U& u) const; + + // Note: find_by_hash and find_range_by_hash both perform a search based on a hash value. + // It is important to note that multiple hash values may map to the same hash bucket, so + // it would be incorrect to assume all items returned match the hash value that + // was searched for. + + /// Implements a find whereby the user supplies the node's hash code. + /// It returns an iterator to the first element that matches the given hash. However, there may be multiple elements that match the given hash. + + template + ENABLE_IF_HASHCODE_EASTLSIZET(HashCodeT, iterator) find_by_hash(HashCodeT c) + { + EASTL_CT_ASSERT_MSG(bCacheHashCode, + "find_by_hash(hash_code_t c) is designed to avoid recomputing hashes, " + "so it requires cached hash codes. Consider setting template parameter " + "bCacheHashCode to true or using find_by_hash(const key_type& k, hash_code_t c) instead."); + + const size_type n = (size_type)bucket_index(c, (uint32_t)mnBucketCount); + + node_type* const pNode = DoFindNode(mpBucketArray[n], c); + + return pNode ? iterator(pNode, mpBucketArray + n) : + iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end() + } + + template + ENABLE_IF_HASHCODE_EASTLSIZET(HashCodeT, const_iterator) find_by_hash(HashCodeT c) const + { + EASTL_CT_ASSERT_MSG(bCacheHashCode, + "find_by_hash(hash_code_t c) is designed to avoid recomputing hashes, " + "so it requires cached hash codes. Consider setting template parameter " + "bCacheHashCode to true or using find_by_hash(const key_type& k, hash_code_t c) instead."); + + const size_type n = (size_type)bucket_index(c, (uint32_t)mnBucketCount); + + node_type* const pNode = DoFindNode(mpBucketArray[n], c); + + return pNode ? + const_iterator(pNode, mpBucketArray + n) : + const_iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end() + } + + iterator find_by_hash(const key_type& k, hash_code_t c) + { + const size_type n = (size_type)bucket_index(c, (uint32_t)mnBucketCount); + + node_type* const pNode = DoFindNode(mpBucketArray[n], k, c); + return pNode ? iterator(pNode, mpBucketArray + n) : iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end() + } + + const_iterator find_by_hash(const key_type& k, hash_code_t c) const + { + const size_type n = (size_type)bucket_index(c, (uint32_t)mnBucketCount); + + node_type* const pNode = DoFindNode(mpBucketArray[n], k, c); + return pNode ? const_iterator(pNode, mpBucketArray + n) : const_iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end() + } + + // Returns a pair that allows iterating over all nodes in a hash bucket + // first in the pair returned holds the iterator for the beginning of the bucket, + // second in the pair returned holds the iterator for the end of the bucket, + // If no bucket is found, both values in the pair are set to end(). + // + // See also the note above. + eastl::pair find_range_by_hash(hash_code_t c); + eastl::pair find_range_by_hash(hash_code_t c) const; + + size_type count(const key_type& k) const EA_NOEXCEPT; + + eastl::pair equal_range(const key_type& k); + eastl::pair equal_range(const key_type& k) const; + + bool validate() const; + int validate_iterator(const_iterator i) const; + + protected: + // We must remove one of the 'DoGetResultIterator' overloads from the overload-set (via SFINAE) because both can + // not compile successfully at the same time. The 'bUniqueKeys' template parameter chooses at compile-time the + // type of 'insert_return_type' between a pair and a raw iterator. We must pick between the two + // overloads that unpacks the iterator from the pair or simply passes the provided iterator to the caller based + // on the class template parameter. + template + iterator DoGetResultIterator(BoolConstantT, + const insert_return_type& irt, + ENABLE_IF_TRUETYPE(BoolConstantT) = nullptr) const EA_NOEXCEPT + { + return irt.first; + } + + template + iterator DoGetResultIterator(BoolConstantT, + const insert_return_type& irt, + DISABLE_IF_TRUETYPE(BoolConstantT) = nullptr) const EA_NOEXCEPT + { + return irt; + } + + node_type* DoAllocateNodeFromKey(const key_type& key); + node_type* DoAllocateNodeFromKey(key_type&& key); + void DoFreeNode(node_type* pNode); + void DoFreeNodes(node_type** pBucketArray, size_type); + + node_type** DoAllocateBuckets(size_type n); + void DoFreeBuckets(node_type** pBucketArray, size_type n); + + template + eastl::pair DoInsertValue(BoolConstantT, Args&&... args); + + template + iterator DoInsertValue(BoolConstantT, Args&&... args); + + + template + eastl::pair DoInsertValueExtra(BoolConstantT, + const key_type& k, + hash_code_t c, + node_type* pNodeNew, + value_type&& value, + ENABLE_IF_TRUETYPE(BoolConstantT) = nullptr); + + template + eastl::pair DoInsertValue(BoolConstantT, + value_type&& value, + ENABLE_IF_TRUETYPE(BoolConstantT) = nullptr); + + template + iterator DoInsertValueExtra(BoolConstantT, + const key_type& k, + hash_code_t c, + node_type* pNodeNew, + value_type&& value, + DISABLE_IF_TRUETYPE(BoolConstantT) = nullptr); + + template + iterator DoInsertValue(BoolConstantT, value_type&& value, DISABLE_IF_TRUETYPE(BoolConstantT) = nullptr); + + + template + eastl::pair DoInsertValueExtra(BoolConstantT, + const key_type& k, + hash_code_t c, + node_type* pNodeNew, + const value_type& value, + ENABLE_IF_TRUETYPE(BoolConstantT) = nullptr); + + template + eastl::pair DoInsertValue(BoolConstantT, + const value_type& value, + ENABLE_IF_TRUETYPE(BoolConstantT) = nullptr); + + template + iterator DoInsertValueExtra(BoolConstantT, + const key_type& k, + hash_code_t c, + node_type* pNodeNew, + const value_type& value, + DISABLE_IF_TRUETYPE(BoolConstantT) = nullptr); + + template + iterator DoInsertValue(BoolConstantT, const value_type& value, DISABLE_IF_TRUETYPE(BoolConstantT) = nullptr); + + template + node_type* DoAllocateNode(Args&&... args); + node_type* DoAllocateNode(value_type&& value); + node_type* DoAllocateNode(const value_type& value); + + // DoInsertKey is supposed to get hash_code_t c = get_hash_code(key). + // it is done in case application has it's own hashset/hashmap-like containter, where hash code is for some reason known prior the insert + // this allows to save some performance, especially with heavy hash functions + eastl::pair DoInsertKey(true_type, const key_type& key, hash_code_t c); + iterator DoInsertKey(false_type, const key_type& key, hash_code_t c); + eastl::pair DoInsertKey(true_type, key_type&& key, hash_code_t c); + iterator DoInsertKey(false_type, key_type&& key, hash_code_t c); + + // We keep DoInsertKey overload without third parameter, for compatibility with older revisions of EASTL (3.12.07 and earlier) + // It used to call get_hash_code as a first call inside the DoInsertKey. + eastl::pair DoInsertKey(true_type, const key_type& key) { return DoInsertKey(true_type(), key, get_hash_code(key)); } + iterator DoInsertKey(false_type, const key_type& key) { return DoInsertKey(false_type(), key, get_hash_code(key)); } + eastl::pair DoInsertKey(true_type, key_type&& key) { return DoInsertKey(true_type(), eastl::move(key), get_hash_code(key)); } + iterator DoInsertKey(false_type, key_type&& key) { return DoInsertKey(false_type(), eastl::move(key), get_hash_code(key)); } + + void DoRehash(size_type nBucketCount); + node_type* DoFindNode(node_type* pNode, const key_type& k, hash_code_t c) const; + + template + ENABLE_IF_HAS_HASHCODE(T, node_type) DoFindNode(T* pNode, hash_code_t c) const + { + for (; pNode; pNode = pNode->mpNext) + { + if (pNode->mnHashCode == c) + return pNode; + } + return NULL; + } + + template + node_type* DoFindNodeT(node_type* pNode, const U& u, BinaryPredicate predicate) const; + + }; // class hashtable + + + + + + /////////////////////////////////////////////////////////////////////// + // node_iterator_base + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const node_iterator_base& a, const node_iterator_base& b) + { return a.mpNode == b.mpNode; } + + template + inline bool operator!=(const node_iterator_base& a, const node_iterator_base& b) + { return a.mpNode != b.mpNode; } + + + + + /////////////////////////////////////////////////////////////////////// + // hashtable_iterator_base + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const hashtable_iterator_base& a, const hashtable_iterator_base& b) + { return a.mpNode == b.mpNode; } + + template + inline bool operator!=(const hashtable_iterator_base& a, const hashtable_iterator_base& b) + { return a.mpNode != b.mpNode; } + + + + + /////////////////////////////////////////////////////////////////////// + // hashtable + /////////////////////////////////////////////////////////////////////// + + template + hashtable + ::hashtable(size_type nBucketCount, const H1& h1, const H2& h2, const H& h, + const Eq& eq, const EK& ek, const allocator_type& allocator) + : rehash_base(), + hash_code_base(ek, eq, h1, h2, h), + mnBucketCount(0), + mnElementCount(0), + mRehashPolicy(), + mAllocator(allocator) + { + if(nBucketCount < 2) // If we are starting in an initially empty state, with no memory allocation done. + reset_lose_memory(); + else // Else we are creating a potentially non-empty hashtable... + { + EASTL_ASSERT(nBucketCount < 10000000); + mnBucketCount = (size_type)mRehashPolicy.GetNextBucketCount((uint32_t)nBucketCount); + mpBucketArray = DoAllocateBuckets(mnBucketCount); // mnBucketCount will always be at least 2. + } + } + + + + template + template + hashtable::hashtable(FowardIterator first, FowardIterator last, size_type nBucketCount, + const H1& h1, const H2& h2, const H& h, + const Eq& eq, const EK& ek, const allocator_type& allocator) + : rehash_base(), + hash_code_base(ek, eq, h1, h2, h), + //mnBucketCount(0), // This gets re-assigned below. + mnElementCount(0), + mRehashPolicy(), + mAllocator(allocator) + { + if(nBucketCount < 2) + { + const size_type nElementCount = (size_type)eastl::ht_distance(first, last); + mnBucketCount = (size_type)mRehashPolicy.GetBucketCount((uint32_t)nElementCount); + } + else + { + EASTL_ASSERT(nBucketCount < 10000000); + mnBucketCount = nBucketCount; + } + + mpBucketArray = DoAllocateBuckets(mnBucketCount); // mnBucketCount will always be at least 2. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for(; first != last; ++first) + insert(*first); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + clear(); + DoFreeBuckets(mpBucketArray, mnBucketCount); + throw; + } + #endif + } + + + + template + hashtable::hashtable(const this_type& x) + : rehash_base(x), + hash_code_base(x), + mnBucketCount(x.mnBucketCount), + mnElementCount(x.mnElementCount), + mRehashPolicy(x.mRehashPolicy), + mAllocator(x.mAllocator) + { + if(mnElementCount) // If there is anything to copy... + { + mpBucketArray = DoAllocateBuckets(mnBucketCount); // mnBucketCount will be at least 2. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for(size_type i = 0; i < x.mnBucketCount; ++i) + { + node_type* pNodeSource = x.mpBucketArray[i]; + node_type** ppNodeDest = mpBucketArray + i; + + while(pNodeSource) + { + *ppNodeDest = DoAllocateNode(pNodeSource->mValue); + copy_code(*ppNodeDest, pNodeSource); + ppNodeDest = &(*ppNodeDest)->mpNext; + pNodeSource = pNodeSource->mpNext; + } + } + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + clear(); + DoFreeBuckets(mpBucketArray, mnBucketCount); + throw; + } + #endif + } + else + { + // In this case, instead of allocate memory and copy nothing from x, + // we reset ourselves to a zero allocation state. + reset_lose_memory(); + } + } + + + template + hashtable::hashtable(this_type&& x) + : rehash_base(x), + hash_code_base(x), + mnBucketCount(0), + mnElementCount(0), + mRehashPolicy(x.mRehashPolicy), + mAllocator(x.mAllocator) + { + reset_lose_memory(); // We do this here the same as we do it in the default ctor because it puts the container in a proper initial empty state. This code would be cleaner if we could rely on being able to use C++11 delegating constructors and just call the default ctor here. + swap(x); + } + + + template + hashtable::hashtable(this_type&& x, const allocator_type& allocator) + : rehash_base(x), + hash_code_base(x), + mnBucketCount(0), + mnElementCount(0), + mRehashPolicy(x.mRehashPolicy), + mAllocator(allocator) + { + reset_lose_memory(); // We do this here the same as we do it in the default ctor because it puts the container in a proper initial empty state. This code would be cleaner if we could rely on being able to use C++11 delegating constructors and just call the default ctor here. + swap(x); // swap will directly or indirectly handle the possibility that mAllocator != x.mAllocator. + } + + + template + inline const typename hashtable::allocator_type& + hashtable::get_allocator() const EA_NOEXCEPT + { + return mAllocator; + } + + + + template + inline typename hashtable::allocator_type& + hashtable::get_allocator() EA_NOEXCEPT + { + return mAllocator; + } + + + + template + inline void hashtable::set_allocator(const allocator_type& allocator) + { + mAllocator = allocator; + } + + + + template + inline typename hashtable::this_type& + hashtable::operator=(const this_type& x) + { + if(this != &x) + { + clear(); + + #if EASTL_ALLOCATOR_COPY_ENABLED + mAllocator = x.mAllocator; + #endif + + insert(x.begin(), x.end()); + } + return *this; + } + + + template + inline typename hashtable::this_type& + hashtable::operator=(this_type&& x) + { + if(this != &x) + { + clear(); // To consider: Are we really required to clear here? x is going away soon and will clear itself in its dtor. + swap(x); // member swap handles the case that x has a different allocator than our allocator by doing a copy. + } + return *this; + } + + + template + inline typename hashtable::this_type& + hashtable::operator=(std::initializer_list ilist) + { + // The simplest means of doing this is to clear and insert. There probably isn't a generic + // solution that's any more efficient without having prior knowledge of the ilist contents. + clear(); + insert(ilist.begin(), ilist.end()); + return *this; + } + + + + template + inline hashtable::~hashtable() + { + clear(); + DoFreeBuckets(mpBucketArray, mnBucketCount); + } + + + template + typename hashtable::node_type* + hashtable::DoAllocateNodeFromKey(const key_type& key) + { + node_type* const pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(value_type), 0); + EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined."); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + ::new(eastl::addressof(pNode->mValue)) value_type(pair_first_construct, key); + pNode->mpNext = NULL; + return pNode; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + EASTLFree(mAllocator, pNode, sizeof(node_type)); + throw; + } + #endif + } + + + template + typename hashtable::node_type* + hashtable::DoAllocateNodeFromKey(key_type&& key) + { + node_type* const pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(value_type), 0); + EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined."); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + ::new(eastl::addressof(pNode->mValue)) value_type(pair_first_construct, eastl::move(key)); + pNode->mpNext = NULL; + return pNode; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + EASTLFree(mAllocator, pNode, sizeof(node_type)); + throw; + } + #endif + } + + + template + inline void hashtable::DoFreeNode(node_type* pNode) + { + pNode->~node_type(); + EASTLFree(mAllocator, pNode, sizeof(node_type)); + } + + + + template + void hashtable::DoFreeNodes(node_type** pNodeArray, size_type n) + { + for(size_type i = 0; i < n; ++i) + { + node_type* pNode = pNodeArray[i]; + while(pNode) + { + node_type* const pTempNode = pNode; + pNode = pNode->mpNext; + DoFreeNode(pTempNode); + } + pNodeArray[i] = NULL; + } + } + + + + template + typename hashtable::node_type** + hashtable::DoAllocateBuckets(size_type n) + { + // We allocate one extra bucket to hold a sentinel, an arbitrary + // non-null pointer. Iterator increment relies on this. + EASTL_ASSERT(n > 1); // We reserve an mnBucketCount of 1 for the shared gpEmptyBucketArray. + EASTL_CT_ASSERT(kHashtableAllocFlagBuckets == 0x00400000); // Currently we expect this to be so, because the allocator has a copy of this enum. + node_type** const pBucketArray = (node_type**)EASTLAllocAlignedFlags(mAllocator, (n + 1) * sizeof(node_type*), EASTL_ALIGN_OF(node_type*), 0, kHashtableAllocFlagBuckets); + //eastl::fill(pBucketArray, pBucketArray + n, (node_type*)NULL); + memset(pBucketArray, 0, n * sizeof(node_type*)); + pBucketArray[n] = reinterpret_cast((uintptr_t)~0); + return pBucketArray; + } + + + + template + inline void hashtable::DoFreeBuckets(node_type** pBucketArray, size_type n) + { + // If n <= 1, then pBucketArray is from the shared gpEmptyBucketArray. We don't test + // for pBucketArray == &gpEmptyBucketArray because one library have a different gpEmptyBucketArray + // than another but pass a hashtable to another. So we go by the size. + if(n > 1) + EASTLFree(mAllocator, pBucketArray, (n + 1) * sizeof(node_type*)); // '+1' because DoAllocateBuckets allocates nBucketCount + 1 buckets in order to have a NULL sentinel at the end. + } + + + template + void hashtable::swap(this_type& x) + { + hash_code_base::base_swap(x); // hash_code_base has multiple implementations, so we let them handle the swap. + eastl::swap(mRehashPolicy, x.mRehashPolicy); + EASTL_MACRO_SWAP(node_type**, mpBucketArray, x.mpBucketArray); + eastl::swap(mnBucketCount, x.mnBucketCount); + eastl::swap(mnElementCount, x.mnElementCount); + + if (mAllocator != x.mAllocator) // If allocators are not equivalent... + { + eastl::swap(mAllocator, x.mAllocator); + } + } + + + template + inline void hashtable::rehash_policy(const rehash_policy_type& rehashPolicy) + { + mRehashPolicy = rehashPolicy; + + const size_type nBuckets = rehashPolicy.GetBucketCount((uint32_t)mnElementCount); + + if(nBuckets > mnBucketCount) + DoRehash(nBuckets); + } + + + + template + inline typename hashtable::iterator + hashtable::find(const key_type& k) + { + const hash_code_t c = get_hash_code(k); + const size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount); + + node_type* const pNode = DoFindNode(mpBucketArray[n], k, c); + return pNode ? iterator(pNode, mpBucketArray + n) : iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end() + } + + + + template + inline typename hashtable::const_iterator + hashtable::find(const key_type& k) const + { + const hash_code_t c = get_hash_code(k); + const size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount); + + node_type* const pNode = DoFindNode(mpBucketArray[n], k, c); + return pNode ? const_iterator(pNode, mpBucketArray + n) : const_iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end() + } + + + + template + template + inline typename hashtable::iterator + hashtable::find_as(const U& other, UHash uhash, BinaryPredicate predicate) + { + const hash_code_t c = (hash_code_t)uhash(other); + const size_type n = (size_type)(c % mnBucketCount); // This assumes we are using the mod range policy. + + node_type* const pNode = DoFindNodeT(mpBucketArray[n], other, predicate); + return pNode ? iterator(pNode, mpBucketArray + n) : iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end() + } + + + + template + template + inline typename hashtable::const_iterator + hashtable::find_as(const U& other, UHash uhash, BinaryPredicate predicate) const + { + const hash_code_t c = (hash_code_t)uhash(other); + const size_type n = (size_type)(c % mnBucketCount); // This assumes we are using the mod range policy. + + node_type* const pNode = DoFindNodeT(mpBucketArray[n], other, predicate); + return pNode ? const_iterator(pNode, mpBucketArray + n) : const_iterator(mpBucketArray + mnBucketCount); // iterator(mpBucketArray + mnBucketCount) == end() + } + + + /// hashtable_find + /// + /// Helper function that defaults to using hash and equal_to_2. + /// This makes it so that by default you don't need to provide these. + /// Note that the default hash functions may not be what you want, though. + /// + /// Example usage. Instead of this: + /// hash_set hashSet; + /// hashSet.find("hello", hash(), equal_to_2()); + /// + /// You can use this: + /// hash_set hashSet; + /// hashtable_find(hashSet, "hello"); + /// + template + inline typename H::iterator hashtable_find(H& hashTable, U u) + { return hashTable.find_as(u, eastl::hash(), eastl::equal_to_2()); } + + template + inline typename H::const_iterator hashtable_find(const H& hashTable, U u) + { return hashTable.find_as(u, eastl::hash(), eastl::equal_to_2()); } + + + + template + template + inline typename hashtable::iterator + hashtable::find_as(const U& other) + { return eastl::hashtable_find(*this, other); } + // VC++ doesn't appear to like the following, though it seems correct to me. + // So we implement the workaround above until we can straighten this out. + //{ return find_as(other, eastl::hash(), eastl::equal_to_2()); } + + + template + template + inline typename hashtable::const_iterator + hashtable::find_as(const U& other) const + { return eastl::hashtable_find(*this, other); } + // VC++ doesn't appear to like the following, though it seems correct to me. + // So we implement the workaround above until we can straighten this out. + //{ return find_as(other, eastl::hash(), eastl::equal_to_2()); } + + + + template + eastl::pair::const_iterator, + typename hashtable::const_iterator> + hashtable::find_range_by_hash(hash_code_t c) const + { + const size_type start = (size_type)bucket_index(c, (uint32_t)mnBucketCount); + node_type* const pNodeStart = mpBucketArray[start]; + + if (pNodeStart) + { + eastl::pair pair(const_iterator(pNodeStart, mpBucketArray + start), + const_iterator(pNodeStart, mpBucketArray + start)); + pair.second.increment_bucket(); + return pair; + } + + return eastl::pair(const_iterator(mpBucketArray + mnBucketCount), + const_iterator(mpBucketArray + mnBucketCount)); + } + + + + template + eastl::pair::iterator, + typename hashtable::iterator> + hashtable::find_range_by_hash(hash_code_t c) + { + const size_type start = (size_type)bucket_index(c, (uint32_t)mnBucketCount); + node_type* const pNodeStart = mpBucketArray[start]; + + if (pNodeStart) + { + eastl::pair pair(iterator(pNodeStart, mpBucketArray + start), + iterator(pNodeStart, mpBucketArray + start)); + pair.second.increment_bucket(); + return pair; + + } + + return eastl::pair(iterator(mpBucketArray + mnBucketCount), + iterator(mpBucketArray + mnBucketCount)); + } + + + + template + typename hashtable::size_type + hashtable::count(const key_type& k) const EA_NOEXCEPT + { + const hash_code_t c = get_hash_code(k); + const size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount); + size_type result = 0; + + // To do: Make a specialization for bU (unique keys) == true and take + // advantage of the fact that the count will always be zero or one in that case. + for(node_type* pNode = mpBucketArray[n]; pNode; pNode = pNode->mpNext) + { + if(compare(k, c, pNode)) + ++result; + } + return result; + } + + + + template + eastl::pair::iterator, + typename hashtable::iterator> + hashtable::equal_range(const key_type& k) + { + const hash_code_t c = get_hash_code(k); + const size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount); + node_type** head = mpBucketArray + n; + node_type* pNode = DoFindNode(*head, k, c); + + if(pNode) + { + node_type* p1 = pNode->mpNext; + + for(; p1; p1 = p1->mpNext) + { + if(!compare(k, c, p1)) + break; + } + + iterator first(pNode, head); + iterator last(p1, head); + + if(!p1) + last.increment_bucket(); + + return eastl::pair(first, last); + } + + return eastl::pair(iterator(mpBucketArray + mnBucketCount), // iterator(mpBucketArray + mnBucketCount) == end() + iterator(mpBucketArray + mnBucketCount)); + } + + + + + template + eastl::pair::const_iterator, + typename hashtable::const_iterator> + hashtable::equal_range(const key_type& k) const + { + const hash_code_t c = get_hash_code(k); + const size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount); + node_type** head = mpBucketArray + n; + node_type* pNode = DoFindNode(*head, k, c); + + if(pNode) + { + node_type* p1 = pNode->mpNext; + + for(; p1; p1 = p1->mpNext) + { + if(!compare(k, c, p1)) + break; + } + + const_iterator first(pNode, head); + const_iterator last(p1, head); + + if(!p1) + last.increment_bucket(); + + return eastl::pair(first, last); + } + + return eastl::pair(const_iterator(mpBucketArray + mnBucketCount), // iterator(mpBucketArray + mnBucketCount) == end() + const_iterator(mpBucketArray + mnBucketCount)); + } + + + + template + inline typename hashtable::node_type* + hashtable::DoFindNode(node_type* pNode, const key_type& k, hash_code_t c) const + { + for(; pNode; pNode = pNode->mpNext) + { + if(compare(k, c, pNode)) + return pNode; + } + return NULL; + } + + + + template + template + inline typename hashtable::node_type* + hashtable::DoFindNodeT(node_type* pNode, const U& other, BinaryPredicate predicate) const + { + for(; pNode; pNode = pNode->mpNext) + { + if(predicate(mExtractKey(pNode->mValue), other)) // Intentionally compare with key as first arg and other as second arg. + return pNode; + } + return NULL; + } + + + + template + template + eastl::pair::iterator, bool> + hashtable::DoInsertValue(BoolConstantT, Args&&... args) // true_type means bUniqueKeys is true. + { + // Adds the value to the hash table if not already present. + // If already present then the existing value is returned via an iterator/bool pair. + + // We have a chicken-and-egg problem here. In order to know if and where to insert the value, we need to get the + // hashtable key for the value. But we don't explicitly have a value argument, we have a templated Args&&... argument. + // We need the value_type in order to proceed, but that entails getting an instance of a value_type from the args. + // And it may turn out that the value is already present in the hashtable and we need to cancel the insertion, + // despite having obtained a value_type to put into the hashtable. We have mitigated this problem somewhat by providing + // specializations of the insert function for const value_type& and value_type&&, and so the only time this function + // should get called is when args refers to arguments to construct a value_type. + + node_type* const pNodeNew = DoAllocateNode(eastl::forward(args)...); + const key_type& k = mExtractKey(pNodeNew->mValue); + const hash_code_t c = get_hash_code(k); + size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount); + node_type* const pNode = DoFindNode(mpBucketArray[n], k, c); + + if(pNode == NULL) // If value is not present... add it. + { + const eastl::pair bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1); + + set_code(pNodeNew, c); // This is a no-op for most hashtables. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + if(bRehash.first) + { + n = (size_type)bucket_index(k, c, (uint32_t)bRehash.second); + DoRehash(bRehash.second); + } + + EASTL_ASSERT((uintptr_t)mpBucketArray != (uintptr_t)&gpEmptyBucketArray[0]); + pNodeNew->mpNext = mpBucketArray[n]; + mpBucketArray[n] = pNodeNew; + ++mnElementCount; + + return eastl::pair(iterator(pNodeNew, mpBucketArray + n), true); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeNode(pNodeNew); + throw; + } + #endif + } + else + { + // To do: We have an inefficiency to deal with here. We allocated a node above but we are freeing it here because + // it turned out it wasn't needed. But we needed to create the node in order to get the hashtable key for + // the node. One possible resolution is to create specializations: DoInsertValue(true_type, value_type&&) and + // DoInsertValue(true_type, const value_type&) which don't need to create a node up front in order to get the + // hashtable key. Probably most users would end up using these pathways instead of this Args... pathway. + // While we should considering handling this to-do item, a lot of the performance limitations of maps and sets + // in practice is with finding elements rather than adding (potentially redundant) new elements. + DoFreeNode(pNodeNew); + } + + return eastl::pair(iterator(pNode, mpBucketArray + n), false); + } + + + template + template + typename hashtable::iterator + hashtable::DoInsertValue(BoolConstantT, Args&&... args) // false_type means bUniqueKeys is false. + { + const eastl::pair bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1); + + if(bRehash.first) + DoRehash(bRehash.second); + + node_type* pNodeNew = DoAllocateNode(eastl::forward(args)...); + const key_type& k = mExtractKey(pNodeNew->mValue); + const hash_code_t c = get_hash_code(k); + const size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount); + + set_code(pNodeNew, c); // This is a no-op for most hashtables. + + // To consider: Possibly make this insertion not make equal elements contiguous. + // As it stands now, we insert equal values contiguously in the hashtable. + // The benefit is that equal_range can work in a sensible manner and that + // erase(value) can more quickly find equal values. The downside is that + // this insertion operation taking some extra time. How important is it to + // us that equal_range span all equal items? + node_type* const pNodePrev = DoFindNode(mpBucketArray[n], k, c); + + if(pNodePrev == NULL) + { + EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]); + pNodeNew->mpNext = mpBucketArray[n]; + mpBucketArray[n] = pNodeNew; + } + else + { + pNodeNew->mpNext = pNodePrev->mpNext; + pNodePrev->mpNext = pNodeNew; + } + + ++mnElementCount; + + return iterator(pNodeNew, mpBucketArray + n); + } + + + template + template + typename hashtable::node_type* + hashtable::DoAllocateNode(Args&&... args) + { + node_type* const pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(value_type), 0); + EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined."); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + ::new(eastl::addressof(pNode->mValue)) value_type(eastl::forward(args)...); + pNode->mpNext = NULL; + return pNode; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + EASTLFree(mAllocator, pNode, sizeof(node_type)); + throw; + } + #endif + } + + + //////////////////////////////////////////////////////////////////////////////////////////////////// + // Note: The following insertion-related functions are nearly copies of the above three functions, + // but are for value_type&& and const value_type& arguments. It's useful for us to have the functions + // below, even when using a fully compliant C++11 compiler that supports the above functions. + // The reason is because the specializations below are slightly more efficient because they can delay + // the creation of a node until it's known that it will be needed. + //////////////////////////////////////////////////////////////////////////////////////////////////// + + template + template + eastl::pair::iterator, bool> + hashtable::DoInsertValueExtra(BoolConstantT, const key_type& k, + hash_code_t c, node_type* pNodeNew, value_type&& value, ENABLE_IF_TRUETYPE(BoolConstantT)) // true_type means bUniqueKeys is true. + { + // Adds the value to the hash table if not already present. + // If already present then the existing value is returned via an iterator/bool pair. + size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount); + node_type* const pNode = DoFindNode(mpBucketArray[n], k, c); + + if(pNode == NULL) // If value is not present... add it. + { + const eastl::pair bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1); + + // Allocate the new node before doing the rehash so that we don't + // do a rehash if the allocation throws. + #if EASTL_EXCEPTIONS_ENABLED + bool nodeAllocated; // If exceptions are enabled then we we need to track if we allocated the node so we can free it in the catch block. + #endif + + if(pNodeNew) + { + ::new(eastl::addressof(pNodeNew->mValue)) value_type(eastl::move(value)); // It's expected that pNodeNew was allocated with allocate_uninitialized_node. + #if EASTL_EXCEPTIONS_ENABLED + nodeAllocated = false; + #endif + } + else + { + pNodeNew = DoAllocateNode(eastl::move(value)); + #if EASTL_EXCEPTIONS_ENABLED + nodeAllocated = true; + #endif + } + + set_code(pNodeNew, c); // This is a no-op for most hashtables. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + if(bRehash.first) + { + n = (size_type)bucket_index(k, c, (uint32_t)bRehash.second); + DoRehash(bRehash.second); + } + + EASTL_ASSERT((uintptr_t)mpBucketArray != (uintptr_t)&gpEmptyBucketArray[0]); + pNodeNew->mpNext = mpBucketArray[n]; + mpBucketArray[n] = pNodeNew; + ++mnElementCount; + + return eastl::pair(iterator(pNodeNew, mpBucketArray + n), true); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + if(nodeAllocated) // If we allocated the node within this function, free it. Else let the caller retain ownership of it. + DoFreeNode(pNodeNew); + throw; + } + #endif + } + // Else the value is already present, so don't add a new node. And don't free pNodeNew. + + return eastl::pair(iterator(pNode, mpBucketArray + n), false); + } + + + template + template + eastl::pair::iterator, bool> + hashtable::DoInsertValue(BoolConstantT, value_type&& value, ENABLE_IF_TRUETYPE(BoolConstantT)) // true_type means bUniqueKeys is true. + { + const key_type& k = mExtractKey(value); + const hash_code_t c = get_hash_code(k); + + return DoInsertValueExtra(true_type(), k, c, NULL, eastl::move(value)); + } + + + template + template + typename hashtable::iterator + hashtable::DoInsertValueExtra(BoolConstantT, const key_type& k, hash_code_t c, node_type* pNodeNew, value_type&& value, + DISABLE_IF_TRUETYPE(BoolConstantT)) // false_type means bUniqueKeys is false. + { + const eastl::pair bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1); + + if(bRehash.first) + DoRehash(bRehash.second); // Note: We don't need to wrap this call with try/catch because there's nothing we would need to do in the catch. + + const size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount); + + if(pNodeNew) + ::new(eastl::addressof(pNodeNew->mValue)) value_type(eastl::move(value)); // It's expected that pNodeNew was allocated with allocate_uninitialized_node. + else + pNodeNew = DoAllocateNode(eastl::move(value)); + + set_code(pNodeNew, c); // This is a no-op for most hashtables. + + // To consider: Possibly make this insertion not make equal elements contiguous. + // As it stands now, we insert equal values contiguously in the hashtable. + // The benefit is that equal_range can work in a sensible manner and that + // erase(value) can more quickly find equal values. The downside is that + // this insertion operation taking some extra time. How important is it to + // us that equal_range span all equal items? + node_type* const pNodePrev = DoFindNode(mpBucketArray[n], k, c); + + if(pNodePrev == NULL) + { + EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]); + pNodeNew->mpNext = mpBucketArray[n]; + mpBucketArray[n] = pNodeNew; + } + else + { + pNodeNew->mpNext = pNodePrev->mpNext; + pNodePrev->mpNext = pNodeNew; + } + + ++mnElementCount; + + return iterator(pNodeNew, mpBucketArray + n); + } + + + template + template + typename hashtable::iterator + hashtable::DoInsertValue(BoolConstantT, value_type&& value, DISABLE_IF_TRUETYPE(BoolConstantT)) // false_type means bUniqueKeys is false. + { + const key_type& k = mExtractKey(value); + const hash_code_t c = get_hash_code(k); + + return DoInsertValueExtra(false_type(), k, c, NULL, eastl::move(value)); + } + + + template + typename hashtable::node_type* + hashtable::DoAllocateNode(value_type&& value) + { + node_type* const pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(value_type), 0); + EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined."); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + ::new(eastl::addressof(pNode->mValue)) value_type(eastl::move(value)); + pNode->mpNext = NULL; + return pNode; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + EASTLFree(mAllocator, pNode, sizeof(node_type)); + throw; + } + #endif + } + + + template + template + eastl::pair::iterator, bool> + hashtable::DoInsertValueExtra(BoolConstantT, const key_type& k, hash_code_t c, node_type* pNodeNew, const value_type& value, + ENABLE_IF_TRUETYPE(BoolConstantT)) // true_type means bUniqueKeys is true. + { + // Adds the value to the hash table if not already present. + // If already present then the existing value is returned via an iterator/bool pair. + size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount); + node_type* const pNode = DoFindNode(mpBucketArray[n], k, c); + + if(pNode == NULL) // If value is not present... add it. + { + const eastl::pair bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1); + + // Allocate the new node before doing the rehash so that we don't + // do a rehash if the allocation throws. + #if EASTL_EXCEPTIONS_ENABLED + bool nodeAllocated; // If exceptions are enabled then we we need to track if we allocated the node so we can free it in the catch block. + #endif + + if(pNodeNew) + { + ::new(eastl::addressof(pNodeNew->mValue)) value_type(value); // It's expected that pNodeNew was allocated with allocate_uninitialized_node. + #if EASTL_EXCEPTIONS_ENABLED + nodeAllocated = false; + #endif + } + else + { + pNodeNew = DoAllocateNode(value); + #if EASTL_EXCEPTIONS_ENABLED + nodeAllocated = true; + #endif + } + + set_code(pNodeNew, c); // This is a no-op for most hashtables. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + if(bRehash.first) + { + n = (size_type)bucket_index(k, c, (uint32_t)bRehash.second); + DoRehash(bRehash.second); + } + + EASTL_ASSERT((uintptr_t)mpBucketArray != (uintptr_t)&gpEmptyBucketArray[0]); + pNodeNew->mpNext = mpBucketArray[n]; + mpBucketArray[n] = pNodeNew; + ++mnElementCount; + + return eastl::pair(iterator(pNodeNew, mpBucketArray + n), true); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + if(nodeAllocated) // If we allocated the node within this function, free it. Else let the caller retain ownership of it. + DoFreeNode(pNodeNew); + throw; + } + #endif + } + // Else the value is already present, so don't add a new node. And don't free pNodeNew. + + return eastl::pair(iterator(pNode, mpBucketArray + n), false); + } + + + template + template + eastl::pair::iterator, bool> + hashtable::DoInsertValue(BoolConstantT, const value_type& value, ENABLE_IF_TRUETYPE(BoolConstantT)) // true_type means bUniqueKeys is true. + { + const key_type& k = mExtractKey(value); + const hash_code_t c = get_hash_code(k); + + return DoInsertValueExtra(true_type(), k, c, NULL, value); + } + + + template + template + typename hashtable::iterator + hashtable::DoInsertValueExtra(BoolConstantT, const key_type& k, hash_code_t c, node_type* pNodeNew, const value_type& value, + DISABLE_IF_TRUETYPE(BoolConstantT)) // false_type means bUniqueKeys is false. + { + const eastl::pair bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1); + + if(bRehash.first) + DoRehash(bRehash.second); // Note: We don't need to wrap this call with try/catch because there's nothing we would need to do in the catch. + + const size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount); + + if(pNodeNew) + ::new(eastl::addressof(pNodeNew->mValue)) value_type(value); // It's expected that pNodeNew was allocated with allocate_uninitialized_node. + else + pNodeNew = DoAllocateNode(value); + + set_code(pNodeNew, c); // This is a no-op for most hashtables. + + // To consider: Possibly make this insertion not make equal elements contiguous. + // As it stands now, we insert equal values contiguously in the hashtable. + // The benefit is that equal_range can work in a sensible manner and that + // erase(value) can more quickly find equal values. The downside is that + // this insertion operation taking some extra time. How important is it to + // us that equal_range span all equal items? + node_type* const pNodePrev = DoFindNode(mpBucketArray[n], k, c); + + if(pNodePrev == NULL) + { + EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]); + pNodeNew->mpNext = mpBucketArray[n]; + mpBucketArray[n] = pNodeNew; + } + else + { + pNodeNew->mpNext = pNodePrev->mpNext; + pNodePrev->mpNext = pNodeNew; + } + + ++mnElementCount; + + return iterator(pNodeNew, mpBucketArray + n); + } + + + template + template + typename hashtable::iterator + hashtable::DoInsertValue(BoolConstantT, const value_type& value, DISABLE_IF_TRUETYPE(BoolConstantT)) // false_type means bUniqueKeys is false. + { + const key_type& k = mExtractKey(value); + const hash_code_t c = get_hash_code(k); + + return DoInsertValueExtra(false_type(), k, c, NULL, value); + } + + + template + typename hashtable::node_type* + hashtable::DoAllocateNode(const value_type& value) + { + node_type* const pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(value_type), 0); + EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined."); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + ::new(eastl::addressof(pNode->mValue)) value_type(value); + pNode->mpNext = NULL; + return pNode; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + EASTLFree(mAllocator, pNode, sizeof(node_type)); + throw; + } + #endif + } + + + template + typename hashtable::node_type* + hashtable::allocate_uninitialized_node() + { + // We don't wrap this in try/catch because users of this function are expected to do that themselves as needed. + node_type* const pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(value_type), 0); + EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined."); + // Leave pNode->mValue uninitialized. + pNode->mpNext = NULL; + return pNode; + } + + + template + void hashtable::free_uninitialized_node(node_type* pNode) + { + // pNode->mValue is expected to be uninitialized. + EASTLFree(mAllocator, pNode, sizeof(node_type)); + } + + + template + eastl::pair::iterator, bool> + hashtable::DoInsertKey(true_type, const key_type& key, const hash_code_t c) // true_type means bUniqueKeys is true. + { + size_type n = (size_type)bucket_index(key, c, (uint32_t)mnBucketCount); + node_type* const pNode = DoFindNode(mpBucketArray[n], key, c); + + if(pNode == NULL) + { + const eastl::pair bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1); + + // Allocate the new node before doing the rehash so that we don't + // do a rehash if the allocation throws. + node_type* const pNodeNew = DoAllocateNodeFromKey(key); + set_code(pNodeNew, c); // This is a no-op for most hashtables. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + if(bRehash.first) + { + n = (size_type)bucket_index(key, c, (uint32_t)bRehash.second); + DoRehash(bRehash.second); + } + + EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]); + pNodeNew->mpNext = mpBucketArray[n]; + mpBucketArray[n] = pNodeNew; + ++mnElementCount; + + return eastl::pair(iterator(pNodeNew, mpBucketArray + n), true); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeNode(pNodeNew); + throw; + } + #endif + } + + return eastl::pair(iterator(pNode, mpBucketArray + n), false); + } + + + + template + typename hashtable::iterator + hashtable::DoInsertKey(false_type, const key_type& key, const hash_code_t c) // false_type means bUniqueKeys is false. + { + const eastl::pair bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1); + + if(bRehash.first) + DoRehash(bRehash.second); + + const size_type n = (size_type)bucket_index(key, c, (uint32_t)mnBucketCount); + + node_type* const pNodeNew = DoAllocateNodeFromKey(key); + set_code(pNodeNew, c); // This is a no-op for most hashtables. + + // To consider: Possibly make this insertion not make equal elements contiguous. + // As it stands now, we insert equal values contiguously in the hashtable. + // The benefit is that equal_range can work in a sensible manner and that + // erase(value) can more quickly find equal values. The downside is that + // this insertion operation taking some extra time. How important is it to + // us that equal_range span all equal items? + node_type* const pNodePrev = DoFindNode(mpBucketArray[n], key, c); + + if(pNodePrev == NULL) + { + EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]); + pNodeNew->mpNext = mpBucketArray[n]; + mpBucketArray[n] = pNodeNew; + } + else + { + pNodeNew->mpNext = pNodePrev->mpNext; + pNodePrev->mpNext = pNodeNew; + } + + ++mnElementCount; + + return iterator(pNodeNew, mpBucketArray + n); + } + + + template + eastl::pair::iterator, bool> + hashtable::DoInsertKey(true_type, key_type&& key, const hash_code_t c) // true_type means bUniqueKeys is true. + { + size_type n = (size_type)bucket_index(key, c, (uint32_t)mnBucketCount); + node_type* const pNode = DoFindNode(mpBucketArray[n], key, c); + + if(pNode == NULL) + { + const eastl::pair bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1); + + // Allocate the new node before doing the rehash so that we don't + // do a rehash if the allocation throws. + node_type* const pNodeNew = DoAllocateNodeFromKey(eastl::move(key)); + set_code(pNodeNew, c); // This is a no-op for most hashtables. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + if(bRehash.first) + { + n = (size_type)bucket_index(key, c, (uint32_t)bRehash.second); + DoRehash(bRehash.second); + } + + EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]); + pNodeNew->mpNext = mpBucketArray[n]; + mpBucketArray[n] = pNodeNew; + ++mnElementCount; + + return eastl::pair(iterator(pNodeNew, mpBucketArray + n), true); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeNode(pNodeNew); + throw; + } + #endif + } + + return eastl::pair(iterator(pNode, mpBucketArray + n), false); + } + + + template + typename hashtable::iterator + hashtable::DoInsertKey(false_type, key_type&& key, const hash_code_t c) // false_type means bUniqueKeys is false. + { + const eastl::pair bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, (uint32_t)1); + + if(bRehash.first) + DoRehash(bRehash.second); + + const size_type n = (size_type)bucket_index(key, c, (uint32_t)mnBucketCount); + + node_type* const pNodeNew = DoAllocateNodeFromKey(eastl::move(key)); + set_code(pNodeNew, c); // This is a no-op for most hashtables. + + // To consider: Possibly make this insertion not make equal elements contiguous. + // As it stands now, we insert equal values contiguously in the hashtable. + // The benefit is that equal_range can work in a sensible manner and that + // erase(value) can more quickly find equal values. The downside is that + // this insertion operation taking some extra time. How important is it to + // us that equal_range span all equal items? + node_type* const pNodePrev = DoFindNode(mpBucketArray[n], key, c); + + if(pNodePrev == NULL) + { + EASTL_ASSERT((void**)mpBucketArray != &gpEmptyBucketArray[0]); + pNodeNew->mpNext = mpBucketArray[n]; + mpBucketArray[n] = pNodeNew; + } + else + { + pNodeNew->mpNext = pNodePrev->mpNext; + pNodePrev->mpNext = pNodeNew; + } + + ++mnElementCount; + + return iterator(pNodeNew, mpBucketArray + n); + } + + + template + template + typename hashtable::insert_return_type + hashtable::emplace(Args&&... args) + { + return DoInsertValue(has_unique_keys_type(), eastl::forward(args)...); // Need to use forward instead of move because Args&& is a "universal reference" instead of an rvalue reference. + } + + template + template + typename hashtable::iterator + hashtable::emplace_hint(const_iterator, Args&&... args) + { + // We currently ignore the iterator argument as a hint. + insert_return_type result = DoInsertValue(has_unique_keys_type(), eastl::forward(args)...); + return DoGetResultIterator(has_unique_keys_type(), result); + } + + template + template + // inline eastl::pair::iterator, bool> + inline typename hashtable::insert_return_type + hashtable::try_emplace(const key_type& key, Args&&... args) + { + return DoInsertValue(has_unique_keys_type(), piecewise_construct, eastl::forward_as_tuple(key), + eastl::forward_as_tuple(eastl::forward(args)...)); + } + + template + template + // inline eastl::pair::iterator, bool> + inline typename hashtable::insert_return_type + hashtable::try_emplace(key_type&& key, Args&&... args) + { + return DoInsertValue(has_unique_keys_type(), piecewise_construct, eastl::forward_as_tuple(eastl::move(key)), + eastl::forward_as_tuple(eastl::forward(args)...)); + } + + template + template + inline typename hashtable::iterator + hashtable::try_emplace(const_iterator, const key_type& key, Args&&... args) + { + insert_return_type result = DoInsertValue( + has_unique_keys_type(), + value_type(piecewise_construct, eastl::forward_as_tuple(key), eastl::forward_as_tuple(eastl::forward(args)...))); + + return DoGetResultIterator(has_unique_keys_type(), result); + } + + template + template + inline typename hashtable::iterator + hashtable::try_emplace(const_iterator, key_type&& key, Args&&... args) + { + insert_return_type result = + DoInsertValue(has_unique_keys_type(), value_type(piecewise_construct, eastl::forward_as_tuple(eastl::move(key)), + eastl::forward_as_tuple(eastl::forward(args)...))); + + return DoGetResultIterator(has_unique_keys_type(), result); + } + + template + typename hashtable::insert_return_type + hashtable::insert(value_type&& otherValue) + { + return DoInsertValue(has_unique_keys_type(), eastl::move(otherValue)); + } + + + template + template + typename hashtable::insert_return_type + hashtable::insert(hash_code_t c, node_type* pNodeNew, P&& otherValue) + { + // pNodeNew->mValue is expected to be uninitialized. + value_type value(eastl::forward

(otherValue)); // Need to use forward instead of move because P&& is a "universal reference" instead of an rvalue reference. + const key_type& k = mExtractKey(value); + return DoInsertValueExtra(has_unique_keys_type(), k, c, pNodeNew, eastl::move(value)); + } + + + template + typename hashtable::iterator + hashtable::insert(const_iterator, value_type&& value) + { + // We currently ignore the iterator argument as a hint. + insert_return_type result = DoInsertValue(has_unique_keys_type(), value_type(eastl::move(value))); + return DoGetResultIterator(has_unique_keys_type(), result); + } + + + template + typename hashtable::insert_return_type + hashtable::insert(const value_type& value) + { + return DoInsertValue(has_unique_keys_type(), value); + } + + + template + typename hashtable::insert_return_type + hashtable::insert(hash_code_t c, node_type* pNodeNew, const value_type& value) + { + // pNodeNew->mValue is expected to be uninitialized. + const key_type& k = mExtractKey(value); + return DoInsertValueExtra(has_unique_keys_type(), k, c, pNodeNew, value); + } + + + template + template + typename hashtable::insert_return_type + hashtable::insert(P&& otherValue) + { + return emplace(eastl::forward

(otherValue)); + } + + + template + typename hashtable::iterator + hashtable::insert(const_iterator, const value_type& value) + { + // We ignore the first argument (hint iterator). It's not likely to be useful for hashtable containers. + insert_return_type result = DoInsertValue(has_unique_keys_type(), value); + return DoGetResultIterator(has_unique_keys_type(), result); + } + + + template + void hashtable::insert(std::initializer_list ilist) + { + insert(ilist.begin(), ilist.end()); + } + + + template + template + void + hashtable::insert(InputIterator first, InputIterator last) + { + const uint32_t nElementAdd = (uint32_t)eastl::ht_distance(first, last); + const eastl::pair bRehash = mRehashPolicy.GetRehashRequired((uint32_t)mnBucketCount, (uint32_t)mnElementCount, nElementAdd); + + if(bRehash.first) + DoRehash(bRehash.second); + + for(; first != last; ++first) + DoInsertValue(has_unique_keys_type(), *first); + } + + + template + template + eastl::pair::iterator, bool> + hashtable::insert_or_assign(const key_type& k, M&& obj) + { + auto iter = find(k); + if(iter == end()) + { + return insert(value_type(piecewise_construct, eastl::forward_as_tuple(k), eastl::forward_as_tuple(eastl::forward(obj)))); + } + else + { + iter->second = eastl::forward(obj); + return {iter, false}; + } + } + + template + template + eastl::pair::iterator, bool> + hashtable::insert_or_assign(key_type&& k, M&& obj) + { + auto iter = find(k); + if(iter == end()) + { + return insert(value_type(piecewise_construct, eastl::forward_as_tuple(eastl::move(k)), eastl::forward_as_tuple(eastl::forward(obj)))); + } + else + { + iter->second = eastl::forward(obj); + return {iter, false}; + } + } + + template + template + typename hashtable::iterator + hashtable::insert_or_assign(const_iterator, const key_type& k, M&& obj) + { + return insert_or_assign(k, eastl::forward(obj)).first; // we ignore the iterator hint + } + + template + template + typename hashtable::iterator + hashtable::insert_or_assign(const_iterator, key_type&& k, M&& obj) + { + return insert_or_assign(eastl::move(k), eastl::forward(obj)).first; // we ignore the iterator hint + } + + + template + typename hashtable::iterator + hashtable::erase(const_iterator i) + { + iterator iNext(i.mpNode, i.mpBucket); // Convert from const_iterator to iterator while constructing. + ++iNext; + + node_type* pNode = i.mpNode; + node_type* pNodeCurrent = *i.mpBucket; + + if(pNodeCurrent == pNode) + *i.mpBucket = pNodeCurrent->mpNext; + else + { + // We have a singly-linked list, so we have no choice but to + // walk down it till we find the node before the node at 'i'. + node_type* pNodeNext = pNodeCurrent->mpNext; + + while(pNodeNext != pNode) + { + pNodeCurrent = pNodeNext; + pNodeNext = pNodeCurrent->mpNext; + } + + pNodeCurrent->mpNext = pNodeNext->mpNext; + } + + DoFreeNode(pNode); + --mnElementCount; + + return iNext; + } + + + + template + inline typename hashtable::iterator + hashtable::erase(const_iterator first, const_iterator last) + { + while(first != last) + first = erase(first); + return iterator(first.mpNode, first.mpBucket); + } + + + + template + typename hashtable::size_type + hashtable::erase(const key_type& k) + { + // To do: Reimplement this function to do a single loop and not try to be + // smart about element contiguity. The mechanism here is only a benefit if the + // buckets are heavily overloaded; otherwise this mechanism may be slightly slower. + + const hash_code_t c = get_hash_code(k); + const size_type n = (size_type)bucket_index(k, c, (uint32_t)mnBucketCount); + const size_type nElementCountSaved = mnElementCount; + + node_type** pBucketArray = mpBucketArray + n; + + while(*pBucketArray && !compare(k, c, *pBucketArray)) + pBucketArray = &(*pBucketArray)->mpNext; + + while(*pBucketArray && compare(k, c, *pBucketArray)) + { + node_type* const pNode = *pBucketArray; + *pBucketArray = pNode->mpNext; + DoFreeNode(pNode); + --mnElementCount; + } + + return nElementCountSaved - mnElementCount; + } + + + + template + inline void hashtable::clear() + { + DoFreeNodes(mpBucketArray, mnBucketCount); + mnElementCount = 0; + } + + + + template + inline void hashtable::clear(bool clearBuckets) + { + DoFreeNodes(mpBucketArray, mnBucketCount); + if(clearBuckets) + { + DoFreeBuckets(mpBucketArray, mnBucketCount); + reset_lose_memory(); + } + mnElementCount = 0; + } + + + + template + inline void hashtable::reset_lose_memory() EA_NOEXCEPT + { + // The reset function is a special extension function which unilaterally + // resets the container to an empty state without freeing the memory of + // the contained objects. This is useful for very quickly tearing down a + // container built into scratch memory. + mnBucketCount = 1; + + #ifdef _MSC_VER + mpBucketArray = (node_type**)&gpEmptyBucketArray[0]; + #else + void* p = &gpEmptyBucketArray[0]; + memcpy(&mpBucketArray, &p, sizeof(mpBucketArray)); // Other compilers implement strict aliasing and casting is thus unsafe. + #endif + + mnElementCount = 0; + mRehashPolicy.mnNextResize = 0; + } + + + template + inline void hashtable::reserve(size_type nElementCount) + { + rehash(mRehashPolicy.GetBucketCount(uint32_t(nElementCount))); + } + + + + template + inline void hashtable::rehash(size_type nBucketCount) + { + // Note that we unilaterally use the passed in bucket count; we do not attempt migrate it + // up to the next prime number. We leave it at the user's discretion to do such a thing. + DoRehash(nBucketCount); + } + + + + template + void hashtable::DoRehash(size_type nNewBucketCount) + { + node_type** const pBucketArray = DoAllocateBuckets(nNewBucketCount); // nNewBucketCount should always be >= 2. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + node_type* pNode; + + for(size_type i = 0; i < mnBucketCount; ++i) + { + while((pNode = mpBucketArray[i]) != NULL) // Using '!=' disables compiler warnings. + { + const size_type nNewBucketIndex = (size_type)bucket_index(pNode, (uint32_t)nNewBucketCount); + + mpBucketArray[i] = pNode->mpNext; + pNode->mpNext = pBucketArray[nNewBucketIndex]; + pBucketArray[nNewBucketIndex] = pNode; + } + } + + DoFreeBuckets(mpBucketArray, mnBucketCount); + mnBucketCount = nNewBucketCount; + mpBucketArray = pBucketArray; + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + // A failure here means that a hash function threw an exception. + // We can't restore the previous state without calling the hash + // function again, so the only sensible recovery is to delete everything. + DoFreeNodes(pBucketArray, nNewBucketCount); + DoFreeBuckets(pBucketArray, nNewBucketCount); + DoFreeNodes(mpBucketArray, mnBucketCount); + mnElementCount = 0; + throw; + } + #endif + } + + + template + inline bool hashtable::validate() const + { + // Verify our empty bucket array is unmodified. + if(gpEmptyBucketArray[0] != NULL) + return false; + + if(gpEmptyBucketArray[1] != (void*)uintptr_t(~0)) + return false; + + // Verify that we have at least one bucket. Calculations can + // trigger division by zero exceptions otherwise. + if(mnBucketCount == 0) + return false; + + // Verify that gpEmptyBucketArray is used correctly. + // gpEmptyBucketArray is only used when initially empty. + if((void**)mpBucketArray == &gpEmptyBucketArray[0]) + { + if(mnElementCount) // gpEmptyBucketArray is used only for empty hash tables. + return false; + + if(mnBucketCount != 1) // gpEmptyBucketArray is used exactly an only for mnBucketCount == 1. + return false; + } + else + { + if(mnBucketCount < 2) // Small bucket counts *must* use gpEmptyBucketArray. + return false; + } + + // Verify that the element count matches mnElementCount. + size_type nElementCount = 0; + + for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp) + ++nElementCount; + + if(nElementCount != mnElementCount) + return false; + + // To do: Verify that individual elements are in the expected buckets. + + return true; + } + + + template + int hashtable::validate_iterator(const_iterator i) const + { + // To do: Come up with a more efficient mechanism of doing this. + + for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp) + { + if(temp == i) + return (isf_valid | isf_current | isf_can_dereference); + } + + if(i == end()) + return (isf_valid | isf_current); + + return isf_none; + } + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + // operator==, != have been moved to the specific container subclasses (e.g. hash_map). + + // The following comparison operators are deprecated and will likely be removed in a + // future version of this package. + // + // Comparing hash tables for less-ness is an odd thing to do. We provide it for + // completeness, though the user is advised to be wary of how they use this. + // + template + inline bool operator<(const hashtable& a, + const hashtable& b) + { + // This requires hash table elements to support operator<. Since the hash table + // doesn't compare elements via less (it does so via equals), we must use the + // globally defined operator less for the elements. + return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); + } + + + template + inline bool operator>(const hashtable& a, + const hashtable& b) + { + return b < a; + } + + + template + inline bool operator<=(const hashtable& a, + const hashtable& b) + { + return !(b < a); + } + + + template + inline bool operator>=(const hashtable& a, + const hashtable& b) + { + return !(a < b); + } + + + template + inline void swap(const hashtable& a, + const hashtable& b) + { + a.swap(b); + } + + +} // namespace eastl + + +EA_RESTORE_VC_WARNING(); + + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/internal/in_place_t.h b/libkram/eastl/include/EASTL/internal/in_place_t.h new file mode 100644 index 00000000..79acd184 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/in_place_t.h @@ -0,0 +1,82 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_IN_PLACE_T_H +#define EASTL_INTERNAL_IN_PLACE_T_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +namespace eastl +{ + namespace Internal + { + struct in_place_tag {}; + template struct in_place_type_tag {}; + template struct in_place_index_tag {}; + } + + /////////////////////////////////////////////////////////////////////////////// + /// in_place_tag + /// + /// http://en.cppreference.com/w/cpp/utility/in_place_tag + /// + struct in_place_tag + { + in_place_tag() = delete; + + private: + explicit in_place_tag(Internal::in_place_tag) {} + friend inline in_place_tag Internal_ConstructInPlaceTag(); + }; + + // internal factory function for in_place_tag + inline in_place_tag Internal_ConstructInPlaceTag() { return in_place_tag(Internal::in_place_tag{}); } + + + /////////////////////////////////////////////////////////////////////////////// + /// in_place_t / in_place_type_t / in_place_index_t + /// + /// used to disambiguate overloads that take arguments (possibly a parameter + /// pack) for in-place construction of some value. + /// + /// http://en.cppreference.com/w/cpp/utility/optional/in_place_t + /// + using in_place_t = in_place_tag(&)(Internal::in_place_tag); + + template + using in_place_type_t = in_place_tag(&)(Internal::in_place_type_tag); + + template + using in_place_index_t = in_place_tag(&)(Internal::in_place_index_tag); + + + /////////////////////////////////////////////////////////////////////////////// + /// in_place / in_place / in_place + /// + /// http://en.cppreference.com/w/cpp/utility/in_place + /// + inline in_place_tag in_place(Internal::in_place_tag) { return Internal_ConstructInPlaceTag(); } + + template + inline in_place_tag in_place(Internal::in_place_type_tag) { return Internal_ConstructInPlaceTag(); } + + template + inline in_place_tag in_place(Internal::in_place_index_tag) { return Internal_ConstructInPlaceTag(); } + + +} // namespace eastl + + +#endif // Header include guard + + + + + + diff --git a/libkram/eastl/include/EASTL/internal/integer_sequence.h b/libkram/eastl/include/EASTL/internal/integer_sequence.h new file mode 100644 index 00000000..88cf1b1b --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/integer_sequence.h @@ -0,0 +1,74 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_INTEGER_SEQUENCE_H +#define EASTL_INTEGER_SEQUENCE_H + +#include +#include +#include + +namespace eastl +{ + +#if EASTL_VARIADIC_TEMPLATES_ENABLED && !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + +// integer_sequence +template +class integer_sequence +{ +public: + typedef T value_type; + static_assert(is_integral::value, "eastl::integer_sequence can only be instantiated with an integral type"); + static EA_CONSTEXPR size_t size() EA_NOEXCEPT { return sizeof...(Ints); } +}; + +template +struct make_index_sequence_impl; + +template +struct make_index_sequence_impl> +{ + typedef typename make_index_sequence_impl>::type type; +}; + +template +struct make_index_sequence_impl<0, integer_sequence> +{ + typedef integer_sequence type; +}; + +template +using index_sequence = integer_sequence; + +template +using make_index_sequence = typename make_index_sequence_impl>::type; + +template +struct integer_sequence_convert_impl; + +template +struct integer_sequence_convert_impl> +{ + typedef integer_sequence type; +}; + +template +struct make_integer_sequence_impl +{ + typedef typename integer_sequence_convert_impl>::type type; +}; + +template +using make_integer_sequence = typename make_integer_sequence_impl::type; + +// Helper alias template that converts any type parameter pack into an index sequence of the same length +template +using index_sequence_for = make_index_sequence; + +#endif // EASTL_VARIADIC_TEMPLATES_ENABLED + +} // namespace eastl + +#endif // EASTL_INTEGER_SEQUENCE_H diff --git a/libkram/eastl/include/EASTL/internal/intrusive_hashtable.h b/libkram/eastl/include/EASTL/internal/intrusive_hashtable.h new file mode 100644 index 00000000..dccca5b1 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/intrusive_hashtable.h @@ -0,0 +1,989 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements an intrusive hash table, which is a hash table whereby +// the container nodes are the hash table objects themselves. This has benefits +// primarily in terms of memory management. There are some minor limitations +// that result from this. +// +/////////////////////////////////////////////////////////////////////////////// + + + +#ifndef EASTL_INTERNAL_INTRUSIVE_HASHTABLE_H +#define EASTL_INTERNAL_INTRUSIVE_HASHTABLE_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include +#include +#include +#include +#include +#include + +EA_DISABLE_ALL_VC_WARNINGS(); +#include +#include +#include +EA_RESTORE_ALL_VC_WARNINGS(); + + +namespace eastl +{ + + /// intrusive_hash_node + /// + /// A hash_node stores an element in a hash table, much like a + /// linked list node stores an element in a linked list. + /// An intrusive_hash_node additionally can, via template parameter, + /// store a hash code in the node to speed up hash calculations + /// and comparisons in some cases. + /// + /// To consider: Make a version of intrusive_hash_node which is + /// templated on the container type. This would allow for the + /// mpNext pointer to be the container itself and thus allow + /// for easier debugging. + /// + /// Example usage: + /// struct Widget : public intrusive_hash_node{ ... }; + /// + /// struct Dagget : public intrusive_hash_node_key{ ... }; + /// + struct intrusive_hash_node + { + intrusive_hash_node* mpNext; + }; + + + template + struct intrusive_hash_node_key : public intrusive_hash_node + { + typedef Key key_type; + Key mKey; + }; + + + + /// intrusive_node_iterator + /// + /// Node iterators iterate nodes within a given bucket. + /// + /// The bConst parameter defines if the iterator is a const_iterator + /// or an iterator. + /// + template + struct intrusive_node_iterator + { + public: + typedef intrusive_node_iterator this_type; + typedef Value value_type; + typedef Value node_type; + typedef ptrdiff_t difference_type; + typedef typename type_select::type pointer; + typedef typename type_select::type reference; + typedef EASTL_ITC_NS::forward_iterator_tag iterator_category; + + public: + node_type* mpNode; + + public: + intrusive_node_iterator() + : mpNode(NULL) { } + + explicit intrusive_node_iterator(value_type* pNode) + : mpNode(pNode) { } + + intrusive_node_iterator(const intrusive_node_iterator& x) + : mpNode(x.mpNode) { } + + reference operator*() const + { return *mpNode; } + + pointer operator->() const + { return mpNode; } + + this_type& operator++() + { mpNode = static_cast(mpNode->mpNext); return *this; } + + this_type operator++(int) + { this_type temp(*this); mpNode = static_cast(mpNode->mpNext); return temp; } + + }; // intrusive_node_iterator + + + + + /// intrusive_hashtable_iterator_base + /// + /// An intrusive_hashtable_iterator_base iterates the entire hash table and + /// not just nodes within a single bucket. Users in general will use a hash + /// table iterator much more often, as it is much like other container + /// iterators (e.g. vector::iterator). + /// + /// We define a base class here because it is shared by both const and + /// non-const iterators. + /// + template + struct intrusive_hashtable_iterator_base + { + public: + typedef Value value_type; + + protected: + template + friend class intrusive_hashtable; + + template + friend struct intrusive_hashtable_iterator; + + template + friend bool operator==(const intrusive_hashtable_iterator_base&, const intrusive_hashtable_iterator_base&); + + template + friend bool operator!=(const intrusive_hashtable_iterator_base&, const intrusive_hashtable_iterator_base&); + + value_type* mpNode; // Current node within current bucket. + value_type** mpBucket; // Current bucket. + + public: + intrusive_hashtable_iterator_base(value_type* pNode, value_type** pBucket) + : mpNode(pNode), mpBucket(pBucket) { } + + void increment_bucket() + { + ++mpBucket; + while(*mpBucket == NULL) // We store an extra bucket with some non-NULL value at the end + ++mpBucket; // of the bucket array so that finding the end of the bucket + mpNode = *mpBucket; // array is quick and simple. + } + + void increment() + { + mpNode = static_cast(mpNode->mpNext); + + while(mpNode == NULL) + mpNode = *++mpBucket; + } + + }; // intrusive_hashtable_iterator_base + + + + + /// intrusive_hashtable_iterator + /// + /// An intrusive_hashtable_iterator iterates the entire hash table and not + /// just nodes within a single bucket. Users in general will use a hash + /// table iterator much more often, as it is much like other container + /// iterators (e.g. vector::iterator). + /// + /// The bConst parameter defines if the iterator is a const_iterator + /// or an iterator. + /// + template + struct intrusive_hashtable_iterator : public intrusive_hashtable_iterator_base + { + public: + typedef intrusive_hashtable_iterator_base base_type; + typedef intrusive_hashtable_iterator this_type; + typedef intrusive_hashtable_iterator this_type_non_const; + typedef typename base_type::value_type value_type; + typedef typename type_select::type pointer; + typedef typename type_select::type reference; + typedef ptrdiff_t difference_type; + typedef EASTL_ITC_NS::forward_iterator_tag iterator_category; + + public: + intrusive_hashtable_iterator() + : base_type(NULL, NULL) { } + + explicit intrusive_hashtable_iterator(value_type* pNode, value_type** pBucket) + : base_type(pNode, pBucket) { } + + explicit intrusive_hashtable_iterator(value_type** pBucket) + : base_type(*pBucket, pBucket) { } + + intrusive_hashtable_iterator(const this_type_non_const& x) + : base_type(x.mpNode, x.mpBucket) { } + + reference operator*() const + { return *base_type::mpNode; } + + pointer operator->() const + { return base_type::mpNode; } + + this_type& operator++() + { base_type::increment(); return *this; } + + this_type operator++(int) + { this_type temp(*this); base_type::increment(); return temp; } + + }; // intrusive_hashtable_iterator + + + + /// use_intrusive_key + /// + /// operator()(x) returns x.mKey. Used in maps, as opposed to sets. + /// This is a template policy implementation; it is an alternative to + /// the use_self template implementation, which is used for sets. + /// + template + struct use_intrusive_key // : public unary_function // Perhaps we want to make it a subclass of unary_function. + { + typedef Key result_type; + + const result_type& operator()(const Node& x) const + { return x.mKey; } + }; + + + + /////////////////////////////////////////////////////////////////////////// + /// intrusive_hashtable + /// + template + class intrusive_hashtable + { + public: + typedef intrusive_hashtable this_type; + typedef Key key_type; + typedef Value value_type; + typedef Value mapped_type; + typedef Value node_type; + typedef uint32_t hash_code_t; + typedef Equal key_equal; + typedef ptrdiff_t difference_type; + typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. + typedef value_type& reference; + typedef const value_type& const_reference; + typedef intrusive_node_iterator local_iterator; + typedef intrusive_node_iterator const_local_iterator; + typedef intrusive_hashtable_iterator iterator; + typedef intrusive_hashtable_iterator const_iterator; + typedef typename type_select, iterator>::type insert_return_type; + typedef typename type_select, + eastl::use_intrusive_key >::type extract_key; + + enum + { + kBucketCount = bucketCount + }; + + protected: + node_type* mBucketArray[kBucketCount + 1]; // '+1' because we have an end bucket which is non-NULL so iterators always stop on it. + size_type mnElementCount; + Hash mHash; // To do: Use base class optimization to make this go away when it is of zero size. + Equal mEqual; // To do: Use base class optimization to make this go away when it is of zero size. + + public: + intrusive_hashtable(const Hash&, const Equal&); + + void swap(this_type& x); + + iterator begin() EA_NOEXCEPT + { + iterator i(mBucketArray); + if(!i.mpNode) + i.increment_bucket(); + return i; + } + + const_iterator begin() const EA_NOEXCEPT + { + const_iterator i(const_cast(mBucketArray)); + if(!i.mpNode) + i.increment_bucket(); + return i; + } + + const_iterator cbegin() const EA_NOEXCEPT + { + return begin(); + } + + iterator end() EA_NOEXCEPT + { return iterator(mBucketArray + kBucketCount); } + + const_iterator end() const EA_NOEXCEPT + { return const_iterator(const_cast(mBucketArray) + kBucketCount); } + + const_iterator cend() const EA_NOEXCEPT + { return const_iterator(const_cast(mBucketArray) + kBucketCount); } + + local_iterator begin(size_type n) EA_NOEXCEPT + { return local_iterator(mBucketArray[n]); } + + const_local_iterator begin(size_type n) const EA_NOEXCEPT + { return const_local_iterator(mBucketArray[n]); } + + const_local_iterator cbegin(size_type n) const EA_NOEXCEPT + { return const_local_iterator(mBucketArray[n]); } + + local_iterator end(size_type) EA_NOEXCEPT + { return local_iterator(NULL); } + + const_local_iterator end(size_type) const EA_NOEXCEPT + { return const_local_iterator(NULL); } + + const_local_iterator cend(size_type) const EA_NOEXCEPT + { return const_local_iterator(NULL); } + + size_type size() const EA_NOEXCEPT + { return mnElementCount; } + + bool empty() const EA_NOEXCEPT + { return mnElementCount == 0; } + + size_type bucket_count() const EA_NOEXCEPT // This function is unnecessary, as the user can directly reference + { return kBucketCount; } // intrusive_hashtable::kBucketCount as a constant. + + size_type bucket_size(size_type n) const EA_NOEXCEPT + { return (size_type)eastl::distance(begin(n), end(n)); } + + size_type bucket(const key_type& k) const EA_NOEXCEPT + { return (size_type)(mHash(k) % kBucketCount); } + + public: + float load_factor() const EA_NOEXCEPT + { return (float)mnElementCount / (float)kBucketCount; } + + public: + insert_return_type insert(value_type& value) + { return DoInsertValue(value, integral_constant()); } + + insert_return_type insert(const_iterator, value_type& value) + { return insert(value); } // To consider: We might be able to use the iterator argument to specify a specific insertion location. + + template + void insert(InputIterator first, InputIterator last); + + public: + iterator erase(const_iterator position); + iterator erase(const_iterator first, const_iterator last); + size_type erase(const key_type& k); + iterator remove(value_type& value); // Removes by value instead of by iterator. This is an O(1) operation, due to this hashtable being 'intrusive'. + + void clear(); + + public: + iterator find(const key_type& k); + const_iterator find(const key_type& k) const; + + /// Implements a find whereby the user supplies a comparison of a different type + /// than the hashtable value_type. A useful case of this is one whereby you have + /// a container of string objects but want to do searches via passing in char pointers. + /// The problem is that without this kind of find, you need to do the expensive operation + /// of converting the char pointer to a string so it can be used as the argument to the + /// find function. + /// + /// Example usage: + /// hash_set hashSet; + /// hashSet.find_as("hello"); // Use default hash and compare. + /// + /// Example usage (namespaces omitted for brevity): + /// hash_set hashSet; + /// hashSet.find_as("hello", hash(), equal_to_2()); + /// + template + iterator find_as(const U& u, UHash uhash, BinaryPredicate predicate); + + template + const_iterator find_as(const U& u, UHash uhash, BinaryPredicate predicate) const; + + template + iterator find_as(const U& u); + + template + const_iterator find_as(const U& u) const; + + size_type count(const key_type& k) const; + + // The use for equal_range in a hash_table seems somewhat questionable. + // The primary reason for its existence is to replicate the interface of set/map. + eastl::pair equal_range(const key_type& k); + eastl::pair equal_range(const key_type& k) const; + + public: + bool validate() const; + int validate_iterator(const_iterator i) const; + + public: + Hash hash_function() const + { return mHash; } + + Equal equal_function() const // Deprecated. Use key_eq() instead, as key_eq is what the new C++ standard + { return mEqual; } // has specified in its hashtable (unordered_*) proposal. + + const key_equal& key_eq() const + { return mEqual; } + + key_equal& key_eq() + { return mEqual; } + + protected: + eastl::pair DoInsertValue(value_type&, true_type); // true_type means bUniqueKeys is true. + iterator DoInsertValue(value_type&, false_type); // false_type means bUniqueKeys is false. + + node_type* DoFindNode(node_type* pNode, const key_type& k) const; + + template + node_type* DoFindNode(node_type* pNode, const U& u, BinaryPredicate predicate) const; + + }; // class intrusive_hashtable + + + + + + /////////////////////////////////////////////////////////////////////// + // node_iterator_base + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const intrusive_node_iterator& a, + const intrusive_node_iterator& b) + { return a.mpNode == b.mpNode; } + + template + inline bool operator!=(const intrusive_node_iterator& a, + const intrusive_node_iterator& b) + { return a.mpNode != b.mpNode; } + + + + + /////////////////////////////////////////////////////////////////////// + // hashtable_iterator_base + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const intrusive_hashtable_iterator_base& a, + const intrusive_hashtable_iterator_base& b) + { return a.mpNode == b.mpNode; } + + + template + inline bool operator!=(const intrusive_hashtable_iterator_base& a, + const intrusive_hashtable_iterator_base& b) + { return a.mpNode != b.mpNode; } + + + + + /////////////////////////////////////////////////////////////////////// + // intrusive_hashtable + /////////////////////////////////////////////////////////////////////// + + template + inline intrusive_hashtable::intrusive_hashtable(const H& h, const Eq& eq) + : mnElementCount(0), + mHash(h), + mEqual(eq) + { + memset(mBucketArray, 0, kBucketCount * sizeof(mBucketArray[0])); + mBucketArray[kBucketCount] = reinterpret_cast((uintptr_t)~0); + } + + + template + void intrusive_hashtable::swap(this_type& x) + { + for(size_t i = 0; i < kBucketCount; i++) + eastl::swap(mBucketArray[i], x.mBucketArray[i]); + + eastl::swap(mnElementCount, x.mnElementCount); + eastl::swap(mHash, x.mHash); + eastl::swap(mEqual, x.mEqual); + } + + + template + inline typename intrusive_hashtable::iterator + intrusive_hashtable::find(const key_type& k) + { + const size_type n = (size_type)(mHash(k) % kBucketCount); + node_type* const pNode = DoFindNode(mBucketArray[n], k); + return pNode ? iterator(pNode, mBucketArray + n) : iterator(mBucketArray + kBucketCount); + } + + + template + inline typename intrusive_hashtable::const_iterator + intrusive_hashtable::find(const key_type& k) const + { + const size_type n = (size_type)(mHash(k) % kBucketCount); + node_type* const pNode = DoFindNode(mBucketArray[n], k); + return pNode ? const_iterator(pNode, const_cast(mBucketArray) + n) : const_iterator(const_cast(mBucketArray) + kBucketCount); + } + + + template + template + inline typename intrusive_hashtable::iterator + intrusive_hashtable::find_as(const U& other, UHash uhash, BinaryPredicate predicate) + { + const size_type n = (size_type)(uhash(other) % kBucketCount); + node_type* const pNode = DoFindNode(mBucketArray[n], other, predicate); + return pNode ? iterator(pNode, mBucketArray + n) : iterator(mBucketArray + kBucketCount); + } + + + template + template + inline typename intrusive_hashtable::const_iterator + intrusive_hashtable::find_as(const U& other, UHash uhash, BinaryPredicate predicate) const + { + const size_type n = (size_type)(uhash(other) % kBucketCount); + node_type* const pNode = DoFindNode(mBucketArray[n], other, predicate); + return pNode ? const_iterator(pNode, const_cast(mBucketArray) + n) : const_iterator(const_cast(mBucketArray) + kBucketCount); + } + + + /// intrusive_hashtable_find + /// + /// Helper function that defaults to using hash and equal_to_2. + /// This makes it so that by default you don't need to provide these. + /// Note that the default hash functions may not be what you want, though. + /// + /// Example usage. Instead of this: + /// hash_set hashSet; + /// hashSet.find("hello", hash(), equal_to_2()); + /// + /// You can use this: + /// hash_set hashSet; + /// hashtable_find(hashSet, "hello"); + /// + template + inline typename H::iterator intrusive_hashtable_find(H& hashTable, const U& u) + { return hashTable.find_as(u, eastl::hash(), eastl::equal_to_2()); } + + template + inline typename H::const_iterator intrusive_hashtable_find(const H& hashTable, const U& u) + { return hashTable.find_as(u, eastl::hash(), eastl::equal_to_2()); } + + + + template + template + inline typename intrusive_hashtable::iterator + intrusive_hashtable::find_as(const U& other) + { return eastl::intrusive_hashtable_find(*this, other); } + // VC++ doesn't appear to like the following, though it seems correct to me. + // So we implement the workaround above until we can straighten this out. + //{ return find_as(other, eastl::hash(), eastl::equal_to_2()); } + + + template + template + inline typename intrusive_hashtable::const_iterator + intrusive_hashtable::find_as(const U& other) const + { return eastl::intrusive_hashtable_find(*this, other); } + // VC++ doesn't appear to like the following, though it seems correct to me. + // So we implement the workaround above until we can straighten this out. + //{ return find_as(other, eastl::hash(), eastl::equal_to_2()); } + + + template + typename intrusive_hashtable::size_type + intrusive_hashtable::count(const key_type& k) const + { + const size_type n = (size_type)(mHash(k) % kBucketCount); + size_type result = 0; + extract_key extractKey; // extract_key is empty and thus this ctor is a no-op. + + // To do: Make a specialization for bU (unique keys) == true and take + // advantage of the fact that the count will always be zero or one in that case. + for(node_type* pNode = mBucketArray[n]; pNode; pNode = static_cast(pNode->mpNext)) + { + if(mEqual(k, extractKey(*pNode))) + ++result; + } + return result; + } + + + template + eastl::pair::iterator, + typename intrusive_hashtable::iterator> + intrusive_hashtable::equal_range(const key_type& k) + { + const size_type n = (size_type)(mHash(k) % kBucketCount); + node_type** head = mBucketArray + n; + node_type* pNode = DoFindNode(*head, k); + extract_key extractKey; // extract_key is empty and thus this ctor is a no-op. + + if(pNode) + { + node_type* p1 = static_cast(pNode->mpNext); + + for(; p1; p1 = static_cast(p1->mpNext)) + { + if(!mEqual(k, extractKey(*p1))) + break; + } + + iterator first(pNode, head); + iterator last(p1, head); + + if(!p1) + last.increment_bucket(); + + return eastl::pair(first, last); + } + + return eastl::pair(iterator(mBucketArray + kBucketCount), + iterator(mBucketArray + kBucketCount)); + } + + + + + template + eastl::pair::const_iterator, + typename intrusive_hashtable::const_iterator> + intrusive_hashtable::equal_range(const key_type& k) const + { + const size_type n = (size_type)(mHash(k) % kBucketCount); + node_type** head = const_cast(mBucketArray + n); + node_type* pNode = DoFindNode(*head, k); + extract_key extractKey; // extract_key is empty and thus this ctor is a no-op. + + if(pNode) + { + node_type* p1 = static_cast(pNode->mpNext); + + for(; p1; p1 = static_cast(p1->mpNext)) + { + if(!mEqual(k, extractKey(*p1))) + break; + } + + const_iterator first(pNode, head); + const_iterator last(p1, head); + + if(!p1) + last.increment_bucket(); + + return eastl::pair(first, last); + } + + return eastl::pair(const_iterator(const_cast(mBucketArray) + kBucketCount), + const_iterator(const_cast(mBucketArray) + kBucketCount)); + } + + + template + inline typename intrusive_hashtable::node_type* + intrusive_hashtable::DoFindNode(node_type* pNode, const key_type& k) const + { + extract_key extractKey; // extract_key is empty and thus this ctor is a no-op. + + for(; pNode; pNode = static_cast(pNode->mpNext)) + { + if(mEqual(k, extractKey(*pNode))) + return pNode; + } + return NULL; + } + + + template + template + inline typename intrusive_hashtable::node_type* + intrusive_hashtable::DoFindNode(node_type* pNode, const U& other, BinaryPredicate predicate) const + { + extract_key extractKey; // extract_key is empty and thus this ctor is a no-op. + + for(; pNode; pNode = static_cast(pNode->mpNext)) + { + if(predicate(extractKey(*pNode), other)) // Intentionally compare with key as first arg and other as second arg. + return pNode; + } + return NULL; + } + + + template + eastl::pair::iterator, bool> + intrusive_hashtable::DoInsertValue(value_type& value, true_type) // true_type means bUniqueKeys is true. + { + // For sets (as opposed to maps), one could argue that all insertions are successful, + // as all elements are unique. However, the equal function might not think so. + extract_key extractKey; // extract_key is empty and thus this ctor is a no-op. + const size_type n = (size_type)(mHash(extractKey(value)) % kBucketCount); + node_type* const pNode = DoFindNode(mBucketArray[n], extractKey(value)); + + if(pNode == NULL) + { + value.mpNext = mBucketArray[n]; + mBucketArray[n] = &value; + ++mnElementCount; + + return eastl::pair(iterator(&value, mBucketArray + n), true); + } + + return eastl::pair(iterator(pNode, mBucketArray + n), false); + } + + + template + typename intrusive_hashtable::iterator + intrusive_hashtable::DoInsertValue(value_type& value, false_type) // false_type means bUniqueKeys is false. + { + extract_key extractKey; // extract_key is empty and thus this ctor is a no-op. + const size_type n = (size_type)(mHash(extractKey(value)) % kBucketCount); + node_type* const pNodePrev = DoFindNode(mBucketArray[n], extractKey(value)); + + if(pNodePrev == NULL) + { + value.mpNext = mBucketArray[n]; + mBucketArray[n] = &value; + } + else + { + value.mpNext = pNodePrev->mpNext; + pNodePrev->mpNext = &value; + } + + ++mnElementCount; + + return iterator(&value, mBucketArray + n); + } + + + + template + template + inline void intrusive_hashtable::insert(InputIterator first, InputIterator last) + { + for(; first != last; ++first) + insert(*first); + } + + + template + typename intrusive_hashtable::iterator + intrusive_hashtable::erase(const_iterator i) + { + iterator iNext(i.mpNode, i.mpBucket); + ++iNext; + + node_type* pNode = i.mpNode; + node_type* pNodeCurrent = *i.mpBucket; + + if(pNodeCurrent == pNode) + *i.mpBucket = static_cast(pNodeCurrent->mpNext); + else + { + // We have a singly-linked list, so we have no choice but to + // walk down it till we find the node before the node at 'i'. + node_type* pNodeNext = static_cast(pNodeCurrent->mpNext); + + while(pNodeNext != pNode) + { + pNodeCurrent = pNodeNext; + pNodeNext = static_cast(pNodeCurrent->mpNext); + } + + pNodeCurrent->mpNext = static_cast(pNodeNext->mpNext); + } + + // To consider: In debug builds set the node mpNext to NULL. + --mnElementCount; + + return iNext; + } + + + template + inline typename intrusive_hashtable::iterator + intrusive_hashtable::erase(const_iterator first, const_iterator last) + { + while(first != last) + first = erase(first); + return iterator(first.mpNode, first.mpBucket); + } + + + template + typename intrusive_hashtable::size_type + intrusive_hashtable::erase(const key_type& k) + { + const size_type n = (size_type)(mHash(k) % kBucketCount); + const size_type nElementCountSaved = mnElementCount; + node_type*& pNodeBase = mBucketArray[n]; + extract_key extractKey; // extract_key is empty and thus this ctor is a no-op. + + // Note by Paul Pedriana: + // We have two loops here, and I'm not finding any easy way to having just one + // loop without changing the requirements of the hashtable node definition. + // It's a problem of taking an address of a variable and converting it to the + // address of another type without knowing what that type is. Perhaps I'm a + // little overly tired, so if there is a simple solution I am probably missing it. + + while(pNodeBase && mEqual(k, extractKey(*pNodeBase))) + { + pNodeBase = static_cast(pNodeBase->mpNext); + --mnElementCount; + } + + node_type* pNodePrev = pNodeBase; + + if(pNodePrev) + { + node_type* pNodeCur; + + while((pNodeCur = static_cast(pNodePrev->mpNext)) != NULL) + { + if(mEqual(k, extractKey(*pNodeCur))) + { + pNodePrev->mpNext = static_cast(pNodeCur->mpNext); + --mnElementCount; // To consider: In debug builds set the node mpNext to NULL. + } + else + pNodePrev = static_cast(pNodePrev->mpNext); + } + } + + return nElementCountSaved - mnElementCount; + } + + + template + inline typename intrusive_hashtable::iterator + intrusive_hashtable::remove(value_type& value) + { + extract_key extractKey; // extract_key is empty and thus this ctor is a no-op. + const size_type n = (size_type)(mHash(extractKey(value)) % kBucketCount); + + return erase(iterator(&value, &mBucketArray[n])); + } + + + template + inline void intrusive_hashtable::clear() + { + // To consider: In debug builds set the node mpNext to NULL. + memset(mBucketArray, 0, kBucketCount * sizeof(mBucketArray[0])); + mnElementCount = 0; + } + + + template + inline bool intrusive_hashtable::validate() const + { + // Verify that the element count matches mnElementCount. + size_type nElementCount = 0; + + for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp) + ++nElementCount; + + if(nElementCount != mnElementCount) + return false; + + // To do: Verify that individual elements are in the expected buckets. + + return true; + } + + + template + int intrusive_hashtable::validate_iterator(const_iterator i) const + { + // To do: Come up with a more efficient mechanism of doing this. + + for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp) + { + if(temp == i) + return (isf_valid | isf_current | isf_can_dereference); + } + + if(i == end()) + return (isf_valid | isf_current); + + return isf_none; + } + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const intrusive_hashtable& a, + const intrusive_hashtable& b) + { + return (a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin()); + } + + + template + inline bool operator!=(const intrusive_hashtable& a, + const intrusive_hashtable& b) + { + return !(a == b); + } + + + // Comparing hash tables for less-ness is an odd thing to do. We provide it for + // completeness, though the user is advised to be wary of how they use this. + template + inline bool operator<(const intrusive_hashtable& a, + const intrusive_hashtable& b) + { + // This requires hash table elements to support operator<. Since the hash table + // doesn't compare elements via less (it does so via equals), we must use the + // globally defined operator less for the elements. + return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); + } + + + template + inline bool operator>(const intrusive_hashtable& a, + const intrusive_hashtable& b) + { + return b < a; + } + + + template + inline bool operator<=(const intrusive_hashtable& a, + const intrusive_hashtable& b) + { + return !(b < a); + } + + + template + inline bool operator>=(const intrusive_hashtable& a, + const intrusive_hashtable& b) + { + return !(a < b); + } + + + template + inline void swap(const intrusive_hashtable& a, + const intrusive_hashtable& b) + { + a.swap(b); + } + + +} // namespace eastl + + + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/internal/mem_fn.h b/libkram/eastl/include/EASTL/internal/mem_fn.h new file mode 100644 index 00000000..1d3e7b3f --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/mem_fn.h @@ -0,0 +1,304 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_MEM_FN_H +#define EASTL_INTERNAL_MEM_FN_H + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) +#pragma once +#endif + +//////////////////////////////////////////////////////////////////////////////// +// The code in this file is a modification of the libcxx implementation. We copy +// the license information here as required. +// +// We implement only enough of mem_fn to implement eastl::function. +//////////////////////////////////////////////////////////////////////////////// + +//===------------------------ functional ----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under the MIT and the University of Illinois Open +// Source Licenses. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +namespace eastl +{ + // + // apply_cv + // + template ::type>::value, + bool = is_volatile::type>::value> + struct apply_cv { typedef U type; }; + + template struct apply_cv { typedef const U type; }; + template struct apply_cv { typedef volatile U type; }; + template struct apply_cv { typedef const volatile U type; }; + template struct apply_cv { typedef U& type; }; + template struct apply_cv { typedef const U& type; }; + template struct apply_cv { typedef volatile U& type; }; + template struct apply_cv { typedef const volatile U& type; }; + + + + // + // has_result_type + // + template + struct has_result_type + { + private: + template + static eastl::no_type test(...); + + template + static eastl::yes_type test(typename U::result_type* = 0); + + public: + static const bool value = sizeof(test(0)) == sizeof(eastl::yes_type); + }; + + + + // + // derives_from_unary_function + // derives_from_binary_function + // + template + struct derives_from_unary_function + { + private: + static eastl::no_type test(...); + + template + static unary_function test(const volatile unary_function*); + + public: + static const bool value = !is_same::value; + typedef decltype(test((T*)0)) type; + }; + + template + struct derives_from_binary_function + { + private: + static eastl::no_type test(...); + template + static binary_function test(const volatile binary_function*); + + public: + static const bool value = !is_same::value; + typedef decltype(test((T*)0)) type; + }; + + + + // + // maybe_derives_from_unary_function + // maybe_derives_from_binary_function + // + template ::value> + struct maybe_derive_from_unary_function // bool is true + : public derives_from_unary_function::type { }; + + template + struct maybe_derive_from_unary_function { }; + + template ::value> + struct maybe_derive_from_binary_function // bool is true + : public derives_from_binary_function::type { }; + + template + struct maybe_derive_from_binary_function { }; + + + + // + // weak_result_type_imp + // + template ::value> + struct weak_result_type_imp // bool is true + : public maybe_derive_from_unary_function, + public maybe_derive_from_binary_function + { + typedef typename T::result_type result_type; + }; + + template + struct weak_result_type_imp : public maybe_derive_from_unary_function, + public maybe_derive_from_binary_function { }; + + + + // + // weak_result_type + // + template + struct weak_result_type : public weak_result_type_imp { }; + + // 0 argument case + template struct weak_result_type { typedef R result_type; }; + template struct weak_result_type { typedef R result_type; }; + template struct weak_result_type { typedef R result_type; }; + + // 1 argument case + template struct weak_result_type : public unary_function { }; + template struct weak_result_type : public unary_function { }; + template struct weak_result_type : public unary_function { }; + template struct weak_result_type : public unary_function { }; + template struct weak_result_type : public unary_function { }; + template struct weak_result_type : public unary_function { }; + template struct weak_result_type : public unary_function { }; + + // 2 argument case + template struct weak_result_type : public binary_function { }; + template struct weak_result_type : public binary_function { }; + template struct weak_result_type : public binary_function { }; + template struct weak_result_type : public binary_function { }; + template struct weak_result_type : public binary_function { }; + template struct weak_result_type : public binary_function { }; + template struct weak_result_type : public binary_function { }; + + // 3 or more arguments +#if EASTL_VARIADIC_TEMPLATES_ENABLED + template struct weak_result_type { typedef R result_type; }; + template struct weak_result_type { typedef R result_type; }; + template struct weak_result_type { typedef R result_type; }; + template struct weak_result_type { typedef R result_type; }; + template struct weak_result_type { typedef R result_type; }; + template struct weak_result_type { typedef R result_type; }; + template struct weak_result_type { typedef R result_type; }; +#endif + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // mem_fn_impl + // + template + class mem_fn_impl +#if defined(_MSC_VER) && (_MSC_VER >= 1900) // VS2015 or later + // Due to a (seemingly random) internal compiler error on VS2013 we disable eastl::unary_function and + // binary_function support for eastl::mem_fn as its not widely (if at all) used. If you require this support + // on VS2013 or below please contact us. + : public weak_result_type +#endif + { + public: + typedef T type; + + private: + type func; + + public: + EASTL_FORCE_INLINE mem_fn_impl(type _func) : func(_func) {} + +#if EASTL_VARIADIC_TEMPLATES_ENABLED + template + typename invoke_result::type operator()(ArgTypes&&... args) const + { + return invoke(func, eastl::forward(args)...); + } +#else + typename invoke_result::type operator()() const { return invoke_impl(func); } + + template + typename invoke_result0::type operator()(A0& a0) const + { + return invoke(func, a0); + } + + template + typename invoke_result1::type operator()(A0& a0, A1& a1) const + { + return invoke(func, a0, a1); + } + + template + typename invoke_result2::type operator()(A0& a0, A1& a1, A2& a2) const + { + return invoke(func, a0, a1, a2); + } +#endif + }; // mem_fn_impl + + + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // mem_fn -> mem_fn_impl adapters + // + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R T::*pm) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)()) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)(A0)) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)(A0, A1)) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)(A0, A1, A2)) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)() const) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)(A0) const) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)(A0, A1) const) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)(A0, A1, A2) const) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)() volatile) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)(A0) volatile) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)(A0, A1) volatile) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)(A0, A1, A2) volatile) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)() const volatile) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)(A0) const volatile) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)(A0, A1) const volatile) + { return mem_fn_impl(pm); } + + template + EASTL_FORCE_INLINE mem_fn_impl mem_fn(R (T::*pm)(A0, A1, A2) const volatile) + { return mem_fn_impl(pm); } + +} // namespace eastl + +#endif // EASTL_INTERNAL_MEM_FN_H diff --git a/libkram/eastl/include/EASTL/internal/memory_base.h b/libkram/eastl/include/EASTL/internal/memory_base.h new file mode 100644 index 00000000..b1c3490b --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/memory_base.h @@ -0,0 +1,37 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_INTERNAL_MEMORY_BASE_H +#define EASTL_INTERNAL_MEMORY_BASE_H + +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + +//////////////////////////////////////////////////////////////////////////////////////////// +// This file contains basic functionality found in the standard library 'memory' header that +// have limited or no dependencies. This allows us to utilize these utilize these functions +// in other EASTL code while avoid circular dependencies. +//////////////////////////////////////////////////////////////////////////////////////////// + +namespace eastl +{ + /// addressof + /// + /// From the C++11 Standard, section 20.6.12.1 + /// Returns the actual address of the object or function referenced by r, even in the presence of an overloaded operator&. + /// + template + T* addressof(T& value) EA_NOEXCEPT + { + return reinterpret_cast(&const_cast(reinterpret_cast(value))); + } + +} // namespace eastl + +#endif // EASTL_INTERNAL_MEMORY_BASE_H + diff --git a/libkram/eastl/include/EASTL/internal/move_help.h b/libkram/eastl/include/EASTL/internal/move_help.h new file mode 100644 index 00000000..97990df6 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/move_help.h @@ -0,0 +1,162 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_MOVE_HELP_H +#define EASTL_INTERNAL_MOVE_HELP_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include + + +// C++11's rvalue references aren't supported by earlier versions of C++. +// It turns out that in a number of cases under earlier C++ versions we can +// write code that uses rvalues similar to lvalues. We have macros below for +// such cases. For example, eastl::move (same as std::move) can be treated +// as a no-op under C++03, though with the consequence that move functionality +// isn't taken advantage of. + + +/// EASTL_MOVE +/// Acts like eastl::move when possible. Same as C++11 std::move. +/// +/// EASTL_MOVE_INLINE +/// Acts like eastl::move but is implemented inline instead of a function call. +/// This allows code to be faster in debug builds in particular. +/// Depends on C++ compiler decltype support or a similar extension. +/// +/// EASTL_FORWARD +/// Acts like eastl::forward when possible. Same as C++11 std::forward. +/// +/// EASTL_FORWARD_INLINE +/// Acts like eastl::forward but is implemented inline instead of a function call. +/// This allows code to be faster in debug builds in particular. +/// +#define EASTL_MOVE(x) eastl::move(x) +#if !defined(EA_COMPILER_NO_DECLTYPE) + #define EASTL_MOVE_INLINE(x) static_cast::type&&>(x) +#elif defined(__GNUC__) + #define EASTL_MOVE_INLINE(x) static_cast::type&&>(x) +#else + #define EASTL_MOVE_INLINE(x) eastl::move(x) +#endif + +#define EASTL_FORWARD(T, x) eastl::forward(x) +#define EASTL_FORWARD_INLINE(T, x) eastl::forward(x) // Need to investigate how to properly make a macro for this. (eastl::is_reference::value ? static_cast(static_cast(x)) : static_cast(x)) + + + + +/// EASTL_MOVE_RANGE +/// Acts like the eastl::move algorithm when possible. Same as C++11 std::move. +/// Note to be confused with the single argument move: (typename remove_reference::type&& move(T&& x)) +/// http://en.cppreference.com/w/cpp/algorithm/move +/// http://en.cppreference.com/w/cpp/algorithm/move_backward +/// +#define EASTL_MOVE_RANGE(first, last, result) eastl::move(first, last, result) +#define EASTL_MOVE_BACKWARD_RANGE(first, last, resultEnd) eastl::move_backward(first, last, resultEnd) + + +namespace eastl +{ + // forward + // + // forwards the argument to another function exactly as it was passed to the calling function. + // Not to be confused with move, this is specifically for echoing templated argument types + // to another function. move is specifically about making a type be an rvalue reference (i.e. movable) type. + // + // Example usage: + // template + // void WrapperFunction(T&& arg) + // { foo(eastl::forward(arg)); } + // + // template + // void WrapperFunction(Args&&... args) + // { foo(eastl::forward(args)...); } + // + // See the C++ Standard, section 20.2.3 + // http://en.cppreference.com/w/cpp/utility/forward + // + template + EA_CPP14_CONSTEXPR T&& forward(typename eastl::remove_reference::type& x) EA_NOEXCEPT + { + return static_cast(x); + } + + + template + EA_CPP14_CONSTEXPR T&& forward(typename eastl::remove_reference::type&& x) EA_NOEXCEPT + { + static_assert(!is_lvalue_reference::value, "forward T isn't lvalue reference"); + return static_cast(x); + } + + + // move + // + // move obtains an rvalue reference to its argument and converts it to an xvalue. + // Returns, by definition: static_cast::type&&>(t). + // The primary use of this is to pass a move'd type to a function which takes T&&, + // and thus select that function instead of (e.g.) a function which takes T or T&. + // See the C++ Standard, section 20.2.3 + // http://en.cppreference.com/w/cpp/utility/move + // + template + EA_CPP14_CONSTEXPR typename eastl::remove_reference::type&& + move(T&& x) EA_NOEXCEPT + { + return static_cast::type&&>(x); + } + + + // move_if_noexcept + // + // Returns T&& if move-constructing T throws no exceptions. Instead returns const T& if + // move-constructing T throws exceptions or has no accessible copy constructor. + // The purpose of this is to use automatically use copy construction instead of move + // construction when the move may possible throw an exception. + // See the C++ Standard, section 20.2.3 + // http://en.cppreference.com/w/cpp/utility/move_if_noexcept + // + #if EASTL_EXCEPTIONS_ENABLED + template + EA_CPP14_CONSTEXPR typename eastl::conditional::value && + eastl::is_copy_constructible::value, const T&, T&&>::type + move_if_noexcept(T& x) EA_NOEXCEPT + { + return eastl::move(x); + } + #else + template + EA_CPP14_CONSTEXPR T&& + move_if_noexcept(T& x) EA_NOEXCEPT + { + return eastl::move(x); + } + #endif + +} // namespace eastl + +#endif // Header include guard + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/internal/pair_fwd_decls.h b/libkram/eastl/include/EASTL/internal/pair_fwd_decls.h new file mode 100644 index 00000000..a716482d --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/pair_fwd_decls.h @@ -0,0 +1,16 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_PAIR_FWD_DECLS_H +#define EASTL_PAIR_FWD_DECLS_H + +#include + +namespace eastl +{ + template + struct pair; +} + +#endif // EASTL_PAIR_FWD_DECLS_H diff --git a/libkram/eastl/include/EASTL/internal/piecewise_construct_t.h b/libkram/eastl/include/EASTL/internal/piecewise_construct_t.h new file mode 100644 index 00000000..d853f0ea --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/piecewise_construct_t.h @@ -0,0 +1,46 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_PIECEWISE_CONSTRUCT_T_H +#define EASTL_INTERNAL_PIECEWISE_CONSTRUCT_T_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +namespace eastl +{ + /////////////////////////////////////////////////////////////////////////////// + /// piecewise_construct_t + /// + /// http://en.cppreference.com/w/cpp/utility/piecewise_construct_t + /// + struct piecewise_construct_t + { + explicit piecewise_construct_t() = default; + }; + + + /////////////////////////////////////////////////////////////////////////////// + /// piecewise_construct + /// + /// A tag type used to disambiguate between function overloads that take two tuple arguments. + /// + /// http://en.cppreference.com/w/cpp/utility/piecewise_construct + /// + EA_CONSTEXPR piecewise_construct_t piecewise_construct = eastl::piecewise_construct_t(); + +} // namespace eastl + + +#endif // Header include guard + + + + + + diff --git a/libkram/eastl/include/EASTL/internal/red_black_tree.h b/libkram/eastl/include/EASTL/internal/red_black_tree.h new file mode 100644 index 00000000..7448bd42 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/red_black_tree.h @@ -0,0 +1,2400 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_RED_BLACK_TREE_H +#define EASTL_RED_BLACK_TREE_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +EA_DISABLE_ALL_VC_WARNINGS() +#include +#include +EA_RESTORE_ALL_VC_WARNINGS() + + +// 4512 - 'class' : assignment operator could not be generated +// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc +// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught. +EA_DISABLE_VC_WARNING(4512 4530 4571); + + +namespace eastl +{ + + /// EASTL_RBTREE_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_RBTREE_DEFAULT_NAME + #define EASTL_RBTREE_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " rbtree" // Unless the user overrides something, this is "EASTL rbtree". + #endif + + + /// EASTL_RBTREE_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_RBTREE_DEFAULT_ALLOCATOR + #define EASTL_RBTREE_DEFAULT_ALLOCATOR allocator_type(EASTL_RBTREE_DEFAULT_NAME) + #endif + + + /// EASTL_RBTREE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR + /// + #ifndef EASTL_RBTREE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR + #define EASTL_RBTREE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR 0 + #endif + + + /// RBTreeColor + /// + enum RBTreeColor + { + kRBTreeColorRed, + kRBTreeColorBlack + }; + + + + /// RBTreeColor + /// + enum RBTreeSide + { + kRBTreeSideLeft, + kRBTreeSideRight + }; + + + + /// rbtree_node_base + /// + /// We define a rbtree_node_base separately from rbtree_node (below), because it + /// allows us to have non-templated operations, and it makes it so that the + /// rbtree anchor node doesn't carry a T with it, which would waste space and + /// possibly lead to surprising the user due to extra Ts existing that the user + /// didn't explicitly create. The downside to all of this is that it makes debug + /// viewing of an rbtree harder, given that the node pointers are of type + /// rbtree_node_base and not rbtree_node. + /// + struct rbtree_node_base + { + typedef rbtree_node_base this_type; + + public: + this_type* mpNodeRight; // Declared first because it is used most often. + this_type* mpNodeLeft; + this_type* mpNodeParent; + char mColor; // We only need one bit here, would be nice if we could stuff that bit somewhere else. + }; + + + /// rbtree_node + /// + template + struct rbtree_node : public rbtree_node_base + { + Value mValue; // For set and multiset, this is the user's value, for map and multimap, this is a pair of key/value. + + // This type is never constructed, so to avoid a MSVC warning we "delete" the copy constructor. + // + // Potentially we could provide a constructor that would satisfy the compiler and change the code to use this constructor + // instead of constructing mValue in place within an unconstructed rbtree_node. + #if defined(_MSC_VER) + rbtree_node(const rbtree_node&) = delete; + #endif + }; + + + + + // rbtree_node_base functions + // + // These are the fundamental functions that we use to maintain the + // tree. The bulk of the work of the tree maintenance is done in + // these functions. + // + EASTL_API rbtree_node_base* RBTreeIncrement (const rbtree_node_base* pNode); + EASTL_API rbtree_node_base* RBTreeDecrement (const rbtree_node_base* pNode); + EASTL_API rbtree_node_base* RBTreeGetMinChild (const rbtree_node_base* pNode); + EASTL_API rbtree_node_base* RBTreeGetMaxChild (const rbtree_node_base* pNode); + EASTL_API size_t RBTreeGetBlackCount(const rbtree_node_base* pNodeTop, + const rbtree_node_base* pNodeBottom); + EASTL_API void RBTreeInsert ( rbtree_node_base* pNode, + rbtree_node_base* pNodeParent, + rbtree_node_base* pNodeAnchor, + RBTreeSide insertionSide); + EASTL_API void RBTreeErase ( rbtree_node_base* pNode, + rbtree_node_base* pNodeAnchor); + + + + + + + + /// rbtree_iterator + /// + template + struct rbtree_iterator + { + typedef rbtree_iterator this_type; + typedef rbtree_iterator iterator; + typedef rbtree_iterator const_iterator; + typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. + typedef ptrdiff_t difference_type; + typedef T value_type; + typedef rbtree_node_base base_node_type; + typedef rbtree_node node_type; + typedef Pointer pointer; + typedef Reference reference; + typedef EASTL_ITC_NS::bidirectional_iterator_tag iterator_category; + + public: + node_type* mpNode; + + public: + rbtree_iterator(); + explicit rbtree_iterator(const node_type* pNode); + rbtree_iterator(const iterator& x); + + reference operator*() const; + pointer operator->() const; + + rbtree_iterator& operator++(); + rbtree_iterator operator++(int); + + rbtree_iterator& operator--(); + rbtree_iterator operator--(int); + + }; // rbtree_iterator + + + /////////////////////////////////////////////////////////////////////////////// + // rb_base_compare_ebo + // + // Utilizes the "empty base-class optimization" to reduce the size of the rbtree + // when its Compare template argument is an empty class. + /////////////////////////////////////////////////////////////////////////////// + + template ::value> + struct rb_base_compare_ebo + { + protected: + rb_base_compare_ebo() : mCompare() {} + rb_base_compare_ebo(const Compare& compare) : mCompare(compare) {} + + Compare& get_compare() { return mCompare; } + const Compare& get_compare() const { return mCompare; } + + template + bool compare(const T& lhs, const T& rhs) + { + return mCompare(lhs, rhs); + } + + template + bool compare(const T& lhs, const T& rhs) const + { + return mCompare(lhs, rhs); + } + + private: + Compare mCompare; + }; + + template + struct rb_base_compare_ebo : private Compare + { + protected: + rb_base_compare_ebo() {} + rb_base_compare_ebo(const Compare& compare) : Compare(compare) {} + + Compare& get_compare() { return *this; } + const Compare& get_compare() const { return *this; } + + template + bool compare(const T& lhs, const T& rhs) + { + return Compare::operator()(lhs, rhs); + } + + template + bool compare(const T& lhs, const T& rhs) const + { + return Compare::operator()(lhs, rhs); + } + }; + + + + /////////////////////////////////////////////////////////////////////////////// + // rb_base + // + // This class allows us to use a generic rbtree as the basis of map, multimap, + // set, and multiset transparently. The vital template parameters for this are + // the ExtractKey and the bUniqueKeys parameters. + // + // If the rbtree has a value type of the form pair (i.e. it is a map or + // multimap and not a set or multiset) and a key extraction policy that returns + // the first part of the pair, the rbtree gets a mapped_type typedef. + // If it satisfies those criteria and also has unique keys, then it also gets an + // operator[] (which only map and set have and multimap and multiset don't have). + // + /////////////////////////////////////////////////////////////////////////////// + + + + /// rb_base + /// This specialization is used for 'set'. In this case, Key and Value + /// will be the same as each other and ExtractKey will be eastl::use_self. + /// + template + struct rb_base : public rb_base_compare_ebo + { + typedef ExtractKey extract_key; + + protected: + using rb_base_compare_ebo::compare; + using rb_base_compare_ebo::get_compare; + + public: + rb_base() {} + rb_base(const Compare& compare) : rb_base_compare_ebo(compare) {} + }; + + + /// rb_base + /// This class is used for 'multiset'. + /// In this case, Key and Value will be the same as each + /// other and ExtractKey will be eastl::use_self. + /// + template + struct rb_base : public rb_base_compare_ebo + { + typedef ExtractKey extract_key; + + protected: + using rb_base_compare_ebo::compare; + using rb_base_compare_ebo::get_compare; + + public: + rb_base() {} + rb_base(const Compare& compare) : rb_base_compare_ebo(compare) {} + }; + + + /// rb_base + /// This specialization is used for 'map'. + /// + template + struct rb_base, true, RBTree> : public rb_base_compare_ebo + { + typedef eastl::use_first extract_key; + + using rb_base_compare_ebo::compare; + using rb_base_compare_ebo::get_compare; + + public: + rb_base() {} + rb_base(const Compare& compare) : rb_base_compare_ebo(compare) {} + }; + + + /// rb_base + /// This specialization is used for 'multimap'. + /// + template + struct rb_base, false, RBTree> : public rb_base_compare_ebo + { + typedef eastl::use_first extract_key; + + using rb_base_compare_ebo::compare; + using rb_base_compare_ebo::get_compare; + + public: + rb_base() {} + rb_base(const Compare& compare) : rb_base_compare_ebo(compare) {} + }; + + + /// rbtree + /// + /// rbtree is the red-black tree basis for the map, multimap, set, and multiset + /// containers. Just about all the work of those containers is done here, and + /// they are merely a shell which sets template policies that govern the code + /// generation for this rbtree. + /// + /// This rbtree implementation is pretty much the same as all other modern + /// rbtree implementations, as the topic is well known and researched. We may + /// choose to implement a "relaxed balancing" option at some point in the + /// future if it is deemed worthwhile. Most rbtree implementations don't do this. + /// + /// The primary rbtree member variable is mAnchor, which is a node_type and + /// acts as the end node. However, like any other node, it has mpNodeLeft, + /// mpNodeRight, and mpNodeParent members. We do the conventional trick of + /// assigning begin() (left-most rbtree node) to mpNodeLeft, assigning + /// 'end() - 1' (a.k.a. rbegin()) to mpNodeRight, and assigning the tree root + /// node to mpNodeParent. + /// + /// Compare (functor): This is a comparison class which defaults to 'less'. + /// It is a common STL thing which takes two arguments and returns true if + /// the first is less than the second. + /// + /// ExtractKey (functor): This is a class which gets the key from a stored + /// node. With map and set, the node is a pair, whereas with set and multiset + /// the node is just the value. ExtractKey will be either eastl::use_first (map and multimap) + /// or eastl::use_self (set and multiset). + /// + /// bMutableIterators (bool): true if rbtree::iterator is a mutable + /// iterator, false if iterator and const_iterator are both const iterators. + /// It will be true for map and multimap and false for set and multiset. + /// + /// bUniqueKeys (bool): true if the keys are to be unique, and false if there + /// can be multiple instances of a given key. It will be true for set and map + /// and false for multiset and multimap. + /// + /// To consider: Add an option for relaxed tree balancing. This could result + /// in performance improvements but would require a more complicated implementation. + /// + /////////////////////////////////////////////////////////////////////// + /// find_as + /// In order to support the ability to have a tree of strings but + /// be able to do efficiently lookups via char pointers (i.e. so they + /// aren't converted to string objects), we provide the find_as + /// function. This function allows you to do a find with a key of a + /// type other than the tree's key type. See the find_as function + /// for more documentation on this. + /// + template + class rbtree + : public rb_base > + { + public: + typedef ptrdiff_t difference_type; + typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. + typedef Key key_type; + typedef Value value_type; + typedef rbtree_node node_type; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef value_type* pointer; + typedef const value_type* const_pointer; + + typedef typename type_select, + rbtree_iterator >::type iterator; + typedef rbtree_iterator const_iterator; + typedef eastl::reverse_iterator reverse_iterator; + typedef eastl::reverse_iterator const_reverse_iterator; + + typedef Allocator allocator_type; + typedef Compare key_compare; + typedef typename type_select, iterator>::type insert_return_type; // map/set::insert return a pair, multimap/multiset::iterator return an iterator. + typedef rbtree this_type; + typedef rb_base base_type; + typedef integral_constant has_unique_keys_type; + typedef typename base_type::extract_key extract_key; + + protected: + using base_type::compare; + using base_type::get_compare; + + public: + rbtree_node_base mAnchor; /// This node acts as end() and its mpLeft points to begin(), and mpRight points to rbegin() (the last node on the right). + size_type mnSize; /// Stores the count of nodes in the tree (not counting the anchor node). + allocator_type mAllocator; // To do: Use base class optimization to make this go away. + + public: + // ctor/dtor + rbtree(); + rbtree(const allocator_type& allocator); + rbtree(const Compare& compare, const allocator_type& allocator = EASTL_RBTREE_DEFAULT_ALLOCATOR); + rbtree(const this_type& x); + rbtree(this_type&& x); + rbtree(this_type&& x, const allocator_type& allocator); + + template + rbtree(InputIterator first, InputIterator last, const Compare& compare, const allocator_type& allocator = EASTL_RBTREE_DEFAULT_ALLOCATOR); + + ~rbtree(); + + public: + // properties + const allocator_type& get_allocator() const EA_NOEXCEPT; + allocator_type& get_allocator() EA_NOEXCEPT; + void set_allocator(const allocator_type& allocator); + + const key_compare& key_comp() const { return get_compare(); } + key_compare& key_comp() { return get_compare(); } + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + void swap(this_type& x); + + public: + // iterators + iterator begin() EA_NOEXCEPT; + const_iterator begin() const EA_NOEXCEPT; + const_iterator cbegin() const EA_NOEXCEPT; + + iterator end() EA_NOEXCEPT; + const_iterator end() const EA_NOEXCEPT; + const_iterator cend() const EA_NOEXCEPT; + + reverse_iterator rbegin() EA_NOEXCEPT; + const_reverse_iterator rbegin() const EA_NOEXCEPT; + const_reverse_iterator crbegin() const EA_NOEXCEPT; + + reverse_iterator rend() EA_NOEXCEPT; + const_reverse_iterator rend() const EA_NOEXCEPT; + const_reverse_iterator crend() const EA_NOEXCEPT; + + public: + bool empty() const EA_NOEXCEPT; + size_type size() const EA_NOEXCEPT; + + template + insert_return_type emplace(Args&&... args); + + template + iterator emplace_hint(const_iterator position, Args&&... args); + + template eastl::pair try_emplace(const key_type& k, Args&&... args); + template eastl::pair try_emplace(key_type&& k, Args&&... args); + template iterator try_emplace(const_iterator position, const key_type& k, Args&&... args); + template iterator try_emplace(const_iterator position, key_type&& k, Args&&... args); + + // Standard conversion overload to avoid the overhead of mismatched 'pair' types. + template ::value>::type> + insert_return_type insert(P&& otherValue); + + // Currently limited to value_type instead of P because it collides with insert(InputIterator, InputIterator). + // To allow this to work with templated P we need to implement a compile-time specialization for the + // case that P&& is const_iterator and have that specialization handle insert(InputIterator, InputIterator) + // instead of insert(InputIterator, InputIterator). Curiously, neither libstdc++ nor libc++ + // implement this function either, which suggests they ran into the same problem I did here + // and haven't yet resolved it (at least as of March 2014, GCC 4.8.1). + iterator insert(const_iterator hint, value_type&& value); + + /// map::insert and set::insert return a pair, while multimap::insert and + /// multiset::insert return an iterator. + insert_return_type insert(const value_type& value); + + // C++ standard: inserts value if and only if there is no element with + // key equivalent to the key of t in containers with unique keys; always + // inserts value in containers with equivalent keys. Always returns the + // iterator pointing to the element with key equivalent to the key of value. + // iterator position is a hint pointing to where the insert should start + // to search. However, there is a potential defect/improvement report on this behaviour: + // LWG issue #233 (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1780.html) + // We follow the same approach as SGI STL/STLPort and use the position as + // a forced insertion position for the value when possible. + iterator insert(const_iterator position, const value_type& value); + + void insert(std::initializer_list ilist); + + template + void insert(InputIterator first, InputIterator last); + + // TODO(rparolin): + // insert_return_type insert(node_type&& nh); + // iterator insert(const_iterator hint, node_type&& nh); + + template pair insert_or_assign(const key_type& k, M&& obj); + template pair insert_or_assign(key_type&& k, M&& obj); + template iterator insert_or_assign(const_iterator hint, const key_type& k, M&& obj); + template iterator insert_or_assign(const_iterator hint, key_type&& k, M&& obj); + + iterator erase(const_iterator position); + iterator erase(const_iterator first, const_iterator last); + reverse_iterator erase(const_reverse_iterator position); + reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last); + + // For some reason, multiple STL versions make a specialization + // for erasing an array of key_types. I'm pretty sure we don't + // need this, but just to be safe we will follow suit. + // The implementation is trivial. Returns void because the values + // could well be randomly distributed throughout the tree and thus + // a return value would be nearly meaningless. + void erase(const key_type* first, const key_type* last); + + void clear(); + void reset_lose_memory(); // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + + iterator find(const key_type& key); + const_iterator find(const key_type& key) const; + + /// Implements a find whereby the user supplies a comparison of a different type + /// than the tree's value_type. A useful case of this is one whereby you have + /// a container of string objects but want to do searches via passing in char pointers. + /// The problem is that without this kind of find, you need to do the expensive operation + /// of converting the char pointer to a string so it can be used as the argument to the + /// find function. + /// + /// Example usage (note that the compare uses string as first type and char* as second): + /// set strings; + /// strings.find_as("hello", less_2()); + /// + template iterator find_as(const U& u, Compare2 compare2); + template const_iterator find_as(const U& u, Compare2 compare2) const; + + iterator lower_bound(const key_type& key); + const_iterator lower_bound(const key_type& key) const; + + iterator upper_bound(const key_type& key); + const_iterator upper_bound(const key_type& key) const; + + bool validate() const; + int validate_iterator(const_iterator i) const; + + protected: + node_type* DoAllocateNode(); + void DoFreeNode(node_type* pNode); + + node_type* DoCreateNodeFromKey(const key_type& key); + + template + node_type* DoCreateNode(Args&&... args); + node_type* DoCreateNode(const value_type& value); + node_type* DoCreateNode(value_type&& value); + node_type* DoCreateNode(const node_type* pNodeSource, node_type* pNodeParent); + + node_type* DoCopySubtree(const node_type* pNodeSource, node_type* pNodeDest); + void DoNukeSubtree(node_type* pNode); + + template + eastl::pair DoInsertValue(true_type, Args&&... args); + + template + iterator DoInsertValue(false_type, Args&&... args); + + eastl::pair DoInsertValue(true_type, value_type&& value); + iterator DoInsertValue(false_type, value_type&& value); + + template + iterator DoInsertValueImpl(node_type* pNodeParent, bool bForceToLeft, const key_type& key, Args&&... args); + iterator DoInsertValueImpl(node_type* pNodeParent, bool bForceToLeft, const key_type& key, node_type* pNodeNew); + + eastl::pair DoInsertKey(true_type, const key_type& key); + iterator DoInsertKey(false_type, const key_type& key); + + template + iterator DoInsertValueHint(true_type, const_iterator position, Args&&... args); + + template + iterator DoInsertValueHint(false_type, const_iterator position, Args&&... args); + + iterator DoInsertValueHint(true_type, const_iterator position, value_type&& value); + iterator DoInsertValueHint(false_type, const_iterator position, value_type&& value); + + iterator DoInsertKey(true_type, const_iterator position, const key_type& key); // By design we return iterator and not a pair. + iterator DoInsertKey(false_type, const_iterator position, const key_type& key); + iterator DoInsertKeyImpl(node_type* pNodeParent, bool bForceToLeft, const key_type& key); + + node_type* DoGetKeyInsertionPositionUniqueKeys(bool& canInsert, const key_type& key); + node_type* DoGetKeyInsertionPositionNonuniqueKeys(const key_type& key); + + node_type* DoGetKeyInsertionPositionUniqueKeysHint(const_iterator position, bool& bForceToLeft, const key_type& key); + node_type* DoGetKeyInsertionPositionNonuniqueKeysHint(const_iterator position, bool& bForceToLeft, const key_type& key); + + }; // rbtree + + + + + + /////////////////////////////////////////////////////////////////////// + // rbtree_node_base functions + /////////////////////////////////////////////////////////////////////// + + EASTL_API inline rbtree_node_base* RBTreeGetMinChild(const rbtree_node_base* pNodeBase) + { + while(pNodeBase->mpNodeLeft) + pNodeBase = pNodeBase->mpNodeLeft; + return const_cast(pNodeBase); + } + + EASTL_API inline rbtree_node_base* RBTreeGetMaxChild(const rbtree_node_base* pNodeBase) + { + while(pNodeBase->mpNodeRight) + pNodeBase = pNodeBase->mpNodeRight; + return const_cast(pNodeBase); + } + + // The rest of the functions are non-trivial and are found in + // the corresponding .cpp file to this file. + + + + /////////////////////////////////////////////////////////////////////// + // rbtree_iterator functions + /////////////////////////////////////////////////////////////////////// + + template + rbtree_iterator::rbtree_iterator() + : mpNode(NULL) { } + + + template + rbtree_iterator::rbtree_iterator(const node_type* pNode) + : mpNode(static_cast(const_cast(pNode))) { } + + + template + rbtree_iterator::rbtree_iterator(const iterator& x) + : mpNode(x.mpNode) { } + + + template + typename rbtree_iterator::reference + rbtree_iterator::operator*() const + { return mpNode->mValue; } + + + template + typename rbtree_iterator::pointer + rbtree_iterator::operator->() const + { return &mpNode->mValue; } + + + template + typename rbtree_iterator::this_type& + rbtree_iterator::operator++() + { + mpNode = static_cast(RBTreeIncrement(mpNode)); + return *this; + } + + + template + typename rbtree_iterator::this_type + rbtree_iterator::operator++(int) + { + this_type temp(*this); + mpNode = static_cast(RBTreeIncrement(mpNode)); + return temp; + } + + + template + typename rbtree_iterator::this_type& + rbtree_iterator::operator--() + { + mpNode = static_cast(RBTreeDecrement(mpNode)); + return *this; + } + + + template + typename rbtree_iterator::this_type + rbtree_iterator::operator--(int) + { + this_type temp(*this); + mpNode = static_cast(RBTreeDecrement(mpNode)); + return temp; + } + + + // The C++ defect report #179 requires that we support comparisons between const and non-const iterators. + // Thus we provide additional template paremeters here to support this. The defect report does not + // require us to support comparisons between reverse_iterators and const_reverse_iterators. + template + inline bool operator==(const rbtree_iterator& a, + const rbtree_iterator& b) + { + return a.mpNode == b.mpNode; + } + + + template + inline bool operator!=(const rbtree_iterator& a, + const rbtree_iterator& b) + { + return a.mpNode != b.mpNode; + } + + + // We provide a version of operator!= for the case where the iterators are of the + // same type. This helps prevent ambiguity errors in the presence of rel_ops. + template + inline bool operator!=(const rbtree_iterator& a, + const rbtree_iterator& b) + { + return a.mpNode != b.mpNode; + } + + + + + /////////////////////////////////////////////////////////////////////// + // rbtree functions + /////////////////////////////////////////////////////////////////////// + + template + inline rbtree::rbtree() + : mAnchor(), + mnSize(0), + mAllocator(EASTL_RBTREE_DEFAULT_NAME) + { + reset_lose_memory(); + } + + + template + inline rbtree::rbtree(const allocator_type& allocator) + : mAnchor(), + mnSize(0), + mAllocator(allocator) + { + reset_lose_memory(); + } + + + template + inline rbtree::rbtree(const C& compare, const allocator_type& allocator) + : base_type(compare), + mAnchor(), + mnSize(0), + mAllocator(allocator) + { + reset_lose_memory(); + } + + + template + inline rbtree::rbtree(const this_type& x) + : base_type(x.get_compare()), + mAnchor(), + mnSize(0), + mAllocator(x.mAllocator) + { + reset_lose_memory(); + + if(x.mAnchor.mpNodeParent) // mAnchor.mpNodeParent is the rb_tree root node. + { + mAnchor.mpNodeParent = DoCopySubtree((const node_type*)x.mAnchor.mpNodeParent, (node_type*)&mAnchor); + mAnchor.mpNodeRight = RBTreeGetMaxChild(mAnchor.mpNodeParent); + mAnchor.mpNodeLeft = RBTreeGetMinChild(mAnchor.mpNodeParent); + mnSize = x.mnSize; + } + } + + + template + inline rbtree::rbtree(this_type&& x) + : base_type(x.get_compare()), + mAnchor(), + mnSize(0), + mAllocator(x.mAllocator) + { + reset_lose_memory(); + swap(x); + } + + template + inline rbtree::rbtree(this_type&& x, const allocator_type& allocator) + : base_type(x.get_compare()), + mAnchor(), + mnSize(0), + mAllocator(allocator) + { + reset_lose_memory(); + swap(x); // swap will directly or indirectly handle the possibility that mAllocator != x.mAllocator. + } + + + template + template + inline rbtree::rbtree(InputIterator first, InputIterator last, const C& compare, const allocator_type& allocator) + : base_type(compare), + mAnchor(), + mnSize(0), + mAllocator(allocator) + { + reset_lose_memory(); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for(; first != last; ++first) + insert(*first); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + clear(); + throw; + } + #endif + } + + + template + inline rbtree::~rbtree() + { + // Erase the entire tree. DoNukeSubtree is not a + // conventional erase function, as it does no rebalancing. + DoNukeSubtree((node_type*)mAnchor.mpNodeParent); + } + + + template + inline const typename rbtree::allocator_type& + rbtree::get_allocator() const EA_NOEXCEPT + { + return mAllocator; + } + + + template + inline typename rbtree::allocator_type& + rbtree::get_allocator() EA_NOEXCEPT + { + return mAllocator; + } + + + template + inline void rbtree::set_allocator(const allocator_type& allocator) + { + mAllocator = allocator; + } + + + template + inline typename rbtree::size_type + rbtree::size() const EA_NOEXCEPT + { return mnSize; } + + + template + inline bool rbtree::empty() const EA_NOEXCEPT + { return (mnSize == 0); } + + + template + inline typename rbtree::iterator + rbtree::begin() EA_NOEXCEPT + { return iterator(static_cast(mAnchor.mpNodeLeft)); } + + + template + inline typename rbtree::const_iterator + rbtree::begin() const EA_NOEXCEPT + { return const_iterator(static_cast(const_cast(mAnchor.mpNodeLeft))); } + + + template + inline typename rbtree::const_iterator + rbtree::cbegin() const EA_NOEXCEPT + { return const_iterator(static_cast(const_cast(mAnchor.mpNodeLeft))); } + + + template + inline typename rbtree::iterator + rbtree::end() EA_NOEXCEPT + { return iterator(static_cast(&mAnchor)); } + + + template + inline typename rbtree::const_iterator + rbtree::end() const EA_NOEXCEPT + { return const_iterator(static_cast(const_cast(&mAnchor))); } + + + template + inline typename rbtree::const_iterator + rbtree::cend() const EA_NOEXCEPT + { return const_iterator(static_cast(const_cast(&mAnchor))); } + + + template + inline typename rbtree::reverse_iterator + rbtree::rbegin() EA_NOEXCEPT + { return reverse_iterator(end()); } + + + template + inline typename rbtree::const_reverse_iterator + rbtree::rbegin() const EA_NOEXCEPT + { return const_reverse_iterator(end()); } + + + template + inline typename rbtree::const_reverse_iterator + rbtree::crbegin() const EA_NOEXCEPT + { return const_reverse_iterator(end()); } + + + template + inline typename rbtree::reverse_iterator + rbtree::rend() EA_NOEXCEPT + { return reverse_iterator(begin()); } + + + template + inline typename rbtree::const_reverse_iterator + rbtree::rend() const EA_NOEXCEPT + { return const_reverse_iterator(begin()); } + + + template + inline typename rbtree::const_reverse_iterator + rbtree::crend() const EA_NOEXCEPT + { return const_reverse_iterator(begin()); } + + + template + inline typename rbtree::this_type& + rbtree::operator=(const this_type& x) + { + if(this != &x) + { + clear(); + + #if EASTL_ALLOCATOR_COPY_ENABLED + mAllocator = x.mAllocator; + #endif + + get_compare() = x.get_compare(); + + if(x.mAnchor.mpNodeParent) // mAnchor.mpNodeParent is the rb_tree root node. + { + mAnchor.mpNodeParent = DoCopySubtree((const node_type*)x.mAnchor.mpNodeParent, (node_type*)&mAnchor); + mAnchor.mpNodeRight = RBTreeGetMaxChild(mAnchor.mpNodeParent); + mAnchor.mpNodeLeft = RBTreeGetMinChild(mAnchor.mpNodeParent); + mnSize = x.mnSize; + } + } + return *this; + } + + template + inline typename rbtree::this_type& + rbtree::operator=(this_type&& x) + { + if(this != &x) + { + clear(); // To consider: Are we really required to clear here? x is going away soon and will clear itself in its dtor. + swap(x); // member swap handles the case that x has a different allocator than our allocator by doing a copy. + } + return *this; + } + + template + inline typename rbtree::this_type& + rbtree::operator=(std::initializer_list ilist) + { + // The simplest means of doing this is to clear and insert. There probably isn't a generic + // solution that's any more efficient without having prior knowledge of the ilist contents. + clear(); + + for(typename std::initializer_list::iterator it = ilist.begin(), itEnd = ilist.end(); it != itEnd; ++it) + DoInsertValue(has_unique_keys_type(), eastl::move(*it)); + + return *this; + } + + + template + void rbtree::swap(this_type& x) + { + #if EASTL_RBTREE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR + if(mAllocator == x.mAllocator) // If allocators are equivalent... + #endif + { + // Most of our members can be exchaged by a basic swap: + // We leave mAllocator as-is. + eastl::swap(mnSize, x.mnSize); + eastl::swap(get_compare(), x.get_compare()); + #if !EASTL_RBTREE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR + eastl::swap(mAllocator, x.mAllocator); + #endif + + + // However, because our anchor node is a part of our class instance and not + // dynamically allocated, we can't do a swap of it but must do a more elaborate + // procedure. This is the downside to having the mAnchor be like this, but + // otherwise we consider it a good idea to avoid allocating memory for a + // nominal container instance. + + // We optimize for the expected most common case: both pointers being non-null. + if(mAnchor.mpNodeParent && x.mAnchor.mpNodeParent) // If both pointers are non-null... + { + eastl::swap(mAnchor.mpNodeRight, x.mAnchor.mpNodeRight); + eastl::swap(mAnchor.mpNodeLeft, x.mAnchor.mpNodeLeft); + eastl::swap(mAnchor.mpNodeParent, x.mAnchor.mpNodeParent); + + // We need to fix up the anchors to point to themselves (we can't just swap them). + mAnchor.mpNodeParent->mpNodeParent = &mAnchor; + x.mAnchor.mpNodeParent->mpNodeParent = &x.mAnchor; + } + else if(mAnchor.mpNodeParent) + { + x.mAnchor.mpNodeRight = mAnchor.mpNodeRight; + x.mAnchor.mpNodeLeft = mAnchor.mpNodeLeft; + x.mAnchor.mpNodeParent = mAnchor.mpNodeParent; + x.mAnchor.mpNodeParent->mpNodeParent = &x.mAnchor; + + // We need to fix up our anchor to point it itself (we can't have it swap with x). + mAnchor.mpNodeRight = &mAnchor; + mAnchor.mpNodeLeft = &mAnchor; + mAnchor.mpNodeParent = NULL; + } + else if(x.mAnchor.mpNodeParent) + { + mAnchor.mpNodeRight = x.mAnchor.mpNodeRight; + mAnchor.mpNodeLeft = x.mAnchor.mpNodeLeft; + mAnchor.mpNodeParent = x.mAnchor.mpNodeParent; + mAnchor.mpNodeParent->mpNodeParent = &mAnchor; + + // We need to fix up x's anchor to point it itself (we can't have it swap with us). + x.mAnchor.mpNodeRight = &x.mAnchor; + x.mAnchor.mpNodeLeft = &x.mAnchor; + x.mAnchor.mpNodeParent = NULL; + } // Else both are NULL and there is nothing to do. + } + #if EASTL_RBTREE_LEGACY_SWAP_BEHAVIOUR_REQUIRES_COPY_CTOR + else + { + const this_type temp(*this); // Can't call eastl::swap because that would + *this = x; // itself call this member swap function. + x = temp; + } + #endif + } + + + template + template + inline typename rbtree::insert_return_type // map/set::insert return a pair, multimap/multiset::iterator return an iterator. + rbtree::emplace(Args&&... args) + { + return DoInsertValue(has_unique_keys_type(), eastl::forward(args)...); + } + + template + template + typename rbtree::iterator + rbtree::emplace_hint(const_iterator position, Args&&... args) + { + return DoInsertValueHint(has_unique_keys_type(), position, eastl::forward(args)...); + } + + template + template + inline eastl::pair::iterator, bool> + rbtree::try_emplace(const key_type& key, Args&&... args) + { + return DoInsertValue(has_unique_keys_type(), piecewise_construct, eastl::forward_as_tuple(key), eastl::forward_as_tuple(eastl::forward(args)...)); + } + + template + template + inline eastl::pair::iterator, bool> + rbtree::try_emplace(key_type&& key, Args&&... args) + { + return DoInsertValue(has_unique_keys_type(), piecewise_construct, eastl::forward_as_tuple(eastl::move(key)), eastl::forward_as_tuple(eastl::forward(args)...)); + } + + template + template + inline typename rbtree::iterator + rbtree::try_emplace(const_iterator position, const key_type& key, Args&&... args) + { + return DoInsertValueHint( + has_unique_keys_type(), position, + piecewise_construct, eastl::forward_as_tuple(key), eastl::forward_as_tuple(eastl::forward(args)...)); + } + + template + template + inline typename rbtree::iterator + rbtree::try_emplace(const_iterator position, key_type&& key, Args&&... args) + { + return DoInsertValueHint( + has_unique_keys_type(), position, + piecewise_construct, eastl::forward_as_tuple(eastl::move(key)), eastl::forward_as_tuple(eastl::forward(args)...)); + } + + + template + template + inline typename rbtree::insert_return_type // map/set::insert return a pair, multimap/multiset::iterator return an iterator. + rbtree::insert(P&& otherValue) + { + // Need to use forward instead of move because P&& is a "universal reference" instead of an rvalue reference. + return emplace(eastl::forward

(otherValue)); + } + + + template + inline typename rbtree::iterator + rbtree::insert(const_iterator position, value_type&& value) + { + return DoInsertValueHint(has_unique_keys_type(), position, eastl::move(value)); + } + + + template + inline typename rbtree::insert_return_type // map/set::insert return a pair, multimap/multiset::iterator return an iterator. + rbtree::insert(const value_type& value) + { + return DoInsertValue(has_unique_keys_type(), value); + } + + + template + typename rbtree::iterator + rbtree::insert(const_iterator position, const value_type& value) + { + return DoInsertValueHint(has_unique_keys_type(), position, value); + } + + + template + template + eastl::pair::iterator, bool> + rbtree::insert_or_assign(const key_type& k, M&& obj) + { + auto iter = find(k); + + if(iter == end()) + { + return insert(value_type(piecewise_construct, eastl::forward_as_tuple(k), eastl::forward_as_tuple(eastl::forward(obj)))); + } + else + { + iter->second = eastl::forward(obj); + return {iter, false}; + } + } + + template + template + eastl::pair::iterator, bool> + rbtree::insert_or_assign(key_type&& k, M&& obj) + { + auto iter = find(k); + + if(iter == end()) + { + return insert(value_type(piecewise_construct, eastl::forward_as_tuple(eastl::move(k)), eastl::forward_as_tuple(eastl::forward(obj)))); + } + else + { + iter->second = eastl::forward(obj); + return {iter, false}; + } + } + + template + template + typename rbtree::iterator + rbtree::insert_or_assign(const_iterator hint, const key_type& k, M&& obj) + { + auto iter = find(k); + + if(iter == end()) + { + return insert(hint, value_type(piecewise_construct, eastl::forward_as_tuple(k), eastl::forward_as_tuple(eastl::forward(obj)))); + } + else + { + iter->second = eastl::forward(obj); + return iter; + } + } + + template + template + typename rbtree::iterator + rbtree::insert_or_assign(const_iterator hint, key_type&& k, M&& obj) + { + auto iter = find(k); + + if(iter == end()) + { + return insert(hint, value_type(piecewise_construct, eastl::forward_as_tuple(eastl::move(k)), eastl::forward_as_tuple(eastl::forward(obj)))); + } + else + { + iter->second = eastl::forward(obj); + return iter; + } + } + + template + typename rbtree::node_type* + rbtree::DoGetKeyInsertionPositionUniqueKeys(bool& canInsert, const key_type& key) + { + // This code is essentially a slightly modified copy of the the rbtree::insert + // function whereby this version takes a key and not a full value_type. + extract_key extractKey; + + node_type* pCurrent = (node_type*)mAnchor.mpNodeParent; // Start with the root node. + node_type* pLowerBound = (node_type*)&mAnchor; // Set it to the container end for now. + node_type* pParent; // This will be where we insert the new node. + + bool bValueLessThanNode = true; // If the tree is empty, this will result in an insertion at the front. + + // Find insertion position of the value. This will either be a position which + // already contains the value, a position which is greater than the value or + // end(), which we treat like a position which is greater than the value. + while(EASTL_LIKELY(pCurrent)) // Do a walk down the tree. + { + bValueLessThanNode = compare(key, extractKey(pCurrent->mValue)); + pLowerBound = pCurrent; + + if(bValueLessThanNode) + { + EASTL_VALIDATE_COMPARE(!compare(extractKey(pCurrent->mValue), key)); // Validate that the compare function is sane. + pCurrent = (node_type*)pCurrent->mpNodeLeft; + } + else + pCurrent = (node_type*)pCurrent->mpNodeRight; + } + + pParent = pLowerBound; // pLowerBound is actually upper bound right now (i.e. it is > value instead of <=), but we will make it the lower bound below. + + if(bValueLessThanNode) // If we ended up on the left side of the last parent node... + { + if(EASTL_LIKELY(pLowerBound != (node_type*)mAnchor.mpNodeLeft)) // If the tree was empty or if we otherwise need to insert at the very front of the tree... + { + // At this point, pLowerBound points to a node which is > than value. + // Move it back by one, so that it points to a node which is <= value. + pLowerBound = (node_type*)RBTreeDecrement(pLowerBound); + } + else + { + canInsert = true; + return pLowerBound; + } + } + + // Since here we require values to be unique, we will do nothing if the value already exists. + if(compare(extractKey(pLowerBound->mValue), key)) // If the node is < the value (i.e. if value is >= the node)... + { + EASTL_VALIDATE_COMPARE(!compare(key, extractKey(pLowerBound->mValue))); // Validate that the compare function is sane. + canInsert = true; + return pParent; + } + + // The item already exists (as found by the compare directly above), so return false. + canInsert = false; + return pLowerBound; + } + + + template + typename rbtree::node_type* + rbtree::DoGetKeyInsertionPositionNonuniqueKeys(const key_type& key) + { + // This is the pathway for insertion of non-unique keys (multimap and multiset, but not map and set). + node_type* pCurrent = (node_type*)mAnchor.mpNodeParent; // Start with the root node. + node_type* pRangeEnd = (node_type*)&mAnchor; // Set it to the container end for now. + extract_key extractKey; + + while(pCurrent) + { + pRangeEnd = pCurrent; + + if(compare(key, extractKey(pCurrent->mValue))) + { + EASTL_VALIDATE_COMPARE(!compare(extractKey(pCurrent->mValue), key)); // Validate that the compare function is sane. + pCurrent = (node_type*)pCurrent->mpNodeLeft; + } + else + pCurrent = (node_type*)pCurrent->mpNodeRight; + } + + return pRangeEnd; + } + + + template + eastl::pair::iterator, bool> + rbtree::DoInsertValue(true_type, value_type&& value) + { + extract_key extractKey; + key_type key(extractKey(value)); + bool canInsert; + node_type* pPosition = DoGetKeyInsertionPositionUniqueKeys(canInsert, key); + + if(canInsert) + { + const iterator itResult(DoInsertValueImpl(pPosition, false, key, eastl::move(value))); + return pair(itResult, true); + } + + return pair(iterator(pPosition), false); + } + + + template + typename rbtree::iterator + rbtree::DoInsertValue(false_type, value_type&& value) + { + extract_key extractKey; + key_type key(extractKey(value)); + node_type* pPosition = DoGetKeyInsertionPositionNonuniqueKeys(key); + + return DoInsertValueImpl(pPosition, false, key, eastl::move(value)); + } + + + template + template + eastl::pair::iterator, bool> + rbtree::DoInsertValue(true_type, Args&&... args) // true_type means keys are unique. + { + // This is the pathway for insertion of unique keys (map and set, but not multimap and multiset). + // Note that we return a pair and not an iterator. This is because the C++ standard for map + // and set is to return a pair and not just an iterator. + + node_type* pNodeNew = DoCreateNode(eastl::forward(args)...); // Note that pNodeNew->mpLeft, mpRight, mpParent, will be uninitialized. + const key_type& key = extract_key{}(pNodeNew->mValue); + + bool canInsert; + node_type* pPosition = DoGetKeyInsertionPositionUniqueKeys(canInsert, key); + + if(canInsert) + { + iterator itResult(DoInsertValueImpl(pPosition, false, key, pNodeNew)); + return pair(itResult, true); + } + + DoFreeNode(pNodeNew); + return pair(iterator(pPosition), false); + } + + + template + template + typename rbtree::iterator + rbtree::DoInsertValue(false_type, Args&&... args) // false_type means keys are not unique. + { + // We have a problem here if sizeof(value_type) is too big for the stack. We may want to consider having a specialization for large value_types. + // To do: Change this so that we call DoCreateNode(eastl::forward(args)...) here and use the value from the resulting pNode to get the + // key, and make DoInsertValueImpl take that node as an argument. That way there is no value created on the stack. + + node_type* const pNodeNew = DoCreateNode(eastl::forward(args)...); // Note that pNodeNew->mpLeft, mpRight, mpParent, will be uninitialized. + const key_type& key = extract_key{}(pNodeNew->mValue); + + node_type* pPosition = DoGetKeyInsertionPositionNonuniqueKeys(key); + + return DoInsertValueImpl(pPosition, false, key, pNodeNew); + } + + + template + template + typename rbtree::iterator + rbtree::DoInsertValueImpl(node_type* pNodeParent, bool bForceToLeft, const key_type& key, Args&&... args) + { + node_type* const pNodeNew = DoCreateNode(eastl::forward(args)...); // Note that pNodeNew->mpLeft, mpRight, mpParent, will be uninitialized. + + return DoInsertValueImpl(pNodeParent, bForceToLeft, key, pNodeNew); + } + + + template + typename rbtree::iterator + rbtree::DoInsertValueImpl(node_type* pNodeParent, bool bForceToLeft, const key_type& key, node_type* pNodeNew) + { + EASTL_ASSERT_MSG(pNodeNew != nullptr, "node to insert to the rbtree must not be null"); + + RBTreeSide side; + extract_key extractKey; + + // The reason we may want to have bForceToLeft == true is that pNodeParent->mValue and value may be equal. + // In that case it doesn't matter what side we insert on, except that the C++ LWG #233 improvement report + // suggests that we should use the insert hint position to force an ordering. So that's what we do. + if(bForceToLeft || (pNodeParent == &mAnchor) || compare(key, extractKey(pNodeParent->mValue))) + side = kRBTreeSideLeft; + else + side = kRBTreeSideRight; + + RBTreeInsert(pNodeNew, pNodeParent, &mAnchor, side); + mnSize++; + + return iterator(pNodeNew); + } + + + template + eastl::pair::iterator, bool> + rbtree::DoInsertKey(true_type, const key_type& key) // true_type means keys are unique. + { + // This is the pathway for insertion of unique keys (map and set, but not multimap and multiset). + // Note that we return a pair and not an iterator. This is because the C++ standard for map + // and set is to return a pair and not just an iterator. + bool canInsert; + node_type* pPosition = DoGetKeyInsertionPositionUniqueKeys(canInsert, key); + + if(canInsert) + { + const iterator itResult(DoInsertKeyImpl(pPosition, false, key)); + return pair(itResult, true); + } + + return pair(iterator(pPosition), false); + } + + + template + typename rbtree::iterator + rbtree::DoInsertKey(false_type, const key_type& key) // false_type means keys are not unique. + { + node_type* pPosition = DoGetKeyInsertionPositionNonuniqueKeys(key); + + return DoInsertKeyImpl(pPosition, false, key); + } + + + + template + typename rbtree::node_type* + rbtree::DoGetKeyInsertionPositionUniqueKeysHint(const_iterator position, bool& bForceToLeft, const key_type& key) + { + extract_key extractKey; + + if((position.mpNode != mAnchor.mpNodeRight) && (position.mpNode != &mAnchor)) // If the user specified a specific insertion position... + { + iterator itNext(position.mpNode); + ++itNext; + + // To consider: Change this so that 'position' specifies the position after + // where the insertion goes and not the position before where the insertion goes. + // Doing so would make this more in line with user expectations and with LWG #233. + const bool bPositionLessThanValue = compare(extractKey(position.mpNode->mValue), key); + + if(bPositionLessThanValue) // If (value > *position)... + { + EASTL_VALIDATE_COMPARE(!compare(key, extractKey(position.mpNode->mValue))); // Validate that the compare function is sane. + + const bool bValueLessThanNext = compare(key, extractKey(itNext.mpNode->mValue)); + + if(bValueLessThanNext) // If value < *itNext... + { + EASTL_VALIDATE_COMPARE(!compare(extractKey(itNext.mpNode->mValue), key)); // Validate that the compare function is sane. + + if(position.mpNode->mpNodeRight) + { + bForceToLeft = true; // Specifically insert in front of (to the left of) itNext (and thus after 'position'). + return itNext.mpNode; + } + + bForceToLeft = false; + return position.mpNode; + } + } + + bForceToLeft = false; + return NULL; // The above specified hint was not useful, then we do a regular insertion. + } + + if(mnSize && compare(extractKey(((node_type*)mAnchor.mpNodeRight)->mValue), key)) + { + EASTL_VALIDATE_COMPARE(!compare(key, extractKey(((node_type*)mAnchor.mpNodeRight)->mValue))); // Validate that the compare function is sane. + bForceToLeft = false; + return (node_type*)mAnchor.mpNodeRight; + } + + bForceToLeft = false; + return NULL; // The caller can do a default insert. + } + + + template + typename rbtree::node_type* + rbtree::DoGetKeyInsertionPositionNonuniqueKeysHint(const_iterator position, bool& bForceToLeft, const key_type& key) + { + extract_key extractKey; + + if((position.mpNode != mAnchor.mpNodeRight) && (position.mpNode != &mAnchor)) // If the user specified a specific insertion position... + { + iterator itNext(position.mpNode); + ++itNext; + + // To consider: Change this so that 'position' specifies the position after + // where the insertion goes and not the position before where the insertion goes. + // Doing so would make this more in line with user expectations and with LWG #233. + if(!compare(key, extractKey(position.mpNode->mValue)) && // If value >= *position && + !compare(extractKey(itNext.mpNode->mValue), key)) // if value <= *itNext... + { + if(position.mpNode->mpNodeRight) // If there are any nodes to the right... [this expression will always be true as long as we aren't at the end()] + { + bForceToLeft = true; // Specifically insert in front of (to the left of) itNext (and thus after 'position'). + return itNext.mpNode; + } + + bForceToLeft = false; + return position.mpNode; + } + + bForceToLeft = false; + return NULL; // The above specified hint was not useful, then we do a regular insertion. + } + + // This pathway shouldn't be commonly executed, as the user shouldn't be calling + // this hinted version of insert if the user isn't providing a useful hint. + if(mnSize && !compare(key, extractKey(((node_type*)mAnchor.mpNodeRight)->mValue))) // If we are non-empty and the value is >= the last node... + { + bForceToLeft =false; + return (node_type*)mAnchor.mpNodeRight; + } + + bForceToLeft = false; + return NULL; + } + + template + template + typename rbtree::iterator + rbtree::DoInsertValueHint(true_type, const_iterator position, Args&&... args) // true_type means keys are unique. + { + // This is the pathway for insertion of unique keys (map and set, but not multimap and multiset). + // + // We follow the same approach as SGI STL/STLPort and use the position as + // a forced insertion position for the value when possible. + + node_type* pNodeNew = DoCreateNode(eastl::forward(args)...); // Note that pNodeNew->mpLeft, mpRight, mpParent, will be uninitialized. + const key_type& key(extract_key{}(pNodeNew->mValue)); + + bool bForceToLeft; + node_type* pPosition = DoGetKeyInsertionPositionUniqueKeysHint(position, bForceToLeft, key); + + if (!pPosition) + { + bool canInsert; + pPosition = DoGetKeyInsertionPositionUniqueKeys(canInsert, key); + + if (!canInsert) + { + DoFreeNode(pNodeNew); + return iterator(pPosition); + } + + bForceToLeft = false; + } + + return DoInsertValueImpl(pPosition, bForceToLeft, key, pNodeNew); + } + + + template + template + typename rbtree::iterator + rbtree::DoInsertValueHint(false_type, const_iterator position, Args&&... args) // false_type means keys are not unique. + { + // This is the pathway for insertion of non-unique keys (multimap and multiset, but not map and set). + // + // We follow the same approach as SGI STL/STLPort and use the position as + // a forced insertion position for the value when possible. + + node_type* pNodeNew = DoCreateNode(eastl::forward(args)...); // Note that pNodeNew->mpLeft, mpRight, mpParent, will be uninitialized. + const key_type& key(extract_key{}(pNodeNew->mValue)); + + bool bForceToLeft; + node_type* pPosition = DoGetKeyInsertionPositionNonuniqueKeysHint(position, bForceToLeft, key); + + if (!pPosition) + { + pPosition = DoGetKeyInsertionPositionNonuniqueKeys(key); + bForceToLeft = false; + } + + return DoInsertValueImpl(pPosition, bForceToLeft, key, pNodeNew); + } + + + template + typename rbtree::iterator + rbtree::DoInsertValueHint(true_type, const_iterator position, value_type&& value) // true_type means keys are unique. + { + // This is the pathway for insertion of unique keys (map and set, but not multimap and multiset). + // + // We follow the same approach as SGI STL/STLPort and use the position as + // a forced insertion position for the value when possible. + + extract_key extractKey; + key_type key(extractKey(value)); + bool bForceToLeft; + node_type* pPosition = DoGetKeyInsertionPositionUniqueKeysHint(position, bForceToLeft, key); + + if(pPosition) + return DoInsertValueImpl(pPosition, bForceToLeft, key, eastl::move(value)); + else + return DoInsertValue(has_unique_keys_type(), eastl::move(value)).first; + } + + + template + typename rbtree::iterator + rbtree::DoInsertValueHint(false_type, const_iterator position, value_type&& value) // false_type means keys are not unique. + { + // This is the pathway for insertion of non-unique keys (multimap and multiset, but not map and set). + // + // We follow the same approach as SGI STL/STLPort and use the position as + // a forced insertion position for the value when possible. + extract_key extractKey; + key_type key(extractKey(value)); + bool bForceToLeft; + node_type* pPosition = DoGetKeyInsertionPositionNonuniqueKeysHint(position, bForceToLeft, key); + + if(pPosition) + return DoInsertValueImpl(pPosition, bForceToLeft, key, eastl::move(value)); + else + return DoInsertValue(has_unique_keys_type(), eastl::move(value)); + } + + + template + typename rbtree::iterator + rbtree::DoInsertKey(true_type, const_iterator position, const key_type& key) // true_type means keys are unique. + { + bool bForceToLeft; + node_type* pPosition = DoGetKeyInsertionPositionUniqueKeysHint(position, bForceToLeft, key); + + if(pPosition) + return DoInsertKeyImpl(pPosition, bForceToLeft, key); + else + return DoInsertKey(has_unique_keys_type(), key).first; + } + + + template + typename rbtree::iterator + rbtree::DoInsertKey(false_type, const_iterator position, const key_type& key) // false_type means keys are not unique. + { + // This is the pathway for insertion of non-unique keys (multimap and multiset, but not map and set). + // + // We follow the same approach as SGI STL/STLPort and use the position as + // a forced insertion position for the value when possible. + bool bForceToLeft; + node_type* pPosition = DoGetKeyInsertionPositionNonuniqueKeysHint(position, bForceToLeft, key); + + if(pPosition) + return DoInsertKeyImpl(pPosition, bForceToLeft, key); + else + return DoInsertKey(has_unique_keys_type(), key); // We are empty or we are inserting at the end. + } + + + template + typename rbtree::iterator + rbtree::DoInsertKeyImpl(node_type* pNodeParent, bool bForceToLeft, const key_type& key) + { + RBTreeSide side; + extract_key extractKey; + + // The reason we may want to have bForceToLeft == true is that pNodeParent->mValue and value may be equal. + // In that case it doesn't matter what side we insert on, except that the C++ LWG #233 improvement report + // suggests that we should use the insert hint position to force an ordering. So that's what we do. + if(bForceToLeft || (pNodeParent == &mAnchor) || compare(key, extractKey(pNodeParent->mValue))) + side = kRBTreeSideLeft; + else + side = kRBTreeSideRight; + + node_type* const pNodeNew = DoCreateNodeFromKey(key); // Note that pNodeNew->mpLeft, mpRight, mpParent, will be uninitialized. + RBTreeInsert(pNodeNew, pNodeParent, &mAnchor, side); + mnSize++; + + return iterator(pNodeNew); + } + + + template + void rbtree::insert(std::initializer_list ilist) + { + for(typename std::initializer_list::iterator it = ilist.begin(), itEnd = ilist.end(); it != itEnd; ++it) + DoInsertValue(has_unique_keys_type(), eastl::move(*it)); + } + + + template + template + void rbtree::insert(InputIterator first, InputIterator last) + { + for( ; first != last; ++first) + DoInsertValue(has_unique_keys_type(), *first); // Or maybe we should call 'insert(end(), *first)' instead. If the first-last range was sorted then this might make some sense. + } + + + template + inline void rbtree::clear() + { + // Erase the entire tree. DoNukeSubtree is not a + // conventional erase function, as it does no rebalancing. + DoNukeSubtree((node_type*)mAnchor.mpNodeParent); + reset_lose_memory(); + } + + + template + inline void rbtree::reset_lose_memory() + { + // The reset_lose_memory function is a special extension function which unilaterally + // resets the container to an empty state without freeing the memory of + // the contained objects. This is useful for very quickly tearing down a + // container built into scratch memory. + mAnchor.mpNodeRight = &mAnchor; + mAnchor.mpNodeLeft = &mAnchor; + mAnchor.mpNodeParent = NULL; + mAnchor.mColor = kRBTreeColorRed; + mnSize = 0; + } + + + template + inline typename rbtree::iterator + rbtree::erase(const_iterator position) + { + const iterator iErase(position.mpNode); + --mnSize; // Interleave this between the two references to itNext. We expect no exceptions to occur during the code below. + ++position; + RBTreeErase(iErase.mpNode, &mAnchor); + DoFreeNode(iErase.mpNode); + return iterator(position.mpNode); + } + + + template + typename rbtree::iterator + rbtree::erase(const_iterator first, const_iterator last) + { + // We expect that if the user means to clear the container, they will call clear. + if(EASTL_LIKELY((first.mpNode != mAnchor.mpNodeLeft) || (last.mpNode != &mAnchor))) // If (first != begin or last != end) ... + { + // Basic implementation: + while(first != last) + first = erase(first); + return iterator(first.mpNode); + + // Inlined implementation: + //size_type n = 0; + //while(first != last) + //{ + // const iterator itErase(first); + // ++n; + // ++first; + // RBTreeErase(itErase.mpNode, &mAnchor); + // DoFreeNode(itErase.mpNode); + //} + //mnSize -= n; + //return first; + } + + clear(); + return iterator((node_type*)&mAnchor); // Same as: return end(); + } + + + template + inline typename rbtree::reverse_iterator + rbtree::erase(const_reverse_iterator position) + { + return reverse_iterator(erase((++position).base())); + } + + + template + typename rbtree::reverse_iterator + rbtree::erase(const_reverse_iterator first, const_reverse_iterator last) + { + // Version which erases in order from first to last. + // difference_type i(first.base() - last.base()); + // while(i--) + // first = erase(first); + // return first; + + // Version which erases in order from last to first, but is slightly more efficient: + return reverse_iterator(erase((++last).base(), (++first).base())); + } + + + template + inline void rbtree::erase(const key_type* first, const key_type* last) + { + // We have no choice but to run a loop like this, as the first/last range could + // have values that are discontiguously located in the tree. And some may not + // even be in the tree. + while(first != last) + erase(*first++); + } + + + template + typename rbtree::iterator + rbtree::find(const key_type& key) + { + // To consider: Implement this instead via calling lower_bound and + // inspecting the result. The following is an implementation of this: + // const iterator it(lower_bound(key)); + // return ((it.mpNode == &mAnchor) || compare(key, extractKey(it.mpNode->mValue))) ? iterator(&mAnchor) : it; + // We don't currently implement the above because in practice people tend to call + // find a lot with trees, but very uncommonly call lower_bound. + extract_key extractKey; + + node_type* pCurrent = (node_type*)mAnchor.mpNodeParent; // Start with the root node. + node_type* pRangeEnd = (node_type*)&mAnchor; // Set it to the container end for now. + + while(EASTL_LIKELY(pCurrent)) // Do a walk down the tree. + { + if(EASTL_LIKELY(!compare(extractKey(pCurrent->mValue), key))) // If pCurrent is >= key... + { + pRangeEnd = pCurrent; + pCurrent = (node_type*)pCurrent->mpNodeLeft; + } + else + { + EASTL_VALIDATE_COMPARE(!compare(key, extractKey(pCurrent->mValue))); // Validate that the compare function is sane. + pCurrent = (node_type*)pCurrent->mpNodeRight; + } + } + + if(EASTL_LIKELY((pRangeEnd != &mAnchor) && !compare(key, extractKey(pRangeEnd->mValue)))) + return iterator(pRangeEnd); + return iterator((node_type*)&mAnchor); + } + + + template + inline typename rbtree::const_iterator + rbtree::find(const key_type& key) const + { + typedef rbtree rbtree_type; + return const_iterator(const_cast(this)->find(key)); + } + + + template + template + typename rbtree::iterator + rbtree::find_as(const U& u, Compare2 compare2) + { + extract_key extractKey; + + node_type* pCurrent = (node_type*)mAnchor.mpNodeParent; // Start with the root node. + node_type* pRangeEnd = (node_type*)&mAnchor; // Set it to the container end for now. + + while(EASTL_LIKELY(pCurrent)) // Do a walk down the tree. + { + if(EASTL_LIKELY(!compare2(extractKey(pCurrent->mValue), u))) // If pCurrent is >= u... + { + pRangeEnd = pCurrent; + pCurrent = (node_type*)pCurrent->mpNodeLeft; + } + else + { + EASTL_VALIDATE_COMPARE(!compare2(u, extractKey(pCurrent->mValue))); // Validate that the compare function is sane. + pCurrent = (node_type*)pCurrent->mpNodeRight; + } + } + + if(EASTL_LIKELY((pRangeEnd != &mAnchor) && !compare2(u, extractKey(pRangeEnd->mValue)))) + return iterator(pRangeEnd); + return iterator((node_type*)&mAnchor); + } + + + template + template + inline typename rbtree::const_iterator + rbtree::find_as(const U& u, Compare2 compare2) const + { + typedef rbtree rbtree_type; + return const_iterator(const_cast(this)->find_as(u, compare2)); + } + + + template + typename rbtree::iterator + rbtree::lower_bound(const key_type& key) + { + extract_key extractKey; + + node_type* pCurrent = (node_type*)mAnchor.mpNodeParent; // Start with the root node. + node_type* pRangeEnd = (node_type*)&mAnchor; // Set it to the container end for now. + + while(EASTL_LIKELY(pCurrent)) // Do a walk down the tree. + { + if(EASTL_LIKELY(!compare(extractKey(pCurrent->mValue), key))) // If pCurrent is >= key... + { + pRangeEnd = pCurrent; + pCurrent = (node_type*)pCurrent->mpNodeLeft; + } + else + { + EASTL_VALIDATE_COMPARE(!compare(key, extractKey(pCurrent->mValue))); // Validate that the compare function is sane. + pCurrent = (node_type*)pCurrent->mpNodeRight; + } + } + + return iterator(pRangeEnd); + } + + + template + inline typename rbtree::const_iterator + rbtree::lower_bound(const key_type& key) const + { + typedef rbtree rbtree_type; + return const_iterator(const_cast(this)->lower_bound(key)); + } + + + template + typename rbtree::iterator + rbtree::upper_bound(const key_type& key) + { + extract_key extractKey; + + node_type* pCurrent = (node_type*)mAnchor.mpNodeParent; // Start with the root node. + node_type* pRangeEnd = (node_type*)&mAnchor; // Set it to the container end for now. + + while(EASTL_LIKELY(pCurrent)) // Do a walk down the tree. + { + if(EASTL_LIKELY(compare(key, extractKey(pCurrent->mValue)))) // If key is < pCurrent... + { + EASTL_VALIDATE_COMPARE(!compare(extractKey(pCurrent->mValue), key)); // Validate that the compare function is sane. + pRangeEnd = pCurrent; + pCurrent = (node_type*)pCurrent->mpNodeLeft; + } + else + pCurrent = (node_type*)pCurrent->mpNodeRight; + } + + return iterator(pRangeEnd); + } + + + template + inline typename rbtree::const_iterator + rbtree::upper_bound(const key_type& key) const + { + typedef rbtree rbtree_type; + return const_iterator(const_cast(this)->upper_bound(key)); + } + + + // To do: Move this validate function entirely to a template-less implementation. + template + bool rbtree::validate() const + { + // Red-black trees have the following canonical properties which we validate here: + // 1 Every node is either red or black. + // 2 Every leaf (NULL) is black by defintion. Any number of black nodes may appear in a sequence. + // 3 If a node is red, then both its children are black. Thus, on any path from + // the root to a leaf, red nodes must not be adjacent. + // 4 Every simple path from a node to a descendant leaf contains the same number of black nodes. + // 5 The mnSize member of the tree must equal the number of nodes in the tree. + // 6 The tree is sorted as per a conventional binary tree. + // 7 The comparison function is sane; it obeys strict weak ordering. If compare(a,b) is true, then compare(b,a) must be false. Both cannot be true. + + extract_key extractKey; + + if(mnSize) + { + // Verify basic integrity. + //if(!mAnchor.mpNodeParent || (mAnchor.mpNodeLeft == mAnchor.mpNodeRight)) + // return false; // Fix this for case of empty tree. + + if(mAnchor.mpNodeLeft != RBTreeGetMinChild(mAnchor.mpNodeParent)) + return false; + + if(mAnchor.mpNodeRight != RBTreeGetMaxChild(mAnchor.mpNodeParent)) + return false; + + const size_t nBlackCount = RBTreeGetBlackCount(mAnchor.mpNodeParent, mAnchor.mpNodeLeft); + size_type nIteratedSize = 0; + + for(const_iterator it = begin(); it != end(); ++it, ++nIteratedSize) + { + const node_type* const pNode = (const node_type*)it.mpNode; + const node_type* const pNodeRight = (const node_type*)pNode->mpNodeRight; + const node_type* const pNodeLeft = (const node_type*)pNode->mpNodeLeft; + + // Verify #7 above. + if(pNodeRight && compare(extractKey(pNodeRight->mValue), extractKey(pNode->mValue)) && compare(extractKey(pNode->mValue), extractKey(pNodeRight->mValue))) // Validate that the compare function is sane. + return false; + + // Verify #7 above. + if(pNodeLeft && compare(extractKey(pNodeLeft->mValue), extractKey(pNode->mValue)) && compare(extractKey(pNode->mValue), extractKey(pNodeLeft->mValue))) // Validate that the compare function is sane. + return false; + + // Verify item #1 above. + if((pNode->mColor != kRBTreeColorRed) && (pNode->mColor != kRBTreeColorBlack)) + return false; + + // Verify item #3 above. + if(pNode->mColor == kRBTreeColorRed) + { + if((pNodeRight && (pNodeRight->mColor == kRBTreeColorRed)) || + (pNodeLeft && (pNodeLeft->mColor == kRBTreeColorRed))) + return false; + } + + // Verify item #6 above. + if(pNodeRight && compare(extractKey(pNodeRight->mValue), extractKey(pNode->mValue))) + return false; + + if(pNodeLeft && compare(extractKey(pNode->mValue), extractKey(pNodeLeft->mValue))) + return false; + + if(!pNodeRight && !pNodeLeft) // If we are at a bottom node of the tree... + { + // Verify item #4 above. + if(RBTreeGetBlackCount(mAnchor.mpNodeParent, pNode) != nBlackCount) + return false; + } + } + + // Verify item #5 above. + if(nIteratedSize != mnSize) + return false; + + return true; + } + else + { + if((mAnchor.mpNodeLeft != &mAnchor) || (mAnchor.mpNodeRight != &mAnchor)) + return false; + } + + return true; + } + + + template + inline int rbtree::validate_iterator(const_iterator i) const + { + // To do: Come up with a more efficient mechanism of doing this. + + for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp) + { + if(temp == i) + return (isf_valid | isf_current | isf_can_dereference); + } + + if(i == end()) + return (isf_valid | isf_current); + + return isf_none; + } + + + template + inline typename rbtree::node_type* + rbtree::DoAllocateNode() + { + auto* pNode = (node_type*)allocate_memory(mAllocator, sizeof(node_type), EASTL_ALIGN_OF(node_type), 0); + EASTL_ASSERT_MSG(pNode != nullptr, "the behaviour of eastl::allocators that return nullptr is not defined."); + + return pNode; + } + + + template + inline void rbtree::DoFreeNode(node_type* pNode) + { + pNode->~node_type(); + EASTLFree(mAllocator, pNode, sizeof(node_type)); + } + + + template + typename rbtree::node_type* + rbtree::DoCreateNodeFromKey(const key_type& key) + { + // Note that this function intentionally leaves the node pointers uninitialized. + // The caller would otherwise just turn right around and modify them, so there's + // no point in us initializing them to anything (except in a debug build). + node_type* const pNode = DoAllocateNode(); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + ::new (eastl::addressof(pNode->mValue)) value_type(pair_first_construct, key); + + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeNode(pNode); + throw; + } + #endif + + #if EASTL_DEBUG + pNode->mpNodeRight = NULL; + pNode->mpNodeLeft = NULL; + pNode->mpNodeParent = NULL; + pNode->mColor = kRBTreeColorBlack; + #endif + + return pNode; + } + + + template + typename rbtree::node_type* + rbtree::DoCreateNode(const value_type& value) + { + // Note that this function intentionally leaves the node pointers uninitialized. + // The caller would otherwise just turn right around and modify them, so there's + // no point in us initializing them to anything (except in a debug build). + node_type* const pNode = DoAllocateNode(); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + ::new(eastl::addressof(pNode->mValue)) value_type(value); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeNode(pNode); + throw; + } + #endif + + #if EASTL_DEBUG + pNode->mpNodeRight = NULL; + pNode->mpNodeLeft = NULL; + pNode->mpNodeParent = NULL; + pNode->mColor = kRBTreeColorBlack; + #endif + + return pNode; + } + + + template + typename rbtree::node_type* + rbtree::DoCreateNode(value_type&& value) + { + // Note that this function intentionally leaves the node pointers uninitialized. + // The caller would otherwise just turn right around and modify them, so there's + // no point in us initializing them to anything (except in a debug build). + node_type* const pNode = DoAllocateNode(); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + ::new(eastl::addressof(pNode->mValue)) value_type(eastl::move(value)); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeNode(pNode); + throw; + } + #endif + + #if EASTL_DEBUG + pNode->mpNodeRight = NULL; + pNode->mpNodeLeft = NULL; + pNode->mpNodeParent = NULL; + pNode->mColor = kRBTreeColorBlack; + #endif + + return pNode; + } + + + template + template + typename rbtree::node_type* + rbtree::DoCreateNode(Args&&... args) + { + // Note that this function intentionally leaves the node pointers uninitialized. + // The caller would otherwise just turn right around and modify them, so there's + // no point in us initializing them to anything (except in a debug build). + node_type* const pNode = DoAllocateNode(); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + ::new(eastl::addressof(pNode->mValue)) value_type(eastl::forward(args)...); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoFreeNode(pNode); + throw; + } + #endif + + #if EASTL_DEBUG + pNode->mpNodeRight = NULL; + pNode->mpNodeLeft = NULL; + pNode->mpNodeParent = NULL; + pNode->mColor = kRBTreeColorBlack; + #endif + + return pNode; + } + + + template + typename rbtree::node_type* + rbtree::DoCreateNode(const node_type* pNodeSource, node_type* pNodeParent) + { + node_type* const pNode = DoCreateNode(pNodeSource->mValue); + + pNode->mpNodeRight = NULL; + pNode->mpNodeLeft = NULL; + pNode->mpNodeParent = pNodeParent; + pNode->mColor = pNodeSource->mColor; + + return pNode; + } + + + template + typename rbtree::node_type* + rbtree::DoCopySubtree(const node_type* pNodeSource, node_type* pNodeDest) + { + node_type* const pNewNodeRoot = DoCreateNode(pNodeSource, pNodeDest); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + // Copy the right side of the tree recursively. + if(pNodeSource->mpNodeRight) + pNewNodeRoot->mpNodeRight = DoCopySubtree((const node_type*)pNodeSource->mpNodeRight, pNewNodeRoot); + + node_type* pNewNodeLeft; + + for(pNodeSource = (node_type*)pNodeSource->mpNodeLeft, pNodeDest = pNewNodeRoot; + pNodeSource; + pNodeSource = (node_type*)pNodeSource->mpNodeLeft, pNodeDest = pNewNodeLeft) + { + pNewNodeLeft = DoCreateNode(pNodeSource, pNodeDest); + + pNodeDest->mpNodeLeft = pNewNodeLeft; + + // Copy the right side of the tree recursively. + if(pNodeSource->mpNodeRight) + pNewNodeLeft->mpNodeRight = DoCopySubtree((const node_type*)pNodeSource->mpNodeRight, pNewNodeLeft); + } + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + DoNukeSubtree(pNewNodeRoot); + throw; + } + #endif + + return pNewNodeRoot; + } + + + template + void rbtree::DoNukeSubtree(node_type* pNode) + { + while(pNode) // Recursively traverse the tree and destroy items as we go. + { + DoNukeSubtree((node_type*)pNode->mpNodeRight); + + node_type* const pNodeLeft = (node_type*)pNode->mpNodeLeft; + DoFreeNode(pNode); + pNode = pNodeLeft; + } + } + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + inline bool operator==(const rbtree& a, const rbtree& b) + { + return (a.size() == b.size()) && eastl::equal(a.begin(), a.end(), b.begin()); + } + + + // Note that in operator< we do comparisons based on the tree value_type with operator<() of the + // value_type instead of the tree's Compare function. For set/multiset, the value_type is T, while + // for map/multimap the value_type is a pair. operator< for pair can be seen by looking + // utility.h, but it basically is uses the operator< for pair.first and pair.second. The C++ standard + // appears to require this behaviour, whether intentionally or not. If anything, a good reason to do + // this is for consistency. A map and a vector that contain the same items should compare the same. + template + inline bool operator<(const rbtree& a, const rbtree& b) + { + return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); + } + + + template + inline bool operator!=(const rbtree& a, const rbtree& b) + { + return !(a == b); + } + + + template + inline bool operator>(const rbtree& a, const rbtree& b) + { + return b < a; + } + + + template + inline bool operator<=(const rbtree& a, const rbtree& b) + { + return !(b < a); + } + + + template + inline bool operator>=(const rbtree& a, const rbtree& b) + { + return !(a < b); + } + + + template + inline void swap(rbtree& a, rbtree& b) + { + a.swap(b); + } + + +} // namespace eastl + + +EA_RESTORE_VC_WARNING(); + + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/internal/smart_ptr.h b/libkram/eastl/include/EASTL/internal/smart_ptr.h new file mode 100644 index 00000000..f1d52e1b --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/smart_ptr.h @@ -0,0 +1,264 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_SMART_PTR_H +#define EASTL_INTERNAL_SMART_PTR_H + + +#include +#include +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +namespace eastl +{ + + namespace Internal + { + // Tells if the Deleter type has a typedef for pointer to T. If so then return it, + // else return T*. The large majority of the time the pointer type will be T*. + // The C++11 Standard requires that scoped_ptr let the deleter define the pointer type. + // + // Example usage: + // typedef typename unique_pointer_type::type pointer + // + template + class unique_pointer_type + { + template + static typename U::pointer test(typename U::pointer*); + + template + static T* test(...); + + public: + typedef decltype(test::type>(0)) type; + }; + + + /////////////////////////////////////////////////////////////////////// + // is_array_cv_convertible + // + // Tells if the array pointer P1 is cv-convertible to array pointer P2. + // The two types have two be equivalent pointer types and be convertible + // when you consider const/volatile properties of them. + // + // Example usage: + // is_array_cv_convertible::value => false + // is_array_cv_convertible::value => false + // is_array_cv_convertible::value => false + // is_array_cv_convertible::value => false + // is_array_cv_convertible::value => false + // is_array_cv_convertible::value => true + // is_array_cv_convertible::value => true + // is_array_cv_convertible::value => true + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_array_cv_convertible_CONFORMANCE 1 + + template ::element_type>, + eastl::remove_cv_t::element_type>>> + struct is_array_cv_convertible_impl + : public eastl::is_convertible {}; // Return true if P1 is convertible to P2. + + template + struct is_array_cv_convertible_impl + : public eastl::false_type {}; // P1's underlying type is not the same as P2's, so it can't be converted, even if P2 refers to a subclass of P1. Parent == Child, but Parent[] != Child[] + + template && !eastl::is_pointer_v> + struct is_array_cv_convertible + : public is_array_cv_convertible_impl {}; + + template + struct is_array_cv_convertible + : public eastl::false_type {}; // P1 is scalar not a pointer, so it can't be converted to a pointer. + + + /////////////////////////////////////////////////////////////////////// + // is_derived + // + // Given two (possibly identical) types Base and Derived, is_base_of::value == true + // if and only if Base is a direct or indirect base class of Derived. This is like is_base_of + // but returns false if Derived is the same as Base. So is_derived is true only if Derived is actually a subclass + // of Base and not Base itself. + // + // is_derived may only be applied to complete types. + // + // Example usage: + // is_derived::value => false + // is_derived::value => false + // is_derived::value => true + // is_derived::value => false + /////////////////////////////////////////////////////////////////////// + + #if EASTL_TYPE_TRAIT_is_base_of_CONFORMANCE + #define EASTL_TYPE_TRAIT_is_derived_CONFORMANCE 1 + + template + struct is_derived : public eastl::integral_constant::value && !eastl::is_same::type, typename eastl::remove_cv::type>::value> {}; + #else + #define EASTL_TYPE_TRAIT_is_derived_CONFORMANCE 0 + + template // This returns true if Derived is unrelated to Base. That's a wrong answer, but is better for us than returning false for compilers that don't support is_base_of. + struct is_derived : public eastl::integral_constant::type, typename eastl::remove_cv::type>::value> {}; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_safe_array_conversion + // + // Say you have two array types: T* t and U* u. You want to assign the u to t but only if + // that's a safe thing to do. As shown in the logic below, the array conversion + // is safe if U* and T* are convertible, if U is an array, and if either U or T is not + // a pointer or U is not derived from T. + // + // Note: Usage of this class could be replaced with is_array_cv_convertible usage. + // To do: Do this replacement and test it. + // + /////////////////////////////////////////////////////////////////////// + + template + struct is_safe_array_conversion : public eastl::integral_constant::value && + eastl::is_array::value && + (!eastl::is_pointer::value || !is_pointer::value || !Internal::is_derived::type>::value)> {}; + + } // namespace Internal + + + + + + + + /// default_delete + /// + /// C++11 smart pointer default delete function class. + /// + /// Provides a default way to delete an object. This default is simply to call delete on the + /// object pointer. You can provide an alternative to this class or you can override this on + /// a class-by-class basis like the following: + /// template <> + /// struct smart_ptr_deleter + /// { + /// void operator()(MyClass* p) const + /// { SomeCustomFunction(p); } + /// }; + /// + template + struct default_delete + { + #if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION <= 4006) // GCC prior to 4.7 has a bug with noexcept here. + EA_CONSTEXPR default_delete() = default; + #else + EA_CONSTEXPR default_delete() EA_NOEXCEPT = default; + #endif + + template // Enable if T* can be constructed with U* (i.e. U* is convertible to T*). + default_delete(const default_delete&, typename eastl::enable_if::value>::type* = 0) EA_NOEXCEPT {} + + void operator()(T* p) const EA_NOEXCEPT + { delete p; } + }; + + + template + struct default_delete // Specialization for arrays. + { + #if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION <= 4006) // GCC prior to 4.7 has a bug with noexcept here. + EA_CONSTEXPR default_delete() = default; + #else + EA_CONSTEXPR default_delete() EA_NOEXCEPT = default; + #endif + + template // This ctor is enabled if T is equal to or a base of U, and if U is less or equal const/volatile-qualified than T. + default_delete(const default_delete&, typename eastl::enable_if::value>::type* = 0) EA_NOEXCEPT {} + + void operator()(T* p) const EA_NOEXCEPT + { delete[] p; } + }; + + + + + /// smart_ptr_deleter + /// + /// Deprecated in favor of the C++11 name: default_delete + /// + template + struct smart_ptr_deleter + { + typedef T value_type; + + void operator()(const value_type* p) const // We use a const argument type in order to be most flexible with what types we accept. + { delete const_cast(p); } + }; + + template <> + struct smart_ptr_deleter + { + typedef void value_type; + + void operator()(const void* p) const + { delete[] (char*)p; } // We don't seem to have much choice but to cast to a scalar type. + }; + + template <> + struct smart_ptr_deleter + { + typedef void value_type; + + void operator()(const void* p) const + { delete[] (char*)p; } // We don't seem to have much choice but to cast to a scalar type. + }; + + + + /// smart_array_deleter + /// + /// Deprecated in favor of the C++11 name: default_delete + /// + template + struct smart_array_deleter + { + typedef T value_type; + + void operator()(const value_type* p) const // We use a const argument type in order to be most flexible with what types we accept. + { delete[] const_cast(p); } + }; + + template <> + struct smart_array_deleter + { + typedef void value_type; + + void operator()(const void* p) const + { delete[] (char*)p; } // We don't seem to have much choice but to cast to a scalar type. + }; + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/internal/thread_support.h b/libkram/eastl/include/EASTL/internal/thread_support.h new file mode 100644 index 00000000..80386d20 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/thread_support.h @@ -0,0 +1,244 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_THREAD_SUPPORT_H +#define EASTL_INTERNAL_THREAD_SUPPORT_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif +#include + +///////////////////////////////////////////////////////////////////////////////////////////////////// +// NOTE(rparolin): We need a fallback mutex implementation because the Microsoft implementation +// of std::mutex can not be included in managed-cpp code. +// +// fatal error C1189: is not supported when compiling with /clr or /clr:pure +///////////////////////////////////////////////////////////////////////////////////////////////////// +#if defined(EA_HAVE_CPP11_MUTEX) && !defined(EA_COMPILER_MANAGED_CPP) + #define EASTL_CPP11_MUTEX_ENABLED 1 +#else + #define EASTL_CPP11_MUTEX_ENABLED 0 +#endif + +#if EASTL_CPP11_MUTEX_ENABLED + EA_DISABLE_ALL_VC_WARNINGS() + #include + EA_RESTORE_ALL_VC_WARNINGS() +#endif + +#if defined(EA_PLATFORM_MICROSOFT) + // Cannot include Windows headers in our headers, as they kill builds with their #defines. +#elif defined(EA_PLATFORM_POSIX) + #include +#endif + +// copy constructor could not be generated because a base class copy constructor is inaccessible or deleted. +// assignment operator could not be generated because a base class assignment operator is inaccessible or deleted. +// non dll-interface class used as base for DLL-interface classkey 'identifier'. +EA_DISABLE_VC_WARNING(4625 4626 4275); + + +#if defined(EA_PLATFORM_MICROSOFT) + #if defined(EA_PROCESSOR_POWERPC) + extern "C" long __stdcall _InterlockedIncrement(long volatile* Addend); + #pragma intrinsic (_InterlockedIncrement) + + extern "C" long __stdcall _InterlockedDecrement(long volatile* Addend); + #pragma intrinsic (_InterlockedDecrement) + + extern "C" long __stdcall _InterlockedCompareExchange(long volatile* Dest, long Exchange, long Comp); + #pragma intrinsic (_InterlockedCompareExchange) + #else + extern "C" long _InterlockedIncrement(long volatile* Addend); + #pragma intrinsic (_InterlockedIncrement) + + extern "C" long _InterlockedDecrement(long volatile* Addend); + #pragma intrinsic (_InterlockedDecrement) + + extern "C" long _InterlockedCompareExchange(long volatile* Dest, long Exchange, long Comp); + #pragma intrinsic (_InterlockedCompareExchange) + #endif +#endif + + + +/////////////////////////////////////////////////////////////////////////////// +// EASTL_THREAD_SUPPORT_AVAILABLE +// +// Defined as 0 or 1, based on existing support. +// Identifies if thread support (e.g. atomics, mutexes) is available for use. +// The large majority of EASTL doesn't use thread support, but a few parts +// of it (e.g. shared_ptr) do. +/////////////////////////////////////////////////////////////////////////////// + +#if !defined(EASTL_THREAD_SUPPORT_AVAILABLE) + #if defined(EA_COMPILER_CLANG) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003)) + #define EASTL_THREAD_SUPPORT_AVAILABLE 1 + #elif defined(EA_COMPILER_MSVC) + #define EASTL_THREAD_SUPPORT_AVAILABLE 1 + #else + #define EASTL_THREAD_SUPPORT_AVAILABLE 0 + #endif +#endif + + +namespace eastl +{ + namespace Internal + { + /// atomic_increment + /// Returns the new value. + inline int32_t atomic_increment(int32_t* p32) EA_NOEXCEPT + { + #if defined(EA_COMPILER_CLANG) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003)) + return __sync_add_and_fetch(p32, 1); + #elif defined(EA_COMPILER_MSVC) + static_assert(sizeof(long) == sizeof(int32_t), "unexpected size"); + return _InterlockedIncrement((volatile long*)p32); + #elif defined(EA_COMPILER_GNUC) + int32_t result; + __asm__ __volatile__ ("lock; xaddl %0, %1" + : "=r" (result), "=m" (*p32) + : "0" (1), "m" (*p32) + : "memory" + ); + return result + 1; + #else + EASTL_FAIL_MSG("EASTL thread safety is not implemented yet. See EAThread for how to do this for the given platform."); + return ++*p32; + #endif + } + + /// atomic_decrement + /// Returns the new value. + inline int32_t atomic_decrement(int32_t* p32) EA_NOEXCEPT + { + #if defined(EA_COMPILER_CLANG) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003)) + return __sync_add_and_fetch(p32, -1); + #elif defined(EA_COMPILER_MSVC) + return _InterlockedDecrement((volatile long*)p32); // volatile long cast is OK because int32_t == long on Microsoft platforms. + #elif defined(EA_COMPILER_GNUC) + int32_t result; + __asm__ __volatile__ ("lock; xaddl %0, %1" + : "=r" (result), "=m" (*p32) + : "0" (-1), "m" (*p32) + : "memory" + ); + return result - 1; + #else + EASTL_FAIL_MSG("EASTL thread safety is not implemented yet. See EAThread for how to do this for the given platform."); + return --*p32; + #endif + } + + + /// atomic_compare_and_swap + /// Safely sets the value to a new value if the original value is equal to + /// a condition value. Returns true if the condition was met and the + /// assignment occurred. The comparison and value setting are done as + /// an atomic operation and thus another thread cannot intervene between + /// the two as would be the case with simple C code. + inline bool atomic_compare_and_swap(int32_t* p32, int32_t newValue, int32_t condition) + { + #if defined(EA_COMPILER_CLANG) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4003)) + return __sync_bool_compare_and_swap(p32, condition, newValue); + #elif defined(EA_COMPILER_MSVC) + return ((int32_t)_InterlockedCompareExchange((volatile long*)p32, (long)newValue, (long)condition) == condition); + #elif defined(EA_COMPILER_GNUC) + // GCC Inline ASM Constraints + // r <--> Any general purpose register + // a <--> The a register. + // 1 <--> The constraint '1' for operand 2 says that it must occupy the same location as operand 1. + // =a <--> output registers + // =r <--> output registers + + int32_t result; + __asm__ __volatile__( + "lock; cmpxchgl %3, (%1) \n" // Test *p32 against EAX, if same, then *p32 = newValue + : "=a" (result), "=r" (p32) // outputs + : "a" (condition), "r" (newValue), "1" (p32) // inputs + : "memory" // clobbered + ); + return result == condition; + #else + EASTL_FAIL_MSG("EASTL thread safety is not implemented yet. See EAThread for how to do this for the given platform."); + if(*p32 == condition) + { + *p32 = newValue; + return true; + } + return false; + #endif + } + + + // mutex + #if EASTL_CPP11_MUTEX_ENABLED + using std::mutex; + #else + class EASTL_API mutex + { + public: + mutex(); + ~mutex(); + + void lock(); + void unlock(); + + protected: + #if defined(EA_PLATFORM_MICROSOFT) + #if defined(_WIN64) + uint64_t mMutexBuffer[40 / sizeof(uint64_t)]; // CRITICAL_SECTION is 40 bytes on Win64. + #elif defined(_WIN32) + uint32_t mMutexBuffer[24 / sizeof(uint32_t)]; // CRITICAL_SECTION is 24 bytes on Win32. + #endif + #elif defined(EA_PLATFORM_POSIX) + pthread_mutex_t mMutex; + #endif + }; + #endif + + + // auto_mutex + class EASTL_API auto_mutex + { + public: + EA_FORCE_INLINE auto_mutex(mutex& mutex) : pMutex(&mutex) + { pMutex->lock(); } + + EA_FORCE_INLINE ~auto_mutex() + { pMutex->unlock(); } + + protected: + mutex* pMutex; + + auto_mutex(const auto_mutex&) = delete; + void operator=(const auto_mutex&) = delete; + }; + + + // shared_ptr_auto_mutex + class EASTL_API shared_ptr_auto_mutex : public auto_mutex + { + public: + shared_ptr_auto_mutex(const void* pSharedPtr); + + shared_ptr_auto_mutex(const shared_ptr_auto_mutex&) = delete; + void operator=(shared_ptr_auto_mutex&&) = delete; + }; + + + } // namespace Internal + +} // namespace eastl + + +EA_RESTORE_VC_WARNING(); + + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/internal/tuple_fwd_decls.h b/libkram/eastl/include/EASTL/internal/tuple_fwd_decls.h new file mode 100644 index 00000000..a2c773cd --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/tuple_fwd_decls.h @@ -0,0 +1,56 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_TUPLE_FWD_DECLS_H +#define EASTL_TUPLE_FWD_DECLS_H + +#include + +#if EASTL_TUPLE_ENABLED + +namespace eastl +{ + template + class tuple; + + template + class tuple_size; + + template + class tuple_element; + + template + using tuple_element_t = typename tuple_element::type; + + // const typename for tuple_element_t, for when tuple or TupleImpl cannot itself be const + template + using const_tuple_element_t = typename conditional< + is_lvalue_reference>::value, + add_lvalue_reference_t>>, + const tuple_element_t + >::type; + + // get + template + tuple_element_t>& get(tuple& t); + + template + const_tuple_element_t>& get(const tuple& t); + + template + tuple_element_t>&& get(tuple&& t); + + template + T& get(tuple& t); + + template + const T& get(const tuple& t); + + template + T&& get(tuple&& t); +} + +#endif // EASTL_VARIADIC_TEMPLATES_ENABLED + +#endif // EASTL_TUPLE_FWD_DECLS_H diff --git a/libkram/eastl/include/EASTL/internal/type_compound.h b/libkram/eastl/include/EASTL/internal/type_compound.h new file mode 100644 index 00000000..178a7342 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/type_compound.h @@ -0,0 +1,800 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_TYPE_COMPOUND_H +#define EASTL_INTERNAL_TYPE_COMPOUND_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + + +// Until we revise the code below to handle EDG warnings, we don't have much choice but to disable them. +#if defined(__EDG_VERSION__) + #pragma diag_suppress=1931 // operand of sizeof is not a type, variable, or dereferenced pointer expression +#endif + + +namespace eastl +{ + + /////////////////////////////////////////////////////////////////////// + // extent + // + // extent::value is an integral type representing the number of + // elements in the Ith dimension of array type T. + // + // For a given array type T[N], extent::value == N. + // For a given multi-dimensional array type T[M][N], extent::value == N. + // For a given multi-dimensional array type T[M][N], extent::value == M. + // For a given array type T and a given dimension I where I >= rank::value, extent::value == 0. + // For a given array type of unknown extent T[], extent::value == 0. + // For a given non-array type T and an arbitrary dimension I, extent::value == 0. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_extent_CONFORMANCE 1 // extent is conforming. + + template + struct extent_help : public eastl::integral_constant {}; + + template + struct extent_help : public eastl::integral_constant {}; + + template + struct extent_help : public eastl::extent_help { }; + + template + struct extent_help : public eastl::extent_help {}; + + template // extent uses unsigned instead of size_t. + struct extent : public eastl::extent_help { }; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR auto extent_v = extent::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_array + // + // is_array::value == true if and only if T is an array type, + // including unbounded array types. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_array_CONFORMANCE 1 // is_array is conforming; doesn't make mistakes. + + template + struct is_array : public eastl::false_type {}; + + template + struct is_array : public eastl::true_type {}; + + template + struct is_array : public eastl::true_type {}; + + #if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + template + EA_CONSTEXPR bool is_array_v = is_array::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_array_of_known_bounds + // + // Not part of the C++11 Standard. + // is_array_of_known_bounds::value is true if T is an array and is + // of known bounds. is_array_of_unknown_bounds::value == true, + // while is_array_of_unknown_bounds::value = false. + // + /////////////////////////////////////////////////////////////////////// + + template + struct is_array_of_known_bounds + : public eastl::integral_constant::value != 0> {}; + + + /////////////////////////////////////////////////////////////////////// + // is_array_of_unknown_bounds + // + // Not part of the C++11 Standard. + // is_array_of_unknown_bounds::value is true if T is an array but is + // of unknown bounds. is_array_of_unknown_bounds::value == false, + // while is_array_of_unknown_bounds::value = true. + // + /////////////////////////////////////////////////////////////////////// + + template + struct is_array_of_unknown_bounds + : public eastl::integral_constant::value && (eastl::extent::value == 0)> {}; + + + /////////////////////////////////////////////////////////////////////// + // is_member_function_pointer + // + // is_member_function_pointer::value == true if and only if T is a + // pointer to member function type. + // + /////////////////////////////////////////////////////////////////////// + // We detect member functions with 0 to N arguments. We can extend this + // for additional arguments if necessary. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_member_function_pointer_CONFORMANCE 1 // is_member_function_pointer is conforming; doesn't make mistakes. + + // To do: Revise this to support C++11 variadic templates when possible. + // To do: We can probably also use remove_cv to simply the multitude of types below. + + template struct is_mem_fun_pointer_value : public false_type{}; + + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + template struct is_mem_fun_pointer_value : public true_type{}; + + template + struct is_member_function_pointer : public integral_constant::value>{}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_member_function_pointer_v = is_member_function_pointer::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_member_pointer + // + // is_member_pointer::value == true if and only if: + // is_member_object_pointer::value == true, or + // is_member_function_pointer::value == true + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_member_pointer_CONFORMANCE 1 // is_member_pointer is conforming; doesn't make mistakes. + + template + struct is_member_pointer + : public eastl::integral_constant::value>{}; + + template + struct is_member_pointer + : public eastl::true_type{}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_member_pointer_v = is_member_pointer::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_member_object_pointer + // + // is_member_object_pointer::value == true if and only if T is a + // pointer to data member type. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_member_object_pointer_CONFORMANCE 1 // is_member_object_pointer is conforming; doesn't make mistakes. + + template + struct is_member_object_pointer : public eastl::integral_constant::value && + !eastl::is_member_function_pointer::value + > {}; + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_member_object_pointer_v = is_member_object_pointer::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_pointer + // + // is_pointer::value == true if and only if T is a pointer type. + // This category includes function pointer types, but not pointer to + // member types. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_pointer_CONFORMANCE 1 // is_pointer is conforming; doesn't make mistakes. + + template struct is_pointer_helper : public false_type{}; + + template struct is_pointer_helper : public true_type{}; + template struct is_pointer_helper : public true_type{}; + template struct is_pointer_helper : public true_type{}; + template struct is_pointer_helper : public true_type{}; + + template + struct is_pointer_value : public type_and::value, type_not::value>::value> {}; + + template + struct is_pointer : public integral_constant::value>{}; + + #if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + template + EA_CONSTEXPR bool is_pointer_v = is_pointer::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // is_convertible + // + // Given two (possible identical) types From and To, is_convertible::value == true + // if and only if an lvalue of type From can be implicitly converted to type To, + // or is_void::value == true + // + // An instance of the type predicate holds true if the expression To to = from;, where from is an object of type From, is well-formed. + // + // is_convertible may only be applied to complete types. + // Type To may not be an abstract type. + // If the conversion is ambiguous, the program is ill-formed. + // If either or both of From and To are class types, and the conversion would invoke + // non-public member functions of either From or To (such as a private constructor of To, + // or a private conversion operator of From), the program is ill-formed. + // + // Note that without compiler help, both is_convertible and is_base + // can produce compiler errors if the conversion is ambiguous. + // Example: + // struct A {}; + // struct B : A {}; + // struct C : A {}; + // struct D : B, C {}; + // is_convertible::value; // Generates compiler error. + /////////////////////////////////////////////////////////////////////// + + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_convertible_to))) + #define EASTL_TYPE_TRAIT_is_convertible_CONFORMANCE 1 // is_convertible is conforming. + + // Problem: VC++ reports that int is convertible to short, yet if you construct a short from an int then VC++ generates a warning: + // warning C4242: 'initializing' : conversion from 'int' to 'short', possible loss of data. We can deal with this by making + // is_convertible be false for conversions that could result in loss of data. Or we could make another trait called is_lossless_convertible + // and use that appropriately in our code. Or we could put the onus on the user to work around such warnings. + template + struct is_convertible : public integral_constant{}; + + #else + #define EASTL_TYPE_TRAIT_is_convertible_CONFORMANCE 1 + + template::value || eastl::is_function::value || eastl::is_array::value > + struct is_convertible_helper // Anything is convertible to void. Nothing is convertible to a function or an array. + { static const bool value = eastl::is_void::value; }; + + template + class is_convertible_helper + { + template + static void ToFunction(To1); // We try to call this function with an instance of From. It is valid if From can be converted to To. + + template + static eastl::no_type is(...); + + template + static decltype(ToFunction(eastl::declval()), eastl::yes_type()) is(int); + + public: + static const bool value = sizeof(is(0)) == 1; + }; + + template + struct is_convertible + : public integral_constant::value> {}; + + #endif + + #if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + template + EA_CONSTEXPR bool is_convertible_v = is_convertible::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_nothrow_convertible + // + // https://en.cppreference.com/w/cpp/types/is_convertible + // + // template + // struct is_explicitly_convertible + // : public is_constructible {}; + /////////////////////////////////////////////////////////////////////// + // TODO(rparolin): implement type-trait + + + + /////////////////////////////////////////////////////////////////////// + // is_explicitly_convertible + // + // This sometime-seen extension trait is the same as is_constructible + // and so we don't define it. + // + // template + // struct is_explicitly_convertible + // : public is_constructible {}; + /////////////////////////////////////////////////////////////////////// + + + + /////////////////////////////////////////////////////////////////////// + // is_union + // + // is_union::value == true if and only if T is a union type. + // + // There is no way to tell if a type is a union without compiler help. + // As of this writing, only Metrowerks v8+ supports such functionality + // via 'msl::is_union::value'. The user can force something to be + // evaluated as a union via EASTL_DECLARE_UNION. + /////////////////////////////////////////////////////////////////////// + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_union))) + #define EASTL_TYPE_TRAIT_is_union_CONFORMANCE 1 // is_union is conforming. + + template + struct is_union : public integral_constant{}; + #else + #define EASTL_TYPE_TRAIT_is_union_CONFORMANCE 0 // is_union is not fully conforming. + + template struct is_union : public false_type{}; + #endif + + #define EASTL_DECLARE_UNION(T) namespace eastl{ template <> struct is_union : public true_type{}; template <> struct is_union : public true_type{}; } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_union_v = is_union::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // is_class + // + // is_class::value == true if and only if T is a class or struct + // type (and not a union type). + // + // Without specific compiler help, it is not possible to + // distinguish between unions and classes. As a result, is_class + // will erroneously evaluate to true for union types. + /////////////////////////////////////////////////////////////////////// + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_class))) + #define EASTL_TYPE_TRAIT_is_class_CONFORMANCE 1 // is_class is conforming. + + template + struct is_class : public integral_constant{}; + #elif defined(__EDG__) + #define EASTL_TYPE_TRAIT_is_class_CONFORMANCE EASTL_TYPE_TRAIT_is_union_CONFORMANCE + + typedef char yes_array_type[1]; + typedef char no_array_type[2]; + template static yes_array_type& is_class_helper(void (U::*)()); + template static no_array_type& is_class_helper(...); + + template + struct is_class : public integral_constant(0)) == sizeof(yes_array_type) && !is_union::value + >{}; + #elif !defined(__GNUC__) || (((__GNUC__ * 100) + __GNUC_MINOR__) >= 304) // Not GCC or GCC 3.4+ + #define EASTL_TYPE_TRAIT_is_class_CONFORMANCE EASTL_TYPE_TRAIT_is_union_CONFORMANCE + + template static yes_type is_class_helper(void (U::*)()); + template static no_type is_class_helper(...); + + template + struct is_class : public integral_constant(0)) == sizeof(yes_type) && !is_union::value + >{}; + #else + #define EASTL_TYPE_TRAIT_is_class_CONFORMANCE 0 // is_class is not fully conforming. + + // GCC 2.x version, due to GCC being broken. + template + struct is_class : public false_type{}; + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_class_v = is_class::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_enum + // + // is_enum::value == true if and only if T is an enumeration type. + // + /////////////////////////////////////////////////////////////////////// + + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_enum))) + #define EASTL_TYPE_TRAIT_is_enum_CONFORMANCE 1 // is_enum is conforming. + + template + struct is_enum : public integral_constant{}; + #else + #define EASTL_TYPE_TRAIT_is_enum_CONFORMANCE 1 // is_enum is conforming. + + struct int_convertible{ int_convertible(int); }; + + template + struct is_enum_helper { template struct nest : public is_convertible{}; }; + + template <> + struct is_enum_helper { template struct nest : public false_type {}; }; + + template + struct is_enum_helper2 + { + typedef type_or::value, is_reference::value, is_class::value> selector; + typedef is_enum_helper helper_t; + typedef typename add_reference::type ref_t; + typedef typename helper_t::template nest result; + }; + + template + struct is_enum : public integral_constant::result::value>{}; + + template <> struct is_enum : public false_type {}; + template <> struct is_enum : public false_type {}; + template <> struct is_enum : public false_type {}; + template <> struct is_enum : public false_type {}; + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_enum_v = is_enum::value; + #endif + + #define EASTL_DECLARE_ENUM(T) namespace eastl{ template <> struct is_enum : public true_type{}; template <> struct is_enum : public true_type{}; } + + + + + + /////////////////////////////////////////////////////////////////////// + // is_polymorphic + // + // is_polymorphic::value == true if and only if T is a class or struct + // that declares or inherits a virtual function. is_polymorphic may only + // be applied to complete types. + // + /////////////////////////////////////////////////////////////////////// + + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_polymorphic))) + #define EASTL_TYPE_TRAIT_is_polymorphic_CONFORMANCE 1 // is_polymorphic is conforming. + + template + struct is_polymorphic : public integral_constant{}; + #else + #define EASTL_TYPE_TRAIT_is_polymorphic_CONFORMANCE 1 // is_polymorphic is conforming. + + template + struct is_polymorphic_imp1 + { + typedef typename remove_cv::type t; + + struct helper_1 : public t + { + helper_1(); + ~helper_1() throw(); + char pad[64]; + }; + + struct helper_2 : public t + { + helper_2(); + virtual ~helper_2() throw(); + #ifndef _MSC_VER + virtual void foo(); + #endif + char pad[64]; + }; + + static const bool value = (sizeof(helper_1) == sizeof(helper_2)); + }; + + template + struct is_polymorphic_imp2{ static const bool value = false; }; + + template + struct is_polymorphic_selector{ template struct rebind{ typedef is_polymorphic_imp2 type; }; }; + + template <> + struct is_polymorphic_selector{ template struct rebind{ typedef is_polymorphic_imp1 type; }; }; + + template + struct is_polymorphic_value{ + typedef is_polymorphic_selector::value> selector; + typedef typename selector::template rebind binder; + typedef typename binder::type imp_type; + static const bool value = imp_type::value; + }; + + template + struct is_polymorphic : public integral_constant::value>{}; + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_polymorphic_v = is_polymorphic::value; + #endif + + + + + /////////////////////////////////////////////////////////////////////// + // is_object + // + // is_object::value == true if and only if: + // is_reference::value == false, and + // is_function::value == false, and + // is_void::value == false + // + // The C++ standard, section 3.9p9, states: "An object type is a + // (possibly cv-qualified) type that is not a function type, not a + // reference type, and not incomplete (except for an incompletely + // defined object type). + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_object_CONFORMANCE (EASTL_TYPE_TRAIT_is_reference_CONFORMANCE && EASTL_TYPE_TRAIT_is_void_CONFORMANCE && EASTL_TYPE_TRAIT_is_function_CONFORMANCE) + + template + struct is_object : public integral_constant::value && !is_void::value && !is_function::value + >{}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_object_v = is_object::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_scalar + // + // is_scalar::value == true if and only if: + // is_arithmetic::value == true, or + // is_enum::value == true, or + // is_pointer::value == true, or + // is_member_pointer::value == true, or + // is_null_pointer::value == true + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_scalar_CONFORMANCE 1 // is_scalar is conforming. + + template + struct is_scalar : public integral_constant::value || is_enum::value || is_pointer::value || + is_member_pointer::value || + is_null_pointer::value> {}; + + template struct is_scalar : public true_type {}; + template struct is_scalar : public true_type {}; + template struct is_scalar : public true_type {}; + template struct is_scalar : public true_type {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_scalar_v = is_scalar::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_compound + // + // Compound means anything but fundamental. See C++ standard, section 3.9.2. + // + // is_compound::value == true if and only if: + // is_fundamental::value == false + // + // Thus, is_compound::value == true if and only if: + // is_floating_point::value == false, and + // is_integral::value == false, and + // is_void::value == false + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_compound_CONFORMANCE EASTL_TYPE_TRAIT_is_fundamental_CONFORMANCE + + template + struct is_compound : public integral_constant::value>{}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_compound_v = is_compound::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // decay + // + // Converts the type T to its decayed equivalent. That means doing + // lvalue to rvalue, array to pointer, function to pointer conversions, + // and removal of const and volatile. + // This is the type conversion silently applied by the compiler to + // all function arguments when passed by value. + + #define EASTL_TYPE_TRAIT_decay_CONFORMANCE 1 // decay is conforming. + + template + struct decay + { + typedef typename eastl::remove_reference::type U; + + typedef typename eastl::conditional< + eastl::is_array::value, + typename eastl::remove_extent::type*, + typename eastl::conditional< + eastl::is_function::value, + typename eastl::add_pointer::type, + typename eastl::remove_cv::type + >::type + >::type type; + }; + + + // decay_t is the C++14 using typedef for typename decay::type, though + // it requires only C++11 compiler functionality to implement. + // We provide a backwards-compatible means to access it through a macro for pre-C++11 compilers. + #if defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + #define EASTL_DECAY_T(T) typename decay::type + #else + template + using decay_t = typename decay::type; + #define EASTL_DECAY_T(T) decay_t + #endif + + + /////////////////////////////////////////////////////////////////////// + // common_type + // + // Determines the common type among all types T..., that is the type all T... + // can be implicitly converted to. + // + // It is intended that this be specialized by the user for cases where it + // is useful to do so. Example specialization: + // template + // struct common_type{ typedef MyBaseClassB type; }; + // + // The member typedef type shall be defined as set out in 20.9.7.6,p3. All types in + // the parameter pack T shall be complete or (possibly cv) void. A program may + // specialize this trait if at least one template parameter in the specialization + // is a user-defined type. Note: Such specializations are needed when only + // explicit conversions are desired among the template arguments. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_common_type_CONFORMANCE 1 // common_type is conforming. + + template + struct common_type; + + template + struct common_type + { typedef decay_t type; }; // Question: Should we use T or decay_t here? The C++11 Standard specifically (20.9.7.6,p3) specifies that it be without decay, but libc++ uses decay. + + template + struct common_type + { + typedef decay_t() : declval())> type; // The type of a tertiary expression is set by the compiler to be the common type of the two result types. + }; + + template + struct common_type + { typedef typename common_type::type, V...>::type type; }; + + + // common_type_t is the C++14 using typedef for typename common_type::type. + // We provide a backwards-compatible means to access it through a macro for pre-C++11 compilers. + #if defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + #define EASTL_COMMON_TYPE_T(...) typename common_type<__VA_ARGS__>::type + #else + template + using common_type_t = typename common_type::type; + #define EASTL_COMMON_TYPE_T(...) common_type_t<__VA_ARGS__> + #endif + + /////////////////////////////////////////////////////////////////////// + // is_final + /////////////////////////////////////////////////////////////////////// + #if EA_COMPILER_HAS_FEATURE(is_final) + template + struct is_final : public integral_constant {}; + #else + // no compiler support so we always return false + template + struct is_final : public false_type {}; + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_final_v = is_final::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_aggregate + // + // https://en.cppreference.com/w/cpp/language/aggregate_initialization + // + // An aggregate is one of the following types: + // * array type + // * class type (typically, struct or union), that has + // * no private or protected non-static data members + // * no user-provided constructors (explicitly defaulted or deleted constructors are allowed) + // * no user-provided, inherited, or explicit constructors + // * (explicitly defaulted or deleted constructors are allowed) + // * no virtual, private, or protected (since C++17) base classes + // * no virtual member functions + // * no default member initializers + // + /////////////////////////////////////////////////////////////////////// + #if EA_COMPILER_HAS_FEATURE(is_aggregate) || defined(_MSC_VER) && (_MSC_VER >= 1916) // VS2017 15.9+ + #define EASTL_TYPE_TRAIT_is_aggregate_CONFORMANCE 1 + + template + struct is_aggregate : public integral_constant {}; + #else + #define EASTL_TYPE_TRAIT_is_aggregate_CONFORMANCE 0 + + // no compiler support so we always return false + template + struct is_aggregate : public false_type {}; + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_aggregate_v = is_aggregate::value; + #endif +} // namespace eastl + + +#endif // Header include guard + + + + diff --git a/libkram/eastl/include/EASTL/internal/type_fundamental.h b/libkram/eastl/include/EASTL/internal/type_fundamental.h new file mode 100644 index 00000000..950d15e3 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/type_fundamental.h @@ -0,0 +1,289 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_TYPE_FUNDAMENTAL_H +#define EASTL_INTERNAL_TYPE_FUNDAMENTAL_H + + +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +namespace eastl +{ + + + /////////////////////////////////////////////////////////////////////// + // is_void + // + // is_void::value == true if and only if T is one of the following types: + // [const][volatile] void + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_void_CONFORMANCE 1 // is_void is conforming. + + template struct is_void : public false_type{}; + + template <> struct is_void : public true_type{}; + template <> struct is_void : public true_type{}; + template <> struct is_void : public true_type{}; + template <> struct is_void : public true_type{}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_void_v = is_void::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // has_void_arg + // + // utility which identifies if any of the given template arguments is void. + // + // TODO(rparolin): refactor with fold expressions when C++17 compilers are widely available. + /////////////////////////////////////////////////////////////////////// + + template + struct has_void_arg; + + template <> + struct has_void_arg<> + : public eastl::false_type {}; + + template + struct has_void_arg + { static const bool value = (eastl::is_void::value || eastl::has_void_arg::value); }; + + + /////////////////////////////////////////////////////////////////////// + // is_null_pointer + // + // C++14 type trait. Refers only to nullptr_t and not NULL (0). + // eastl::is_null_pointer::value == true + // eastl::is_null_pointer::value == true + // eastl::is_null_pointer::value == false + // eastl::is_null_pointer::value == [cannot compile] + // + /////////////////////////////////////////////////////////////////////// + + #if defined(EA_COMPILER_CPP11_ENABLED) && !defined(EA_COMPILER_NO_DECLTYPE) && !defined(_MSC_VER) // VC++'s handling of decltype(nullptr) is broken. + #define EASTL_TYPE_TRAIT_is_null_pointer_CONFORMANCE 1 + + template + struct is_null_pointer : public eastl::is_same::type, decltype(nullptr)> {}; // A C++11 compiler defines nullptr, but you need a C++11 standard library to declare std::nullptr_t. So it's safer to compare against decltype(nullptr) than to use std::nullptr_t, because we may have a C++11 compiler but C++98 library (happens with Apple frequently). + #else + #define EASTL_TYPE_TRAIT_is_null_pointer_CONFORMANCE 1 + + template + struct is_null_pointer : public eastl::is_same::type, std::nullptr_t> {}; + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_null_pointer_v = is_null_pointer::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // is_integral + // + // is_integral::value == true if and only if T is one of the following types: + // [const] [volatile] bool + // [const] [volatile] char + // [const] [volatile] signed char + // [const] [volatile] unsigned char + // [const] [volatile] wchar_t + // [const] [volatile] short + // [const] [volatile] int + // [const] [volatile] long + // [const] [volatile] long long + // [const] [volatile] unsigned short + // [const] [volatile] unsigned int + // [const] [volatile] unsigned long + // [const] [volatile] unsigned long long + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_integral_CONFORMANCE 1 // is_integral is conforming. + + template struct is_integral_helper : public false_type{}; + + template <> struct is_integral_helper : public true_type{}; + template <> struct is_integral_helper : public true_type{}; + template <> struct is_integral_helper : public true_type{}; + template <> struct is_integral_helper : public true_type{}; + template <> struct is_integral_helper : public true_type{}; + + template <> struct is_integral_helper : public true_type{}; + template <> struct is_integral_helper : public true_type{}; + template <> struct is_integral_helper : public true_type{}; + template <> struct is_integral_helper : public true_type{}; + template <> struct is_integral_helper : public true_type{}; + + template <> struct is_integral_helper : public true_type{}; + template <> struct is_integral_helper : public true_type{}; + #if defined(EA_CHAR16_NATIVE) && EA_CHAR16_NATIVE + template <> struct is_integral_helper : public true_type{}; + #endif + #if defined(EA_CHAR32_NATIVE) && EA_CHAR32_NATIVE + template <> struct is_integral_helper : public true_type{}; + #endif + #ifndef EA_WCHAR_T_NON_NATIVE // If wchar_t is a native type instead of simply a define to an existing type which is already handled above... + template <> struct is_integral_helper : public true_type{}; + #endif + #if EASTL_INT128_SUPPORTED && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) + template <> struct is_integral_helper<__int128_t> : public true_type{}; + template <> struct is_integral_helper<__uint128_t> : public true_type{}; + #endif + + template + struct is_integral : public eastl::is_integral_helper::type>{}; + + #define EASTL_DECLARE_INTEGRAL(T) \ + namespace eastl{ \ + template <> struct is_integral : public true_type{}; \ + template <> struct is_integral : public true_type{}; \ + template <> struct is_integral : public true_type{}; \ + template <> struct is_integral : public true_type{}; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_integral_v = is_integral::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_floating_point + // + // is_floating_point::value == true if and only if T is one of the following types: + // [const] [volatile] float + // [const] [volatile] double + // [const] [volatile] long double + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_floating_point_CONFORMANCE 1 // is_floating_point is conforming. + + template struct is_floating_point_helper : public false_type{}; + + template <> struct is_floating_point_helper : public true_type{}; + template <> struct is_floating_point_helper : public true_type{}; + template <> struct is_floating_point_helper : public true_type{}; + + template + struct is_floating_point : public eastl::is_floating_point_helper::type>{}; + + #define EASTL_DECLARE_FLOATING_POINT(T) \ + namespace eastl{ \ + template <> struct is_floating_point : public true_type{}; \ + template <> struct is_floating_point : public true_type{}; \ + template <> struct is_floating_point : public true_type{}; \ + template <> struct is_floating_point : public true_type{}; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_floating_point_v = is_floating_point::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // is_arithmetic + // + // is_arithmetic::value == true if and only if: + // is_floating_point::value == true, or + // is_integral::value == true + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_arithmetic_CONFORMANCE 1 // is_arithmetic is conforming. + + template + struct is_arithmetic + : public integral_constant::value || is_floating_point::value> {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_arithmetic_v = is_arithmetic::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_fundamental + // + // is_fundamental::value == true if and only if: + // is_floating_point::value == true, or + // is_integral::value == true, or + // is_void::value == true + // is_null_pointer::value == true + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_fundamental_CONFORMANCE 1 // is_fundamental is conforming. + + template + struct is_fundamental + : public bool_constant || is_integral_v || is_floating_point_v || is_null_pointer_v> {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_fundamental_v = is_fundamental::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_hat_type + // + // is_hat_type::value == true if and only if: + // underlying type is a C++/CX '^' type such as: Foo^ + // meaning the type is heap allocated and ref-counted + /////////////////////////////////////////////////////////////////////// + + template struct is_hat_type_helper : public false_type {}; + + #if (EABASE_VERSION_N > 20607 && defined(EA_COMPILER_WINRTCX_ENABLED)) || defined(__cplusplus_winrt) + template struct is_hat_type_helper : public true_type{}; + #endif + + template + struct is_hat_type : public eastl::is_hat_type_helper {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_hat_type_v = is_hat_type::value; + #endif + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/internal/type_pod.h b/libkram/eastl/include/EASTL/internal/type_pod.h new file mode 100644 index 00000000..8726a7e6 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/type_pod.h @@ -0,0 +1,1945 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_TYPE_POD_H +#define EASTL_INTERNAL_TYPE_POD_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include + +namespace eastl +{ + /////////////////////////////////////////////////////////////////////// + // is_empty + // + // is_empty::value == true if and only if T is an empty class or struct. + // is_empty may only be applied to complete types. + // + // is_empty cannot be used with union types until is_union can be made to work. + /////////////////////////////////////////////////////////////////////// + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_empty))) + #define EASTL_TYPE_TRAIT_is_empty_CONFORMANCE 1 // is_empty is conforming. + + template + struct is_empty : public integral_constant{}; + #else + #define EASTL_TYPE_TRAIT_is_empty_CONFORMANCE 1 // is_empty is fully conforming. + + template + struct is_empty_helper_t1 : public T { char m[64]; }; + struct is_empty_helper_t2 { char m[64]; }; + + // The inheritance in empty_helper_t1 will not work with non-class types + template + struct is_empty_helper : public eastl::false_type{}; + + template + struct is_empty_helper : public eastl::integral_constant) == sizeof(is_empty_helper_t2) + >{}; + + template + struct is_empty_helper2 + { + typedef typename eastl::remove_cv::type _T; + typedef eastl::is_empty_helper<_T, eastl::is_class<_T>::value> type; + }; + + template + struct is_empty : public eastl::is_empty_helper2::type {}; + #endif + + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_empty_v = is_empty::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_pod + // + // is_pod::value == true if and only if, for a given type T: + // - is_scalar::value == true, or + // - T is a class or struct that has no user-defined copy assignment + // operator or destructor, and T has no non-static data members M for + // which is_pod::value == false, and no members of reference type, or + // - T is the type of an array of objects E for which is_pod::value == true + // + // is_pod may only be applied to complete types. + // + // Without some help from the compiler or user, is_pod will not report + // that a struct or class is a POD, but will correctly report that + // built-in types such as int are PODs. The user can help the compiler + // by using the EASTL_DECLARE_POD macro on a class. + /////////////////////////////////////////////////////////////////////// + + #if defined(EA_COMPILER_MSVC) + #define EASTL_TYPE_TRAIT_is_pod_CONFORMANCE 1 // is_pod is conforming. Actually as of VS2008 it is apparently not fully conforming, as it flags the following as a non-pod: struct Pod{ Pod(){} }; + + EA_DISABLE_VC_WARNING(4647) + template // We check for has_trivial_constructor only because the VC++ is_pod does. Is it due to some compiler bug? + struct is_pod : public eastl::integral_constant::value) || eastl::is_void::value || eastl::is_scalar::value>{}; + EA_RESTORE_VC_WARNING() + + #elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_pod))) + #define EASTL_TYPE_TRAIT_is_pod_CONFORMANCE 1 // is_pod is conforming. + + template + struct is_pod : public eastl::integral_constant::value || eastl::is_scalar::value>{}; + #else + #define EASTL_TYPE_TRAIT_is_pod_CONFORMANCE 0 // is_pod is not conforming. Can return false negatives. + + template // There's not much we can do here without some compiler extension. + struct is_pod : public eastl::integral_constant::value || eastl::is_scalar::type>::value>{}; + #endif + + template + struct is_pod : public is_pod{}; + + template + struct is_POD : public is_pod{}; // Backwards compatibility. + + #define EASTL_DECLARE_IS_POD(T, isPod) \ + namespace eastl { \ + template <> struct is_pod : public eastl::integral_constant { }; \ + template <> struct is_pod : public eastl::integral_constant { }; \ + template <> struct is_pod : public eastl::integral_constant { }; \ + template <> struct is_pod : public eastl::integral_constant { }; \ + } + + // Old style macro, for bacwards compatibility: + #define EASTL_DECLARE_POD(T) namespace eastl{ template <> struct is_pod : public true_type{}; template <> struct is_pod : public true_type{}; } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_pod_v = is_pod::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_standard_layout + // + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && ((defined(EA_COMPILER_MSVC) && (_MSC_VER >= 1700)) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006)) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_standard_layout))) + #define EASTL_TYPE_TRAIT_is_standard_layout_CONFORMANCE 1 // is_standard_layout is conforming. + + template + struct is_standard_layout : public eastl::integral_constant::value || eastl::is_scalar::value>{}; + #else + #define EASTL_TYPE_TRAIT_is_standard_layout_CONFORMANCE 0 // is_standard_layout is not conforming. Can return false negatives. + + template // There's not much we can do here without some compiler extension. + struct is_standard_layout : public eastl::integral_constant::value || is_scalar::value>{}; + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_standard_layout_v = is_standard_layout::value; + #endif + + #define EASTL_DECLARE_IS_STANDARD_LAYOUT(T, isStandardLayout) \ + namespace eastl { \ + template <> struct is_standard_layout : public eastl::integral_constant { }; \ + template <> struct is_standard_layout : public eastl::integral_constant { }; \ + template <> struct is_standard_layout : public eastl::integral_constant { }; \ + template <> struct is_standard_layout : public eastl::integral_constant { }; \ + } + + // Old style macro, for bacwards compatibility: + #define EASTL_DECLARE_STANDARD_LAYOUT(T) namespace eastl{ template <> struct is_standard_layout : public true_type{}; template <> struct is_standard_layout : public true_type{}; } + + + + /////////////////////////////////////////////////////////////////////// + // has_trivial_constructor + // + // has_trivial_constructor::value == true if and only if T is a class + // or struct that has a trivial constructor. A constructor is trivial if + // - it is implicitly defined by the compiler, and + // - is_polymorphic::value == false, and + // - T has no virtual base classes, and + // - for every direct base class of T, has_trivial_constructor::value == true, + // where B is the type of the base class, and + // - for every nonstatic data member of T that has class type or array + // of class type, has_trivial_constructor::value == true, + // where M is the type of the data member + // + // has_trivial_constructor may only be applied to complete types. + // + // Without from the compiler or user, has_trivial_constructor will not + // report that a class or struct has a trivial constructor. + // The user can use EASTL_DECLARE_TRIVIAL_CONSTRUCTOR to help the compiler. + // + // A default constructor for a class X is a constructor of class X that + // can be called without an argument. + /////////////////////////////////////////////////////////////////////// + + #if defined(_MSC_VER) && (_MSC_VER >= 1600) // VS2010+ + #define EASTL_TYPE_TRAIT_has_trivial_constructor_CONFORMANCE 1 // has_trivial_constructor is conforming. + + template + struct has_trivial_constructor : public eastl::integral_constant::value) && !eastl::is_hat_type::value>{}; + #elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) + #define EASTL_TYPE_TRAIT_has_trivial_constructor_CONFORMANCE 1 // has_trivial_constructor is conforming. + + template + struct has_trivial_constructor : public eastl::integral_constant::value>{}; + #else + #define EASTL_TYPE_TRAIT_has_trivial_constructor_CONFORMANCE 0 // has_trivial_constructor is not fully conforming. Can return false negatives. + + // With current compilers, this is all we can do. + template + struct has_trivial_constructor : public eastl::is_pod {}; + #endif + + #define EASTL_DECLARE_HAS_TRIVIAL_CONSTRUCTOR(T, hasTrivialConstructor) \ + namespace eastl { \ + template <> struct has_trivial_constructor : public eastl::integral_constant { }; \ + } + + // Old style macro, for bacwards compatibility: + #define EASTL_DECLARE_TRIVIAL_CONSTRUCTOR(T) namespace eastl{ template <> struct has_trivial_constructor : public true_type{}; template <> struct has_trivial_constructor : public true_type{}; } + + + + + /////////////////////////////////////////////////////////////////////// + // has_trivial_copy + // + // has_trivial_copy::value == true if and only if T is a class or + // struct that has a trivial copy constructor. A copy constructor is + // trivial if + // - it is implicitly defined by the compiler, and + // - is_polymorphic::value == false, and + // - T has no virtual base classes, and + // - for every direct base class of T, has_trivial_copy::value == true, + // where B is the type of the base class, and + // - for every nonstatic data member of T that has class type or array + // of class type, has_trivial_copy::value == true, where M is the + // type of the data member + // + // has_trivial_copy may only be applied to complete types. + // + // Another way of looking at this is: + // A copy constructor for class X is trivial if it is implicitly + // declared and if all the following are true: + // - Class X has no virtual functions (10.3) and no virtual base classes (10.1). + // - Each direct base class of X has a trivial copy constructor. + // - For all the nonstatic data members of X that are of class type + // (or array thereof), each such class type has a trivial copy constructor; + // otherwise the copy constructor is nontrivial. + // + // Without help from the compiler or user, has_trivial_copy will not report + // that a class or struct has a trivial copy constructor. The user can + // use EASTL_DECLARE_TRIVIAL_COPY to help the compiler. + /////////////////////////////////////////////////////////////////////// + + #if defined(_MSC_VER) + #define EASTL_TYPE_TRAIT_has_trivial_copy_CONFORMANCE 1 // has_trivial_copy is conforming. + + template + struct has_trivial_copy : public eastl::integral_constant::value) && !eastl::is_volatile::value && !eastl::is_hat_type::value>{}; + #elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) + #define EASTL_TYPE_TRAIT_has_trivial_copy_CONFORMANCE 1 // has_trivial_copy is conforming. + + template + struct has_trivial_copy : public eastl::integral_constant::value) && (!eastl::is_volatile::value && !eastl::is_reference::value)>{}; + #else + #define EASTL_TYPE_TRAIT_has_trivial_copy_CONFORMANCE 0 // has_trivial_copy is not fully conforming. Can return false negatives. + + template + struct has_trivial_copy : public eastl::integral_constant::value && !eastl::is_volatile::value>{}; + #endif + + #define EASTL_DECLARE_HAS_TRIVIAL_COPY(T, hasTrivialCopy) \ + namespace eastl { \ + template <> struct has_trivial_copy : public eastl::integral_constant { }; \ + } + + // Old style macro, for bacwards compatibility: + #define EASTL_DECLARE_TRIVIAL_COPY(T) namespace eastl{ template <> struct has_trivial_copy : public true_type{}; template <> struct has_trivial_copy : public true_type{}; } + + + + + /////////////////////////////////////////////////////////////////////// + // has_trivial_assign + // + // has_trivial_assign::value == true if and only if T is a class or + // struct that has a trivial copy assignment operator. A copy assignment + // operator is trivial if: + // - it is implicitly defined by the compiler, and + // - is_polymorphic::value == false, and + // - T has no virtual base classes, and + // - for every direct base class of T, has_trivial_assign::value == true, + // where B is the type of the base class, and + // - for every nonstatic data member of T that has class type or array + // of class type, has_trivial_assign::value == true, where M is + // the type of the data member. + // + // has_trivial_assign may only be applied to complete types. + // + // Without from the compiler or user, has_trivial_assign will not + // report that a class or struct has trivial assignment. The user + // can use EASTL_DECLARE_TRIVIAL_ASSIGN to help the compiler. + /////////////////////////////////////////////////////////////////////// + + #if defined(_MSC_VER) && (_MSC_VER >= 1600) + #define EASTL_TYPE_TRAIT_has_trivial_assign_CONFORMANCE 1 // has_trivial_assign is conforming. + + template + struct has_trivial_assign : public integral_constant::value) && !eastl::is_const::value && !eastl::is_volatile::value && !eastl::is_hat_type::value>{}; + #elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) + #define EASTL_TYPE_TRAIT_has_trivial_assign_CONFORMANCE 1 // has_trivial_assign is conforming. + + template + struct has_trivial_assign : public integral_constant::value) && !eastl::is_const::value && !eastl::is_volatile::value>{}; + #else + #define EASTL_TYPE_TRAIT_has_trivial_assign_CONFORMANCE 0 // is_pod is not fully conforming. Can return false negatives. + + template + struct has_trivial_assign : public integral_constant::value && !is_const::value && !is_volatile::value + >{}; + #endif + + #define EASTL_DECLARE_HAS_TRIVIAL_ASSIGN(T, hasTrivialAssign) \ + namespace eastl { \ + template <> struct has_trivial_assign : public eastl::integral_constant { }; \ + } + + // Old style macro, for bacwards compatibility: + #define EASTL_DECLARE_TRIVIAL_ASSIGN(T) namespace eastl{ template <> struct has_trivial_assign : public true_type{}; template <> struct has_trivial_assign : public true_type{}; } + + + + + /////////////////////////////////////////////////////////////////////// + // has_trivial_destructor + // + // has_trivial_destructor::value == true if and only if T is a class + // or struct that has a trivial destructor. A destructor is trivial if + // - it is implicitly defined by the compiler, and + // - for every direct base class of T, has_trivial_destructor::value == true, + // where B is the type of the base class, and + // - for every nonstatic data member of T that has class type or + // array of class type, has_trivial_destructor::value == true, + // where M is the type of the data member + // + // has_trivial_destructor may only be applied to complete types. + // + // Without from the compiler or user, has_trivial_destructor will not + // report that a class or struct has a trivial destructor. + // The user can use EASTL_DECLARE_TRIVIAL_DESTRUCTOR to help the compiler. + /////////////////////////////////////////////////////////////////////// + + #if defined(_MSC_VER) && (_MSC_VER >= 1600) + #define EASTL_TYPE_TRAIT_has_trivial_destructor_CONFORMANCE 1 // has_trivial_destructor is conforming. + + template + struct has_trivial_destructor : public eastl::integral_constant::value) && !eastl::is_hat_type::value>{}; + #elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) + #define EASTL_TYPE_TRAIT_has_trivial_destructor_CONFORMANCE 1 // has_trivial_destructor is conforming. + + template + struct has_trivial_destructor : public eastl::integral_constant::value>{}; + #else + #define EASTL_TYPE_TRAIT_has_trivial_destructor_CONFORMANCE 0 // is_pod is not fully conforming. Can return false negatives. + + // With current compilers, this is all we can do. + template + struct has_trivial_destructor : public eastl::is_pod{}; + #endif + + #define EASTL_DECLARE_HAS_TRIVIAL_DESTRUCTOR(T, hasTrivialDestructor) \ + namespace eastl { \ + template <> struct has_trivial_destructor : public eastl::integral_constant { }; \ + } + + // Old style macro, for bacwards compatibility: + #define EASTL_DECLARE_TRIVIAL_DESTRUCTOR(T) namespace eastl{ template <> struct has_trivial_destructor : public true_type{}; template <> struct has_trivial_destructor : public true_type{}; } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool has_trivial_destructor_v = has_trivial_destructor::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // has_trivial_relocate + // + // This is an EA extension to the type traits standard. + // This trait is deprecated under conforming C++11 compilers, as C++11 + // move functionality supercedes this functionality and we want to + // migrate away from it in the future. + // + // A trivially relocatable object is one that can be safely memmove'd + // to uninitialized memory. construction, assignment, and destruction + // properties are not addressed by this trait. A type that has the + // is_fundamental trait would always have the has_trivial_relocate trait. + // A type that has the has_trivial_constructor, has_trivial_copy or + // has_trivial_assign traits would usally have the has_trivial_relocate + // trait, but this is not strictly guaranteed. + // + // The user can use EASTL_DECLARE_TRIVIAL_RELOCATE to help the compiler. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_has_trivial_relocate_CONFORMANCE 0 // is_pod is not fully conforming. Can return false negatives. + + template + struct has_trivial_relocate : public eastl::bool_constant && !eastl::is_volatile_v> {}; + + #define EASTL_DECLARE_TRIVIAL_RELOCATE(T) namespace eastl{ template <> struct has_trivial_relocate : public true_type{}; template <> struct has_trivial_relocate : public true_type{}; } + + + + + /////////////////////////////////////////////////////////////////////// + // has_nothrow_constructor + // + // has_nothrow_constructor::value == true if and only if T is a + // class or struct whose default constructor has an empty throw specification. + // + // has_nothrow_constructor may only be applied to complete types. + // + /////////////////////////////////////////////////////////////////////// + + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) + #define EASTL_TYPE_TRAIT_has_nothrow_constructor_CONFORMANCE 1 + + template + struct has_nothrow_constructor + : public eastl::integral_constant{}; + + #elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && defined(_MSC_VER) + // Microsoft's implementation of __has_nothrow_constructor is crippled and returns true only if T is a class that has an explicit constructor. + // "Returns true if the default constructor has an empty exception specification." + #define EASTL_TYPE_TRAIT_has_nothrow_constructor_CONFORMANCE 0 + + template // This is mistakenly returning true for an unbounded array of scalar type. + struct has_nothrow_constructor : public eastl::integral_constant::type>::value || eastl::is_reference::value>{}; + + #else + #define EASTL_TYPE_TRAIT_has_nothrow_constructor_CONFORMANCE 0 // has_nothrow_constructor is not fully conforming. Can return false negatives. + + template + struct has_nothrow_constructor // To do: Improve this to include other types that can work. + { static const bool value = eastl::is_scalar::type>::value || eastl::is_reference::value; }; + #endif + + #define EASTL_DECLARE_HAS_NOTHROW_CONSTRUCTOR(T, hasNothrowConstructor) \ + namespace eastl { \ + template <> struct has_nothrow_constructor : public eastl::integral_constant { }; \ + } + + + + /////////////////////////////////////////////////////////////////////// + // has_nothrow_copy + // + // has_nothrow_copy::value == true if and only if T is a class or + // struct whose copy constructor has an empty throw specification. + // + // has_nothrow_copy may only be applied to complete types. + // + /////////////////////////////////////////////////////////////////////// + + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) + #define EASTL_TYPE_TRAIT_has_nothrow_copy_CONFORMANCE 1 + + template + struct has_nothrow_copy : public eastl::integral_constant{}; + + #elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && defined(_MSC_VER) + // Microsoft's implementation of __has_nothrow_copy is crippled and returns true only if T is a class that has a copy constructor. + // "Returns true if the copy constructor has an empty exception specification." + #define EASTL_TYPE_TRAIT_has_nothrow_copy_CONFORMANCE 0 + + template + struct has_nothrow_copy : public eastl::integral_constant::type>::value || eastl::is_reference::value>{}; + + #else + #define EASTL_TYPE_TRAIT_has_nothrow_copy_CONFORMANCE 0 // has_nothrow_copy is not fully conforming. Can return false negatives. + + template + struct has_nothrow_copy // To do: Improve this to include other types that can work. + { static const bool value = eastl::is_scalar::type>::value || eastl::is_reference::value; }; + #endif + + #define EASTL_DECLARE_HAS_NOTHROW_COPY(T, hasNothrowCopy) \ + namespace eastl { \ + template <> struct has_nothrow_copy : public eastl::integral_constant { }; \ + } + + + + /////////////////////////////////////////////////////////////////////// + // has_nothrow_assign + // + // has_nothrow_assign::value == true if and only if T is a class or + // struct whose copy assignment operator has an empty throw specification. + // + // has_nothrow_assign may only be applied to complete types. + // + /////////////////////////////////////////////////////////////////////// + + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) + #define EASTL_TYPE_TRAIT_has_nothrow_assign_CONFORMANCE 1 + + template + struct has_nothrow_assign : public eastl::integral_constant{}; + + #elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && defined(_MSC_VER) + // Microsoft's implementation of __has_nothrow_assign is crippled and returns true only if T is a class that has an assignment operator. + // "Returns true if a copy assignment operator has an empty exception specification." + #define EASTL_TYPE_TRAIT_has_nothrow_assign_CONFORMANCE 0 + + template // This is mistakenly returning true for an unbounded array of scalar type. + struct has_nothrow_assign : public eastl::integral_constant::type>::value || eastl::is_reference::value>{}; + #else + #define EASTL_TYPE_TRAIT_has_nothrow_assign_CONFORMANCE 0 // has_nothrow_assign is not fully conforming. Can return false negatives. + + template + struct has_nothrow_assign // To do: Improve this to include other types that can work. + { static const bool value = eastl::is_scalar::type>::value || eastl::is_reference::value; } ; + #endif + + #define EASTL_DECLARE_HAS_NOTHROW_ASSIGN(T, hasNothrowAssign) \ + namespace eastl { \ + template <> struct has_nothrow_assign : public eastl::integral_constant { }; \ + } + + + + /////////////////////////////////////////////////////////////////////// + // has_virtual_destructor + // + // has_virtual_destructor::value == true if and only if T is a class + // or struct with a virtual destructor. + // + // has_virtual_destructor may only be applied to complete types. + // + /////////////////////////////////////////////////////////////////////// + + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) + #define EASTL_TYPE_TRAIT_has_virtual_destructor_CONFORMANCE 1 + + template + struct has_virtual_destructor : public eastl::integral_constant{}; + #else + #define EASTL_TYPE_TRAIT_has_virtual_destructor_CONFORMANCE 0 // has_virtual_destructor is not fully conforming. Can return false negatives. + + template + struct has_virtual_destructor : public eastl::false_type{}; + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool has_virtual_destructor_v = has_virtual_destructor::value; + #endif + + #define EASTL_DECLARE_HAS_VIRTUAL_DESTRUCTOR(T, hasVirtualDestructor) \ + namespace eastl { \ + template <> struct has_virtual_destructor : public eastl::integral_constant { }; \ + template <> struct has_virtual_destructor : public eastl::integral_constant { }; \ + template <> struct has_virtual_destructor : public eastl::integral_constant { }; \ + template <> struct has_virtual_destructor : public eastl::integral_constant { }; \ + } + + + /////////////////////////////////////////////////////////////////////// + // is_literal_type + // + // See the C++11 Standard, section 2.9,p10. + // A type is a literal type if it is: + // - a scalar type; or + // - a reference type referring to a literal type; or + // - an array of literal type; or + // - a class type (Clause 9) that has all of the following properties: + // - it has a trivial destructor, + // - every constructor call and full-expression in the brace-or-equal-initializer s for non-static data members (if any) is a constant expression (5.19), + // - it is an aggregate type (8.5.1) or has at least one constexpr constructor or constructor template that is not a copy or move constructor, and + // - all of its non-static data members and base classes are of literal types. + // + /////////////////////////////////////////////////////////////////////// + + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_literal)) + #define EASTL_TYPE_TRAIT_is_literal_type_CONFORMANCE 1 + + template + struct is_literal_type : public eastl::integral_constant{}; + + #elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && ((defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4006)) || (defined(_MSC_VER) && (_MSC_VER >= 1700))) // VS2012+ + #if defined(EA_COMPILER_GNUC) && (!defined(EA_COMPILER_CPP11_ENABLED) || (EA_COMPILER_VERSION < 4007)) + #define EASTL_TYPE_TRAIT_is_literal_type_CONFORMANCE 0 // It seems that in this case GCC supports the compiler intrinsic but reports it as false when it's true. + #else + #define EASTL_TYPE_TRAIT_is_literal_type_CONFORMANCE 1 + #endif + + template + struct is_literal_type : public eastl::integral_constant{}; + + #else + #define EASTL_TYPE_TRAIT_is_literal_type_CONFORMANCE 0 + + // It's not clear if this trait can be fully implemented without explicit compiler support. + // For now we assume that it can't be but implement something that gets it right at least + // some of the time. Recall that partial positives and false negatives are OK (though not ideal), + // while false positives are not OK for us to generate. + + template // This is not a complete implementation and will be true for only some literal types (the basic ones). + struct is_literal_type : public eastl::integral_constant::type>::type>::value>{}; + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_literal_type_v = is_literal_type::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // is_abstract + // + // is_abstract::value == true if and only if T is a class or struct + // that has at least one pure virtual function. is_abstract may only + // be applied to complete types. + // + /////////////////////////////////////////////////////////////////////// + + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_abstract))) + #define EASTL_TYPE_TRAIT_is_abstract_CONFORMANCE 1 // is_abstract is conforming. + + template + struct is_abstract : public integral_constant{}; + #else + #define EASTL_TYPE_TRAIT_is_abstract_CONFORMANCE 0 + + template::value> + class is_abstract_helper + { + template + static eastl::yes_type test(...); + + template + static eastl::no_type test(T1(*)[1]); // The following: 'typedef SomeAbstractClass (*SomeFunctionType)[1];' is invalid (can't have an array of abstract types) and thus doesn't choose this path. + + public: + static const bool value = (sizeof(test(NULL)) == sizeof(eastl::yes_type)); + }; + + template + struct is_abstract_helper + { static const bool value = false; }; + + template + struct is_abstract + : public integral_constant::value> { }; + + #endif + + #define EASTL_DECLARE_IS_ABSTRACT(T, isAbstract) \ + namespace eastl { \ + template <> struct is_abstract : public eastl::integral_constant { }; \ + template <> struct is_abstract : public eastl::integral_constant { }; \ + template <> struct is_abstract : public eastl::integral_constant { }; \ + template <> struct is_abstract : public eastl::integral_constant { }; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_abstract_v = is_abstract::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_trivially_copyable + // + // T is a trivially copyable type (3.9) T shall be a complete type, + // (possibly cv-qualified) void, or an array of unknown bound. + // + // 3.9,p3: For any trivially copyable type T, if two pointers to T + // point to distinct T objects obj1 and obj2, where neither obj1 nor + // obj2 is a base-class subobject, if the underlying bytes making + // up obj1 are copied into obj2, obj2 shall subsequently hold the + // same value as obj1. In other words, you can memcpy/memmove it. + /////////////////////////////////////////////////////////////////////// + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && ((defined(_MSC_VER) && (_MSC_VER >= 1700)) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 5003)) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_trivially_copyable))) + #define EASTL_TYPE_TRAIT_is_trivially_copyable_CONFORMANCE 1 + + // https://connect.microsoft.com/VisualStudio/feedback/details/808827/c-std-is-trivially-copyable-produces-wrong-result-for-arrays + // + // From Microsoft: + // We're working on fixing this. When overhauling in VC 2013, I incorrectly believed that is_trivially_copyable was a synonym + // for is_trivially_copy_constructible. I've asked the compiler team to provide a compiler hook with 100% accurate answers. (Currently, the + // compiler hook has incorrect answers for volatile scalars, volatile data members, and various scenarios for defaulted/deleted/private + // special member functions - I wrote an exhaustive test case to exercise the complicated Standardese.) When the compiler hook is fixed, + // I'll change to invoke it. + // + // Microsoft broken VS2013 STL implementation: + // template + // struct is_trivially_copyable + // : is_trivially_copy_constructible<_Ty>::type + // { // determine whether _Ty has a trivial copy constructor + // }; + // + + template + struct is_trivially_copyable { static const bool value = __is_trivially_copyable(T); }; + + #elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_MSVC) || defined(EA_COMPILER_GNUC)) + #define EASTL_TYPE_TRAIT_is_trivially_copyable_CONFORMANCE 1 + + // Micrsoft (prior to VS2012) and GCC have __has_trivial_copy, but it may not be identical with the goals of this type trait. + template + struct is_trivially_copyable : public integral_constant::type>::value) && (!eastl::is_void::value && !eastl::is_volatile::value && !eastl::is_reference::value)>{}; + #else + #define EASTL_TYPE_TRAIT_is_trivially_copyable_CONFORMANCE 0 // Generates false negatives. + + template + struct is_trivially_copyable { static const bool value = eastl::is_scalar::type>::value; }; + #endif + + #define EASTL_DECLARE_IS_TRIVIALLY_COPYABLE(T, isTriviallyCopyable) \ + namespace eastl { \ + template <> struct is_trivially_copyable : public eastl::integral_constant { }; \ + template <> struct is_trivially_copyable : public eastl::integral_constant { }; \ + template <> struct is_trivially_copyable : public eastl::integral_constant { }; \ + template <> struct is_trivially_copyable : public eastl::integral_constant { }; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_trivially_copyable_v = is_trivially_copyable::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_constructible + // + // See the C++11 Standard, section 20.9.4.3,p6. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_constructible_CONFORMANCE 1 + + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_constructible))) + template + struct is_constructible : public bool_constant<__is_constructible(T, Args...) > {}; + #else + // We implement a copy of move here has move_internal. We are currently stuck doing this because our move + // implementation is in and currently #includes us, and so we have a header + // chicken-and-egg problem. To do: Resolve this, probably by putting eastl::move somewhere else. + template + inline typename eastl::remove_reference::type&& move_internal(T&& x) EA_NOEXCEPT + { return ((typename eastl::remove_reference::type&&)x); } + + template + typename first_type_select()...)))>::type is(T&&, Args&& ...); + + template + struct can_construct_scalar_helper + { + static eastl::true_type can(T); + static eastl::false_type can(...); + }; + + template + eastl::false_type is(argument_sink, Args&& ...); + + // Except for scalars and references (handled below), check for constructibility via decltype. + template + struct is_constructible_helper_2 // argument_sink will catch all T that is not constructible from the Args and denote false_type + : public eastl::identity(), eastl::declval()...))>::type {}; + + template + struct is_constructible_helper_2 + : public eastl::is_scalar {}; + + template // We handle the case of multiple arguments below (by disallowing them). + struct is_constructible_helper_2 + : public eastl::identity::can(eastl::declval()))>::type {}; + + // Scalars and references can be constructed only with 0 or 1 argument. e.g the following is an invalid expression: int(17, 23) + template + struct is_constructible_helper_2 + : public eastl::false_type {}; + + template + struct is_constructible_helper_1 + : public is_constructible_helper_2::value || eastl::is_reference::value, T, Args...> {}; + + // Unilaterally dismiss void, abstract, unknown bound arrays, and function types as not constructible. + template + struct is_constructible_helper_1 + : public false_type {}; + + // is_constructible + template + struct is_constructible + : public is_constructible_helper_1<(eastl::is_abstract::type>::value || + eastl::is_array_of_unknown_bounds::value || + eastl::is_function::type>::value || + eastl::has_void_arg::value), + T, Args...> {}; + + // Array types are constructible if constructed with no arguments and if their element type is default-constructible + template + struct is_constructible_helper_2 + : public eastl::is_constructible::type> {}; + + // Arrays with arguments are not constructible. e.g. the following is an invalid expression: int[3](37, 34, 12) + template + struct is_constructible_helper_2 + : public eastl::false_type {}; + + #endif + + + // You need to manually declare const/volatile variants individually if you want them. + #define EASTL_DECLARE_IS_CONSTRUCTIBLE(T, U, isConstructible) \ + namespace eastl { \ + template <> struct is_constructible : public eastl::integral_constant { }; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_constructible_v = is_constructible::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_trivially_constructible + // + // is_constructible::value is true and the variable definition + // for is_constructible, as defined below, is known to call no operation + // that is not trivial (3.9, 12). T and all types in the parameter pack + // Args shall be complete types, (possibly cv-qualified) void, or arrays + // of unknown bound. + // + // Note: + // C++11's is_trivially_constructible sounds the same as the pre-standard + // has_trivial_constructor type trait (which we also support here). However, + // the definition of has_trivial_constructor has never been formally standardized + // and so we can't just blindly equate the two to each other. Since we are + // moving forward with C++11 and deprecating the old type traits, we leave + // the old ones as-is, though we defer to them in cases where we don't seem + // to have a good alternative. + // + /////////////////////////////////////////////////////////////////////// + + #if defined(EA_COMPILER_NO_VARIADIC_TEMPLATES) + + #define EASTL_TYPE_TRAIT_is_trivially_constructible_CONFORMANCE 0 + + // In this version we allow only zero or one argument (Arg). We can add more arguments + // by creating a number of extra specializations. It's probably not possible to + // simplify the implementation with recursive templates because ctor argument + // presence is specific. + // + // To consider: we can fold the two implementations below by making a macro that's defined + // has __is_trivially_constructible(T) or eastl::has_trivial_copy::value, depending on + // whether the __is_trivially_constructible compiler intrinsic is available. + + // If the compiler has this trait built-in (which ideally all compilers would have since it's necessary for full conformance) use it. + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_trivially_constructible)) + + template + struct is_trivially_constructible + : public eastl::false_type {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant {}; + + #else + + template + struct is_trivially_constructible + : public eastl::false_type {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant::value && eastl::has_trivial_constructor::type>::value> {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant::value && eastl::has_trivial_copy::value> {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant::value && eastl::has_trivial_copy::value> {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant::value && eastl::has_trivial_copy::value> {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant::value && eastl::has_trivial_copy::value> {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant::value && eastl::has_trivial_copy::value> {}; + + #endif + + #else + + // If the compiler has this trait built-in (which ideally all compilers would have since it's necessary for full conformance) use it. + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_trivially_constructible)) + #define EASTL_TYPE_TRAIT_is_trivially_constructible_CONFORMANCE 1 + + // We have a problem with clang here as of clang 3.4: __is_trivially_constructible(int[]) is false, yet I believe it should be true. + // Until it gets resolved, what we do is check for is_constructible along with __is_trivially_constructible(). + template + struct is_trivially_constructible + : public eastl::integral_constant::value && __is_trivially_constructible(T, Args...)> {}; + + #else + + #define EASTL_TYPE_TRAIT_is_trivially_constructible_CONFORMANCE 0 // This is 0 but in fact it will work for most real-world cases due to the has_trivial_constructor specialization below. + + template + struct is_trivially_constructible + : public eastl::false_type {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant::value && eastl::has_trivial_constructor::type>::value> {}; + + // It's questionable whether we can use has_trivial_copy here, as it could theoretically create a false-positive. + template + struct is_trivially_constructible + : public eastl::integral_constant::value && eastl::has_trivial_copy::value> {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant::value && eastl::has_trivial_copy::value> {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant::value && eastl::has_trivial_copy::value> {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant::value && eastl::has_trivial_copy::value> {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant::value && eastl::has_trivial_copy::value> {}; + + template + struct is_trivially_constructible + : public eastl::integral_constant::value && eastl::has_trivial_copy::value> {}; + + #endif + + #endif + + + #define EASTL_DECLARE_IS_TRIVIALLY_CONSTRUCTIBLE(T, isTriviallyConstructible) \ + namespace eastl { \ + template <> struct is_trivially_constructible : public eastl::integral_constant { }; \ + template <> struct is_trivially_constructible : public eastl::integral_constant { }; \ + template <> struct is_trivially_constructible : public eastl::integral_constant { }; \ + template <> struct is_trivially_constructible : public eastl::integral_constant { }; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_trivially_constructible_v = is_trivially_constructible::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_trivially_default_constructible + // + // is_trivially_constructible::value is true. + // This is thus identical to is_trivially_constructible. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_trivially_default_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_trivially_constructible_CONFORMANCE + + template + struct is_trivially_default_constructible + : public eastl::is_trivially_constructible {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_trivially_default_constructible_v = is_trivially_default_constructible::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_trivial + // + // is_trivial::value == true if T is a scalar type, a trivially copyable + // class with a trivial default constructor, or array of such type/class, + // possibly cv-qualified), provides the member constant value equal true. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_trivial_CONFORMANCE ((EASTL_TYPE_TRAIT_is_trivially_default_constructible_CONFORMANCE && EASTL_TYPE_TRAIT_is_trivially_copyable_CONFORMANCE) ? 1 : 0) + + #if defined(_MSC_VER) && _MSC_VER == 1800 + template + struct is_trivial_helper + : public eastl::integral_constant::value && eastl::is_trivially_default_constructible::value>{}; + + template + struct is_trivial_helper + : public false_type{}; + + template + struct is_trivial + : public is_trivial_helper<(EA_ALIGN_OF(T) > EA_PLATFORM_MIN_MALLOC_ALIGNMENT), T>::type{}; + #else + // All other compilers seem to be able to handle aligned types passed as value + template + struct is_trivial + : public eastl::integral_constant::value && eastl::is_trivially_default_constructible::value> {}; + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_trivial_v = is_trivial::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // is_nothrow_constructible + // + // is_constructible::value is true and the variable definition + // for is_constructible, as defined below, is known not to throw any + // exceptions (5.3.7). T and all types in the parameter pack Args shall + // be complete types, (possibly cv-qualified) void, or arrays of unknown bound. + // + /////////////////////////////////////////////////////////////////////// + #if defined(EA_COMPILER_NO_NOEXCEPT) + + #define EASTL_TYPE_TRAIT_is_nothrow_constructible_CONFORMANCE 0 + + template + struct is_nothrow_constructible + : public eastl::false_type {}; + + template + struct is_nothrow_constructible + : public eastl::integral_constant::value> {}; + + template + struct is_nothrow_constructible + : public eastl::integral_constant::value> {}; + + template + struct is_nothrow_constructible + : public eastl::integral_constant::value> {}; + + template + struct is_nothrow_constructible + : public eastl::integral_constant::value> {}; + + template + struct is_nothrow_constructible + : public eastl::integral_constant::value> {}; + + #else + #if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION < 4008) + #define EASTL_TYPE_TRAIT_is_nothrow_constructible_CONFORMANCE 0 // GCC up to v4.7's noexcept is broken and fails to generate true for the case of compiler-generated constructors. + #else + #define EASTL_TYPE_TRAIT_is_nothrow_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_constructible_CONFORMANCE + #endif + + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // *_noexcept_wrapper implements a workaround for VS2015 preview. A standards conforming noexcept operator allows variadic template expansion. + // There appears to be an issue with VS2015 preview that prevents variadic template expansion into a noexcept operator that is passed directly + // to a template parameter. + // + // The fix hoists the noexcept expression into a separate struct and caches the result of the expression. This result is then passed to integral_constant. + // + // Example code from Clang libc++ + // template + // struct __libcpp_is_nothrow_constructible<[>is constructible*/true, /*is reference<]false, _Tp, _Args...> + // : public integral_constant()...))> { }; + // + + template + struct is_nothrow_constructible_helper_noexcept_wrapper + { static const bool value = noexcept(T(eastl::declval()...)); }; + + template + struct is_nothrow_constructible_helper; + + template + struct is_nothrow_constructible_helper + : public eastl::integral_constant::value> {}; + + template + struct is_nothrow_constructible_helper + : public eastl::integral_constant()))> {}; + + template + struct is_nothrow_constructible_helper + : public eastl::integral_constant {}; + + template + struct is_nothrow_constructible_helper + : public eastl::false_type {}; + + template + struct is_nothrow_constructible + : public eastl::is_nothrow_constructible_helper::value, T, Args...> {}; + + template + struct is_nothrow_constructible + : public eastl::is_nothrow_constructible_helper::value, T> {}; + #endif + + #define EASTL_DECLARE_IS_NOTHROW_CONSTRUCTIBLE(T, isNothrowConstructible) \ + namespace eastl{ \ + template <> struct is_nothrow_constructible : public eastl::integral_constant { }; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_nothrow_constructible_v = is_nothrow_constructible::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_default_constructible + // + // is_constructible::value is true. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_default_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_constructible_CONFORMANCE + + template + struct is_default_constructible + : public eastl::is_constructible {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_default_constructible_v = is_default_constructible::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // is_nothrow_default_constructible + /////////////////////////////////////////////////////////////////////// + // TODO(rparolin): implement type-trait + + + + /////////////////////////////////////////////////////////////////////// + // is_copy_constructible + // + // is_constructible::value is true. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_copy_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_constructible_CONFORMANCE + + template + struct is_copy_constructible + : public eastl::is_constructible::type>::type> {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_copy_constructible_v = is_copy_constructible::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_trivially_copy_constructible + // + // is_trivially_constructible::value is true. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_trivially_copy_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_trivially_constructible_CONFORMANCE + + template + struct is_trivially_copy_constructible + : public eastl::is_trivially_constructible::type>::type> {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_trivially_copy_constructible_v = is_trivially_copy_constructible::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_nothrow_copy_constructible + // + // is_nothrow_-constructible::value is true. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_nothrow_copy_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_nothrow_constructible_CONFORMANCE + + template + struct is_nothrow_copy_constructible + : public is_nothrow_constructible::type>::type> {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_nothrow_copy_constructible_v = is_nothrow_copy_constructible::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_move_constructible + // + // is_constructible::value is true. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_move_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_constructible_CONFORMANCE + + template + struct is_move_constructible + : public eastl::is_constructible::type> {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_move_constructible_v = is_move_constructible::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_trivially_move_constructible + // + // is_trivially_constructible::value is true. + // T shall be a complete type, (possibly cv-qualified) void, or an + // array of unknown bound. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_trivially_move_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_trivially_constructible_CONFORMANCE + + template + struct is_trivially_move_constructible + : public eastl::is_trivially_constructible::type> {}; + + #define EASTL_DECLARE_IS_TRIVIALLY_MOVE_CONSTRUCTIBLE(T, isTrivallyMoveConstructible) \ + namespace eastl{ \ + template <> struct is_trivially_move_constructible : public eastl::integral_constant { }; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_trivially_move_constructible_v = is_trivially_move_constructible::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // is_assignable + // + // The expression declval() = declval() is well-formed when treated as an unevaluated operand. + // Access checking is performed as if in a context unrelated to T and U. Only the validity of + // the immediate context of the assignment expression is considered. The compilation of the expression + // can result in side effects such as the instantiation of class template specializations and function + // template specializations, the generation of implicitly-defined functions, and so on. Such side + // effects are not in the "immediate context" and can result in the program being ill-formed. + // + // Note: + // This type trait has a misleading and counter-intuitive name. It does not indicate whether an instance + // of U can be assigned to an instance of T (e.g. t = u). Instead it indicates whether the assignment can be + // done after adding rvalue references to both, as in add_rvalue_reference::type = add_rvalue_reference::type. + // A counterintuitive result of this is that is_assignable::value == false. The is_copy_assignable + // trait indicates if a type can be assigned to its own type, though there isn't a standard C++ way to tell + // if an arbitrary type is assignable to another type. + // http://stackoverflow.com/questions/19920213/why-is-stdis-assignable-counter-intuitive + // + // Note: + // A true is_assignable value doesn't guarantee that the expression is compile-able, the compiler checks + // only that the assignment matches before compilation. In particular, if you have templated operator= + // for a class, the compiler will always say is_assignable is true, regardless of what's being tested + // on the right hand side of the expression. It may actually turn out during compilation that the + // templated operator= fails to compile because in practice it doesn't accept every possible type for + // the right hand side of the expression. + // + // Expected results: + // is_assignable::value == false + // is_assignable::value == true + // is_assignable::value == false + // is_assignable::value == false + // is_assignable::value == false + // is_assignable::value == false + // is_assignable::value == false + // is_assignable::value == false + // is_assignable::value == false + // is_assignable::value == false + // is_assignable::value == false + // is_assignable::value == true + // is_assignable::value == false + // + // Note: + // Our implementation here yields different results than does the std::is_assignable from Dinkumware-based Standard + // Libraries, but yields similar results to the std::is_assignable from GCC's libstdc++ and clang's libc++. It may + // possibly be that the Dinkumware results are intentionally different for some practical purpose or because they + // represent the spirit or the Standard but not the letter of the Standard. + // + /////////////////////////////////////////////////////////////////////// + #define EASTL_TYPE_TRAIT_is_assignable_CONFORMANCE 1 + + template + struct is_assignable_helper + { + template + static eastl::no_type is(...); + + template + static decltype(eastl::declval() = eastl::declval(), eastl::yes_type()) is(int); + + static const bool value = (sizeof(is(0)) == sizeof(eastl::yes_type)); + }; + + template + struct is_assignable : + public eastl::integral_constant::value> {}; + + // The main purpose of this function is to help the non-conforming case above. + // Note: We don't handle const/volatile variations here, as we expect the user to + // manually specify any such variations via this macro. + // Example usage: + // EASTL_DECLARE_IS_ASSIGNABLE(int, int, false) + // + #define EASTL_DECLARE_IS_ASSIGNABLE(T, U, isAssignable) \ + namespace eastl { \ + template <> struct is_assignable : public eastl::integral_constant { }; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_assignable_v = is_assignable::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // is_lvalue_assignable + // + // This is an EASTL extension function which is like is_assignable but + // works for arbitrary assignments and not just rvalue assignments. + // This function provides an intuitive assignability test, as opposed + // to is_assignable. + // + // Note: is_lvalue_assignable === is_copy_assignable + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_lvalue_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_assignable_CONFORMANCE + + template + struct is_lvalue_assignable + : public eastl::is_assignable::type, + typename eastl::add_lvalue_reference::type>::type> {}; + + #define EASTL_DECLARE_IS_LVALUE_ASSIGNABLE(T, U, isLvalueAssignable) \ + namespace eastl { \ + template <> struct is_lvalue_assignable : public eastl::integral_constant { }; \ + } + + + + /////////////////////////////////////////////////////////////////////// + // is_trivially_assignable + // + // is_assignable::value is true and the assignment, as defined by + // is_assignable, is known to call no operation that is not trivial (3.9, 12). + // T and U shall be complete types, (possibly cv-qualified) void, or + // arrays of unknown bound + /////////////////////////////////////////////////////////////////////// + + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_trivially_assignable)) + #define EASTL_TYPE_TRAIT_is_trivially_assignable_CONFORMANCE 1 + + template + struct is_trivially_assignable + : eastl::integral_constant {}; + + #elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) && (_MSC_VER >= 1800)) + #define EASTL_TYPE_TRAIT_is_trivially_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_assignable_CONFORMANCE + + // This code path is attempting to work around the issue with VS2013 __is_trivially_assignable compiler intrinsic documented in the link + // below. todo: Re-evaluate in VS2014. + // + // https://connect.microsoft.com/VisualStudio/feedback/details/806233/std-is-trivially-copyable-const-int-n-and-std-is-trivially-copyable-int-n-incorrect + + template + struct is_trivially_assignable_helper; + + template + struct is_trivially_assignable_helper : eastl::integral_constant{}; + + template + struct is_trivially_assignable_helper : false_type{}; + + template + struct is_trivially_assignable + : eastl::integral_constant::value, T, U >::value> {}; + + #elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(EA_COMPILER_MSVC) || defined(EA_COMPILER_GNUC)) + #define EASTL_TYPE_TRAIT_is_trivially_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_assignable_CONFORMANCE + + // Micrsoft (up till at least VS2012) and GCC have __has_trivial_assign, but it may not be identical with the goals of this type trait. + // The Microsoft type trait headers suggest that a future version of VS will have a __is_trivially_assignable intrinsic, but we + // need to come up with something in the meantime. To do: Re-evalulate this for VS2013+ when it becomes available. + template + struct is_trivially_assignable + : eastl::integral_constant::value && + (eastl::is_pod::type>::value || __has_trivial_assign(typename eastl::remove_reference::type))> {}; + #else + + #define EASTL_TYPE_TRAIT_is_trivially_assignable_CONFORMANCE 0 // Generates false negatives. + + template + struct is_trivially_assignable + : public eastl::false_type {}; + + template + struct is_trivially_assignable + : public eastl::integral_constant::value> {}; + + template + struct is_trivially_assignable + : public eastl::integral_constant::value> {}; + + template + struct is_trivially_assignable + : public eastl::integral_constant::value> {}; + + template + struct is_trivially_assignable + : public eastl::integral_constant::value> {}; + + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_trivially_assignable_v = is_trivially_assignable::value; + #endif + + // The main purpose of this function is to help the non-conforming case above. + // Note: We don't handle const/volatile variations here, as we expect the user to + // manually specify any such variations via this macro. + // Example usage: + // EASTL_DECLARE_IS_TRIVIALLY_ASSIGNABLE(int, int, false) + // + #define EASTL_DECLARE_IS_TRIVIALLY_ASSIGNABLE(T, U, isTriviallyAssignable) \ + namespace eastl { \ + template <> struct is_trivially_assignable : public eastl::integral_constant { }; \ + } + + + + /////////////////////////////////////////////////////////////////////// + // is_nothrow_assignable + // + // is_assignable::value is true and the assignment is known + // not to throw any exceptions (5.3.7). T and U shall be complete + // types, (possibly cv-qualified) void, or arrays of unknown bound. + // + /////////////////////////////////////////////////////////////////////// + + #if defined(_MSC_VER) && (_MSC_VER >= 1800) // VS2013+ + #define EASTL_TYPE_TRAIT_is_nothrow_assignable_CONFORMANCE 1 + + template + struct is_nothrow_assignable + : eastl::integral_constant {}; + + #elif defined(EA_COMPILER_NO_NOEXCEPT) || defined(__EDG_VERSION__) // EDG mis-compiles the conforming code below and so must be placed here. + #define EASTL_TYPE_TRAIT_is_nothrow_assignable_CONFORMANCE 0 + + template + struct is_nothrow_assignable + : public false_type {}; + + // Note that the following are crippled in that they support only assignment of T types to other T types. + template + struct is_nothrow_assignable + : public eastl::integral_constant::value> {}; + + template + struct is_nothrow_assignable + : public eastl::integral_constant::value> {}; + + template + struct is_nothrow_assignable + : public eastl::integral_constant::value> {}; + + #else + #define EASTL_TYPE_TRAIT_is_nothrow_assignable_CONFORMANCE 1 + + template + struct is_nothrow_assignable_helper; + + template + struct is_nothrow_assignable_helper + : public false_type {}; + + template + struct is_nothrow_assignable_helper // Set to true if the assignment (same as is_assignable) cannot generate an exception. + : public eastl::integral_constant() = eastl::declval()) > + { + }; + + template + struct is_nothrow_assignable + : public eastl::is_nothrow_assignable_helper::value, T, U> + { + }; + #endif + + #define EASTL_DECLARE_IS_NOTHROW_ASSIGNABLE(T, isNothrowAssignable) \ + namespace eastl{ \ + template <> struct is_nothrow_assignable : public eastl::integral_constant { }; \ + template <> struct is_nothrow_assignable : public eastl::integral_constant { }; \ + template <> struct is_nothrow_assignable : public eastl::integral_constant { }; \ + template <> struct is_nothrow_assignable : public eastl::integral_constant { }; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_nothrow_assignable_v = is_nothrow_assignable::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // is_copy_assignable + // + // is_assignable::value is true. T shall be a complete type, + // (possibly cv -qualified) void, or an array of unknown bound. + // + // This (and not is_assignable) is the type trait you use to tell if you + // can do an arbitrary assignment. is_assignable tells if you can do an + // assignment specifically to an rvalue and not in general. + // http://stackoverflow.com/a/19921030/725009 + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_copy_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_assignable_CONFORMANCE + + template + struct is_copy_assignable + : public eastl::is_assignable::type, + typename eastl::add_lvalue_reference::type>::type> {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_copy_assignable_v = is_copy_assignable::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_trivially_copy_assignable + // + // is_trivially_assignable::value is true. T shall be a + // complete type, (possibly cv-qualified) void, or an array of unknown bound. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_trivially_copy_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_trivially_assignable_CONFORMANCE + +#if EASTL_TYPE_TRAIT_is_trivially_copy_assignable_CONFORMANCE + template + struct is_trivially_copy_assignable + : public eastl::is_trivially_assignable::type, + typename eastl::add_lvalue_reference::type>::type> {}; +#else + template + struct is_trivially_copy_assignable + : public integral_constant::value || eastl::is_pod::value || eastl::is_trivially_assignable::type, typename eastl::add_lvalue_reference::type>::type>::value + > {}; +#endif + + #define EASTL_DECLARE_IS_TRIVIALLY_COPY_ASSIGNABLE(T, isTriviallyCopyAssignable) \ + namespace eastl { \ + template <> struct is_trivially_copy_assignable : public eastl::integral_constant { }; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_trivially_copy_assignable_v = is_trivially_copy_assignable::value; + #endif + + /////////////////////////////////////////////////////////////////////// + // is_nothrow_copy_assignable + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_nothrow_copy_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_nothrow_assignable_CONFORMANCE + + template + struct is_nothrow_copy_assignable + : public eastl::is_nothrow_assignable::type, + typename eastl::add_lvalue_reference::type>::type> {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_nothrow_copy_assignable_v = is_nothrow_copy_assignable::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_move_assignable + // + // is_assignable::value is true. T shall be a complete type, + // (possibly cv -qualified) void, or an array of unknown bound. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_move_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_assignable_CONFORMANCE + + template + struct is_move_assignable + : public eastl::is_assignable::type, + typename eastl::add_rvalue_reference::type> {}; + + #define EASTL_DECLARE_IS_MOVE_ASSIGNABLE(T, isMoveAssignable) \ + namespace eastl{ \ + template <> struct is_move_assignable : public eastl::integral_constant { }; \ + template <> struct is_move_assignable : public eastl::integral_constant { }; \ + template <> struct is_move_assignable : public eastl::integral_constant { }; \ + template <> struct is_move_assignable : public eastl::integral_constant { }; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_move_assignable_v = is_move_assignable::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // is_trivially_move_assignable + // + // is_trivially_-assignable::value is true. T shall be a complete type, + // (possibly cv-qualified) void, or an array of unknown bound. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_trivially_move_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_trivially_assignable_CONFORMANCE + + template + struct is_trivially_move_assignable + : public eastl::is_trivially_assignable::type, + typename eastl::add_rvalue_reference::type> {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_trivially_move_assignable_v = is_trivially_move_assignable::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_nothrow_move_assignable + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_nothrow_move_assignable_CONFORMANCE EASTL_TYPE_TRAIT_is_nothrow_assignable_CONFORMANCE + + template + struct is_nothrow_move_assignable + : public eastl::is_nothrow_assignable::type, + typename eastl::add_rvalue_reference::type> {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_nothrow_move_assignable_v = is_nothrow_move_assignable::value; + #endif + + /////////////////////////////////////////////////////////////////////// + // is_destructible + // + // For a complete type T and given + // template + // struct test { U u; }; + // test::~test() is not deleted (C++11 "= delete"). + // T shall be a complete type, (possibly cv-qualified) void, or an array of unknown bound. + // + /////////////////////////////////////////////////////////////////////// + + #if 0 // defined(_MSC_VER) && (_MSC_VER >= 1800) // VS2013+ -- Disabled due to __is_destructible being broken in VC++ versions up to at least VS2013. A ticket will be submitted for this + #define EASTL_TYPE_TRAIT_is_destructible_CONFORMANCE 1 + + template + struct is_destructible + : integral_constant {}; + + #elif defined(EA_COMPILER_NO_DECLTYPE) || defined(EA_COMPILER_NO_FUNCTION_TEMPLATE_DEFAULT_ARGS) || defined(_MSC_VER) || defined(__EDG_VERSION__) // VS2012 and EDG mis-compile the conforming code below and so must be placed here. + #define EASTL_TYPE_TRAIT_is_destructible_CONFORMANCE 0 + + // This implementation works for almost all cases, with the primary exception being the + // case that the user declared the destructor as deleted. To deal with that case the + // user needs to use EASTL_DECLARE_IS_NOT_DESTRUCTIBLE to cause is_destructible::value + // to be false. + + template + struct is_destructible + : public eastl::integral_constant::value && + !eastl::is_void::value && + !eastl::is_function::value && + !eastl::is_abstract::value> {}; + #else + #define EASTL_TYPE_TRAIT_is_destructible_CONFORMANCE 1 + + template + struct destructible_test_helper{ U u; }; + + template + eastl::false_type destructible_test_function(...); + + template >().~destructible_test_helper())> + eastl::true_type destructible_test_function(int); + + template ::value || // Exclude these types from being considered destructible. + eastl::is_void::value || + eastl::is_function::value || + eastl::is_abstract::value> + struct is_destructible_helper + : public eastl::identity(0))>::type {}; // Need to wrap decltype with identity because some compilers otherwise don't like the bare decltype usage. + + template + struct is_destructible_helper + : public eastl::false_type {}; + + template + struct is_destructible + : public is_destructible_helper {}; + + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_destructible_v = is_destructible::value; + #endif + + #define EASTL_DECLARE_IS_DESTRUCTIBLE(T, isDestructible) \ + namespace eastl{ \ + template <> struct is_destructible : public eastl::integral_constant{}; \ + template <> struct is_destructible : public eastl::integral_constant{}; \ + template <> struct is_destructible : public eastl::integral_constant{}; \ + template <> struct is_destructible : public eastl::integral_constant{}; \ + } + + + + /////////////////////////////////////////////////////////////////////// + // is_trivially_destructible + // + // is_destructible::value is true and the indicated destructor is + // known to be trivial. T shall be a complete type, (possibly cv-qualified) + // void, or an array of unknown bound. + // + // A destructor is trivial if it is not user-provided and if: + // - the destructor is not virtual, + // - all of the direct base classes of its class have trivial destructors, and + // - for all of the non-static data members of its class that are of + // class type (or array thereof), each such class has a trivial destructor. + // + /////////////////////////////////////////////////////////////////////// + + #if 0 // defined(_MSC_VER) && (_MSC_VER >= 1800) // VS2013+ -- Disabled due to __is_trivially_destructible being broken in VC++ versions up to at least VS2013. A ticket will be submitted for this + #define EASTL_TYPE_TRAIT_is_trivially_destructible_CONFORMANCE 1 + + template + struct is_trivially_destructible + : integral_constant {}; + + #elif EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || defined(EA_COMPILER_CLANG)) + #define EASTL_TYPE_TRAIT_is_trivially_destructible_CONFORMANCE EASTL_TYPE_TRAIT_is_destructible_CONFORMANCE + + template + struct is_trivially_destructible // Can't use just __has_trivial_destructor(T) because some compilers give it slightly different meaning, and are just plain broken, such as VC++'s __has_trivial_destructor, which says false for fundamental types. + : public integral_constant::value && ((__has_trivial_destructor(T) && !eastl::is_hat_type::value)|| eastl::is_scalar::type>::value)> {}; + + #else + #define EASTL_TYPE_TRAIT_is_trivially_destructible_CONFORMANCE 0 + + template + struct is_trivially_destructible_helper + : public integral_constant::value || eastl::is_scalar::value || eastl::is_reference::value) && !eastl::is_void::value> {}; + + template + struct is_trivially_destructible + : public eastl::is_trivially_destructible_helper::type> {}; + #endif + + #define EASTL_DECLARE_IS_TRIVIALLY_DESTRUCTIBLE(T, isTriviallyDestructible) \ + namespace eastl{ \ + template <> struct is_trivially_destructible : public eastl::integral_constant{}; \ + template <> struct is_trivially_destructible : public eastl::integral_constant{}; \ + template <> struct is_trivially_destructible : public eastl::integral_constant{}; \ + template <> struct is_trivially_destructible : public eastl::integral_constant{}; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_trivially_destructible_v = is_trivially_destructible::value; + #endif + + + + + /////////////////////////////////////////////////////////////////////// + // is_nothrow_destructible + // + // is_destructible::value is true and the indicated destructor is + // known not to throw any exceptions (5.3.7). T shall be a complete type, + // (possibly cv-qualified) void, or an array of unknown bound. + // + /////////////////////////////////////////////////////////////////////// + + #if 0 // defined(_MSC_VER) && (_MSC_VER >= 1800) // VS2013+ -- Disabled due to __is_nothrow_destructible being broken in VC++ versions up to at least VS2013. A ticket will be submitted for this + #define EASTL_TYPE_TRAIT_is_nothrow_destructible_CONFORMANCE ((_MSC_VER >= 1900) ? 1 : 0) // VS2013 (1800) doesn't support noexcept and so can't support all usage of this properly (in particular default exception specifications defined in [C++11 Standard, 15.4 paragraph 14]. + + template + struct is_nothrow_destructible + : integral_constant {}; + + #elif defined(EA_COMPILER_NO_NOEXCEPT) + #define EASTL_TYPE_TRAIT_is_nothrow_destructible_CONFORMANCE 0 + + template + struct is_nothrow_destructible_helper + : public eastl::integral_constant::value || eastl::is_reference::value> {}; + + template + struct is_nothrow_destructible + : public eastl::is_nothrow_destructible_helper::type> {}; + + #else + #if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION < 4008) + #define EASTL_TYPE_TRAIT_is_nothrow_destructible_CONFORMANCE 0 // GCC up to v4.7's noexcept is broken and fails to generate true for the case of compiler-generated destructors. + #else + #define EASTL_TYPE_TRAIT_is_nothrow_destructible_CONFORMANCE EASTL_TYPE_TRAIT_is_destructible_CONFORMANCE + #endif + //////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + // *_noexcept_wrapper implements a workaround for VS2015. A standards conforming noexcept operator allows variadic template expansion. + // There appears to be an issue with VS2015 that prevents variadic template expansion into a noexcept operator that is passed directly + // to a template parameter. + // + // The fix hoists the noexcept expression into a separate struct and caches the result of the expression. This result is then passed to integral_constant. + // + // Example code from Clang libc++ + // template + // struct __libcpp_is_nothrow_constructible<[>is constructible*/true, /*is reference<]false, _Tp, _Args...> + // : public integral_constant()...))> { }; + // + + template + struct is_nothrow_destructible_helper_noexcept_wrapper + { static const bool value = noexcept(eastl::declval().~T()); }; + + template + struct is_nothrow_destructible_helper; + + template + struct is_nothrow_destructible_helper + : public eastl::false_type {}; + + template + struct is_nothrow_destructible_helper // If the expression T::~T is a noexcept expression then it's nothrow. + : public eastl::integral_constant::value > {}; + + template + struct is_nothrow_destructible // A type needs to at least be destructible before it could be nothrow destructible. + : public eastl::is_nothrow_destructible_helper::value> {}; + + template // An array is nothrow destructible if its element type is nothrow destructible. + struct is_nothrow_destructible // To consider: Replace this with a remove_all_extents pathway. + : public eastl::is_nothrow_destructible {}; + + template + struct is_nothrow_destructible // A reference type cannot throw while being destructed. It's just a reference. + : public eastl::true_type {}; + + template + struct is_nothrow_destructible // An rvalue reference type cannot throw while being destructed. + : public eastl::true_type {}; + + #endif + + #define EASTL_DECLARE_IS_NOTHROW_DESTRUCTIBLE(T, isNoThrowDestructible) \ + namespace eastl{ \ + template <> struct is_nothrow_destructible { static const bool value = isNoThrowDestructible; }; \ + template <> struct is_nothrow_destructible { static const bool value = isNoThrowDestructible; }; \ + template <> struct is_nothrow_destructible { static const bool value = isNoThrowDestructible; }; \ + template <> struct is_nothrow_destructible { static const bool value = isNoThrowDestructible; }; \ + } + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_nothrow_destructible_v = is_nothrow_destructible::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // is_nothrow_default_constructible + // + /////////////////////////////////////////////////////////////////////// + #define EASTL_TYPE_TRAIT_is_nothrow_default_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_nothrow_constructible_CONFORMANCE + + template + struct is_nothrow_default_constructible + : public eastl::is_nothrow_constructible {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_nothrow_default_constructible_v = is_nothrow_default_constructible::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_nothrow_move_constructible + // + /////////////////////////////////////////////////////////////////////// + #define EASTL_TYPE_TRAIT_is_nothrow_move_constructible_CONFORMANCE EASTL_TYPE_TRAIT_is_nothrow_constructible_CONFORMANCE + + template + struct is_nothrow_move_constructible + : public eastl::is_nothrow_constructible::type> {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_nothrow_move_constructible_v = is_nothrow_move_constructible::value; + #endif + + +} // namespace eastl + + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/internal/type_properties.h b/libkram/eastl/include/EASTL/internal/type_properties.h new file mode 100644 index 00000000..5276f878 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/type_properties.h @@ -0,0 +1,380 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_TYPE_PROPERTIES_H +#define EASTL_INTERNAL_TYPE_PROPERTIES_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include +#include + + +namespace eastl +{ + + + /////////////////////////////////////////////////////////////////////// + // underlying_type + // + // Defines a member typedef type of type that is the underlying type for the enumeration T. + // Requires explicit compiler support to implement. + // + /////////////////////////////////////////////////////////////////////// + + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && ((defined(_MSC_VER) && (_MSC_VER >= 1700)) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4007)) || defined(EA_COMPILER_CLANG)) // VS2012+ + #define EASTL_TYPE_TRAIT_underlying_type_CONFORMANCE 1 // underlying_type is conforming. + + template + struct underlying_type{ typedef __underlying_type(T) type; }; + + #else + #define EASTL_TYPE_TRAIT_underlying_type_CONFORMANCE 0 + + template + struct underlying_type{ typedef int type; }; // This is of course wrong, but we emulate libstdc++ and typedef it as int. + #endif + + #if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + template + using underlying_type_t = typename underlying_type::type; + #endif + + + /////////////////////////////////////////////////////////////////////// + // has_unique_object_representations + // + // If T is TriviallyCopyable and if any two objects of type T with the same + // value have the same object representation, value is true. For any other + // type, value is false. + // + // http://en.cppreference.com/w/cpp/types/has_unique_object_representations + /////////////////////////////////////////////////////////////////////// + #if EASTL_HAS_UNIQUE_OBJECT_REPRESENTATIONS_AVAILABLE + #define EASTL_TYPE_TRAIT_has_unique_object_representations_CONFORMANCE 1 + + template + struct has_unique_object_representations + : public integral_constant>)> + { + }; + + #else + #define EASTL_TYPE_TRAIT_has_unique_object_representations_CONFORMANCE 0 + + template + struct has_unique_object_representations + : public integral_constant>>> // only integral types (floating point types excluded). + { + }; + + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR auto has_unique_object_representations_v = has_unique_object_representations::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_signed + // + // is_signed::value == true if and only if T is one of the following types: + // [const] [volatile] char (maybe) + // [const] [volatile] signed char + // [const] [volatile] short + // [const] [volatile] int + // [const] [volatile] long + // [const] [volatile] long long + // [const] [volatile] float + // [const] [volatile] double + // [const] [volatile] long double + // + // Used to determine if a integral type is signed or unsigned. + // Given that there are some user-made classes which emulate integral + // types, we provide the EASTL_DECLARE_SIGNED macro to allow you to + // set a given class to be identified as a signed type. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_signed_CONFORMANCE 1 // is_signed is conforming. + + template struct is_signed_helper : public false_type{}; + + template <> struct is_signed_helper : public true_type{}; + template <> struct is_signed_helper : public true_type{}; + template <> struct is_signed_helper : public true_type{}; + template <> struct is_signed_helper : public true_type{}; + template <> struct is_signed_helper : public true_type{}; + template <> struct is_signed_helper : public true_type{}; + template <> struct is_signed_helper : public true_type{}; + template <> struct is_signed_helper : public true_type{}; + + #if (CHAR_MAX == SCHAR_MAX) + template <> struct is_signed_helper : public true_type{}; + #endif + #ifndef EA_WCHAR_T_NON_NATIVE // If wchar_t is a native type instead of simply a define to an existing type... + #if defined(__WCHAR_MAX__) && ((__WCHAR_MAX__ == 2147483647) || (__WCHAR_MAX__ == 32767)) // GCC defines __WCHAR_MAX__ for most platforms. + template <> struct is_signed_helper : public true_type{}; + #endif + #endif + + template + struct is_signed : public eastl::is_signed_helper::type>{}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_signed_v = is_signed::value; + #endif + + #define EASTL_DECLARE_SIGNED(T) \ + namespace eastl{ \ + template <> struct is_signed : public true_type{}; \ + template <> struct is_signed : public true_type{}; \ + template <> struct is_signed : public true_type{}; \ + template <> struct is_signed : public true_type{}; \ + } + + + + /////////////////////////////////////////////////////////////////////// + // is_unsigned + // + // is_unsigned::value == true if and only if T is one of the following types: + // [const] [volatile] char (maybe) + // [const] [volatile] unsigned char + // [const] [volatile] unsigned short + // [const] [volatile] unsigned int + // [const] [volatile] unsigned long + // [const] [volatile] unsigned long long + // + // Used to determine if a integral type is signed or unsigned. + // Given that there are some user-made classes which emulate integral + // types, we provide the EASTL_DECLARE_UNSIGNED macro to allow you to + // set a given class to be identified as an unsigned type. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_unsigned_CONFORMANCE 1 // is_unsigned is conforming. + + template struct is_unsigned_helper : public false_type{}; + + template <> struct is_unsigned_helper : public true_type{}; + template <> struct is_unsigned_helper : public true_type{}; + template <> struct is_unsigned_helper : public true_type{}; + template <> struct is_unsigned_helper : public true_type{}; + template <> struct is_unsigned_helper : public true_type{}; + + #if (CHAR_MAX == UCHAR_MAX) + template <> struct is_unsigned_helper : public true_type{}; + #endif + #ifndef EA_WCHAR_T_NON_NATIVE // If wchar_t is a native type instead of simply a define to an existing type... + #if defined(_MSC_VER) || (defined(__WCHAR_MAX__) && ((__WCHAR_MAX__ == 4294967295U) || (__WCHAR_MAX__ == 65535))) // GCC defines __WCHAR_MAX__ for most platforms. + template <> struct is_unsigned_helper : public true_type{}; + #endif + #endif + + template + struct is_unsigned : public eastl::is_unsigned_helper::type>{}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_unsigned_v = is_unsigned::value; + #endif + + #define EASTL_DECLARE_UNSIGNED(T) \ + namespace eastl{ \ + template <> struct is_unsigned : public true_type{}; \ + template <> struct is_unsigned : public true_type{}; \ + template <> struct is_unsigned : public true_type{}; \ + template <> struct is_unsigned : public true_type{}; \ + } + + + + /////////////////////////////////////////////////////////////////////// + // alignment_of + // + // alignment_of::value is an integral value representing, in bytes, + // the memory alignment of objects of type T. + // + // alignment_of may only be applied to complete types. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_alignment_of_CONFORMANCE 1 // alignment_of is conforming. + + template + struct alignment_of_value{ static const size_t value = EASTL_ALIGN_OF(T); }; + + template + struct alignment_of : public integral_constant::value>{}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR size_t alignment_of_v = alignment_of::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_aligned + // + // Defined as true if the type has alignment requirements greater + // than default alignment, which is taken to be 8. This allows for + // doing specialized object allocation and placement for such types. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_aligned_CONFORMANCE 1 // is_aligned is conforming. + + template + struct is_aligned_value{ static const bool value = (EASTL_ALIGN_OF(T) > 8); }; + + template + struct is_aligned : public integral_constant::value>{}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR size_t is_aligned_v = is_aligned::value; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // rank + // + // rank::value is an integral value representing the number of + // dimensions possessed by an array type. For example, given a + // multi-dimensional array type T[M][N], std::tr1::rank::value == 2. + // For a given non-array type T, std::tr1::rank::value == 0. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_rank_CONFORMANCE 1 // rank is conforming. + + template + struct rank : public eastl::integral_constant {}; + + template + struct rank : public eastl::integral_constant::value + 1> {}; + + template + struct rank : public eastl::integral_constant::value + 1> {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR auto rank_v = rank::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_base_of + // + // Given two (possibly identical) types Base and Derived, is_base_of::value == true + // if and only if Base is a direct or indirect base class of Derived, + // or Base and Derived are the same type. + // + // is_base_of may only be applied to complete types. + // + /////////////////////////////////////////////////////////////////////// + + #if EASTL_COMPILER_INTRINSIC_TYPE_TRAITS_AVAILABLE && (defined(_MSC_VER) || defined(EA_COMPILER_GNUC) || (defined(EA_COMPILER_CLANG) && EA_COMPILER_HAS_FEATURE(is_base_of))) + #define EASTL_TYPE_TRAIT_is_base_of_CONFORMANCE 1 // is_base_of is conforming. + + template + struct is_base_of : public eastl::integral_constant::value>{}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EASTL_CPP17_INLINE_VARIABLE EA_CONSTEXPR bool is_base_of_v = is_base_of::value; + #endif + #else + // Not implemented yet. + // This appears to be implementable. + #define EASTL_TYPE_TRAIT_is_base_of_CONFORMANCE 0 + #endif + + + + /////////////////////////////////////////////////////////////////////// + // is_lvalue_reference + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_lvalue_reference_CONFORMANCE 1 // is_lvalue_reference is conforming. + + template struct is_lvalue_reference : public eastl::false_type {}; + template struct is_lvalue_reference : public eastl::true_type {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_lvalue_reference_v = is_lvalue_reference::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // is_rvalue_reference + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_is_rvalue_reference_CONFORMANCE 1 // is_rvalue_reference is conforming. + + template struct is_rvalue_reference : public eastl::false_type {}; + template struct is_rvalue_reference : public eastl::true_type {}; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR bool is_rvalue_reference_v = is_rvalue_reference::value; + #endif + + + /////////////////////////////////////////////////////////////////////// + // result_of + // + /////////////////////////////////////////////////////////////////////// + #define EASTL_TYPE_TRAIT_result_of_CONFORMANCE 1 // result_of is conforming. + + template struct result_of; + + template + struct result_of + { typedef decltype(eastl::declval()(eastl::declval()...)) type; }; + + + // result_of_t is the C++14 using typedef for typename result_of::type. + // We provide a backwards-compatible means to access it through a macro for pre-C++11 compilers. + #if defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + #define EASTL_RESULT_OF_T(T) typename result_of::type + #else + template + using result_of_t = typename result_of::type; + #define EASTL_RESULT_OF_T(T) result_of_t + #endif + + + /////////////////////////////////////////////////////////////////////// + // has_equality + // + // Determines if the specified type can be tested for equality. + // + /////////////////////////////////////////////////////////////////////// + template > + struct has_equality : eastl::false_type {}; + + template + struct has_equality() == eastl::declval())>> : eastl::true_type + { + }; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + EA_CONSTEXPR auto has_equality_v = has_equality::value; + #endif + +} // namespace eastl + + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/internal/type_transformations.h b/libkram/eastl/include/EASTL/internal/type_transformations.h new file mode 100644 index 00000000..cffa65e5 --- /dev/null +++ b/libkram/eastl/include/EASTL/internal/type_transformations.h @@ -0,0 +1,606 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +///////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTERNAL_TYPE_TRANFORMATIONS_H +#define EASTL_INTERNAL_TYPE_TRANFORMATIONS_H + + +#include +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once +#endif + +#include + + +namespace eastl +{ + + /////////////////////////////////////////////////////////////////////// + // add_const + // + // Add const to a type. + // + // Tor a given type T, add_const::type is equivalent to T + // const if is_const::value == false, and + // - is_void::value == true, or + // - is_object::value == true. + // + // Otherwise, add_const::type is equivalent to T. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_add_const_CONFORMANCE 1 // add_const is conforming. + + template ::value || eastl::is_reference::value || eastl::is_function::value> + struct add_const_helper + { typedef T type; }; + + template + struct add_const_helper + { typedef const T type; }; + + template + struct add_const + { typedef typename eastl::add_const_helper::type type; }; + + // add_const_t is the C++17 using typedef for typename add_const::type. + // We provide a backwards-compatible means to access it through a macro for pre-C++11 compilers. + #if defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + #define EASTL_ADD_CONST_T(T) typename add_const::type + #else + template + using add_const_t = typename add_const::type; + #define EASTL_ADD_CONST_T(T) add_const_t + #endif + + + /////////////////////////////////////////////////////////////////////// + // add_volatile + // + // Add volatile to a type. + // + // For a given type T, add_volatile::type is equivalent to T volatile + // if is_volatile::value == false, and + // - is_void::value == true, or + // - is_object::value == true. + // + // Otherwise, add_volatile::type is equivalent to T. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_add_volatile_CONFORMANCE 1 // add_volatile is conforming. + + template ::value || eastl::is_reference::value || eastl::is_function::value> + struct add_volatile_helper + { typedef T type; }; + + template + struct add_volatile_helper + { typedef volatile T type; }; + + template struct add_volatile + { typedef typename eastl::add_volatile_helper::type type; }; + + template using add_volatile_t = typename add_volatile::type; + + + /////////////////////////////////////////////////////////////////////// + // add_cv + // + // The add_cv transformation trait adds const and volatile qualification + // to the type to which it is applied. For a given type T, + // add_volatile::type is equivalent to add_const::type>::type. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_add_cv_CONFORMANCE 1 // add_cv is conforming. + + template + struct add_cv + { + typedef typename add_const::type>::type type; + }; + + template using add_cv_t = typename add_cv::type; + + + /////////////////////////////////////////////////////////////////////// + // make_signed + // + // Used to convert an integral type to its signed equivalent, if not already. + // T shall be a (possibly const and/or volatile-qualified) integral type + // or enumeration but not a bool type.; + // + // The user can define their own make_signed overrides for their own + // types by making a template specialization like done below and adding + // it to the user's code. + /////////////////////////////////////////////////////////////////////// + + // To do: This implementation needs to be updated to support C++11 conformance (recognition of enums) and + // to support volatile-qualified types. It will probably be useful to have it fail for unsupported types. + #define EASTL_TYPE_TRAIT_make_signed_CONFORMANCE 0 // make_signed is only partially conforming. + + template struct make_signed { typedef T type; }; + + template <> struct make_signed { typedef signed char type; }; + template <> struct make_signed { typedef const signed char type; }; + template <> struct make_signed { typedef signed short type; }; + template <> struct make_signed { typedef const signed short type; }; + template <> struct make_signed { typedef signed int type; }; + template <> struct make_signed { typedef const signed int type; }; + template <> struct make_signed { typedef signed long type; }; + template <> struct make_signed { typedef const signed long type; }; + template <> struct make_signed { typedef signed long long type; }; + template <> struct make_signed { typedef const signed long long type; }; + + #if (defined(CHAR_MAX) && defined(UCHAR_MAX) && (CHAR_MAX == UCHAR_MAX)) // If char is unsigned, we convert char to signed char. However, if char is signed then make_signed returns char itself and not signed char. + template <> struct make_signed { typedef signed char type; }; + template <> struct make_signed { typedef signed char type; }; + #endif + + #ifndef EA_WCHAR_T_NON_NATIVE // If wchar_t is a native type instead of simply a define to an existing type... + #if (defined(__WCHAR_MAX__) && (__WCHAR_MAX__ == 4294967295U)) // If wchar_t is a 32 bit unsigned value... + template<> + struct make_signed + { typedef int32_t type; }; + #elif (defined(__WCHAR_MAX__) && (__WCHAR_MAX__ == 65535)) // If wchar_t is a 16 bit unsigned value... + template<> + struct make_signed + { typedef int16_t type; }; + #elif (defined(__WCHAR_MAX__) && (__WCHAR_MAX__ == 255)) // If wchar_t is an 8 bit unsigned value... + template<> + struct make_signed + { typedef int8_t type; }; + #endif + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + using make_signed_t = typename make_signed::type; + #endif + + + /////////////////////////////////////////////////////////////////////// + // add_signed + // + // This is not a C++11 type trait, and is here for backwards compatibility + // only. Use the C++11 make_unsigned type trait instead. + /////////////////////////////////////////////////////////////////////// + + template + struct add_signed : public make_signed + { typedef typename eastl::make_signed::type type; }; + + + + + /////////////////////////////////////////////////////////////////////// + // make_unsigned + // + // Used to convert an integral type to its signed equivalent, if not already. + // T shall be a (possibly const and/or volatile-qualified) integral type + // or enumeration but not a bool type.; + // + // The user can define their own make_signed overrides for their own + // types by making a template specialization like done below and adding + // it to the user's code. + /////////////////////////////////////////////////////////////////////// + + // To do: This implementation needs to be updated to support C++11 conformance (recognition of enums) and + // to support volatile-qualified types. It will probably be useful to have it fail for unsupported types. + #define EASTL_TYPE_TRAIT_make_unsigned_CONFORMANCE 0 // make_unsigned is only partially conforming. + + template struct make_unsigned { typedef T type; }; + + template <> struct make_unsigned { typedef unsigned char type; }; + template <> struct make_unsigned { typedef const unsigned char type; }; + template <> struct make_unsigned { typedef unsigned short type; }; + template <> struct make_unsigned { typedef const unsigned short type; }; + template <> struct make_unsigned { typedef unsigned int type; }; + template <> struct make_unsigned { typedef const unsigned int type; }; + template <> struct make_unsigned { typedef unsigned long type; }; + template <> struct make_unsigned { typedef const unsigned long type; }; + template <> struct make_unsigned { typedef unsigned long long type; }; + template <> struct make_unsigned { typedef const unsigned long long type; }; + + #if (CHAR_MIN < 0) // If char is signed, we convert char to unsigned char. However, if char is unsigned then make_unsigned returns char itself and not unsigned char. + template <> struct make_unsigned { typedef unsigned char type; }; + template <> struct make_unsigned { typedef unsigned char type; }; + #endif + + #ifndef EA_WCHAR_T_NON_NATIVE // If wchar_t is a native type instead of simply a define to an existing type... + #if (defined(__WCHAR_MAX__) && (__WCHAR_MAX__ != 4294967295U)) // If wchar_t is a 32 bit signed value... + template<> + struct make_unsigned + { typedef uint32_t type; }; + #elif (defined(__WCHAR_MAX__) && (__WCHAR_MAX__ != 65535)) // If wchar_t is a 16 bit signed value... + template<> + struct make_unsigned + { typedef uint16_t type; }; + #elif (defined(__WCHAR_MAX__) && (__WCHAR_MAX__ != 255)) // If wchar_t is an 8 bit signed value... + template<> + struct make_unsigned + { typedef uint8_t type; }; + #endif + #endif + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + using make_unsigned_t = typename make_unsigned::type; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // add_unsigned + // + // This is not a C++11 type trait, and is here for backwards compatibility + // only. Use the C++11 make_unsigned type trait instead. + // + // Adds unsigned-ness to the given type. + // Modifies only integral values; has no effect on others. + // add_unsigned::type is unsigned int + // add_unsigned::type is unsigned int + // + /////////////////////////////////////////////////////////////////////// + + template + struct add_unsigned : public make_unsigned + { typedef typename eastl::make_signed::type type; }; + + + + /////////////////////////////////////////////////////////////////////// + // remove_pointer + // + // Remove pointer from a type. + // + // The remove_pointer transformation trait removes top-level indirection + // by pointer (if any) from the type to which it is applied. Pointers to + // members are not affected. For a given type T, remove_pointer::type + // is equivalent to T. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_remove_pointer_CONFORMANCE 1 + + template struct remove_pointer { typedef T type; }; + template struct remove_pointer { typedef T type; }; + template struct remove_pointer { typedef T type; }; + template struct remove_pointer { typedef T type; }; + template struct remove_pointer { typedef T type; }; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + using remove_pointer_t = typename remove_pointer::type; + #endif + + + /////////////////////////////////////////////////////////////////////// + // add_pointer + // + // Add pointer to a type. + // Provides the member typedef type which is the type T*. If T is a + // reference type, then type is a pointer to the referred type. + // + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_add_pointer_CONFORMANCE 1 + + template + struct add_pointer { typedef typename eastl::remove_reference::type* type; }; + + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + using add_pointer_t = typename add_pointer::type; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // remove_extent + // + // The remove_extent transformation trait removes a dimension from an array. + // For a given non-array type T, remove_extent::type is equivalent to T. + // For a given array type T[N], remove_extent::type is equivalent to T. + // For a given array type const T[N], remove_extent::type is equivalent to const T. + // For example, given a multi-dimensional array type T[M][N], remove_extent::type is equivalent to T[N]. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_remove_extent_CONFORMANCE 1 // remove_extent is conforming. + + template struct remove_extent { typedef T type; }; + template struct remove_extent { typedef T type; }; + template struct remove_extent { typedef T type; }; + + #if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + template + using remove_extent_t = typename remove_extent::type; + #endif + + + /////////////////////////////////////////////////////////////////////// + // remove_all_extents + // + // The remove_all_extents transformation trait removes all dimensions from an array. + // For a given non-array type T, remove_all_extents::type is equivalent to T. + // For a given array type T[N], remove_all_extents::type is equivalent to T. + // For a given array type const T[N], remove_all_extents::type is equivalent to const T. + // For example, given a multi-dimensional array type T[M][N], remove_all_extents::type is equivalent to T. + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_remove_all_extents_CONFORMANCE 1 // remove_all_extents is conforming. + + template struct remove_all_extents { typedef T type; }; + template struct remove_all_extents { typedef typename eastl::remove_all_extents::type type; }; + template struct remove_all_extents { typedef typename eastl::remove_all_extents::type type; }; + + #if !defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + template + using remove_all_extents_t = typename remove_all_extents::type; + #endif + + + + /////////////////////////////////////////////////////////////////////// + // aligned_storage + // + // The aligned_storage transformation trait provides a type that is + // suitably aligned to store an object whose size is does not exceed length + // and whose alignment is a divisor of alignment. When using aligned_storage, + // length must be non-zero, and alignment must >= alignment_of::value + // for some type T. We require the alignment value to be a power-of-two. + // + // GCC versions prior to 4.4 don't properly support this with stack-based + // variables. The EABase EA_ALIGN_MAX_AUTOMATIC define identifies the + // extent to which stack (automatic) variables can be aligned for the + // given compiler/platform combination. + // + // Example usage: + // aligned_storage::type widget; + // Widget* pWidget = new(&widget) Widget; + // + // aligned_storage::type widgetAlignedTo64; + // Widget* pWidget = new(&widgetAlignedTo64) Widget; + // + // aligned_storage::type widgetArray[37]; + // Widget* pWidgetArray = new(widgetArray) Widget[37]; + /////////////////////////////////////////////////////////////////////// + + #define EASTL_TYPE_TRAIT_aligned_storage_CONFORMANCE 1 // aligned_storage is conforming. + + #if defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION >= 4008) + // New versions of GCC do not support using 'alignas' with a value greater than 128. + // However, this code using the GNU standard alignment attribute works properly. + template + struct aligned_storage + { + struct type { unsigned char mCharData[N]; } EA_ALIGN(Align); + }; + #elif (EABASE_VERSION_N >= 20040) && !defined(EA_COMPILER_NO_ALIGNAS) // If C++11 alignas is supported... + template + struct aligned_storage + { + typedef struct { + alignas(Align) unsigned char mCharData[N]; + } type; + }; + + #elif defined(EA_COMPILER_MSVC) || (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION < 4007)) || defined(EA_COMPILER_EDG) // At some point GCC fixed their attribute(align) to support non-literals, though it's not clear what version aside from being no later than 4.7 and no earlier than 4.2. + // Some compilers don't allow you to to use EA_ALIGNED with anything by a numeric literal, + // so we can't use the simpler code like we do further below for other compilers. We support + // only up to so much of an alignment value here. + template + struct aligned_storage_helper { struct type{ unsigned char mCharData[N]; }; }; + + template struct aligned_storage_helper { struct EA_ALIGN( 2) type{ unsigned char mCharData[N]; }; }; + template struct aligned_storage_helper { struct EA_ALIGN( 4) type{ unsigned char mCharData[N]; }; }; + template struct aligned_storage_helper { struct EA_ALIGN( 8) type{ unsigned char mCharData[N]; }; }; + template struct aligned_storage_helper { struct EA_ALIGN( 16) type{ unsigned char mCharData[N]; }; }; + template struct aligned_storage_helper { struct EA_ALIGN( 32) type{ unsigned char mCharData[N]; }; }; + template struct aligned_storage_helper { struct EA_ALIGN( 64) type{ unsigned char mCharData[N]; }; }; + template struct aligned_storage_helper { struct EA_ALIGN( 128) type{ unsigned char mCharData[N]; }; }; + template struct aligned_storage_helper { struct EA_ALIGN( 256) type{ unsigned char mCharData[N]; }; }; + template struct aligned_storage_helper { struct EA_ALIGN( 512) type{ unsigned char mCharData[N]; }; }; + template struct aligned_storage_helper { struct EA_ALIGN(1024) type{ unsigned char mCharData[N]; }; }; + template struct aligned_storage_helper { struct EA_ALIGN(2048) type{ unsigned char mCharData[N]; }; }; + template struct aligned_storage_helper { struct EA_ALIGN(4096) type{ unsigned char mCharData[N]; }; }; + + template + struct aligned_storage + { + typedef typename aligned_storage_helper::type type; + }; + + #else + template + struct aligned_storage + { + union type + { + unsigned char mCharData[N]; + struct EA_ALIGN(Align) mStruct{ }; + }; + }; + #endif + + #if defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + #define EASTL_ALIGNED_STORAGE_T(N, Align) typename eastl::aligned_storage_t::type + #else + template + using aligned_storage_t = typename aligned_storage::type; + #define EASTL_ALIGNED_STORAGE_T(N, Align) eastl::aligned_storage_t + #endif + + + + /////////////////////////////////////////////////////////////////////// + // aligned_union + // + // The member typedef type shall be a POD type suitable for use as + // uninitialized storage for any object whose type is listed in Types; + // its size shall be at least Len. The static member alignment_value + // shall be an integral constant of type std::size_t whose value is + // the strictest alignment of all types listed in Types. + // Note that the resulting type is not a C/C++ union, but simply memory + // block (of pod type) that can be used to placement-new an actual + // C/C++ union of the types. The actual union you declare can be a non-POD union. + // + // Example usage: + // union MyUnion { + // char c; + // int i; + // float f; + // + // MyUnion(float fValue) : f(fValue) {} + // }; + // + // aligned_union::type myUnionStorage; + // MyUnion* pMyUnion = new(&myUnionStorage) MyUnion(21.4f); + // pMyUnion->i = 37; + // + /////////////////////////////////////////////////////////////////////// + + #if defined(EA_COMPILER_NO_VARIADIC_TEMPLATES) || !EASTL_TYPE_TRAIT_static_max_CONFORMANCE + #define EASTL_TYPE_TRAIT_aligned_union_CONFORMANCE 0 // aligned_union is not conforming, as it supports only a two-member unions. + + // To consider: Expand this to include more possible types. We may want to convert this to be a recursive + // template instead of like below. + template + struct aligned_union + { + static const size_t size0 = eastl::static_max::value; + static const size_t size1 = eastl::static_max::value; + static const size_t size2 = eastl::static_max::value; + static const size_t size = eastl::static_max::value; + + static const size_t alignment0 = eastl::static_max::value; + static const size_t alignment1 = eastl::static_max::value; + static const size_t alignment_value = eastl::static_max::value; + + typedef typename eastl::aligned_storage::type type; + }; + + #if defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + // To do: define macro. + #else + template + using aligned_union_t = typename aligned_union::type; + #endif + #else + #define EASTL_TYPE_TRAIT_aligned_union_CONFORMANCE 1 // aligned_union is conforming. + + template + struct aligned_union + { + static const size_t size = eastl::static_max::value; + static const size_t alignment_value = eastl::static_max::value; + + typedef typename eastl::aligned_storage::type type; + }; + + #if defined(EA_COMPILER_NO_TEMPLATE_ALIASES) + // To do: define macro. + #else + template + using aligned_union_t = typename aligned_union::type; + #endif + + #endif + + + /////////////////////////////////////////////////////////////////////// + // union_cast + // + // Safely converts between unrelated types that have a binary equivalency. + // This appoach is required by strictly conforming C++ compilers because + // directly using a C or C++ cast between unrelated types is fraught with + // the possibility of undefined runtime behavior due to type aliasing. + // The Source and Dest types must be POD types due to the use of a union + // in C++ versions prior to C++11. C++11 relaxes the definition of a POD + // such that it allows a classes with trivial default constructors whereas + // previous versions did not, so beware of this when writing portable code. + // + // Example usage: + // float f32 = 1.234f; + // uint32_t n32 = union_cast(f32); + // + // Example possible mis-usage: + // The following is valid only if you are aliasing the pointer value and + // not what it points to. Most of the time the user intends the latter, + // which isn't strictly possible. + // Widget* pWidget = CreateWidget(); + // Foo* pFoo = union_cast(pWidget); + /////////////////////////////////////////////////////////////////////// + + template + DestType union_cast(SourceType sourceValue) + { + EASTL_CT_ASSERT((sizeof(DestType) == sizeof(SourceType)) && + (EA_ALIGN_OF(DestType) == EA_ALIGN_OF(SourceType))); // To support differening alignments, we would need to use a memcpy-based solution or find a way to make the two union members align with each other. + //EASTL_CT_ASSERT(is_pod::value && is_pod::value); // Disabled because we don't want to restrict what the user can do, as some compiler's definitions of is_pod aren't up to C++11 Standards. + //EASTL_CT_ASSERT(!is_pointer::value && !is_pointer::value); // Disabled because it's valid to alias pointers as long as you are aliasong the pointer value and not what it points to. + + union { + SourceType sourceValue; + DestType destValue; + } u; + u.sourceValue = sourceValue; + + return u.destValue; + } + + + + /////////////////////////////////////////////////////////////////////// + // void_t + // + // Maps a sequence of any types to void. This utility class is used in + // template meta programming to simplify compile time reflection mechanisms + // required by the standard library. + // + // http://en.cppreference.com/w/cpp/types/void_t + // + // Example: + // template + // struct is_iterable : false_type {}; + // + // template + // struct is_iterable().begin()), + // decltype(declval().end())>> : true_type {}; + // + /////////////////////////////////////////////////////////////////////// + #if EASTL_VARIABLE_TEMPLATES_ENABLED + template + using void_t = void; + #endif + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/intrusive_hash_map.h b/libkram/eastl/include/EASTL/intrusive_hash_map.h new file mode 100644 index 00000000..37f16188 --- /dev/null +++ b/libkram/eastl/include/EASTL/intrusive_hash_map.h @@ -0,0 +1,98 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_INTRUSIVE_HASH_MAP_H +#define EASTL_INTRUSIVE_HASH_MAP_H + + +#include +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /// intrusive_hash_map + /// + /// Template parameters: + /// Key The key object (key in the key/value pair). T must contain a member of type Key named mKey. + /// T The type of object the map holds (a.k.a. value). + /// bucketCount The number of buckets to use. Best if it's a prime number. + /// Hash Hash function. See functional.h for examples of hash functions. + /// Equal Equality testing predicate; tells if two elements are equal. + /// + template , typename Equal = eastl::equal_to > + class intrusive_hash_map : public intrusive_hashtable + { + public: + typedef intrusive_hashtable base_type; + typedef intrusive_hash_map this_type; + + public: + explicit intrusive_hash_map(const Hash& h = Hash(), const Equal& eq = Equal()) + : base_type(h, eq) + { + // Empty + } + + // To consider: Is this feasible, given how initializer_list works by creating a temporary array? Even if it is feasible, is it a good idea? + //intrusive_hash_map(std::initializer_list ilist); + + }; // intrusive_hash_map + + + + + /// intrusive_hash_multimap + /// + /// Implements a intrusive_hash_multimap, which is the same thing as a intrusive_hash_map + /// except that contained elements need not be unique. See the documentation + /// for intrusive_hash_map for details. + /// + /// Template parameters: + /// Key The key object (key in the key/value pair). T must contain a member of type Key named mKey. + /// T The type of object the map holds (a.k.a. value). + /// bucketCount The number of buckets to use. Best if it's a prime number. + /// Hash Hash function. See functional.h for examples of hash functions. + /// Equal Equality testing predicate; tells if two elements are equal. + /// + template , typename Equal = eastl::equal_to > + class intrusive_hash_multimap : public intrusive_hashtable + { + public: + typedef intrusive_hashtable base_type; + typedef intrusive_hash_multimap this_type; + + public: + explicit intrusive_hash_multimap(const Hash& h = Hash(), const Equal& eq = Equal()) + : base_type(h, eq) + { + // Empty + } + + // To consider: Is this feasible, given how initializer_list works by creating a temporary array? Even if it is feasible, is it a good idea? + //intrusive_hash_multimap(std::initializer_list ilist); + + }; // intrusive_hash_multimap + + + + +} // namespace eastl + + +#endif // Header include guard + + + + + + diff --git a/libkram/eastl/include/EASTL/intrusive_hash_set.h b/libkram/eastl/include/EASTL/intrusive_hash_set.h new file mode 100644 index 00000000..a25d03a6 --- /dev/null +++ b/libkram/eastl/include/EASTL/intrusive_hash_set.h @@ -0,0 +1,100 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +#ifndef EASTL_INTRUSIVE_HASH_SET_H +#define EASTL_INTRUSIVE_HASH_SET_H + + +#include +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /// intrusive_hash_set + /// + /// Template parameters: + /// T The type of object the set holds (a.k.a. value). + /// bucketCount The number of buckets to use. Best if it's a prime number. + /// Hash Hash function. See functional.h for examples of hash functions. + /// Equal Equality testing predicate; tells if two elements are equal. + /// + template , typename Equal = eastl::equal_to > + class intrusive_hash_set : public intrusive_hashtable + { + public: + typedef intrusive_hashtable base_type; + typedef intrusive_hash_set this_type; + + public: + explicit intrusive_hash_set(const Hash& h = Hash(), const Equal& eq = Equal()) + : base_type(h, eq) + { + // Empty + } + + // To consider: Is this feasible, given how initializer_list works by creating a temporary array? Even if it is feasible, is it a good idea? + //intrusive_hash_set(std::initializer_list ilist); + + }; // intrusive_hash_set + + + + + /// intrusive_hash_multiset + /// + /// Implements a intrusive_hash_multiset, which is the same thing as a intrusive_hash_set + /// except that contained elements need not be unique. See the documentation + /// for intrusive_hash_set for details. + /// + /// Template parameters: + /// T The type of object the set holds (a.k.a. value). + /// bucketCount The number of buckets to use. Best if it's a prime number. + /// Hash Hash function. See functional.h for examples of hash functions. + /// Equal Equality testing predicate; tells if two elements are equal. + /// + template , typename Equal = eastl::equal_to > + class intrusive_hash_multiset : public intrusive_hashtable + { + public: + typedef intrusive_hashtable base_type; + typedef intrusive_hash_multiset this_type; + + public: + explicit intrusive_hash_multiset(const Hash& h = Hash(), const Equal& eq = Equal()) + : base_type(h, eq) + { + // Empty + } + + // To consider: Is this feasible, given how initializer_list works by creating a temporary array? Even if it is feasible, is it a good idea? + //intrusive_hash_multiset(std::initializer_list ilist); + + }; // intrusive_hash_multiset + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/intrusive_list.h b/libkram/eastl/include/EASTL/intrusive_list.h new file mode 100644 index 00000000..18d7e93a --- /dev/null +++ b/libkram/eastl/include/EASTL/intrusive_list.h @@ -0,0 +1,1315 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + + +/////////////////////////////////////////////////////////////////////////////// +// The intrusive list container is similar to a list, with the primary +// different being that intrusive lists allow you to control memory +// allocation. +// +// * Intrusive lists store the nodes directly in the data items. This +// is done by deriving the object from intrusive_list_node. +// +// * The container does no memory allocation -- it works entirely with +// the submitted nodes. This does mean that it is the client's job to +// free the nodes in an intrusive list, though. +// +// * Valid node pointers can be converted back to iterators in O(1). +// This is because objects in the list are also nodes in the list. +// +// * intrusive_list does not support copy construction or assignment; +// the push, pop, and insert operations take ownership of the +// passed object. +// +// Usage notes: +// +// * You can use an intrusive_list directly with the standard nodes +// if you have some other way of converting the node pointer back +// to your data pointer. +// +// * Remember that the list destructor doesn't deallocate nodes -- it can't. +// +// * The size is not cached; this makes size() linear time but splice() is +// constant time. This does mean that you can remove() an element without +// having to figure out which list it is in, however. +// +// * You can insert a node into multiple intrusive_lists. One way to do so +// is to (ab)use inheritance: +// +// struct NodeA : public intrusive_list_node {}; +// struct NodeB : public intrusive_list_node {}; +// struct Object : public NodeA, nodeB {}; +// +// intrusive_list listA; +// intrusive_list listB; +// +// listA.push_back(obj); +// listB.push_back(obj); +// +// * find() vs. locate() +// The find(v) algorithm returns an iterator p such that *p == v; intrusive_list::locate(v) +// returns an iterator p such that &*p == &v. intrusive_list<> doesn't have find() mainly +// because list<> doesn't have it either, but there's no reason it couldn't. intrusive_list +// uses the name 'find' because: +// - So as not to confuse the member function with the well-defined free function from algorithm.h. +// - Because it is not API-compatible with eastl::find(). +// - Because it simply locates an object within the list based on its node entry and doesn't perform before any value-based searches or comparisons. +// +// Differences between intrusive_list and std::list: +// +// Issue std::list intrusive_list +// -------------------------------------------------------------- +// Automatic node ctor/dtor Yes No +// Can memmove() container Maybe* No +// Same item in list twice Yes(copy/byref) No +// Can store non-copyable items No Yes +// size() O(1) or O(n) O(n) +// clear() O(n) O(1) +// erase(range) O(n) O(1) +// splice(range) O(1) or O(n) O(1) +// Convert reference to iterator No O(1) +// Remove without container No O(1) +// Nodes in mixed allocators No Yes +// +// *) Not required by standard but can be done with some STL implementations. +// +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTRUSIVE_LIST_H +#define EASTL_INTRUSIVE_LIST_H + + +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /// intrusive_list_node + /// + /// By design this must be a POD, as user structs will be inheriting from + /// it and they may wish to remain POD themselves. However, if the + /// EASTL_VALIDATE_INTRUSIVE_LIST option is enabled + /// + struct intrusive_list_node + { + intrusive_list_node* mpNext; + intrusive_list_node* mpPrev; + + #if EASTL_VALIDATE_INTRUSIVE_LIST + intrusive_list_node() // Implemented inline because GCC can't deal with member functions + { // of may-alias classes being defined outside the declaration. + mpNext = mpPrev = NULL; + } + + ~intrusive_list_node() + { + #if EASTL_ASSERT_ENABLED + if(mpNext || mpPrev) + EASTL_FAIL_MSG("~intrusive_list_node(): List is non-empty."); + #endif + } + #endif + } EASTL_MAY_ALIAS; // It's not clear if this really should be needed. An old GCC compatible compiler is generating some crashing optimized code when strict aliasing is enabled, but analysis of it seems to blame the compiler. However, this topic can be tricky. + + + + /// intrusive_list_iterator + /// + template + class intrusive_list_iterator + { + public: + typedef intrusive_list_iterator this_type; + typedef intrusive_list_iterator iterator; + typedef intrusive_list_iterator const_iterator; + typedef T value_type; + typedef T node_type; + typedef ptrdiff_t difference_type; + typedef Pointer pointer; + typedef Reference reference; + typedef EASTL_ITC_NS::bidirectional_iterator_tag iterator_category; + + public: + pointer mpNode; // Needs to be public for operator==() to work + + public: + intrusive_list_iterator(); + explicit intrusive_list_iterator(pointer pNode); // Note that you can also construct an iterator from T via this, since value_type == node_type. + intrusive_list_iterator(const iterator& x); + + reference operator*() const; + pointer operator->() const; + + intrusive_list_iterator& operator++(); + intrusive_list_iterator& operator--(); + + intrusive_list_iterator operator++(int); + intrusive_list_iterator operator--(int); + + }; // class intrusive_list_iterator + + + + /// intrusive_list_base + /// + class intrusive_list_base + { + public: + typedef eastl_size_t size_type; // See config.h for the definition of this, which defaults to size_t. + typedef ptrdiff_t difference_type; + + protected: + intrusive_list_node mAnchor; ///< Sentinel node (end). All data nodes are linked in a ring from this node. + + public: + intrusive_list_base(); + ~intrusive_list_base(); + + bool empty() const EA_NOEXCEPT; + eastl_size_t size() const EA_NOEXCEPT; ///< Returns the number of elements in the list; O(n). + void clear() EA_NOEXCEPT; ///< Clears the list; O(1). No deallocation occurs. + void pop_front(); ///< Removes an element from the front of the list; O(1). The element must exist, but is not deallocated. + void pop_back(); ///< Removes an element from the back of the list; O(1). The element must exist, but is not deallocated. + EASTL_API void reverse() EA_NOEXCEPT; ///< Reverses a list so that front and back are swapped; O(n). + + EASTL_API bool validate() const; ///< Scans a list for linkage inconsistencies; O(n) time, O(1) space. Returns false if errors are detected, such as loops or branching. + + }; // class intrusive_list_base + + + + /// intrusive_list + /// + /// Example usage: + /// struct IntNode : public eastl::intrusive_list_node { + /// int mX; + /// IntNode(int x) : mX(x) { } + /// }; + /// + /// IntNode nodeA(0); + /// IntNode nodeB(1); + /// + /// intrusive_list intList; + /// intList.push_back(nodeA); + /// intList.push_back(nodeB); + /// intList.remove(nodeA); + /// + template + class intrusive_list : public intrusive_list_base + { + public: + typedef intrusive_list this_type; + typedef intrusive_list_base base_type; + typedef T node_type; + typedef T value_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::difference_type difference_type; + typedef T& reference; + typedef const T& const_reference; + typedef T* pointer; + typedef const T* const_pointer; + typedef intrusive_list_iterator iterator; + typedef intrusive_list_iterator const_iterator; + typedef eastl::reverse_iterator reverse_iterator; + typedef eastl::reverse_iterator const_reverse_iterator; + + public: + intrusive_list(); ///< Creates an empty list. + intrusive_list(const this_type& x); ///< Creates an empty list; ignores the argument. + //intrusive_list(std::initializer_list ilist); To consider: Is this feasible, given how initializer_list works by creating a temporary array? Even if it is feasible, is it a good idea? + + this_type& operator=(const this_type& x); ///< Clears the list; ignores the argument. + void swap(this_type&); ///< Swaps the contents of two intrusive lists; O(1). + + iterator begin() EA_NOEXCEPT; ///< Returns an iterator pointing to the first element in the list. + const_iterator begin() const EA_NOEXCEPT; ///< Returns a const_iterator pointing to the first element in the list. + const_iterator cbegin() const EA_NOEXCEPT; ///< Returns a const_iterator pointing to the first element in the list. + + iterator end() EA_NOEXCEPT; ///< Returns an iterator pointing one-after the last element in the list. + const_iterator end() const EA_NOEXCEPT; ///< Returns a const_iterator pointing one-after the last element in the list. + const_iterator cend() const EA_NOEXCEPT; ///< Returns a const_iterator pointing one-after the last element in the list. + + reverse_iterator rbegin() EA_NOEXCEPT; ///< Returns a reverse_iterator pointing at the end of the list (start of the reverse sequence). + const_reverse_iterator rbegin() const EA_NOEXCEPT; ///< Returns a const_reverse_iterator pointing at the end of the list (start of the reverse sequence). + const_reverse_iterator crbegin() const EA_NOEXCEPT; ///< Returns a const_reverse_iterator pointing at the end of the list (start of the reverse sequence). + + reverse_iterator rend() EA_NOEXCEPT; ///< Returns a reverse_iterator pointing at the start of the list (end of the reverse sequence). + const_reverse_iterator rend() const EA_NOEXCEPT; ///< Returns a const_reverse_iterator pointing at the start of the list (end of the reverse sequence). + const_reverse_iterator crend() const EA_NOEXCEPT; ///< Returns a const_reverse_iterator pointing at the start of the list (end of the reverse sequence). + + reference front(); ///< Returns a reference to the first element. The list must be non-empty. + const_reference front() const; ///< Returns a const reference to the first element. The list must be non-empty. + reference back(); ///< Returns a reference to the last element. The list must be non-empty. + const_reference back() const; ///< Returns a const reference to the last element. The list must be non-empty. + + void push_front(value_type& x); ///< Adds an element to the front of the list; O(1). The element is not copied. The element must not be in any other list. + void push_back(value_type& x); ///< Adds an element to the back of the list; O(1). The element is not copied. The element must not be in any other list. + + bool contains(const value_type& x) const; ///< Returns true if the given element is in the list; O(n). Equivalent to (locate(x) != end()). + + iterator locate(value_type& x); ///< Converts a reference to an object in the list back to an iterator, or returns end() if it is not part of the list. O(n) + const_iterator locate(const value_type& x) const; ///< Converts a const reference to an object in the list back to a const iterator, or returns end() if it is not part of the list. O(n) + + iterator insert(const_iterator pos, value_type& x); ///< Inserts an element before the element pointed to by the iterator. O(1) + iterator erase(const_iterator pos); ///< Erases the element pointed to by the iterator. O(1) + iterator erase(const_iterator pos, const_iterator last); ///< Erases elements within the iterator range [pos, last). O(1) + + reverse_iterator erase(const_reverse_iterator pos); + reverse_iterator erase(const_reverse_iterator pos, const_reverse_iterator last); + + static void remove(value_type& value); ///< Erases an element from a list; O(1). Note that this is static so you don't need to know which list the element, although it must be in some list. + + void splice(const_iterator pos, value_type& x); + ///< Moves the given element into this list before the element pointed to by pos; O(1). + ///< Required: x must be in some list or have first/next pointers that point it itself. + + void splice(const_iterator pos, intrusive_list& x); + ///< Moves the contents of a list into this list before the element pointed to by pos; O(1). + ///< Required: &x != this (same as std::list). + + void splice(const_iterator pos, intrusive_list& x, const_iterator i); + ///< Moves the given element pointed to i within the list x into the current list before + ///< the element pointed to by pos; O(1). + + void splice(const_iterator pos, intrusive_list& x, const_iterator first, const_iterator last); + ///< Moves the range of elements [first, last) from list x into the current list before + ///< the element pointed to by pos; O(1). + ///< Required: pos must not be in [first, last). (same as std::list). + + public: + // Sorting functionality + // This is independent of the global sort algorithms, as lists are + // linked nodes and can be sorted more efficiently by moving nodes + // around in ways that global sort algorithms aren't privy to. + + void merge(this_type& x); + + template + void merge(this_type& x, Compare compare); + + void unique(); + + template + void unique(BinaryPredicate); + + void sort(); + + template + void sort(Compare compare); + + public: + // bool validate() const; // Inherited from parent. + int validate_iterator(const_iterator i) const; + + }; // intrusive_list + + + + + /////////////////////////////////////////////////////////////////////// + // intrusive_list_node + /////////////////////////////////////////////////////////////////////// + + // Moved to be inline within the class because the may-alias attribute is + // triggering what appears to be a bug in GCC that effectively requires + // may-alias structs to implement inline member functions within the class + // declaration. We don't have a .cpp file for + // #if EASTL_VALIDATE_INTRUSIVE_LIST + // inline intrusive_list_node::intrusive_list_node() + // { + // mpNext = mpPrev = NULL; + // } + // + // inline intrusive_list_node::~intrusive_list_node() + // { + // #if EASTL_ASSERT_ENABLED + // if(mpNext || mpPrev) + // EASTL_FAIL_MSG("~intrusive_list_node(): List is non-empty."); + // #endif + // } + // #endif + + + /////////////////////////////////////////////////////////////////////// + // intrusive_list_iterator + /////////////////////////////////////////////////////////////////////// + + template + inline intrusive_list_iterator::intrusive_list_iterator() + { + #if EASTL_DEBUG + mpNode = NULL; + #endif + } + + + template + inline intrusive_list_iterator::intrusive_list_iterator(pointer pNode) + : mpNode(pNode) + { + // Empty + } + + + template + inline intrusive_list_iterator::intrusive_list_iterator(const iterator& x) + : mpNode(x.mpNode) + { + // Empty + } + + + template + inline typename intrusive_list_iterator::reference + intrusive_list_iterator::operator*() const + { + return *mpNode; + } + + + template + inline typename intrusive_list_iterator::pointer + intrusive_list_iterator::operator->() const + { + return mpNode; + } + + + template + inline typename intrusive_list_iterator::this_type& + intrusive_list_iterator::operator++() + { + mpNode = static_cast(mpNode->mpNext); + return *this; + } + + + template + inline typename intrusive_list_iterator::this_type + intrusive_list_iterator::operator++(int) + { + intrusive_list_iterator it(*this); + mpNode = static_cast(mpNode->mpNext); + return it; + } + + + template + inline typename intrusive_list_iterator::this_type& + intrusive_list_iterator::operator--() + { + mpNode = static_cast(mpNode->mpPrev); + return *this; + } + + + template + inline typename intrusive_list_iterator::this_type + intrusive_list_iterator::operator--(int) + { + intrusive_list_iterator it(*this); + mpNode = static_cast(mpNode->mpPrev); + return it; + } + + + // The C++ defect report #179 requires that we support comparisons between const and non-const iterators. + // Thus we provide additional template paremeters here to support this. The defect report does not + // require us to support comparisons between reverse_iterators and const_reverse_iterators. + template + inline bool operator==(const intrusive_list_iterator& a, + const intrusive_list_iterator& b) + { + return a.mpNode == b.mpNode; + } + + + template + inline bool operator!=(const intrusive_list_iterator& a, + const intrusive_list_iterator& b) + { + return a.mpNode != b.mpNode; + } + + + // We provide a version of operator!= for the case where the iterators are of the + // same type. This helps prevent ambiguity errors in the presence of rel_ops. + template + inline bool operator!=(const intrusive_list_iterator& a, + const intrusive_list_iterator& b) + { + return a.mpNode != b.mpNode; + } + + + + + /////////////////////////////////////////////////////////////////////// + // intrusive_list_base + /////////////////////////////////////////////////////////////////////// + + inline intrusive_list_base::intrusive_list_base() + { + mAnchor.mpNext = mAnchor.mpPrev = &mAnchor; + } + + inline intrusive_list_base::~intrusive_list_base() + { + #if EASTL_VALIDATE_INTRUSIVE_LIST + clear(); + mAnchor.mpNext = mAnchor.mpPrev = NULL; + #endif + } + + + inline bool intrusive_list_base::empty() const EA_NOEXCEPT + { + return mAnchor.mpPrev == &mAnchor; + } + + + inline intrusive_list_base::size_type intrusive_list_base::size() const EA_NOEXCEPT + { + const intrusive_list_node* p = &mAnchor; + size_type n = (size_type)-1; + + do { + ++n; + p = p->mpNext; + } while(p != &mAnchor); + + return n; + } + + + inline void intrusive_list_base::clear() EA_NOEXCEPT + { + #if EASTL_VALIDATE_INTRUSIVE_LIST + // Need to clear out all the next/prev pointers in the elements; + // this makes this operation O(n) instead of O(1). + intrusive_list_node* pNode = mAnchor.mpNext; + + while(pNode != &mAnchor) + { + intrusive_list_node* const pNextNode = pNode->mpNext; + pNode->mpNext = pNode->mpPrev = NULL; + pNode = pNextNode; + } + #endif + + mAnchor.mpNext = mAnchor.mpPrev = &mAnchor; + } + + + inline void intrusive_list_base::pop_front() + { + #if EASTL_VALIDATE_INTRUSIVE_LIST + intrusive_list_node* const pNode = mAnchor.mpNext; + #endif + + mAnchor.mpNext->mpNext->mpPrev = &mAnchor; + mAnchor.mpNext = mAnchor.mpNext->mpNext; + + #if EASTL_VALIDATE_INTRUSIVE_LIST + if(pNode != &mAnchor) + pNode->mpNext = pNode->mpPrev = NULL; + #if EASTL_ASSERT_ENABLED + else + EASTL_FAIL_MSG("intrusive_list::pop_front(): empty list."); + #endif + #endif + } + + + inline void intrusive_list_base::pop_back() + { + #if EASTL_VALIDATE_INTRUSIVE_LIST + intrusive_list_node* const pNode = mAnchor.mpPrev; + #endif + + mAnchor.mpPrev->mpPrev->mpNext = &mAnchor; + mAnchor.mpPrev = mAnchor.mpPrev->mpPrev; + + #if EASTL_VALIDATE_INTRUSIVE_LIST + if(pNode != &mAnchor) + pNode->mpNext = pNode->mpPrev = NULL; + #if EASTL_ASSERT_ENABLED + else + EASTL_FAIL_MSG("intrusive_list::pop_back(): empty list."); + #endif + #endif + } + + + + + /////////////////////////////////////////////////////////////////////// + // intrusive_list + /////////////////////////////////////////////////////////////////////// + + template + inline intrusive_list::intrusive_list() + { + } + + + template + inline intrusive_list::intrusive_list(const this_type& /*x*/) + : intrusive_list_base() + { + // We intentionally ignore argument x. + // To consider: Shouldn't this function simply not exist? Is there a useful purpose for having this function? + // There should be a comment here about it, though my first guess is that this exists to quell VC++ level 4/-Wall compiler warnings. + } + + + template + inline typename intrusive_list::this_type& intrusive_list::operator=(const this_type& /*x*/) + { + // We intentionally ignore argument x. + // See notes above in the copy constructor about questioning the existence of this function. + return *this; + } + + + template + inline typename intrusive_list::iterator intrusive_list::begin() EA_NOEXCEPT + { + return iterator(static_cast(mAnchor.mpNext)); + } + + + template + inline typename intrusive_list::const_iterator intrusive_list::begin() const EA_NOEXCEPT + { + return const_iterator(static_cast(mAnchor.mpNext)); + } + + + template + inline typename intrusive_list::const_iterator intrusive_list::cbegin() const EA_NOEXCEPT + { + return const_iterator(static_cast(mAnchor.mpNext)); + } + + + template + inline typename intrusive_list::iterator intrusive_list::end() EA_NOEXCEPT + { + return iterator(static_cast(&mAnchor)); + } + + + template + inline typename intrusive_list::const_iterator intrusive_list::end() const EA_NOEXCEPT + { + return const_iterator(static_cast(&mAnchor)); + } + + + template + inline typename intrusive_list::const_iterator intrusive_list::cend() const EA_NOEXCEPT + { + return const_iterator(static_cast(&mAnchor)); + } + + + template + inline typename intrusive_list::reverse_iterator intrusive_list::rbegin() EA_NOEXCEPT + { + return reverse_iterator(iterator(static_cast(&mAnchor))); + } + + + template + inline typename intrusive_list::const_reverse_iterator intrusive_list::rbegin() const EA_NOEXCEPT + { + return const_reverse_iterator(const_iterator(static_cast(&mAnchor))); + } + + + template + inline typename intrusive_list::const_reverse_iterator intrusive_list::crbegin() const EA_NOEXCEPT + { + return const_reverse_iterator(const_iterator(static_cast(&mAnchor))); + } + + + template + inline typename intrusive_list::reverse_iterator intrusive_list::rend() EA_NOEXCEPT + { + return reverse_iterator(iterator(static_cast(mAnchor.mpNext))); + } + + + template + inline typename intrusive_list::const_reverse_iterator intrusive_list::rend() const EA_NOEXCEPT + { + return const_reverse_iterator(const_iterator(static_cast(mAnchor.mpNext))); + } + + + template + inline typename intrusive_list::const_reverse_iterator intrusive_list::crend() const EA_NOEXCEPT + { + return const_reverse_iterator(const_iterator(static_cast(mAnchor.mpNext))); + } + + + template + inline typename intrusive_list::reference intrusive_list::front() + { + #if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED + if(mAnchor.mpNext == &mAnchor) + EASTL_FAIL_MSG("intrusive_list::front(): empty list."); + #endif + + return *static_cast(mAnchor.mpNext); + } + + + template + inline typename intrusive_list::const_reference intrusive_list::front() const + { + #if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED + if(mAnchor.mpNext == &mAnchor) + EASTL_FAIL_MSG("intrusive_list::front(): empty list."); + #endif + + return *static_cast(mAnchor.mpNext); + } + + + template + inline typename intrusive_list::reference intrusive_list::back() + { + #if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED + if(mAnchor.mpNext == &mAnchor) + EASTL_FAIL_MSG("intrusive_list::back(): empty list."); + #endif + + return *static_cast(mAnchor.mpPrev); + } + + + template + inline typename intrusive_list::const_reference intrusive_list::back() const + { + #if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED + if(mAnchor.mpNext == &mAnchor) + EASTL_FAIL_MSG("intrusive_list::back(): empty list."); + #endif + + return *static_cast(mAnchor.mpPrev); + } + + + template + inline void intrusive_list::push_front(value_type& x) + { + #if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED + if(x.mpNext || x.mpPrev) + EASTL_FAIL_MSG("intrusive_list::push_front(): element already on a list."); + #endif + + x.mpNext = mAnchor.mpNext; + x.mpPrev = &mAnchor; + mAnchor.mpNext = &x; + x.mpNext->mpPrev = &x; + } + + + template + inline void intrusive_list::push_back(value_type& x) + { + #if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED + if(x.mpNext || x.mpPrev) + EASTL_FAIL_MSG("intrusive_list::push_back(): element already on a list."); + #endif + + x.mpPrev = mAnchor.mpPrev; + x.mpNext = &mAnchor; + mAnchor.mpPrev = &x; + x.mpPrev->mpNext = &x; + } + + + template + inline bool intrusive_list::contains(const value_type& x) const + { + for(const intrusive_list_node* p = mAnchor.mpNext; p != &mAnchor; p = p->mpNext) + { + if(p == &x) + return true; + } + + return false; + } + + + template + inline typename intrusive_list::iterator intrusive_list::locate(value_type& x) + { + for(intrusive_list_node* p = (T*)mAnchor.mpNext; p != &mAnchor; p = p->mpNext) + { + if(p == &x) + return iterator(static_cast(p)); + } + + return iterator((T*)&mAnchor); + } + + + template + inline typename intrusive_list::const_iterator intrusive_list::locate(const value_type& x) const + { + for(const intrusive_list_node* p = mAnchor.mpNext; p != &mAnchor; p = p->mpNext) + { + if(p == &x) + return const_iterator(static_cast(p)); + } + + return const_iterator((T*)&mAnchor); + } + + + template + inline typename intrusive_list::iterator intrusive_list::insert(const_iterator pos, value_type& x) + { + #if EASTL_VALIDATE_INTRUSIVE_LIST && EASTL_ASSERT_ENABLED + if(x.mpNext || x.mpPrev) + EASTL_FAIL_MSG("intrusive_list::insert(): element already on a list."); + #endif + + intrusive_list_node& next = *const_cast(pos.mpNode); + intrusive_list_node& prev = *static_cast(next.mpPrev); + prev.mpNext = next.mpPrev = &x; + x.mpPrev = &prev; + x.mpNext = &next; + + return iterator(&x); + } + + + template + inline typename intrusive_list::iterator + intrusive_list::erase(const_iterator pos) + { + intrusive_list_node& prev = *static_cast(pos.mpNode->mpPrev); + intrusive_list_node& next = *static_cast(pos.mpNode->mpNext); + prev.mpNext = &next; + next.mpPrev = &prev; + + #if EASTL_VALIDATE_INTRUSIVE_LIST + iterator ii(const_cast(pos.mpNode)); + ii.mpNode->mpPrev = ii.mpNode->mpNext = NULL; + #endif + + return iterator(static_cast(&next)); + } + + + template + inline typename intrusive_list::iterator + intrusive_list::erase(const_iterator first, const_iterator last) + { + intrusive_list_node& prev = *static_cast(first.mpNode->mpPrev); + intrusive_list_node& next = *const_cast(last.mpNode); + + #if EASTL_VALIDATE_INTRUSIVE_LIST + // need to clear out all the next/prev pointers in the elements; + // this makes this operation O(n) instead of O(1), sadly, although + // it's technically amortized O(1) since you could count yourself + // as paying this cost with each insert. + intrusive_list_node* pCur = const_cast(first.mpNode); + + while(pCur != &next) + { + intrusive_list_node* const pCurNext = pCur->mpNext; + pCur->mpPrev = pCur->mpNext = NULL; + pCur = pCurNext; + } + #endif + + prev.mpNext = &next; + next.mpPrev = &prev; + + return iterator(const_cast(last.mpNode)); + } + + + template + inline typename intrusive_list::reverse_iterator + intrusive_list::erase(const_reverse_iterator position) + { + return reverse_iterator(erase((++position).base())); + } + + + template + inline typename intrusive_list::reverse_iterator + intrusive_list::erase(const_reverse_iterator first, const_reverse_iterator last) + { + // Version which erases in order from first to last. + // difference_type i(first.base() - last.base()); + // while(i--) + // first = erase(first); + // return first; + + // Version which erases in order from last to first, but is slightly more efficient: + return reverse_iterator(erase((++last).base(), (++first).base())); + } + + + template + void intrusive_list::swap(intrusive_list& x) + { + // swap anchors + intrusive_list_node temp(mAnchor); + mAnchor = x.mAnchor; + x.mAnchor = temp; + + // Fixup node pointers into the anchor, since the addresses of + // the anchors must stay the same with each list. + if(mAnchor.mpNext == &x.mAnchor) + mAnchor.mpNext = mAnchor.mpPrev = &mAnchor; + else + mAnchor.mpNext->mpPrev = mAnchor.mpPrev->mpNext = &mAnchor; + + if(x.mAnchor.mpNext == &mAnchor) + x.mAnchor.mpNext = x.mAnchor.mpPrev = &x.mAnchor; + else + x.mAnchor.mpNext->mpPrev = x.mAnchor.mpPrev->mpNext = &x.mAnchor; + + #if EASTL_VALIDATE_INTRUSIVE_LIST + temp.mpPrev = temp.mpNext = NULL; + #endif + } + + + template + void intrusive_list::splice(const_iterator pos, value_type& value) + { + // Note that splice(pos, x, pos) and splice(pos+1, x, pos) + // are valid and need to be handled correctly. + + if(pos.mpNode != &value) + { + // Unlink item from old list. + intrusive_list_node& oldNext = *value.mpNext; + intrusive_list_node& oldPrev = *value.mpPrev; + oldNext.mpPrev = &oldPrev; + oldPrev.mpNext = &oldNext; + + // Relink item into new list. + intrusive_list_node& newNext = *const_cast(pos.mpNode); + intrusive_list_node& newPrev = *newNext.mpPrev; + + newPrev.mpNext = &value; + newNext.mpPrev = &value; + value.mpPrev = &newPrev; + value.mpNext = &newNext; + } + } + + + template + void intrusive_list::splice(const_iterator pos, intrusive_list& x) + { + // Note: &x == this is prohibited, so self-insertion is not a problem. + if(x.mAnchor.mpNext != &x.mAnchor) // If the list 'x' isn't empty... + { + intrusive_list_node& next = *const_cast(pos.mpNode); + intrusive_list_node& prev = *static_cast(next.mpPrev); + intrusive_list_node& insertPrev = *static_cast(x.mAnchor.mpNext); + intrusive_list_node& insertNext = *static_cast(x.mAnchor.mpPrev); + + prev.mpNext = &insertPrev; + insertPrev.mpPrev = &prev; + insertNext.mpNext = &next; + next.mpPrev = &insertNext; + x.mAnchor.mpPrev = x.mAnchor.mpNext = &x.mAnchor; + } + } + + + template + void intrusive_list::splice(const_iterator pos, intrusive_list& /*x*/, const_iterator i) + { + // Note: &x == this is prohibited, so self-insertion is not a problem. + + // Note that splice(pos, x, pos) and splice(pos + 1, x, pos) + // are valid and need to be handled correctly. + + // We don't need to check if the source list is empty, because + // this function expects a valid iterator from the source list, + // and thus the list cannot be empty in such a situation. + + iterator ii(const_cast(i.mpNode)); // Make a temporary non-const version. + + if(pos != ii) + { + // Unlink item from old list. + intrusive_list_node& oldNext = *ii.mpNode->mpNext; + intrusive_list_node& oldPrev = *ii.mpNode->mpPrev; + oldNext.mpPrev = &oldPrev; + oldPrev.mpNext = &oldNext; + + // Relink item into new list. + intrusive_list_node& newNext = *const_cast(pos.mpNode); + intrusive_list_node& newPrev = *newNext.mpPrev; + + newPrev.mpNext = ii.mpNode; + newNext.mpPrev = ii.mpNode; + ii.mpNode->mpPrev = &newPrev; + ii.mpNode->mpNext = &newNext; + } + } + + + template + void intrusive_list::splice(const_iterator pos, intrusive_list& /*x*/, const_iterator first, const_iterator last) + { + // Note: &x == this is prohibited, so self-insertion is not a problem. + if(first != last) + { + intrusive_list_node& insertPrev = *const_cast(first.mpNode); + intrusive_list_node& insertNext = *static_cast(last.mpNode->mpPrev); + + // remove from old list + insertNext.mpNext->mpPrev = insertPrev.mpPrev; + insertPrev.mpPrev->mpNext = insertNext.mpNext; + + // insert into this list + intrusive_list_node& next = *const_cast(pos.mpNode); + intrusive_list_node& prev = *static_cast(next.mpPrev); + + prev.mpNext = &insertPrev; + insertPrev.mpPrev = &prev; + insertNext.mpNext = &next; + next.mpPrev = &insertNext; + } + } + + + template + inline void intrusive_list::remove(value_type& value) + { + intrusive_list_node& prev = *value.mpPrev; + intrusive_list_node& next = *value.mpNext; + prev.mpNext = &next; + next.mpPrev = &prev; + + #if EASTL_VALIDATE_INTRUSIVE_LIST + value.mpPrev = value.mpNext = NULL; + #endif + } + + + template + void intrusive_list::merge(this_type& x) + { + if(this != &x) + { + iterator first(begin()); + iterator firstX(x.begin()); + const iterator last(end()); + const iterator lastX(x.end()); + + while((first != last) && (firstX != lastX)) + { + if(*firstX < *first) + { + iterator next(firstX); + + splice(first, x, firstX, ++next); + firstX = next; + } + else + ++first; + } + + if(firstX != lastX) + splice(last, x, firstX, lastX); + } + } + + + template + template + void intrusive_list::merge(this_type& x, Compare compare) + { + if(this != &x) + { + iterator first(begin()); + iterator firstX(x.begin()); + const iterator last(end()); + const iterator lastX(x.end()); + + while((first != last) && (firstX != lastX)) + { + if(compare(*firstX, *first)) + { + iterator next(firstX); + + splice(first, x, firstX, ++next); + firstX = next; + } + else + ++first; + } + + if(firstX != lastX) + splice(last, x, firstX, lastX); + } + } + + + template + void intrusive_list::unique() + { + iterator first(begin()); + const iterator last(end()); + + if(first != last) + { + iterator next(first); + + while(++next != last) + { + if(*first == *next) + erase(next); + else + first = next; + next = first; + } + } + } + + + template + template + void intrusive_list::unique(BinaryPredicate predicate) + { + iterator first(begin()); + const iterator last(end()); + + if(first != last) + { + iterator next(first); + + while(++next != last) + { + if(predicate(*first, *next)) + erase(next); + else + first = next; + next = first; + } + } + } + + + template + void intrusive_list::sort() + { + // We implement the algorithm employed by Chris Caulfield whereby we use recursive + // function calls to sort the list. The sorting of a very large list may fail due to stack overflow + // if the stack is exhausted. The limit depends on the platform and the avaialble stack space. + + // Easier-to-understand version of the 'if' statement: + // iterator i(begin()); + // if((i != end()) && (++i != end())) // If the size is >= 2 (without calling the more expensive size() function)... + + // Faster, more inlinable version of the 'if' statement: + if((static_cast(mAnchor.mpNext) != &mAnchor) && + (static_cast(mAnchor.mpNext) != static_cast(mAnchor.mpPrev))) + { + // Split the array into 2 roughly equal halves. + this_type leftList; // This should cause no memory allocation. + this_type rightList; + + // We find an iterator which is in the middle of the list. The fastest way to do + // this is to iterate from the base node both forwards and backwards with two + // iterators and stop when they meet each other. Recall that our size() function + // is not O(1) but is instead O(n), at least when EASTL_LIST_SIZE_CACHE is disabled. + #if EASTL_LIST_SIZE_CACHE + iterator mid(begin()); + eastl::advance(mid, size() / 2); + #else + iterator mid(begin()), tail(end()); + + while((mid != tail) && (++mid != tail)) + --tail; + #endif + + // Move the left half of this into leftList and the right half into rightList. + leftList.splice(leftList.begin(), *this, begin(), mid); + rightList.splice(rightList.begin(), *this); + + // Sort the sub-lists. + leftList.sort(); + rightList.sort(); + + // Merge the two halves into this list. + splice(begin(), leftList); + merge(rightList); + } + } + + + template + template + void intrusive_list::sort(Compare compare) + { + // We implement the algorithm employed by Chris Caulfield whereby we use recursive + // function calls to sort the list. The sorting of a very large list may fail due to stack overflow + // if the stack is exhausted. The limit depends on the platform and the avaialble stack space. + + // Easier-to-understand version of the 'if' statement: + // iterator i(begin()); + // if((i != end()) && (++i != end())) // If the size is >= 2 (without calling the more expensive size() function)... + + // Faster, more inlinable version of the 'if' statement: + if((static_cast(mAnchor.mpNext) != &mAnchor) && + (static_cast(mAnchor.mpNext) != static_cast(mAnchor.mpPrev))) + { + // Split the array into 2 roughly equal halves. + this_type leftList; // This should cause no memory allocation. + this_type rightList; + + // We find an iterator which is in the middle of the list. The fastest way to do + // this is to iterate from the base node both forwards and backwards with two + // iterators and stop when they meet each other. Recall that our size() function + // is not O(1) but is instead O(n), at least when EASTL_LIST_SIZE_CACHE is disabled. + #if EASTL_LIST_SIZE_CACHE + iterator mid(begin()); + eastl::advance(mid, size() / 2); + #else + iterator mid(begin()), tail(end()); + + while((mid != tail) && (++mid != tail)) + --tail; + #endif + + // Move the left half of this into leftList and the right half into rightList. + leftList.splice(leftList.begin(), *this, begin(), mid); + rightList.splice(rightList.begin(), *this); + + // Sort the sub-lists. + leftList.sort(compare); + rightList.sort(compare); + + // Merge the two halves into this list. + splice(begin(), leftList); + merge(rightList, compare); + } + } + + + template + inline int intrusive_list::validate_iterator(const_iterator i) const + { + // To do: Come up with a more efficient mechanism of doing this. + + for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp) + { + if(temp == i) + return (isf_valid | isf_current | isf_can_dereference); + } + + if(i == end()) + return (isf_valid | isf_current); + + return isf_none; + } + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + bool operator==(const intrusive_list& a, const intrusive_list& b) + { + // If we store an mSize member for intrusive_list, we want to take advantage of it here. + typename intrusive_list::const_iterator ia = a.begin(); + typename intrusive_list::const_iterator ib = b.begin(); + typename intrusive_list::const_iterator enda = a.end(); + typename intrusive_list::const_iterator endb = b.end(); + + while((ia != enda) && (ib != endb) && (*ia == *ib)) + { + ++ia; + ++ib; + } + return (ia == enda) && (ib == endb); + } + + template + bool operator!=(const intrusive_list& a, const intrusive_list& b) + { + return !(a == b); + } + + template + bool operator<(const intrusive_list& a, const intrusive_list& b) + { + return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); + } + + template + bool operator>(const intrusive_list& a, const intrusive_list& b) + { + return b < a; + } + + template + bool operator<=(const intrusive_list& a, const intrusive_list& b) + { + return !(b < a); + } + + template + bool operator>=(const intrusive_list& a, const intrusive_list& b) + { + return !(a < b); + } + + template + void swap(intrusive_list& a, intrusive_list& b) + { + a.swap(b); + } + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/intrusive_ptr.h b/libkram/eastl/include/EASTL/intrusive_ptr.h new file mode 100644 index 00000000..af4e686f --- /dev/null +++ b/libkram/eastl/include/EASTL/intrusive_ptr.h @@ -0,0 +1,426 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_INTRUSIVE_PTR_H +#define EASTL_INTRUSIVE_PTR_H + + +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + // We provide default implementations of AddRef and Release in the eastl namespace. + // The user can override these on a per-class basis by defining their own specialized + // intrusive_ptr_add_ref and intrusive_ptr_release functions. User-defined specializations + // do not need to exist in the eastl namespace, but should preferably be in the namespace + // of the templated class T. + template + void intrusive_ptr_add_ref(T* p) + { + p->AddRef(); + } + + template + void intrusive_ptr_release(T* p) + { + p->Release(); + } + + + ////////////////////////////////////////////////////////////////////////////// + /// intrusive_ptr + /// + /// This is a class that acts like the C++ auto_ptr class except that instead + /// of deleting its member data when it goes out of scope, it releases its + /// member data when it goes out of scope. This class thus requires that the + /// templated data type have an AddRef and Release function (or whatever is + /// configured to be the two refcount functions). + /// + /// This class is useful for automatically releasing an object when this + /// class goes out of scope. See below for some usage. + /// You should be careful about putting instances of this class as members of + /// another class. If you do so, then the intrusive_ptr destructor will only + /// be called if the object that owns it is destructed. This creates a potential + /// chicken-and-egg situation. What if the intrusive_ptr member contains a + /// pointer to an object that has a reference on the object that owns the + /// intrusive_ptr member? The answer is that the neither object can ever be + /// destructed. The solution is to: + /// 1) Be very careful about what objects you put into member intrusive_ptr objects. + /// 2) Clear out your intrusive_ptr members in your shutdown function. + /// 3) Simply don't use intrusive_ptr objects as class members. + /// + /// Example usage: + /// intrusive_ptr pWidget = new Widget; + /// pWidget = new Widget; + /// pWidget->Reset(); + /// + template + class intrusive_ptr + { + protected: + // Friend declarations. + template friend class intrusive_ptr; + typedef intrusive_ptr this_type; + + T* mpObject; + + public: + /// element_type + /// This typedef is present for consistency with the C++ standard library + /// auto_ptr template. It allows users to refer to the templated type via + /// a typedef. This is sometimes useful to be able to do. + /// + /// Example usage: + /// intrusive_ptr ip; + /// void DoSomething(intrusive_ptr::element_type someType); + /// + typedef T element_type; + + /// intrusive_ptr + /// Default constructor. The member object is set to NULL. + intrusive_ptr() + : mpObject(NULL) + { + // Empty + } + + /// intrusive_ptr + /// Provides a constructor which takes ownership of a pointer. + /// The incoming pointer is AddRefd. + /// + /// Example usage: + /// intrusive_ptr pWidget(new Widget); + intrusive_ptr(T* p, bool bAddRef = true) + : mpObject(p) + { + if(mpObject && bAddRef) + intrusive_ptr_add_ref(mpObject); // Intentionally do not prefix the call with eastl:: but instead allow namespace lookup to resolve the namespace. + } + + /// intrusive_ptr + /// Construction from self type. + intrusive_ptr(const intrusive_ptr& ip) + : mpObject(ip.mpObject) + { + if(mpObject) + intrusive_ptr_add_ref(mpObject); + } + + + /// intrusive_ptr + /// move constructor + intrusive_ptr(intrusive_ptr&& ip) + : mpObject(nullptr) + { + swap(ip); + } + + /// intrusive_ptr + /// Provides a constructor which copies a pointer from another intrusive_ptr. + /// The incoming pointer is AddRefd. The source intrusive_ptr object maintains + /// its AddRef on the pointer. + /// + /// Example usage: + /// intrusive_ptr pWidget1; + /// intrusive_ptr pWidget2(pWidget1); + template + intrusive_ptr(const intrusive_ptr& ip) + : mpObject(ip.mpObject) + { + if(mpObject) + intrusive_ptr_add_ref(mpObject); + } + + /// intrusive_ptr + /// Releases the owned pointer. + ~intrusive_ptr() + { + if(mpObject) + intrusive_ptr_release(mpObject); + } + + + /// operator= + /// Assignment to self type. + intrusive_ptr& operator=(const intrusive_ptr& ip) + { + return operator=(ip.mpObject); + } + + + /// operator= + /// Move assignment operator + intrusive_ptr& operator=(intrusive_ptr&& ip) + { + swap(ip); + return *this; + } + + + /// operator = + /// Assigns an intrusive_ptr object to this intrusive_ptr object. + /// The incoming pointer is AddRefd. The source intrusive_ptr object + /// maintains its AddRef on the pointer. If there is an existing member + /// pointer, it is Released before the incoming pointer is assigned. + /// If the incoming pointer is equal to the existing pointer, no + /// action is taken. The incoming pointer is AddRefd before any + /// member pointer is Released. + template + intrusive_ptr& operator=(const intrusive_ptr& ip) + { + return operator=(ip.mpObject); + } + + /// operator= + /// Assigns an intrusive_ptr object to this intrusive_ptr object. + /// The incoming pointer is AddRefd. If there is an existing member + /// pointer, it is Released before the incoming pointer is assigned. + /// If the incoming pointer is equal to the existing pointer, no + /// action is taken. The incoming pointer is AddRefd before any + /// member pointer is Released. + intrusive_ptr& operator=(T* pObject) + { + if(pObject != mpObject) + { + T* const pTemp = mpObject; // Create temporary to prevent possible problems with re-entrancy. + if(pObject) + intrusive_ptr_add_ref(pObject); + mpObject = pObject; + if(pTemp) + intrusive_ptr_release(pTemp); + } + return *this; + } + + /// operator * + /// Returns a reference to the contained object. + T& operator *() const + { + return *mpObject; + } + + /// operator * + /// Returns a pointer to the contained object, allowing the + /// user to use this container as if it were contained pointer itself. + T* operator ->() const + { + return mpObject; + } + + /// get() + /// Returns a pointer to the contained object. + T* get() const + { + return mpObject; + } + + /// reset + /// Releases the owned object and clears our reference to it. + void reset() + { + T* const pTemp = mpObject; + mpObject = NULL; + if(pTemp) + intrusive_ptr_release(pTemp); + } + + /// swap + /// Exchanges the owned pointer beween two intrusive_ptr objects. + void swap(this_type& ip) + { + T* const pTemp = mpObject; + mpObject = ip.mpObject; + ip.mpObject = pTemp; + } + + /// attach + /// Sets an intrusive_ptr pointer without calling AddRef() on + /// the pointed object. The intrusive_ptr thus eventually only does a + /// Release() on the object. This is useful for assuming a reference + /// that someone else has handed you and making sure it is always + /// released, even if you return in the middle of a function or an + /// exception is thrown. + /// + void attach(T* pObject) + { + T* const pTemp = mpObject; + mpObject = pObject; + if(pTemp) + intrusive_ptr_release(pTemp); + } + + /// detach + /// Surrenders the reference held by an intrusive_ptr pointer -- + /// it returns the current reference and nulls the pointer. If the returned + /// pointer is non-null it must be released. This is useful in functions + /// that must return a reference while possibly being aborted by a return + /// or thrown exception: + /// + /// bool GetFoo(T** pp){ + /// intrusive_ptr p(PrivateGetFoo()); + /// if(p->Method()) + /// return false; + /// *pp = p.detach(); + /// return true; + /// } + T* detach() + { + T* const pTemp = mpObject; + mpObject = NULL; + return pTemp; + } + + /// Implicit operator bool + /// Allows for using a intrusive_ptr as a boolean. + /// Example usage: + /// intrusive_ptr ptr = new Widget; + /// if(ptr) + /// ++*ptr; + /// + /// Note that below we do not use operator bool(). The reason for this + /// is that booleans automatically convert up to short, int, float, etc. + /// The result is that this: if(intrusivePtr == 1) would yield true (bad). + typedef T* (this_type::*bool_)() const; + operator bool_() const + { + if(mpObject) + return &this_type::get; + return NULL; + } + + /// operator! + /// This returns the opposite of operator bool; it returns true if + /// the owned pointer is null. Some compilers require this and some don't. + /// intrusive_ptr ptr = new Widget; + /// if(!ptr) + /// assert(false); + bool operator!() const + { + return (mpObject == NULL); + } + + }; // class intrusive_ptr + + + /// get_pointer + /// returns intrusive_ptr::get() via the input intrusive_ptr. + template + inline T* get_pointer(const intrusive_ptr& intrusivePtr) + { + return intrusivePtr.get(); + } + + /// swap + /// Exchanges the owned pointer beween two intrusive_ptr objects. + /// This non-member version is useful for compatibility of intrusive_ptr + /// objects with the C++ Standard Library and other libraries. + template + inline void swap(intrusive_ptr& intrusivePtr1, intrusive_ptr& intrusivePtr2) + { + intrusivePtr1.swap(intrusivePtr2); + } + + + template + bool operator==(intrusive_ptr const& iPtr1, intrusive_ptr const& iPtr2) + { + return (iPtr1.get() == iPtr2.get()); + } + + template + bool operator!=(intrusive_ptr const& iPtr1, intrusive_ptr const& iPtr2) + { + return (iPtr1.get() != iPtr2.get()); + } + + template + bool operator==(intrusive_ptr const& iPtr1, T* p) + { + return (iPtr1.get() == p); + } + + template + bool operator!=(intrusive_ptr const& iPtr1, T* p) + { + return (iPtr1.get() != p); + } + + template + bool operator==(T* p, intrusive_ptr const& iPtr2) + { + return (p == iPtr2.get()); + } + + template + bool operator!=(T* p, intrusive_ptr const& iPtr2) + { + return (p != iPtr2.get()); + } + + template + bool operator<(intrusive_ptr const& iPtr1, intrusive_ptr const& iPtr2) + { + return ((uintptr_t)iPtr1.get() < (uintptr_t)iPtr2.get()); + } + + + /// static_pointer_cast + /// Returns an intrusive_ptr static-casted from a intrusive_ptr. + template + intrusive_ptr static_pointer_cast(const intrusive_ptr& intrusivePtr) + { + return static_cast(intrusivePtr.get()); + } + + + #if EASTL_RTTI_ENABLED + + /// dynamic_pointer_cast + /// Returns an intrusive_ptr dynamic-casted from a intrusive_ptr. + template + intrusive_ptr dynamic_pointer_cast(const intrusive_ptr& intrusivePtr) + { + return dynamic_cast(intrusivePtr.get()); + } + + #endif + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/iterator.h b/libkram/eastl/include/EASTL/iterator.h new file mode 100644 index 00000000..d2dc8993 --- /dev/null +++ b/libkram/eastl/include/EASTL/iterator.h @@ -0,0 +1,1192 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_ITERATOR_H +#define EASTL_ITERATOR_H + + +#include +#include +#include + +EA_DISABLE_ALL_VC_WARNINGS(); + +#include + +EA_RESTORE_ALL_VC_WARNINGS(); + +// If the user has specified that we use std iterator +// categories instead of EASTL iterator categories, +// then #include . +#if EASTL_STD_ITERATOR_CATEGORY_ENABLED + EA_DISABLE_ALL_VC_WARNINGS(); + + #include + + EA_RESTORE_ALL_VC_WARNINGS(); +#endif + + +EA_DISABLE_VC_WARNING(4619); // There is no warning number 'number'. +EA_DISABLE_VC_WARNING(4217); // Member template functions cannot be used for copy-assignment or copy-construction. + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + /// iterator_status_flag + /// + /// Defines the validity status of an iterator. This is primarily used for + /// iterator validation in debug builds. These are implemented as OR-able + /// flags (as opposed to mutually exclusive values) in order to deal with + /// the nature of iterator status. In particular, an iterator may be valid + /// but not dereferencable, as in the case with an iterator to container end(). + /// An iterator may be valid but also dereferencable, as in the case with an + /// iterator to container begin(). + /// + enum iterator_status_flag + { + isf_none = 0x00, /// This is called none and not called invalid because it is not strictly the opposite of invalid. + isf_valid = 0x01, /// The iterator is valid, which means it is in the range of [begin, end]. + isf_current = 0x02, /// The iterator is valid and points to the same element it did when created. For example, if an iterator points to vector::begin() but an element is inserted at the front, the iterator is valid but not current. Modification of elements in place do not make iterators non-current. + isf_can_dereference = 0x04 /// The iterator is dereferencable, which means it is in the range of [begin, end). It may or may not be current. + }; + + + + // The following declarations are taken directly from the C++ standard document. + // input_iterator_tag, etc. + // iterator + // iterator_traits + // reverse_iterator + + // Iterator categories + // Every iterator is defined as belonging to one of the iterator categories that + // we define here. These categories come directly from the C++ standard. + #if !EASTL_STD_ITERATOR_CATEGORY_ENABLED // If we are to use our own iterator category definitions... + struct input_iterator_tag { }; + struct output_iterator_tag { }; + struct forward_iterator_tag : public input_iterator_tag { }; + struct bidirectional_iterator_tag : public forward_iterator_tag { }; + struct random_access_iterator_tag : public bidirectional_iterator_tag { }; + struct contiguous_iterator_tag : public random_access_iterator_tag { }; // Extension to the C++ standard. Contiguous ranges are more than random access, they are physically contiguous. + #endif + + + // struct iterator + template + struct iterator + { + typedef Category iterator_category; + typedef T value_type; + typedef Distance difference_type; + typedef Pointer pointer; + typedef Reference reference; + }; + + + // struct iterator_traits + template + struct iterator_traits + { + typedef typename Iterator::iterator_category iterator_category; + typedef typename Iterator::value_type value_type; + typedef typename Iterator::difference_type difference_type; + typedef typename Iterator::pointer pointer; + typedef typename Iterator::reference reference; + }; + + template + struct iterator_traits + { + typedef EASTL_ITC_NS::random_access_iterator_tag iterator_category; // To consider: Change this to contiguous_iterator_tag for the case that + typedef T value_type; // EASTL_ITC_NS is "eastl" instead of "std". + typedef ptrdiff_t difference_type; + typedef T* pointer; + typedef T& reference; + }; + + template + struct iterator_traits + { + typedef EASTL_ITC_NS::random_access_iterator_tag iterator_category; + typedef T value_type; + typedef ptrdiff_t difference_type; + typedef const T* pointer; + typedef const T& reference; + }; + + + + + /// is_iterator_wrapper + /// + /// Tells if an Iterator type is a wrapper type as opposed to a regular type. + /// Relies on the class declaring a typedef called wrapped_iterator_type. + /// + /// Examples of wrapping iterators: + /// reverse_iterator + /// generic_iterator + /// move_iterator + /// Examples of non-wrapping iterators: + /// iterator + /// list::iterator + /// char* + /// + /// Example behavior: + /// is_iterator_wrapper(int*)::value => false + /// is_iterator_wrapper(eastl::array*)::value => false + /// is_iterator_wrapper(eastl::vector::iterator)::value => false + /// is_iterator_wrapper(eastl::generic_iterator)::value => true + /// is_iterator_wrapper(eastl::move_iterator::iterator>)::value => true + /// + template + class is_iterator_wrapper + { + template + static eastl::no_type test(...); + + template + static eastl::yes_type test(typename U::wrapped_iterator_type*, typename eastl::enable_if::value>::type* = 0); + + public: + EA_DISABLE_VC_WARNING(6334) + static const bool value = (sizeof(test(NULL)) == sizeof(eastl::yes_type)); + EA_RESTORE_VC_WARNING() + }; + + + /// unwrap_iterator + /// + /// Takes a wrapper Iterator (e.g. move_iterator, reverse_iterator, generic_iterator) instance + /// and returns the wrapped iterator type. If Iterator is not a wrapper (including being a pointer), + /// or is not an iterator, then this function returns it as-is. + /// unwrap_iterator unwraps only a single layer of iterator at a time. You need to call it twice, + /// for example, to unwrap two layers of iterators. + /// + /// Example usage: + /// int* pInt = unwrap_iterator(&pIntArray[15]); + /// int* pInt = unwrap_iterator(generic_iterator(&pIntArray[15])); + /// MyVector::iterator it = unwrap_iterator(myVector.begin()); + /// MyVector::iterator it = unwrap_iterator(move_iterator(myVector.begin())); + /// + template + struct is_iterator_wrapper_helper + { + typedef Iterator iterator_type; + + static iterator_type get_base(Iterator it) + { return it; } + }; + + + template + struct is_iterator_wrapper_helper + { + typedef typename Iterator::iterator_type iterator_type; + + static iterator_type get_base(Iterator it) + { return it.base(); } + }; + + template + inline typename is_iterator_wrapper_helper::value>::iterator_type unwrap_iterator(Iterator it) + { return eastl::is_iterator_wrapper_helper::value>::get_base(it); } + + + + /// reverse_iterator + /// + /// From the C++ standard: + /// Bidirectional and random access iterators have corresponding reverse + /// iterator adaptors that iterate through the data structure in the + /// opposite direction. They have the same signatures as the corresponding + /// iterators. The fundamental relation between a reverse iterator and its + /// corresponding iterator i is established by the identity: + /// &*(reverse_iterator(i)) == &*(i - 1). + /// This mapping is dictated by the fact that while there is always a pointer + /// past the end of an array, there might not be a valid pointer before the + /// beginning of an array. + /// + template + class reverse_iterator : public iterator::iterator_category, + typename eastl::iterator_traits::value_type, + typename eastl::iterator_traits::difference_type, + typename eastl::iterator_traits::pointer, + typename eastl::iterator_traits::reference> + { + public: + typedef Iterator iterator_type; + typedef iterator_type wrapped_iterator_type; // This is not in the C++ Standard; it's used by use to identify it as a wrapping iterator type. + typedef typename eastl::iterator_traits::pointer pointer; + typedef typename eastl::iterator_traits::reference reference; + typedef typename eastl::iterator_traits::difference_type difference_type; + + protected: + Iterator mIterator; + + public: + EA_CPP14_CONSTEXPR reverse_iterator() // It's important that we construct mIterator, because if Iterator + : mIterator() { } // is a pointer, there's a difference between doing it and not. + + EA_CPP14_CONSTEXPR explicit reverse_iterator(iterator_type i) + : mIterator(i) { } + + EA_CPP14_CONSTEXPR reverse_iterator(const reverse_iterator& ri) + : mIterator(ri.mIterator) { } + + template + EA_CPP14_CONSTEXPR reverse_iterator(const reverse_iterator& ri) + : mIterator(ri.base()) { } + + // This operator= isn't in the standard, but the the C++ + // library working group has tentatively approved it, as it + // allows const and non-const reverse_iterators to interoperate. + template + EA_CPP14_CONSTEXPR reverse_iterator& operator=(const reverse_iterator& ri) + { mIterator = ri.base(); return *this; } + + EA_CPP14_CONSTEXPR iterator_type base() const + { return mIterator; } + + EA_CPP14_CONSTEXPR reference operator*() const + { + iterator_type i(mIterator); + return *--i; + } + + EA_CPP14_CONSTEXPR pointer operator->() const + { return &(operator*()); } + + EA_CPP14_CONSTEXPR reverse_iterator& operator++() + { --mIterator; return *this; } + + EA_CPP14_CONSTEXPR reverse_iterator operator++(int) + { + reverse_iterator ri(*this); + --mIterator; + return ri; + } + + EA_CPP14_CONSTEXPR reverse_iterator& operator--() + { ++mIterator; return *this; } + + EA_CPP14_CONSTEXPR reverse_iterator operator--(int) + { + reverse_iterator ri(*this); + ++mIterator; + return ri; + } + + EA_CPP14_CONSTEXPR reverse_iterator operator+(difference_type n) const + { return reverse_iterator(mIterator - n); } + + EA_CPP14_CONSTEXPR reverse_iterator& operator+=(difference_type n) + { mIterator -= n; return *this; } + + EA_CPP14_CONSTEXPR reverse_iterator operator-(difference_type n) const + { return reverse_iterator(mIterator + n); } + + EA_CPP14_CONSTEXPR reverse_iterator& operator-=(difference_type n) + { mIterator += n; return *this; } + + // http://cplusplus.github.io/LWG/lwg-defects.html#386, + // http://llvm.org/bugs/show_bug.cgi?id=17883 + // random_access_iterator operator[] is merely required to return something convertible to reference. + // reverse_iterator operator[] can't necessarily know what to return as the underlying iterator + // operator[] may return something other than reference. + EA_CPP14_CONSTEXPR reference operator[](difference_type n) const + { return mIterator[-n - 1]; } + }; + + + // The C++ library working group has tentatively approved the usage of two + // template parameters (Iterator1 and Iterator2) in order to allow reverse_iterators + // and const_reverse iterators to be comparable. This is a similar issue to the + // C++ defect report #179 regarding comparison of container iterators and const_iterators. + // + // libstdc++ reports that std::relops breaks the usage of two iterator types and if we + // want to support relops then we need to also make versions of each of below with + // a single template parameter to placate std::relops. But relops is hardly used due to + // the troubles it causes and so we are avoiding support here until somebody complains about it. + template + EA_CPP14_CONSTEXPR inline bool + operator==(const reverse_iterator& a, const reverse_iterator& b) + { return a.base() == b.base(); } + + + template + EA_CPP14_CONSTEXPR inline bool + operator<(const reverse_iterator& a, const reverse_iterator& b) + { return a.base() > b.base(); } + + + template + EA_CPP14_CONSTEXPR inline bool + operator!=(const reverse_iterator& a, const reverse_iterator& b) + { return a.base() != b.base(); } + + + template + EA_CPP14_CONSTEXPR inline bool + operator>(const reverse_iterator& a, const reverse_iterator& b) + { return a.base() < b.base(); } + + + template + EA_CPP14_CONSTEXPR inline bool + operator<=(const reverse_iterator& a, const reverse_iterator& b) + { return a.base() >= b.base(); } + + + template + EA_CPP14_CONSTEXPR inline bool + operator>=(const reverse_iterator& a, const reverse_iterator& b) + { return a.base() <= b.base(); } + + + template + EA_CPP14_CONSTEXPR inline typename reverse_iterator::difference_type + operator-(const reverse_iterator& a, const reverse_iterator& b) + { return b.base() - a.base(); } + + + template + EA_CPP14_CONSTEXPR inline reverse_iterator + operator+(typename reverse_iterator::difference_type n, const reverse_iterator& a) + { return reverse_iterator(a.base() - n); } + + + /// is_reverse_iterator + /// + /// This is a type traits extension utility. + /// Given an iterator, tells if it's a reverse_iterator vs anything else. + /// If it's a reverse iterator wrapped by another iterator then value is false. + /// To consider: Detect that if it's a move_iterator and unwrap + /// move_iterator so we can detect that underneath it's reverse_iterator. + /// + template + struct is_reverse_iterator + : public eastl::false_type {}; + + template + struct is_reverse_iterator< eastl::reverse_iterator > + : public eastl::true_type {}; + + + + /// unwrap_reverse_iterator + /// + /// Returns Iterator::get_base() if it's a reverse_iterator, else returns Iterator as-is. + /// + /// Example usage: + /// vector intVector; + /// eastl::reverse_iterator::iterator> reverseIterator(intVector.begin()); + /// vector::iterator it = unwrap_reverse_iterator(reverseIterator); + /// + /// Disabled until there is considered a good use for it. + /// template + /// inline typename eastl::is_iterator_wrapper_helper::value>::iterator_type unwrap_reverse_iterator(Iterator it) + /// { return eastl::is_iterator_wrapper_helper::value>::get_base(it); } + + + + /// move_iterator + /// + /// From the C++11 Standard, section 24.5.3.1: + /// Class template move_iterator is an iterator adaptor with the same behavior as the underlying iterator + /// except that its dereference operator implicitly converts the value returned by the underlying iterator's + /// dereference operator to an rvalue reference. Some generic algorithms can be called with move iterators to + /// replace copying with moving. + + template + class move_iterator // Don't inherit from iterator. + { + public: + typedef Iterator iterator_type; + typedef iterator_type wrapped_iterator_type; // This is not in the C++ Standard; it's used by use to identify it as a wrapping iterator type. + typedef iterator_traits traits_type; + typedef typename traits_type::iterator_category iterator_category; + typedef typename traits_type::value_type value_type; + typedef typename traits_type::difference_type difference_type; + typedef Iterator pointer; + typedef value_type&& reference; + + protected: + iterator_type mIterator; + + public: + move_iterator() + : mIterator() + { + } + + explicit move_iterator(iterator_type mi) + : mIterator(mi) { } + + template + move_iterator(const move_iterator& mi) + : mIterator(mi.base()) + { + } + + iterator_type base() const + { return mIterator; } + + reference operator*() const + { return eastl::move(*mIterator); } + + pointer operator->() const + { return mIterator; } + + move_iterator& operator++() + { + ++mIterator; + return *this; + } + + move_iterator operator++(int) + { + move_iterator tempMoveIterator = *this; + ++mIterator; + return tempMoveIterator; + } + + move_iterator& operator--() + { + --mIterator; + return *this; + } + + move_iterator operator--(int) + { + move_iterator tempMoveIterator = *this; + --mIterator; + return tempMoveIterator; + } + + move_iterator operator+(difference_type n) const + { return move_iterator(mIterator + n); } + + move_iterator& operator+=(difference_type n) + { + mIterator += n; + return *this; + } + + move_iterator operator-(difference_type n) const + { return move_iterator(mIterator - n); } + + move_iterator& operator-=(difference_type n) + { + mIterator -= n; + return *this; + } + + reference operator[](difference_type n) const + { return eastl::move(mIterator[n]); } + }; + + template + inline bool + operator==(const move_iterator& a, const move_iterator& b) + { return a.base() == b.base(); } + + + template + inline bool + operator!=(const move_iterator& a, const move_iterator& b) + { return !(a == b); } + + + template + inline bool + operator<(const move_iterator& a, const move_iterator& b) + { return a.base() < b.base(); } + + + template + inline bool + operator<=(const move_iterator& a, const move_iterator& b) + { return !(b < a); } + + + template + inline bool + operator>(const move_iterator& a, const move_iterator& b) + { return b < a; } + + + template + inline bool + operator>=(const move_iterator& a, const move_iterator& b) + { return !(a < b); } + + + template + inline auto + operator-(const move_iterator& a, const move_iterator& b) -> decltype(a.base() - b.base()) + { return a.base() - b.base(); } + + + template + inline move_iterator + operator+(typename move_iterator::difference_type n, const move_iterator& a) + { return a + n; } + + + template + inline move_iterator make_move_iterator(Iterator i) + { return move_iterator(i); } + + + // make_move_if_noexcept_iterator returns move_iterator if the Iterator is of a noexcept type; + // otherwise returns Iterator as-is. The point of this is to be able to avoid moves that can generate exceptions and instead + // fall back to copies or whatever the default IteratorType::operator* returns for use by copy/move algorithms. + // To consider: merge the conditional expression usage here with the one used by move_if_noexcept, as they are the same condition. + #if EASTL_EXCEPTIONS_ENABLED + template ::value_type>::value || + !eastl::is_copy_constructible::value_type>::value, + eastl::move_iterator, Iterator>::type> + inline IteratorType make_move_if_noexcept_iterator(Iterator i) + { return IteratorType(i); } + #else + // Else there are no exceptions and thus we always return a move_iterator. + template + inline eastl::move_iterator make_move_if_noexcept_iterator(Iterator i) + { return eastl::move_iterator(i); } + #endif + + + + /// is_move_iterator + /// + /// This is a type traits extension utility. + /// Given an iterator, tells if it's a move iterator vs anything else. + /// Example usage (though somewhat useless): + /// template + /// bool IsMoveIterator() { return typename eastl::is_move_iterator::value; } + /// + template + struct is_move_iterator + : public eastl::false_type {}; + + template + struct is_move_iterator< eastl::move_iterator > + : public eastl::true_type {}; + + + /// unwrap_move_iterator + /// + /// Returns Iterator::get_base() if it's a move_iterator, else returns Iterator as-is. + /// + /// Example usage: + /// vector intVector; + /// eastl::move_iterator::iterator> moveIterator(intVector.begin()); + /// vector::iterator it = unwrap_move_iterator(moveIterator); + /// + template + inline typename eastl::is_iterator_wrapper_helper::value>::iterator_type unwrap_move_iterator(Iterator it) + { return eastl::is_iterator_wrapper_helper::value>::get_base(it); } + + + + + /// back_insert_iterator + /// + /// A back_insert_iterator is simply a class that acts like an iterator but when you + /// assign a value to it, it calls push_back on the container with the value. + /// + template + class back_insert_iterator : public iterator + { + public: + typedef back_insert_iterator this_type; + typedef Container container_type; + typedef typename Container::const_reference const_reference; + + protected: + Container& container; + + public: + //back_insert_iterator(); // Not valid. Must construct with a Container. + + //back_insert_iterator(const this_type& x) // Compiler-implemented + // : container(x.container) { } + + explicit back_insert_iterator(Container& x) + : container(x) { } + + back_insert_iterator& operator=(const_reference value) + { container.push_back(value); return *this; } + + back_insert_iterator& operator=(typename Container::value_type&& value) + { container.push_back(eastl::move(value)); return *this; } + + back_insert_iterator& operator*() + { return *this; } + + back_insert_iterator& operator++() + { return *this; } // This is by design. + + back_insert_iterator operator++(int) + { return *this; } // This is by design. + + protected: + void operator=(const this_type&){} // Declared to avoid compiler warnings about inability to generate this function. + }; + + + /// back_inserter + /// + /// Creates an instance of a back_insert_iterator. + /// + template + inline back_insert_iterator + back_inserter(Container& x) + { return back_insert_iterator(x); } + + + + + /// front_insert_iterator + /// + /// A front_insert_iterator is simply a class that acts like an iterator but when you + /// assign a value to it, it calls push_front on the container with the value. + /// + template + class front_insert_iterator : public iterator + { + public: + typedef front_insert_iterator this_type; + typedef Container container_type; + typedef typename Container::const_reference const_reference; + + protected: + Container& container; + + public: + //front_insert_iterator(); // Not valid. Must construct with a Container. + + //front_insert_iterator(const this_type& x) // Compiler-implemented + // : container(x.container) { } + + explicit front_insert_iterator(Container& x) + : container(x) { } + + front_insert_iterator& operator=(const_reference value) + { container.push_front(value); return *this; } + + front_insert_iterator& operator*() + { return *this; } + + front_insert_iterator& operator++() + { return *this; } // This is by design. + + front_insert_iterator operator++(int) + { return *this; } // This is by design. + + protected: + void operator=(const this_type&){} // Declared to avoid compiler warnings about inability to generate this function. + }; + + + /// front_inserter + /// + /// Creates an instance of a front_insert_iterator. + /// + template + inline front_insert_iterator + front_inserter(Container& x) + { return front_insert_iterator(x); } + + + + + /// insert_iterator + /// + /// An insert_iterator is like an iterator except that when you assign a value to it, + /// the insert_iterator inserts the value into the container and increments the iterator. + /// + /// insert_iterator is an iterator adaptor that functions as an OutputIterator: + /// assignment through an insert_iterator inserts an object into a container. + /// Specifically, if ii is an insert_iterator, then ii keeps track of a container c and + /// an insertion point p; the expression *ii = x performs the insertion container.insert(p, x). + /// + /// If you assign through an insert_iterator several times, then you will be inserting + /// several elements into the underlying container. In the case of a sequence, they will + /// appear at a particular location in the underlying sequence, in the order in which + /// they were inserted: one of the arguments to insert_iterator's constructor is an + /// iterator p, and the new range will be inserted immediately before p. + /// + template + class insert_iterator : public iterator + { + public: + typedef Container container_type; + typedef typename Container::iterator iterator_type; + typedef typename Container::const_reference const_reference; + + protected: + Container& container; + iterator_type it; + + public: + // This assignment operator is defined more to stop compiler warnings (e.g. VC++ C4512) + // than to be useful. However, it does allow an insert_iterator to be assigned to another + // insert iterator provided that they point to the same container. + insert_iterator& operator=(const insert_iterator& x) + { + EASTL_ASSERT(&x.container == &container); + it = x.it; + return *this; + } + + insert_iterator(Container& x, iterator_type itNew) + : container(x), it(itNew) {} + + insert_iterator& operator=(const_reference value) + { + it = container.insert(it, value); + ++it; + return *this; + } + + insert_iterator& operator*() + { return *this; } + + insert_iterator& operator++() + { return *this; } // This is by design. + + insert_iterator& operator++(int) + { return *this; } // This is by design. + + }; // insert_iterator + + + /// inserter + /// + /// Creates an instance of an insert_iterator. + /// + template + inline eastl::insert_iterator + inserter(Container& x, Iterator i) + { + typedef typename Container::iterator iterator; + return eastl::insert_iterator(x, iterator(i)); + } + + + /// is_insert_iterator + /// + /// This is a type traits extension utility. + /// Given an iterator, tells if it's an insert_iterator vs anything else. + /// If it's a insert_iterator wrapped by another iterator then value is false. + /// + template + struct is_insert_iterator + : public eastl::false_type {}; + + template + struct is_insert_iterator< eastl::insert_iterator > + : public eastl::true_type {}; + + + + + ////////////////////////////////////////////////////////////////////////////////// + /// distance + /// + /// Implements the distance() function. There are two versions, one for + /// random access iterators (e.g. with vector) and one for regular input + /// iterators (e.g. with list). The former is more efficient. + /// + template + EA_CONSTEXPR + inline typename eastl::iterator_traits::difference_type + distance_impl(InputIterator first, InputIterator last, EASTL_ITC_NS::input_iterator_tag) + { + typename eastl::iterator_traits::difference_type n = 0; + + while(first != last) + { + ++first; + ++n; + } + return n; + } + + template + EA_CONSTEXPR + inline typename eastl::iterator_traits::difference_type + distance_impl(RandomAccessIterator first, RandomAccessIterator last, EASTL_ITC_NS::random_access_iterator_tag) + { + return last - first; + } + + // Special version defined so that std C++ iterators can be recognized by + // this function. Unfortunately, this function treats all foreign iterators + // as InputIterators and thus can seriously hamper performance in the case + // of large ranges of bidirectional_iterator_tag iterators. + //template + //inline typename eastl::iterator_traits::difference_type + //distance_impl(InputIterator first, InputIterator last, ...) + //{ + // typename eastl::iterator_traits::difference_type n = 0; + // + // while(first != last) + // { + // ++first; + // ++n; + // } + // return n; + //} + + template + EA_CONSTEXPR + inline typename eastl::iterator_traits::difference_type + distance(InputIterator first, InputIterator last) + { + typedef typename eastl::iterator_traits::iterator_category IC; + + return eastl::distance_impl(first, last, IC()); + } + + + + + ////////////////////////////////////////////////////////////////////////////////// + /// advance + /// + /// Implements the advance() function. There are three versions, one for + /// random access iterators (e.g. with vector), one for bidirectional + /// iterators (list) and one for regular input iterators (e.g. with slist). + /// + template + inline void + advance_impl(InputIterator& i, Distance n, EASTL_ITC_NS::input_iterator_tag) + { + while(n--) + ++i; + } + + template + struct advance_bi_impl + { + template + static void advance_impl(BidirectionalIterator& i, Distance n) // Specialization for unsigned distance type. + { + while(n--) + ++i; + } + }; + + template <> + struct advance_bi_impl + { + template + static void advance_impl(BidirectionalIterator& i, Distance n) // Specialization for signed distance type. + { + if(n > 0) + { + while(n--) + ++i; + } + else + { + while(n++) + --i; + } + } + }; + + template + inline void + advance_impl(BidirectionalIterator& i, Distance n, EASTL_ITC_NS::bidirectional_iterator_tag) + { + advance_bi_impl::value>::advance_impl(i, n); + } + + template + inline void + advance_impl(RandomAccessIterator& i, Distance n, EASTL_ITC_NS::random_access_iterator_tag) + { + i += n; + } + + // Special version defined so that std C++ iterators can be recognized by + // this function. Unfortunately, this function treats all foreign iterators + // as InputIterators and thus can seriously hamper performance in the case + // of large ranges of bidirectional_iterator_tag iterators. + //template + //inline void + //advance_impl(InputIterator& i, Distance n, ...) + //{ + // while(n--) + // ++i; + //} + + template + inline void + advance(InputIterator& i, Distance n) + { + typedef typename eastl::iterator_traits::iterator_category IC; + + eastl::advance_impl(i, n, IC()); + } + + + // eastl::next / eastl::prev + // Return the nth/-nth successor of iterator it. + // + // http://en.cppreference.com/w/cpp/iterator/next + // + template + inline InputIterator + next(InputIterator it, typename eastl::iterator_traits::difference_type n = 1) + { + eastl::advance(it, n); + return it; + } + + template + inline InputIterator + prev(InputIterator it, typename eastl::iterator_traits::difference_type n = 1) + { + eastl::advance(it, -n); + return it; + } + + +#if defined(EA_COMPILER_CPP11_ENABLED) && EA_COMPILER_CPP11_ENABLED + + // eastl::data + // + // http://en.cppreference.com/w/cpp/iterator/data + // + template + EA_CPP14_CONSTEXPR auto data(Container& c) -> decltype(c.data()) + { return c.data(); } + + template + EA_CPP14_CONSTEXPR auto data(const Container& c) -> decltype(c.data()) + { return c.data(); } + + template + EA_CPP14_CONSTEXPR T* data(T(&array)[N]) EA_NOEXCEPT + { return array; } + + template + EA_CPP14_CONSTEXPR const E* data(std::initializer_list il) EA_NOEXCEPT + { return il.begin(); } + + + // eastl::size + // + // http://en.cppreference.com/w/cpp/iterator/size + // + template + EA_CPP14_CONSTEXPR auto size(const C& c) -> decltype(c.size()) + { return c.size(); } + + template + EA_CPP14_CONSTEXPR size_t size(const T (&)[N]) EA_NOEXCEPT + { return N; } + + + // eastl::ssize + // + // https://en.cppreference.com/w/cpp/iterator/size + // + template + EA_CPP14_CONSTEXPR ptrdiff_t ssize(const T(&)[N]) EA_NOEXCEPT + { return N; } + + template + EA_CPP14_CONSTEXPR auto ssize(const C& c) + -> eastl::common_type_t> + { + using R = eastl::common_type_t>; + return static_cast(c.size()); + } + + + // eastl::empty + // + // http://en.cppreference.com/w/cpp/iterator/empty + // + template + EA_CPP14_CONSTEXPR auto empty(const Container& c) -> decltype(c.empty()) + { return c.empty(); } + + template + EA_CPP14_CONSTEXPR bool empty(const T (&)[N]) EA_NOEXCEPT + { return false; } + + template + EA_CPP14_CONSTEXPR bool empty(std::initializer_list il) EA_NOEXCEPT + { return il.size() == 0; } + +#endif // defined(EA_COMPILER_CPP11_ENABLED) && EA_COMPILER_CPP11_ENABLED + + + // eastl::begin / eastl::end + // http://en.cppreference.com/w/cpp/iterator/begin + // + // In order to enable eastl::begin and eastl::end, the compiler needs to have conforming support + // for argument-dependent lookup if it supports C++11 range-based for loops. The reason for this is + // that in C++11 range-based for loops result in usage of std::begin/std::end, but allow that to + // be overridden by argument-dependent lookup: + // C++11 Standard, section 6.5.4, paragraph 1. + // "otherwise, begin-expr and end-expr are begin(__range) and end(__range), respectively, + // where begin and end are looked up with argument-dependent lookup (3.4.2). For the + // purposes of this name lookup, namespace std is an associated namespace." + // It turns out that one compiler has a problem: GCC 4.6. That version added support for + // range-based for loops but has broken argument-dependent lookup which was fixed in GCC 4.7. + // + #if (defined(EA_COMPILER_GNUC) && (EA_COMPILER_VERSION == 4006)) + #define EASTL_BEGIN_END_ENABLED 0 + #else + #define EASTL_BEGIN_END_ENABLED 1 + #endif + + #if EASTL_BEGIN_END_ENABLED + template + EA_CPP14_CONSTEXPR inline auto begin(Container& container) -> decltype(container.begin()) + { + return container.begin(); + } + + template + EA_CPP14_CONSTEXPR inline auto begin(const Container& container) -> decltype(container.begin()) + { + return container.begin(); + } + + template + EA_CPP14_CONSTEXPR inline auto cbegin(const Container& container) -> decltype(container.begin()) + { + return container.begin(); + } + + template + EA_CPP14_CONSTEXPR inline auto end(Container& container) -> decltype(container.end()) + { + return container.end(); + } + + template + EA_CPP14_CONSTEXPR inline auto end(const Container& container) -> decltype(container.end()) + { + return container.end(); + } + + template + EA_CPP14_CONSTEXPR inline auto cend(const Container& container) -> decltype(container.end()) + { + return container.end(); + } + + template + EA_CPP14_CONSTEXPR inline auto rbegin(Container& container) -> decltype(container.rbegin()) + { + return container.rbegin(); + } + + template + EA_CPP14_CONSTEXPR inline auto rbegin(const Container& container) -> decltype(container.rbegin()) + { + return container.rbegin(); + } + + template + EA_CPP14_CONSTEXPR inline auto rend(Container& container) -> decltype(container.rend()) + { + return container.rend(); + } + + template + EA_CPP14_CONSTEXPR inline auto rend(const Container& container) -> decltype(container.rend()) + { + return container.rend(); + } + + template + EA_CPP14_CONSTEXPR inline auto crbegin(const Container& container) -> decltype(eastl::rbegin(container)) + { + return container.rbegin(); + } + + template + EA_CPP14_CONSTEXPR inline auto crend(const Container& container) -> decltype(eastl::rend(container)) + { + return container.rend(); + } + + template + EA_CPP14_CONSTEXPR inline T* begin(T (&arrayObject)[arraySize]) + { + return arrayObject; + } + + template + EA_CPP14_CONSTEXPR inline T* end(T (&arrayObject)[arraySize]) + { + return (arrayObject + arraySize); + } + + template + EA_CPP14_CONSTEXPR inline reverse_iterator rbegin(T (&arrayObject)[arraySize]) + { + return reverse_iterator(arrayObject + arraySize); + } + + template + EA_CPP14_CONSTEXPR inline reverse_iterator rend(T (&arrayObject)[arraySize]) + { + return reverse_iterator(arrayObject); + } + + template + EA_CPP14_CONSTEXPR inline reverse_iterator rbegin(std::initializer_list ilist) + { + return eastl::reverse_iterator(ilist.end()); + } + + template + EA_CPP14_CONSTEXPR inline reverse_iterator rend(std::initializer_list ilist) + { + return eastl::reverse_iterator(ilist.begin()); + } + + template + EA_CPP14_CONSTEXPR reverse_iterator make_reverse_iterator(Iterator i) + { return reverse_iterator(i); } + + #endif // EASTL_BEGIN_END_ENABLED + +} // namespace eastl + + + +// Some compilers (e.g. GCC 4.6) support range-based for loops, but have a bug with +// respect to argument-dependent lookup which results on them unilaterally using std::begin/end +// with range-based for loops. To work around this we #include for this case in +// order to make std::begin/end visible to users of , for portability. +#if !EASTL_BEGIN_END_ENABLED && !defined(EA_COMPILER_NO_RANGE_BASED_FOR_LOOP) + #include +#endif + + + +EA_RESTORE_VC_WARNING(); +EA_RESTORE_VC_WARNING(); + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/linked_array.h b/libkram/eastl/include/EASTL/linked_array.h new file mode 100644 index 00000000..88d99146 --- /dev/null +++ b/libkram/eastl/include/EASTL/linked_array.h @@ -0,0 +1,336 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This class implements a linked_array template, which is an array version +// of linked_ptr. See linked_ptr for detailed documentation. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_LINKED_ARRAY_H +#define EASTL_LINKED_ARRAY_H + + +#include +#include // Defines smart_array_deleter +#include // Defines linked_ptr_base +#include // Definition of ptrdiff_t + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /// class linked_array + /// + /// This class implements a linked_array template, which is an array version + /// of linked_ptr. See linked_ptr for detailed documentation. + /// + template > + class linked_array + { + + protected: + + /// this_type + /// This is an alias for linked_array, this class. + typedef linked_array this_type; + + /// deleter_type + typedef Deleter deleter_type; + + T* mpArray; + mutable const this_type* mpPrev; + mutable const this_type* mpNext; + + void link(const linked_array& linkedArray) + { // This code can only be called when we are in a reset state. + // assert(!mpArray && (mpNext == mpPrev)); + mpNext = linkedArray.mpNext; + mpNext->mpPrev = this; + mpPrev = &linkedArray; + linkedArray.mpNext = this; + } + + public: + /// element_type + /// Synonym for type T, useful for external code to reference the + /// type in a generic way. + typedef T element_type; + + + /// linked_array + /// Takes ownership of the pointer. It is OK if the input pointer is null. + explicit linked_array(T* pArray = NULL) + : mpArray(pArray) + { + mpPrev = mpNext = this; + } + + + /// linked_array + /// Shares ownership of a pointer with another instance of linked_array. + linked_array(const linked_array& linkedArray) + : mpArray(linkedArray.mpArray) + { + if(mpArray) + link(linkedArray); + else + mpPrev = mpNext = this; + } + + + /// ~linked_array + /// Removes this object from the of objects using the shared pointer. + /// If this object is the last owner of the shared pointer, the shared + /// pointer is deleted. + ~linked_array() + { + reset(); + } + + + /// operator= + /// Copies another linked_array to this object. Note that this object + /// may already own a shared pointer with another different pointer + /// (but still of the same type) before this call. In that case, + /// this function removes ownership of the old pointer and takes shared + /// ownership of the new pointer and increments its reference count. + linked_array& operator=(const linked_array& linkedArray) + { + if(linkedArray.mpArray != mpArray) + { + reset(linkedArray.mpArray); + if(linkedArray.mpArray) + link(linkedArray); + } + return *this; + } + + + /// operator= + /// Assigns a new pointer. If the new pointer is equivalent + /// to the current pointer, nothing is done. Otherwise the + /// current pointer is unlinked and possibly destroyed. + /// The new pointer can be NULL. + linked_array& operator=(T* pArray) + { + reset(pArray); + return *this; + } + + + /// reset + /// Releases the owned pointer and takes ownership of the + /// passed in pointer. If the passed in pointer is the same + /// as the owned pointer, nothing is done. The passed in pointer + /// can be null, in which case the use count is set to 1. + void reset(T* pArray = NULL) + { + if(pArray != mpArray) + { + if(unique()) + { + deleter_type del; + del(mpArray); + } + else + { + mpPrev->mpNext = mpNext; + mpNext->mpPrev = mpPrev; + mpPrev = mpNext = this; + } + mpArray = pArray; + } + } + + + /// swap + /// Exchanges the owned pointer beween two linkedArray objects. + /// + /// This function is disabled as it is currently deemed unsafe. + /// The problem is that the only way to implement this function + /// is to transfer pointers between the objects; you cannot + /// transfer the linked list membership between the objects. + /// Thus unless both linked_array objects were 'unique()', the + /// shared pointers would be duplicated amongst containers, + /// resulting in a crash. + //void swap(linked_array& linkedArray) + //{ + // if(linkedArray.mpArray != mpArray) + // { // This is only safe if both linked_arrays are unique(). + // linkedArray::element_type* const pArrayTemp = linkedArray.mpArray; + // linkedArray.reset(mpArray); + // reset(pArrayTemp); + // } + //} + + + /// operator[] + /// Returns a reference to the specified item in the owned pointer array. + T& operator[](ptrdiff_t i) const + { + // assert(mpArray && (i >= 0)); + return mpArray[i]; + } + + + /// operator* + /// Returns the owner pointer dereferenced. + T& operator*() const + { + return *mpArray; + } + + + /// operator-> + /// Allows access to the owned pointer via operator->() + T* operator->() const + { + return mpArray; + } + + + /// get + /// Returns the owned pointer. Note that this class does + /// not provide an operator T() function. This is because such + /// a thing (automatic conversion) is deemed unsafe. + T* get() const + { + return mpArray; + } + + + /// use_count + /// Returns the use count of the shared pointer. + /// The return value is one if the owned pointer is null. + /// This function is provided for compatibility with the + /// proposed C++ standard and for debugging purposes. It is not + /// intended for runtime use given that its execution time is + /// not constant. + int use_count() const + { + int useCount(1); + + for(const linked_ptr_base* pCurrent = this; pCurrent->mpNext != this; pCurrent = pCurrent->mpNext) + ++useCount; + + return useCount; + } + + + /// unique + /// Returns true if the use count of the owned pointer is one. + /// The return value is true if the owned pointer is null. + bool unique() const + { + return (mpNext == this); + } + + + /// Implicit operator bool + /// Allows for using a linked_array as a boolean. + /// Note that below we do not use operator bool(). The reason for this + /// is that booleans automatically convert up to short, int, float, etc. + /// The result is that this: if(linkedArray == 1) would yield true (bad). + typedef T* (this_type::*bool_)() const; + operator bool_() const + { + if(mpArray) + return &this_type::get; + return NULL; + } + + + /// operator! + /// This returns the opposite of operator bool; it returns true if + /// the owned pointer is null. Some compilers require this and some don't. + bool operator!() + { + return (mpArray == NULL); + } + + + /// force_delete + /// Forces deletion of the shared pointer. Fixes all references to the + /// pointer by any other owners to be NULL. + void force_delete() + { + T* const pArray = mpArray; + + this_type* p = this; + do + { + this_type* const pNext = const_cast(p->mpNext); + p->mpArray = NULL; + p->mpNext = p->mpPrev = p; + p = pNext; + } + while(p != this); + + deleter_type del; + del(pArray); + } + + }; // class linked_array + + + + /// get_pointer + /// Returns linked_array::get() via the input linked_array. Provided for compatibility + /// with certain well-known libraries that use this functionality. + template + inline T* get_pointer(const linked_array& linkedArray) + { + return linkedArray.get(); + } + + + /// operator== + /// Compares two linked_array objects for equality. Equality is defined as + /// being true when the pointer shared between two linked_array objects is equal. + template + inline bool operator==(const linked_array& linkedArray1, const linked_array& linkedArray2) + { + return (linkedArray1.get() == linkedArray2.get()); + } + + + /// operator!= + /// Compares two linked_array objects for inequality. Equality is defined as + /// being true when the pointer shared between two linked_array objects is equal. + template + inline bool operator!=(const linked_array& linkedArray1, const linked_array& linkedArray2) + { + return (linkedArray1.get() != linkedArray2.get()); + } + + + /// operator< + /// Returns which linked_array is 'less' than the other. Useful when storing + /// sorted containers of linked_array objects. + template + inline bool operator<(const linked_array& linkedArray1, const linked_array& linkedArray2) + { + return (linkedArray1.get() < linkedArray2.get()); + } + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/linked_ptr.h b/libkram/eastl/include/EASTL/linked_ptr.h new file mode 100644 index 00000000..f57681a9 --- /dev/null +++ b/libkram/eastl/include/EASTL/linked_ptr.h @@ -0,0 +1,426 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_LINKED_PTR_H +#define EASTL_LINKED_PTR_H + + + +#include +#include // Defines smart_ptr_deleter +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /// linked_ptr_base + /// + /// This class allows linked_ptr and linked_ptr to share the same + /// base nodes and thus be in the same linked list. + /// + struct linked_ptr_base + { + mutable linked_ptr_base* mpPrev; + mutable linked_ptr_base* mpNext; + }; + + + /// linked_ptr + /// + /// This class implements a linked_ptr template. A linked_ptr is like the C++ + /// Standard Library auto_ptr except that it allows sharing of pointers between + /// instances of auto_ptr via reference counting. linked_ptr objects can safely + /// be copied and can safely be used in C++ Standard Library containers such + /// as std::vector or std::list. This implementation, however, is not thread-safe. + /// you would need to use a separate linked_ptr_mt (multi-threaded) to get + /// thread safety. + /// + /// linked_ptr is a variation of shared_ptr (a.k.a. counted_ptr) which differs + /// in that instead of being implemented by a shared integer stored on the heap, + /// it is implemented by linked list stored within the linked_ptr object itself. + /// The result is that no memory is explicitly allocated from the heap, though + /// the cost of each linked_ptr object is 12 bytes of memory (32 bit machine) + /// instead of 4 bytes for the case of shared_ptr (depending on the heap). + /// + template > + class linked_ptr : public linked_ptr_base + { + protected: + template friend class linked_ptr; + + /// this_type + /// This is an alias for linked_ptr, this class. + typedef linked_ptr this_type; + + /// deleter_type + typedef Deleter deleter_type; + + T* mpValue; /// The owned pointer. + + template + void link(const linked_ptr& linkedPtr) + { // This code can only be called when we are in a reset state. + // assert(!mpValue && (mpNext == mpPrev)); + mpNext = linkedPtr.mpNext; + mpNext->mpPrev = this; + mpPrev = const_cast*>(&linkedPtr); + linkedPtr.mpNext = this; + } + + public: + /// element_type + /// Synonym for type T, useful for external code to reference the + /// type in a generic way. + typedef T element_type; + + + /// linked_ptr + /// Default constructor. + linked_ptr() + : mpValue(NULL) + { + mpPrev = mpNext = this; + } + + + /// linked_ptr + /// Takes ownership of the pointer. It is OK if the input pointer is null. + template + explicit linked_ptr(U* pValue) + : mpValue(pValue) + { + mpPrev = mpNext = this; + } + + + /// linked_ptr + /// Construction with self type. + /// If we want a shared_ptr constructor that is templated on linked_ptr, + /// then we need to make it in addition to this function, as otherwise + /// the compiler will generate this function and things will go wrong. + linked_ptr(const linked_ptr& linkedPtr) + : mpValue(linkedPtr.mpValue) + { + if(mpValue) + link(linkedPtr); + else + mpPrev = mpNext = this; + } + + + /// linked_ptr + /// Shares ownership of a pointer with another instance of linked_ptr. + template + linked_ptr(const linked_ptr& linkedPtr) + : mpValue(linkedPtr.mpValue) + { + if(mpValue) + link(linkedPtr); + else + mpPrev = mpNext = this; + } + + + /// ~linked_ptr + /// Removes this object from the of objects using the shared pointer. + /// If this object is the last owner of the shared pointer, the shared + /// pointer is deleted. + ~linked_ptr() + { + reset(); + } + + + /// operator= + /// If we want a shared_ptr operator= that is templated on linked_ptr, + /// then we need to make it in addition to this function, as otherwise + /// the compiler will generate this function and things will go wrong. + linked_ptr& operator=(const linked_ptr& linkedPtr) + { + if(linkedPtr.mpValue != mpValue) + { + reset(linkedPtr.mpValue); + if(linkedPtr.mpValue) + link(linkedPtr); + } + return *this; + } + + + /// operator= + /// Copies another linked_ptr to this object. Note that this object + /// may already own a shared pointer with another different pointer + /// (but still of the same type) before this call. In that case, + /// this function removes ownership of the old pointer and takes shared + /// ownership of the new pointer and increments its reference count. + template + linked_ptr& operator=(const linked_ptr& linkedPtr) + { + if(linkedPtr.mpValue != mpValue) + { + reset(linkedPtr.mpValue); + if(linkedPtr.mpValue) + link(linkedPtr); + } + return *this; + } + + + /// operator= + /// Assigns a new pointer. If the new pointer is equivalent + /// to the current pointer, nothing is done. Otherwise the + /// current pointer is unlinked and possibly destroyed. + /// The new pointer can be NULL. + template + linked_ptr& operator=(U* pValue) + { + reset(pValue); + return *this; + } + + + /// reset + /// Releases the owned pointer and takes ownership of the + /// passed in pointer. If the passed in pointer is the same + /// as the owned pointer, nothing is done. The passed in pointer + /// can be NULL, in which case the use count is set to 1. + template + void reset(U* pValue) + { + if(pValue != mpValue) + { + if(unique()) + { + deleter_type del; + del(mpValue); + } + else + { + mpPrev->mpNext = mpNext; + mpNext->mpPrev = mpPrev; + mpPrev = mpNext = this; + } + mpValue = pValue; + } + } + + + /// reset + /// Resets the container with NULL. If the current pointer + /// is non-NULL, it is unlinked and possibly destroyed. + void reset() + { + reset((T*)NULL); + } + + + /// swap + /// Exchanges the owned pointer beween two linkedPtr objects. + /// + /// This function is disabled as it is currently deemed unsafe. + /// The problem is that the only way to implement this function + /// is to transfer pointers between the objects; you cannot + /// transfer the linked list membership between the objects. + /// Thus unless both linked_ptr objects were 'unique()', the + /// shared pointers would be duplicated amongst containers, + /// resulting in a crash. + //template + //void swap(linked_ptr& linkedPtr) + //{ + // if(linkedPtr.mpValue != mpValue) + // { // This is only safe if both linked_ptrs are unique(). + // linkedPtr::element_type* const pValueTemp = linkedPtr.mpValue; + // linkedPtr.reset(mpValue); + // reset(pValueTemp); + // } + //} + + + /// operator* + /// Returns the owner pointer dereferenced. + T& operator*() const + { + return *mpValue; + } + + + /// operator-> + /// Allows access to the owned pointer via operator->() + T* operator->() const + { + return mpValue; + } + + + /// get + /// Returns the owned pointer. Note that this class does + /// not provide an operator T() function. This is because such + /// a thing (automatic conversion) is deemed unsafe. + T* get() const + { + return mpValue; + } + + + /// use_count + /// Returns the use count of the shared pointer. + /// The return value is one if the owned pointer is null. + /// This function is provided for compatibility with the + /// proposed C++ standard and for debugging purposes. It is not + /// intended for runtime use given that its execution time is + /// not constant. + int use_count() const + { + int useCount(1); + + for(const linked_ptr_base* pCurrent = static_cast(this); + pCurrent->mpNext != static_cast(this); pCurrent = pCurrent->mpNext) + ++useCount; + + return useCount; + } + + + /// unique + /// Returns true if the use count of the owned pointer is one. + /// The return value is true if the owned pointer is null. + bool unique() const + { + return (mpNext == static_cast(this)); + } + + + /// Implicit operator bool + /// Allows for using a linked_ptr as a boolean. + /// Note that below we do not use operator bool(). The reason for this + /// is that booleans automatically convert up to short, int, float, etc. + /// The result is that this: if(linkedPtr == 1) would yield true (bad). + typedef T* (this_type::*bool_)() const; + operator bool_() const + { + if(mpValue) + return &this_type::get; + return NULL; + } + + + /// operator! + /// This returns the opposite of operator bool; it returns true if + /// the owned pointer is null. Some compilers require this and some don't. + bool operator!() + { + return (mpValue == NULL); + } + + + /// detach + /// Returns ownership of the pointer to the caller. Fixes all + /// references to the pointer by any other owners to be NULL. + /// This function can work properly only if all entries in the list + /// refer to type T and none refer to any other type (e.g. U). + T* detach() + { + T* const pValue = mpValue; + + linked_ptr_base* p = this; + do + { + linked_ptr_base* const pNext = p->mpNext; + static_cast(p)->mpValue = NULL; + p->mpNext = p->mpPrev = p; + p = pNext; + } + while(p != this); + + return pValue; + } + + /// force_delete + /// Forces deletion of the shared pointer. Fixes all references to the + /// pointer by any other owners to be NULL. + /// This function can work properly only if all entries in the list + /// refer to type T and none refer to any other type (e.g. U). + void force_delete() + { + T* const pValue = detach(); + Deleter del; + del(pValue); + } + + }; // class linked_ptr + + + + /// get_pointer + /// Returns linked_ptr::get() via the input linked_ptr. Provided for compatibility + /// with certain well-known libraries that use this functionality. + template + inline T* get_pointer(const linked_ptr& linkedPtr) + { + return linkedPtr.get(); + } + + + /// operator== + /// Compares two linked_ptr objects for equality. Equality is defined as + /// being true when the pointer shared between two linked_ptr objects is equal. + template + inline bool operator==(const linked_ptr& linkedPtr1, const linked_ptr& linkedPtr2) + { + return (linkedPtr1.get() == linkedPtr2.get()); + } + + + /// operator!= + /// Compares two linked_ptr objects for inequality. Equality is defined as + /// being true when the pointer shared between two linked_ptr objects is equal. + template + inline bool operator!=(const linked_ptr& linkedPtr1, const linked_ptr& linkedPtr2) + { + return (linkedPtr1.get() != linkedPtr2.get()); + } + + + /// operator< + /// Returns which linked_ptr is 'less' than the other. Useful when storing + /// sorted containers of linked_ptr objects. + template + inline bool operator<(const linked_ptr& linkedPtr1, const linked_ptr& linkedPtr2) + { + return (linkedPtr1.get() < linkedPtr2.get()); + } + + +} // namespace eastl + + +#endif // Header include guard + + + + + + + + + + + + + + + + + + + + diff --git a/libkram/eastl/include/EASTL/list.h b/libkram/eastl/include/EASTL/list.h new file mode 100644 index 00000000..680dcad7 --- /dev/null +++ b/libkram/eastl/include/EASTL/list.h @@ -0,0 +1,2168 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements a doubly-linked list, much like the C++ std::list class. +// The primary distinctions between this list and std::list are: +// - list doesn't implement some of the less-frequently used functions +// of std::list. Any required functions can be added at a later time. +// - list has a couple extension functions that increase performance. +// - list can contain objects with alignment requirements. std::list cannot +// do so without a bit of tedious non-portable effort. +// - list has optimizations that don't exist in the STL implementations +// supplied by library vendors for our targeted platforms. +// - list supports debug memory naming natively. +// - list::size() by default is not a constant time function, like the list::size +// in some std implementations such as STLPort and SGI STL but unlike the +// list in Dinkumware and Metrowerks. The EASTL_LIST_SIZE_CACHE option can change this. +// - list provides a guaranteed portable node definition that allows users +// to write custom fixed size node allocators that are portable. +// - list is easier to read, debug, and visualize. +// - list is savvy to an environment that doesn't have exception handling, +// as is sometimes the case with console or embedded environments. +// - list has less deeply nested function calls and allows the user to +// enable forced inlining in debug builds in order to reduce bloat. +// - list doesn't keep a member size variable. This means that list is +// smaller than std::list (depends on std::list) and that for most operations +// it is faster than std::list. However, the list::size function is slower. +// - list::size_type is defined as eastl_size_t instead of size_t in order to +// save memory and run faster on 64 bit systems. +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_LIST_H +#define EASTL_LIST_H + + +#include +#include +#include +#include +#include +#include +#include + +EA_DISABLE_ALL_VC_WARNINGS() +#include +#include +EA_RESTORE_ALL_VC_WARNINGS() + + +// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc +// 4345 - Behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized +// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught. +// 4623 - default constructor was implicitly defined as deleted +EA_DISABLE_VC_WARNING(4530 4345 4571 4623); + + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /// EASTL_LIST_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_LIST_DEFAULT_NAME + #define EASTL_LIST_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " list" // Unless the user overrides something, this is "EASTL list". + #endif + + + /// EASTL_LIST_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_LIST_DEFAULT_ALLOCATOR + #define EASTL_LIST_DEFAULT_ALLOCATOR allocator_type(EASTL_LIST_DEFAULT_NAME) + #endif + + + + /// ListNodeBase + /// + /// We define a ListNodeBase separately from ListNode (below), because it allows + /// us to have non-templated operations such as insert, remove (below), and it + /// makes it so that the list anchor node doesn't carry a T with it, which would + /// waste space and possibly lead to surprising the user due to extra Ts existing + /// that the user didn't explicitly create. The downside to all of this is that + /// it makes debug viewing of a list harder, given that the node pointers are of + /// type ListNodeBase and not ListNode. However, see ListNodeBaseProxy below. + /// + struct ListNodeBase + { + ListNodeBase* mpNext; + ListNodeBase* mpPrev; + + void insert(ListNodeBase* pNext) EA_NOEXCEPT; // Inserts this standalone node before the node pNext in pNext's list. + void remove() EA_NOEXCEPT; // Removes this node from the list it's in. Leaves this node's mpNext/mpPrev invalid. + void splice(ListNodeBase* pFirst, ListNodeBase* pLast) EA_NOEXCEPT; // Removes [pFirst,pLast) from the list it's in and inserts it before this in this node's list. + void reverse() EA_NOEXCEPT; // Reverses the order of nodes in the circular list this node is a part of. + static void swap(ListNodeBase& a, ListNodeBase& b) EA_NOEXCEPT; // Swaps the nodes a and b in the lists to which they belong. + + void insert_range(ListNodeBase* pFirst, ListNodeBase* pFinal) EA_NOEXCEPT; // Differs from splice in that first/final aren't in another list. + static void remove_range(ListNodeBase* pFirst, ListNodeBase* pFinal) EA_NOEXCEPT; // + } EASTL_LIST_PROXY_MAY_ALIAS; + + + #if EASTL_LIST_PROXY_ENABLED + + /// ListNodeBaseProxy + /// + /// In debug builds, we define ListNodeBaseProxy to be the same thing as + /// ListNodeBase, except it is templated on the parent ListNode class. + /// We do this because we want users in debug builds to be able to easily + /// view the list's contents in a debugger GUI. We do this only in a debug + /// build for the reasons described above: that ListNodeBase needs to be + /// as efficient as possible and not cause code bloat or extra function + /// calls (inlined or not). + /// + /// ListNodeBaseProxy *must* be separate from its parent class ListNode + /// because the list class must have a member node which contains no T value. + /// It is thus incorrect for us to have one single ListNode class which + /// has mpNext, mpPrev, and mValue. So we do a recursive template trick in + /// the definition and use of SListNodeBaseProxy. + /// + template + struct ListNodeBaseProxy + { + LN* mpNext; + LN* mpPrev; + }; + + template + struct ListNode : public ListNodeBaseProxy< ListNode > + { + T mValue; + }; + + #else + + EA_DISABLE_VC_WARNING(4625 4626) + template + struct ListNode : public ListNodeBase + { + T mValue; + }; + EA_RESTORE_VC_WARNING() + + #endif + + + + + /// ListIterator + /// + template + struct ListIterator + { + typedef ListIterator this_type; + typedef ListIterator iterator; + typedef ListIterator const_iterator; + typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. + typedef ptrdiff_t difference_type; + typedef T value_type; + typedef ListNode node_type; + typedef Pointer pointer; + typedef Reference reference; + typedef EASTL_ITC_NS::bidirectional_iterator_tag iterator_category; + + public: + node_type* mpNode; + + public: + ListIterator() EA_NOEXCEPT; + ListIterator(const ListNodeBase* pNode) EA_NOEXCEPT; + ListIterator(const iterator& x) EA_NOEXCEPT; + + this_type next() const EA_NOEXCEPT; + this_type prev() const EA_NOEXCEPT; + + reference operator*() const EA_NOEXCEPT; + pointer operator->() const EA_NOEXCEPT; + + this_type& operator++() EA_NOEXCEPT; + this_type operator++(int) EA_NOEXCEPT; + + this_type& operator--() EA_NOEXCEPT; + this_type operator--(int) EA_NOEXCEPT; + + }; // ListIterator + + + + + /// ListBase + /// + /// See VectorBase (class vector) for an explanation of why we + /// create this separate base class. + /// + template + class ListBase + { + public: + typedef T value_type; + typedef Allocator allocator_type; + typedef ListNode node_type; + typedef eastl_size_t size_type; // See config.h for the definition of eastl_size_t, which defaults to size_t. + typedef ptrdiff_t difference_type; + #if EASTL_LIST_PROXY_ENABLED + typedef ListNodeBaseProxy< ListNode > base_node_type; + #else + typedef ListNodeBase base_node_type; // We use ListNodeBase instead of ListNode because we don't want to create a T. + #endif + + protected: + eastl::compressed_pair mNodeAllocator; + #if EASTL_LIST_SIZE_CACHE + size_type mSize; + #endif + + base_node_type& internalNode() EA_NOEXCEPT { return mNodeAllocator.first(); } + base_node_type const& internalNode() const EA_NOEXCEPT { return mNodeAllocator.first(); } + allocator_type& internalAllocator() EA_NOEXCEPT { return mNodeAllocator.second(); } + const allocator_type& internalAllocator() const EA_NOEXCEPT { return mNodeAllocator.second(); } + + public: + const allocator_type& get_allocator() const EA_NOEXCEPT; + allocator_type& get_allocator() EA_NOEXCEPT; + void set_allocator(const allocator_type& allocator); + + protected: + ListBase(); + ListBase(const allocator_type& a); + ~ListBase(); + + node_type* DoAllocateNode(); + void DoFreeNode(node_type* pNode); + + void DoInit() EA_NOEXCEPT; + void DoClear(); + + }; // ListBase + + + + + /// list + /// + /// -- size() is O(n) -- + /// Note that as of this writing, list::size() is an O(n) operation when EASTL_LIST_SIZE_CACHE is disabled. + /// That is, getting the size of the list is not a fast operation, as it requires traversing the list and + /// counting the nodes. We could make list::size() be fast by having a member mSize variable. There are reasons + /// for having such functionality and reasons for not having such functionality. We currently choose + /// to not have a member mSize variable as it would add four bytes to the class, add a tiny amount + /// of processing to functions such as insert and erase, and would only serve to improve the size + /// function, but no others. The alternative argument is that the C++ standard states that std::list + /// should be an O(1) operation (i.e. have a member size variable), most C++ standard library list + /// implementations do so, the size is but an integer which is quick to update, and many users + /// expect to have a fast size function. The EASTL_LIST_SIZE_CACHE option changes this. + /// To consider: Make size caching an optional template parameter. + /// + /// Pool allocation + /// If you want to make a custom memory pool for a list container, your pool + /// needs to contain items of type list::node_type. So if you have a memory + /// pool that has a constructor that takes the size of pool items and the + /// count of pool items, you would do this (assuming that MemoryPool implements + /// the Allocator interface): + /// typedef list WidgetList; // Delare your WidgetList type. + /// MemoryPool myPool(sizeof(WidgetList::node_type), 100); // Make a pool of 100 Widget nodes. + /// WidgetList myList(&myPool); // Create a list that uses the pool. + /// + template + class list : public ListBase + { + typedef ListBase base_type; + typedef list this_type; + + public: + typedef T value_type; + typedef T* pointer; + typedef const T* const_pointer; + typedef T& reference; + typedef const T& const_reference; + typedef ListIterator iterator; + typedef ListIterator const_iterator; + typedef eastl::reverse_iterator reverse_iterator; + typedef eastl::reverse_iterator const_reverse_iterator; + typedef typename base_type::size_type size_type; + typedef typename base_type::difference_type difference_type; + typedef typename base_type::allocator_type allocator_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::base_node_type base_node_type; + + using base_type::mNodeAllocator; + using base_type::DoAllocateNode; + using base_type::DoFreeNode; + using base_type::DoClear; + using base_type::DoInit; + using base_type::get_allocator; + #if EASTL_LIST_SIZE_CACHE + using base_type::mSize; + #endif + using base_type::internalNode; + using base_type::internalAllocator; + + public: + list(); + list(const allocator_type& allocator); + explicit list(size_type n, const allocator_type& allocator = EASTL_LIST_DEFAULT_ALLOCATOR); + list(size_type n, const value_type& value, const allocator_type& allocator = EASTL_LIST_DEFAULT_ALLOCATOR); + list(const this_type& x); + list(const this_type& x, const allocator_type& allocator); + list(this_type&& x); + list(this_type&&, const allocator_type&); + list(std::initializer_list ilist, const allocator_type& allocator = EASTL_LIST_DEFAULT_ALLOCATOR); + + template + list(InputIterator first, InputIterator last); // allocator arg removed because VC7.1 fails on the default arg. To do: Make a second version of this function without a default arg. + + this_type& operator=(const this_type& x); + this_type& operator=(std::initializer_list ilist); + this_type& operator=(this_type&& x); + + // In the case that the two containers' allocators are unequal, swap copies elements instead + // of replacing them in place. In this case swap is an O(n) operation instead of O(1). + void swap(this_type& x); + + void assign(size_type n, const value_type& value); + + template // It turns out that the C++ std::list specifies a two argument + void assign(InputIterator first, InputIterator last); // version of assign that takes (int size, int value). These are not + // iterators, so we need to do a template compiler trick to do the right thing. + void assign(std::initializer_list ilist); + + iterator begin() EA_NOEXCEPT; + const_iterator begin() const EA_NOEXCEPT; + const_iterator cbegin() const EA_NOEXCEPT; + + iterator end() EA_NOEXCEPT; + const_iterator end() const EA_NOEXCEPT; + const_iterator cend() const EA_NOEXCEPT; + + reverse_iterator rbegin() EA_NOEXCEPT; + const_reverse_iterator rbegin() const EA_NOEXCEPT; + const_reverse_iterator crbegin() const EA_NOEXCEPT; + + reverse_iterator rend() EA_NOEXCEPT; + const_reverse_iterator rend() const EA_NOEXCEPT; + const_reverse_iterator crend() const EA_NOEXCEPT; + + bool empty() const EA_NOEXCEPT; + size_type size() const EA_NOEXCEPT; + + void resize(size_type n, const value_type& value); + void resize(size_type n); + + reference front(); + const_reference front() const; + + reference back(); + const_reference back() const; + + template + void emplace_front(Args&&... args); + + template + void emplace_back(Args&&... args); + + void push_front(const value_type& value); + void push_front(value_type&& x); + reference push_front(); + void* push_front_uninitialized(); + + void push_back(const value_type& value); + void push_back(value_type&& x); + reference push_back(); + void* push_back_uninitialized(); + + void pop_front(); + void pop_back(); + + template + iterator emplace(const_iterator position, Args&&... args); + + iterator insert(const_iterator position); + iterator insert(const_iterator position, const value_type& value); + iterator insert(const_iterator position, value_type&& x); + iterator insert(const_iterator position, std::initializer_list ilist); + iterator insert(const_iterator position, size_type n, const value_type& value); + + template + iterator insert(const_iterator position, InputIterator first, InputIterator last); + + iterator erase(const_iterator position); + iterator erase(const_iterator first, const_iterator last); + + reverse_iterator erase(const_reverse_iterator position); + reverse_iterator erase(const_reverse_iterator first, const_reverse_iterator last); + + void clear() EA_NOEXCEPT; + void reset_lose_memory() EA_NOEXCEPT; // This is a unilateral reset to an initially empty state. No destructors are called, no deallocation occurs. + + void remove(const T& x); + + template + void remove_if(Predicate); + + void reverse() EA_NOEXCEPT; + + // splice inserts elements in the range [first,last) before position and removes the elements from x. + // In the case that the two containers' allocators are unequal, splice copies elements + // instead of splicing them. In this case elements are not removed from x, and iterators + // into the spliced elements from x continue to point to the original values in x. + void splice(const_iterator position, this_type& x); + void splice(const_iterator position, this_type& x, const_iterator i); + void splice(const_iterator position, this_type& x, const_iterator first, const_iterator last); + void splice(const_iterator position, this_type&& x); + void splice(const_iterator position, this_type&& x, const_iterator i); + void splice(const_iterator position, this_type&& x, const_iterator first, const_iterator last); + + public: + // For merge, see notes for splice regarding the handling of unequal allocators. + void merge(this_type& x); + void merge(this_type&& x); + + template + void merge(this_type& x, Compare compare); + + template + void merge(this_type&& x, Compare compare); + + void unique(); + + template + void unique(BinaryPredicate); + + // Sorting functionality + // This is independent of the global sort algorithms, as lists are + // linked nodes and can be sorted more efficiently by moving nodes + // around in ways that global sort algorithms aren't privy to. + void sort(); + + template + void sort(Compare compare); + + public: + bool validate() const; + int validate_iterator(const_iterator i) const; + + protected: + node_type* DoCreateNode(); + + template + node_type* DoCreateNode(Args&&... args); + + template + void DoAssign(Integer n, Integer value, true_type); + + template + void DoAssign(InputIterator first, InputIterator last, false_type); + + void DoAssignValues(size_type n, const value_type& value); + + template + void DoInsert(ListNodeBase* pNode, Integer n, Integer value, true_type); + + template + void DoInsert(ListNodeBase* pNode, InputIterator first, InputIterator last, false_type); + + void DoInsertValues(ListNodeBase* pNode, size_type n, const value_type& value); + + template + void DoInsertValue(ListNodeBase* pNode, Args&&... args); + + void DoErase(ListNodeBase* pNode); + + void DoSwap(this_type& x); + + template + iterator DoSort(iterator i1, iterator end2, size_type n, Compare& compare); + + }; // class list + + + + + + /////////////////////////////////////////////////////////////////////// + // ListNodeBase + /////////////////////////////////////////////////////////////////////// + + // Swaps the nodes a and b in the lists to which they belong. This is similar to + // splicing a into b's list and b into a's list at the same time. + // Works by swapping the members of a and b, and fixes up the lists that a and b + // were part of to point to the new members. + inline void ListNodeBase::swap(ListNodeBase& a, ListNodeBase& b) EA_NOEXCEPT + { + const ListNodeBase temp(a); + a = b; + b = temp; + + if(a.mpNext == &b) + a.mpNext = a.mpPrev = &a; + else + a.mpNext->mpPrev = a.mpPrev->mpNext = &a; + + if(b.mpNext == &a) + b.mpNext = b.mpPrev = &b; + else + b.mpNext->mpPrev = b.mpPrev->mpNext = &b; + } + + + // splices the [first,last) range from its current list into our list before this node. + inline void ListNodeBase::splice(ListNodeBase* first, ListNodeBase* last) EA_NOEXCEPT + { + // We assume that [first, last] are not within our list. + last->mpPrev->mpNext = this; + first->mpPrev->mpNext = last; + this->mpPrev->mpNext = first; + + ListNodeBase* const pTemp = this->mpPrev; + this->mpPrev = last->mpPrev; + last->mpPrev = first->mpPrev; + first->mpPrev = pTemp; + } + + + inline void ListNodeBase::reverse() EA_NOEXCEPT + { + ListNodeBase* pNode = this; + do + { + EA_ANALYSIS_ASSUME(pNode != NULL); + ListNodeBase* const pTemp = pNode->mpNext; + pNode->mpNext = pNode->mpPrev; + pNode->mpPrev = pTemp; + pNode = pNode->mpPrev; + } + while(pNode != this); + } + + + inline void ListNodeBase::insert(ListNodeBase* pNext) EA_NOEXCEPT + { + mpNext = pNext; + mpPrev = pNext->mpPrev; + pNext->mpPrev->mpNext = this; + pNext->mpPrev = this; + } + + + // Removes this node from the list that it's in. Assumes that the + // node is within a list and thus that its prev/next pointers are valid. + inline void ListNodeBase::remove() EA_NOEXCEPT + { + mpNext->mpPrev = mpPrev; + mpPrev->mpNext = mpNext; + } + + + // Inserts the standalone range [pFirst, pFinal] before pPosition. Assumes that the + // range is not within a list and thus that it's prev/next pointers are not valid. + // Assumes that this node is within a list and thus that its prev/next pointers are valid. + inline void ListNodeBase::insert_range(ListNodeBase* pFirst, ListNodeBase* pFinal) EA_NOEXCEPT + { + mpPrev->mpNext = pFirst; + pFirst->mpPrev = mpPrev; + mpPrev = pFinal; + pFinal->mpNext = this; + } + + + // Removes the range [pFirst, pFinal] from the list that it's in. Assumes that the + // range is within a list and thus that its prev/next pointers are valid. + inline void ListNodeBase::remove_range(ListNodeBase* pFirst, ListNodeBase* pFinal) EA_NOEXCEPT + { + pFinal->mpNext->mpPrev = pFirst->mpPrev; + pFirst->mpPrev->mpNext = pFinal->mpNext; + } + + + /////////////////////////////////////////////////////////////////////// + // ListIterator + /////////////////////////////////////////////////////////////////////// + + template + inline ListIterator::ListIterator() EA_NOEXCEPT + : mpNode() // To consider: Do we really need to intialize mpNode? + { + // Empty + } + + + template + inline ListIterator::ListIterator(const ListNodeBase* pNode) EA_NOEXCEPT + : mpNode(static_cast((ListNode*)const_cast(pNode))) // All this casting is in the name of making runtime debugging much easier on the user. + { + // Empty + } + + + template + inline ListIterator::ListIterator(const iterator& x) EA_NOEXCEPT + : mpNode(const_cast(x.mpNode)) + { + // Empty + } + + + template + inline typename ListIterator::this_type + ListIterator::next() const EA_NOEXCEPT + { + return ListIterator(mpNode->mpNext); + } + + + template + inline typename ListIterator::this_type + ListIterator::prev() const EA_NOEXCEPT + { + return ListIterator(mpNode->mpPrev); + } + + + template + inline typename ListIterator::reference + ListIterator::operator*() const EA_NOEXCEPT + { + return mpNode->mValue; + } + + + template + inline typename ListIterator::pointer + ListIterator::operator->() const EA_NOEXCEPT + { + return &mpNode->mValue; + } + + + template + inline typename ListIterator::this_type& + ListIterator::operator++() EA_NOEXCEPT + { + mpNode = static_cast(mpNode->mpNext); + return *this; + } + + + template + inline typename ListIterator::this_type + ListIterator::operator++(int) EA_NOEXCEPT + { + this_type temp(*this); + mpNode = static_cast(mpNode->mpNext); + return temp; + } + + + template + inline typename ListIterator::this_type& + ListIterator::operator--() EA_NOEXCEPT + { + mpNode = static_cast(mpNode->mpPrev); + return *this; + } + + + template + inline typename ListIterator::this_type + ListIterator::operator--(int) EA_NOEXCEPT + { + this_type temp(*this); + mpNode = static_cast(mpNode->mpPrev); + return temp; + } + + + // The C++ defect report #179 requires that we support comparisons between const and non-const iterators. + // Thus we provide additional template paremeters here to support this. The defect report does not + // require us to support comparisons between reverse_iterators and const_reverse_iterators. + template + inline bool operator==(const ListIterator& a, + const ListIterator& b) EA_NOEXCEPT + { + return a.mpNode == b.mpNode; + } + + + template + inline bool operator!=(const ListIterator& a, + const ListIterator& b) EA_NOEXCEPT + { + return a.mpNode != b.mpNode; + } + + + // We provide a version of operator!= for the case where the iterators are of the + // same type. This helps prevent ambiguity errors in the presence of rel_ops. + template + inline bool operator!=(const ListIterator& a, + const ListIterator& b) EA_NOEXCEPT + { + return a.mpNode != b.mpNode; + } + + + + /////////////////////////////////////////////////////////////////////// + // ListBase + /////////////////////////////////////////////////////////////////////// + + template + inline ListBase::ListBase() + : mNodeAllocator(base_node_type(), allocator_type(EASTL_LIST_DEFAULT_NAME)) + #if EASTL_LIST_SIZE_CACHE + , mSize(0) + #endif + { + DoInit(); + } + + template + inline ListBase::ListBase(const allocator_type& allocator) + : mNodeAllocator(base_node_type(), allocator) + #if EASTL_LIST_SIZE_CACHE + , mSize(0) + #endif + { + DoInit(); + } + + + template + inline ListBase::~ListBase() + { + DoClear(); + } + + + template + const typename ListBase::allocator_type& + ListBase::get_allocator() const EA_NOEXCEPT + { + return internalAllocator(); + } + + + template + typename ListBase::allocator_type& + ListBase::get_allocator() EA_NOEXCEPT + { + return internalAllocator(); + } + + + template + inline void ListBase::set_allocator(const allocator_type& allocator) + { + EASTL_ASSERT((internalAllocator() == allocator) || (static_cast(internalNode().mpNext) == &internalNode())); // We can only assign a different allocator if we are empty of elements. + internalAllocator() = allocator; + } + + + template + inline typename ListBase::node_type* + ListBase::DoAllocateNode() + { + node_type* pNode = (node_type*)allocate_memory(internalAllocator(), sizeof(node_type), EASTL_ALIGN_OF(T), 0); + EASTL_ASSERT(pNode != nullptr); + return pNode; + } + + + template + inline void ListBase::DoFreeNode(node_type* p) + { + EASTLFree(internalAllocator(), p, sizeof(node_type)); + } + + + template + inline void ListBase::DoInit() EA_NOEXCEPT + { + internalNode().mpNext = (ListNode*)&internalNode(); + internalNode().mpPrev = (ListNode*)&internalNode(); + } + + + template + inline void ListBase::DoClear() + { + node_type* p = static_cast(internalNode().mpNext); + + while(p != &internalNode()) + { + node_type* const pTemp = p; + p = static_cast(p->mpNext); + pTemp->~node_type(); + EASTLFree(internalAllocator(), pTemp, sizeof(node_type)); + } + } + + + + /////////////////////////////////////////////////////////////////////// + // list + /////////////////////////////////////////////////////////////////////// + + template + inline list::list() + : base_type() + { + // Empty + } + + + template + inline list::list(const allocator_type& allocator) + : base_type(allocator) + { + // Empty + } + + + template + inline list::list(size_type n, const allocator_type& allocator) + : base_type(allocator) + { + DoInsertValues((ListNodeBase*)&internalNode(), n, value_type()); + } + + + template + inline list::list(size_type n, const value_type& value, const allocator_type& allocator) + : base_type(allocator) + { + DoInsertValues((ListNodeBase*)&internalNode(), n, value); + } + + + template + inline list::list(const this_type& x) + : base_type(x.internalAllocator()) + { + DoInsert((ListNodeBase*)&internalNode(), const_iterator((ListNodeBase*)x.internalNode().mpNext), const_iterator((ListNodeBase*)&x.internalNode()), false_type()); + } + + + template + inline list::list(const this_type& x, const allocator_type& allocator) + : base_type(allocator) + { + DoInsert((ListNodeBase*)&internalNode(), const_iterator((ListNodeBase*)x.internalNode().mpNext), const_iterator((ListNodeBase*)&x.internalNode()), false_type()); + } + + + template + inline list::list(this_type&& x) + : base_type(eastl::move(x.internalAllocator())) + { + swap(x); + } + + + template + inline list::list(this_type&& x, const allocator_type& allocator) + : base_type(allocator) + { + swap(x); // member swap handles the case that x has a different allocator than our allocator by doing a copy. + } + + + template + inline list::list(std::initializer_list ilist, const allocator_type& allocator) + : base_type(allocator) + { + DoInsert((ListNodeBase*)&internalNode(), ilist.begin(), ilist.end(), false_type()); + } + + + template + template + list::list(InputIterator first, InputIterator last) + : base_type(EASTL_LIST_DEFAULT_ALLOCATOR) + { + //insert(const_iterator((ListNodeBase*)&internalNode()), first, last); + DoInsert((ListNodeBase*)&internalNode(), first, last, is_integral()); + } + + + template + typename list::iterator + inline list::begin() EA_NOEXCEPT + { + return iterator((ListNodeBase*)internalNode().mpNext); + } + + + template + inline typename list::const_iterator + list::begin() const EA_NOEXCEPT + { + return const_iterator((ListNodeBase*)internalNode().mpNext); + } + + + template + inline typename list::const_iterator + list::cbegin() const EA_NOEXCEPT + { + return const_iterator((ListNodeBase*)internalNode().mpNext); + } + + + template + inline typename list::iterator + list::end() EA_NOEXCEPT + { + return iterator((ListNodeBase*)&internalNode()); + } + + + template + inline typename list::const_iterator + list::end() const EA_NOEXCEPT + { + return const_iterator((ListNodeBase*)&internalNode()); + } + + + template + inline typename list::const_iterator + list::cend() const EA_NOEXCEPT + { + return const_iterator((ListNodeBase*)&internalNode()); + } + + + template + inline typename list::reverse_iterator + list::rbegin() EA_NOEXCEPT + { + return reverse_iterator((ListNodeBase*)&internalNode()); + } + + + template + inline typename list::const_reverse_iterator + list::rbegin() const EA_NOEXCEPT + { + return const_reverse_iterator((ListNodeBase*)&internalNode()); + } + + + template + inline typename list::const_reverse_iterator + list::crbegin() const EA_NOEXCEPT + { + return const_reverse_iterator((ListNodeBase*)&internalNode()); + } + + + template + inline typename list::reverse_iterator + list::rend() EA_NOEXCEPT + { + return reverse_iterator((ListNodeBase*)internalNode().mpNext); + } + + + template + inline typename list::const_reverse_iterator + list::rend() const EA_NOEXCEPT + { + return const_reverse_iterator((ListNodeBase*)internalNode().mpNext); + } + + + template + inline typename list::const_reverse_iterator + list::crend() const EA_NOEXCEPT + { + return const_reverse_iterator((ListNodeBase*)internalNode().mpNext); + } + + + template + inline typename list::reference + list::front() + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(static_cast(internalNode().mpNext) == &internalNode())) + EASTL_FAIL_MSG("list::front -- empty container"); + #else + // We allow the user to reference an empty container. + #endif + + return static_cast(internalNode().mpNext)->mValue; + } + + + template + inline typename list::const_reference + list::front() const + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(static_cast(internalNode().mpNext) == &internalNode())) + EASTL_FAIL_MSG("list::front -- empty container"); + #else + // We allow the user to reference an empty container. + #endif + + return static_cast(internalNode().mpNext)->mValue; + } + + + template + inline typename list::reference + list::back() + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(static_cast(internalNode().mpNext) == &internalNode())) + EASTL_FAIL_MSG("list::back -- empty container"); + #else + // We allow the user to reference an empty container. + #endif + + return static_cast(internalNode().mpPrev)->mValue; + } + + + template + inline typename list::const_reference + list::back() const + { + #if EASTL_ASSERT_ENABLED && EASTL_EMPTY_REFERENCE_ASSERT_ENABLED + if (EASTL_UNLIKELY(static_cast(internalNode().mpNext) == &internalNode())) + EASTL_FAIL_MSG("list::back -- empty container"); + #else + // We allow the user to reference an empty container. + #endif + + return static_cast(internalNode().mpPrev)->mValue; + } + + + template + inline bool list::empty() const EA_NOEXCEPT + { + #if EASTL_LIST_SIZE_CACHE + return (mSize == 0); + #else + return static_cast(internalNode().mpNext) == &internalNode(); + #endif + } + + + template + inline typename list::size_type + list::size() const EA_NOEXCEPT + { + #if EASTL_LIST_SIZE_CACHE + return mSize; + #else + #if EASTL_DEBUG + const ListNodeBase* p = (ListNodeBase*)internalNode().mpNext; + size_type n = 0; + while(p != (ListNodeBase*)&internalNode()) + { + ++n; + p = (ListNodeBase*)p->mpNext; + } + return n; + #else + // The following optimizes to slightly better code than the code above. + return (size_type)eastl::distance(const_iterator((ListNodeBase*)internalNode().mpNext), const_iterator((ListNodeBase*)&internalNode())); + #endif + #endif + } + + + template + typename list::this_type& + list::operator=(const this_type& x) + { + if(this != &x) // If not assigning to self... + { + // If (EASTL_ALLOCATOR_COPY_ENABLED == 1) and the current contents are allocated by an + // allocator that's unequal to x's allocator, we need to reallocate our elements with + // our current allocator and reallocate it with x's allocator. If the allocators are + // equal then we can use a more optimal algorithm that doesn't reallocate our elements + // but instead can copy them in place. + + #if EASTL_ALLOCATOR_COPY_ENABLED + bool bSlowerPathwayRequired = (internalAllocator() != x.internalAllocator()); + #else + bool bSlowerPathwayRequired = false; + #endif + + if(bSlowerPathwayRequired) + { + clear(); + + #if EASTL_ALLOCATOR_COPY_ENABLED + internalAllocator() = x.internalAllocator(); + #endif + } + + DoAssign(x.begin(), x.end(), eastl::false_type()); + } + + return *this; + } + + + template + typename list::this_type& + list::operator=(this_type&& x) + { + if(this != &x) + { + clear(); // To consider: Are we really required to clear here? x is going away soon and will clear itself in its dtor. + swap(x); // member swap handles the case that x has a different allocator than our allocator by doing a copy. + } + return *this; + } + + + template + typename list::this_type& + list::operator=(std::initializer_list ilist) + { + DoAssign(ilist.begin(), ilist.end(), false_type()); + return *this; + } + + + template + inline void list::assign(size_type n, const value_type& value) + { + DoAssignValues(n, value); + } + + + // It turns out that the C++ std::list specifies a two argument + // version of assign that takes (int size, int value). These are not + // iterators, so we need to do a template compiler trick to do the right thing. + template + template + inline void list::assign(InputIterator first, InputIterator last) + { + DoAssign(first, last, is_integral()); + } + + + template + inline void list::assign(std::initializer_list ilist) + { + DoAssign(ilist.begin(), ilist.end(), false_type()); + } + + + template + inline void list::clear() EA_NOEXCEPT + { + DoClear(); + DoInit(); + #if EASTL_LIST_SIZE_CACHE + mSize = 0; + #endif + } + + + template + inline void list::reset_lose_memory() EA_NOEXCEPT + { + // The reset_lose_memory function is a special extension function which unilaterally + // resets the container to an empty state without freeing the memory of + // the contained objects. This is useful for very quickly tearing down a + // container built into scratch memory. + DoInit(); + #if EASTL_LIST_SIZE_CACHE + mSize = 0; + #endif + } + + + template + void list::resize(size_type n, const value_type& value) + { + iterator current((ListNodeBase*)internalNode().mpNext); + size_type i = 0; + + while((current.mpNode != &internalNode()) && (i < n)) + { + ++current; + ++i; + } + if(i == n) + erase(current, (ListNodeBase*)&internalNode()); + else + insert((ListNodeBase*)&internalNode(), n - i, value); + } + + + template + inline void list::resize(size_type n) + { + resize(n, value_type()); + } + + + template + template + void list::emplace_front(Args&&... args) + { + DoInsertValue((ListNodeBase*)internalNode().mpNext, eastl::forward(args)...); + } + + template + template + void list::emplace_back(Args&&... args) + { + DoInsertValue((ListNodeBase*)&internalNode(), eastl::forward(args)...); + } + + + template + inline void list::push_front(const value_type& value) + { + DoInsertValue((ListNodeBase*)internalNode().mpNext, value); + } + + + template + inline void list::push_front(value_type&& value) + { + emplace(begin(), eastl::move(value)); + } + + + template + inline typename list::reference + list::push_front() + { + node_type* const pNode = DoCreateNode(); + ((ListNodeBase*)pNode)->insert((ListNodeBase*)internalNode().mpNext); + #if EASTL_LIST_SIZE_CACHE + ++mSize; + #endif + return static_cast(internalNode().mpNext)->mValue; // Same as return front(); + } + + + template + inline void* list::push_front_uninitialized() + { + node_type* const pNode = DoAllocateNode(); + ((ListNodeBase*)pNode)->insert((ListNodeBase*)internalNode().mpNext); + #if EASTL_LIST_SIZE_CACHE + ++mSize; + #endif + return &pNode->mValue; + } + + + template + inline void list::pop_front() + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(static_cast(internalNode().mpNext) == &internalNode())) + EASTL_FAIL_MSG("list::pop_front -- empty container"); + #endif + + DoErase((ListNodeBase*)internalNode().mpNext); + } + + + template + inline void list::push_back(const value_type& value) + { + DoInsertValue((ListNodeBase*)&internalNode(), value); + } + + + template + inline void list::push_back(value_type&& value) + { + emplace(end(), eastl::move(value)); + } + + + template + inline typename list::reference + list::push_back() + { + node_type* const pNode = DoCreateNode(); + ((ListNodeBase*)pNode)->insert((ListNodeBase*)&internalNode()); + #if EASTL_LIST_SIZE_CACHE + ++mSize; + #endif + return static_cast(internalNode().mpPrev)->mValue; // Same as return back(); + } + + + template + inline void* list::push_back_uninitialized() + { + node_type* const pNode = DoAllocateNode(); + ((ListNodeBase*)pNode)->insert((ListNodeBase*)&internalNode()); + #if EASTL_LIST_SIZE_CACHE + ++mSize; + #endif + return &pNode->mValue; + } + + + template + inline void list::pop_back() + { + #if EASTL_ASSERT_ENABLED + if(EASTL_UNLIKELY(static_cast(internalNode().mpNext) == &internalNode())) + EASTL_FAIL_MSG("list::pop_back -- empty container"); + #endif + + DoErase((ListNodeBase*)internalNode().mpPrev); + } + + + template + template + inline typename list::iterator + list::emplace(const_iterator position, Args&&... args) + { + DoInsertValue(position.mpNode, eastl::forward(args)...); + return iterator(position.mpNode->mpPrev); + } + + + template + inline typename list::iterator + list::insert(const_iterator position) + { + node_type* const pNode = DoCreateNode(value_type()); + ((ListNodeBase*)pNode)->insert((ListNodeBase*)position.mpNode); + #if EASTL_LIST_SIZE_CACHE + ++mSize; + #endif + return (ListNodeBase*)pNode; + } + + + template + inline typename list::iterator + list::insert(const_iterator position, const value_type& value) + { + node_type* const pNode = DoCreateNode(value); + ((ListNodeBase*)pNode)->insert((ListNodeBase*)position.mpNode); + #if EASTL_LIST_SIZE_CACHE + ++mSize; + #endif + return (ListNodeBase*)pNode; + } + + + template + inline typename list::iterator + list::insert(const_iterator position, value_type&& value) + { + return emplace(position, eastl::move(value)); + } + + template + inline typename list::iterator + list::insert(const_iterator position, size_type n, const value_type& value) + { + iterator itPrev(position.mpNode); + --itPrev; + DoInsertValues((ListNodeBase*)position.mpNode, n, value); + return ++itPrev; // Inserts in front of position, returns iterator to new elements. + } + + + template + template + inline typename list::iterator + list::insert(const_iterator position, InputIterator first, InputIterator last) + { + iterator itPrev(position.mpNode); + --itPrev; + DoInsert((ListNodeBase*)position.mpNode, first, last, is_integral()); + return ++itPrev; // Inserts in front of position, returns iterator to new elements. + } + + + template + inline typename list::iterator + list::insert(const_iterator position, std::initializer_list ilist) + { + iterator itPrev(position.mpNode); + --itPrev; + DoInsert((ListNodeBase*)position.mpNode, ilist.begin(), ilist.end(), false_type()); + return ++itPrev; // Inserts in front of position, returns iterator to new elements. + } + + + template + inline typename list::iterator + list::erase(const_iterator position) + { + ++position; + DoErase((ListNodeBase*)position.mpNode->mpPrev); + return iterator(position.mpNode); + } + + + template + typename list::iterator + list::erase(const_iterator first, const_iterator last) + { + while(first != last) + first = erase(first); + return iterator(last.mpNode); + } + + + template + inline typename list::reverse_iterator + list::erase(const_reverse_iterator position) + { + return reverse_iterator(erase((++position).base())); + } + + + template + typename list::reverse_iterator + list::erase(const_reverse_iterator first, const_reverse_iterator last) + { + // Version which erases in order from first to last. + // difference_type i(first.base() - last.base()); + // while(i--) + // first = erase(first); + // return first; + + // Version which erases in order from last to first, but is slightly more efficient: + const_iterator itLastBase((++last).base()); + const_iterator itFirstBase((++first).base()); + + return reverse_iterator(erase(itLastBase, itFirstBase)); + } + + + template + void list::remove(const value_type& value) + { + iterator current((ListNodeBase*)internalNode().mpNext); + + while(current.mpNode != &internalNode()) + { + if(EASTL_LIKELY(!(*current == value))) + ++current; // We have duplicate '++current' statements here and below, but the logic here forces this. + else + { + ++current; + DoErase((ListNodeBase*)current.mpNode->mpPrev); + } + } + } + + + template + template + inline void list::remove_if(Predicate predicate) + { + for(iterator first((ListNodeBase*)internalNode().mpNext), last((ListNodeBase*)&internalNode()); first != last; ) + { + iterator temp(first); + ++temp; + if(predicate(first.mpNode->mValue)) + DoErase((ListNodeBase*)first.mpNode); + first = temp; + } + } + + + template + inline void list::reverse() EA_NOEXCEPT + { + ((ListNodeBase&)internalNode()).reverse(); + } + + + template + inline void list::splice(const_iterator position, this_type& x) + { + // Splicing operations cannot succeed if the two containers use unequal allocators. + // This issue is not addressed in the C++ 1998 standard but is discussed in the + // LWG defect reports, such as #431. There is no simple solution to this problem. + // One option is to throw an exception. Another option which probably captures the + // user intent most of the time is to copy the range from the source to the dest and + // remove it from the source. + + if(internalAllocator() == x.internalAllocator()) + { + #if EASTL_LIST_SIZE_CACHE + if(x.mSize) + { + ((ListNodeBase*)position.mpNode)->splice((ListNodeBase*)x.internalNode().mpNext, (ListNodeBase*)&x.internalNode()); + mSize += x.mSize; + x.mSize = 0; + } + #else + if(!x.empty()) + ((ListNodeBase*)position.mpNode)->splice((ListNodeBase*)x.internalNode().mpNext, (ListNodeBase*)&x.internalNode()); + #endif + } + else + { + insert(position, x.begin(), x.end()); + x.clear(); + } + } + + template + inline void list::splice(const_iterator position, this_type&& x) + { + return splice(position, x); // This will call splice(const_iterator, const this_type&); + } + + + template + inline void list::splice(const_iterator position, list& x, const_iterator i) + { + if(internalAllocator() == x.internalAllocator()) + { + iterator i2(i.mpNode); + ++i2; + if((position != i) && (position != i2)) + { + ((ListNodeBase*)position.mpNode)->splice((ListNodeBase*)i.mpNode, (ListNodeBase*)i2.mpNode); + + #if EASTL_LIST_SIZE_CACHE + ++mSize; + --x.mSize; + #endif + } + } + else + { + insert(position, *i); + x.erase(i); + } + } + + + template + inline void list::splice(const_iterator position, list&& x, const_iterator i) + { + return splice(position, x, i); // This will call splice(const_iterator, const this_type&, const_iterator); + } + + + template + inline void list::splice(const_iterator position, this_type& x, const_iterator first, const_iterator last) + { + if(internalAllocator() == x.internalAllocator()) + { + #if EASTL_LIST_SIZE_CACHE + const size_type n = (size_type)eastl::distance(first, last); + + if(n) + { + ((ListNodeBase*)position.mpNode)->splice((ListNodeBase*)first.mpNode, (ListNodeBase*)last.mpNode); + mSize += n; + x.mSize -= n; + } + #else + if(first != last) + ((ListNodeBase*)position.mpNode)->splice((ListNodeBase*)first.mpNode, (ListNodeBase*)last.mpNode); + #endif + } + else + { + insert(position, first, last); + x.erase(first, last); + } + } + + + template + inline void list::splice(const_iterator position, list&& x, const_iterator first, const_iterator last) + { + return splice(position, x, first, last); // This will call splice(const_iterator, const this_type&, const_iterator, const_iterator); + } + + + template + inline void list::swap(this_type& x) + { + if(internalAllocator() == x.internalAllocator()) // If allocators are equivalent... + DoSwap(x); + else // else swap the contents. + { + const this_type temp(*this); // Can't call eastl::swap because that would + *this = x; // itself call this member swap function. + x = temp; + } + } + + + template + void list::merge(this_type& x) + { + if(this != &x) + { + iterator first(begin()); + iterator firstX(x.begin()); + const iterator last(end()); + const iterator lastX(x.end()); + + while((first != last) && (firstX != lastX)) + { + if(*firstX < *first) + { + iterator next(firstX); + + splice(first, x, firstX, ++next); + firstX = next; + } + else + ++first; + } + + if(firstX != lastX) + splice(last, x, firstX, lastX); + } + } + + + template + void list::merge(this_type&& x) + { + return merge(x); // This will call merge(this_type&) + } + + + template + template + void list::merge(this_type& x, Compare compare) + { + if(this != &x) + { + iterator first(begin()); + iterator firstX(x.begin()); + const iterator last(end()); + const iterator lastX(x.end()); + + while((first != last) && (firstX != lastX)) + { + if(compare(*firstX, *first)) + { + iterator next(firstX); + + splice(first, x, firstX, ++next); + firstX = next; + } + else + ++first; + } + + if(firstX != lastX) + splice(last, x, firstX, lastX); + } + } + + + template + template + void list::merge(this_type&& x, Compare compare) + { + return merge(x, compare); // This will call merge(this_type&, Compare) + } + + + template + void list::unique() + { + iterator first(begin()); + const iterator last(end()); + + if(first != last) + { + iterator next(first); + + while(++next != last) + { + if(*first == *next) + DoErase((ListNodeBase*)next.mpNode); + else + first = next; + next = first; + } + } + } + + + template + template + void list::unique(BinaryPredicate predicate) + { + iterator first(begin()); + const iterator last(end()); + + if(first != last) + { + iterator next(first); + + while(++next != last) + { + if(predicate(*first, *next)) + DoErase((ListNodeBase*)next.mpNode); + else + first = next; + next = first; + } + } + } + + + template + void list::sort() + { + eastl::less compare; + DoSort(begin(), end(), size(), compare); + } + + + template + template + void list::sort(Compare compare) + { + DoSort(begin(), end(), size(), compare); + } + + + template + template + typename list::iterator + list::DoSort(iterator i1, iterator end2, size_type n, Compare& compare) + { + // A previous version of this function did this by creating temporary lists, + // but that was incompatible with fixed_list because the sizes could be too big. + // We sort subsegments by recursive descent. Then merge as we ascend. + // Return an iterator to the beginning of the sorted subsegment. + // Start with a special case for small node counts. + switch (n) + { + case 0: + case 1: + return i1; + + case 2: + // Potentialy swap these two nodes and return the resulting first of them. + if(compare(*--end2, *i1)) + { + end2.mpNode->remove(); + end2.mpNode->insert(i1.mpNode); + return end2; + } + return i1; + + case 3: + { + // We do a list insertion sort. Measurements showed this improved performance 3-12%. + iterator lowest = i1; + + for(iterator current = i1.next(); current != end2; ++current) + { + if(compare(*current, *lowest)) + lowest = current; + } + + if(lowest == i1) + ++i1; + else + { + lowest.mpNode->remove(); + lowest.mpNode->insert(i1.mpNode); + } + + if(compare(*--end2, *i1)) // At this point, i1 refers to the second element in this three element segment. + { + end2.mpNode->remove(); + end2.mpNode->insert(i1.mpNode); + } + + return lowest; + } + } + + // Divide the range into two parts are recursively sort each part. Upon return we will have + // two halves that are each sorted but we'll need to merge the two together before returning. + iterator result; + size_type nMid = (n / 2); + iterator end1 = eastl::next(i1, (difference_type)nMid); + i1 = DoSort(i1, end1, nMid, compare); // Return the new beginning of the first sorted sub-range. + iterator i2 = DoSort(end1, end2, n - nMid, compare); // Return the new beginning of the second sorted sub-range. + + // If the start of the second list is before the start of the first list, insert the first list + // into the second at an appropriate starting place. + if(compare(*i2, *i1)) + { + // Find the position to insert the first list into the second list. + iterator ix = i2.next(); + while((ix != end2) && compare(*ix, *i1)) + ++ix; + + // Cut out the initial segment of the second list and move it to be in front of the first list. + ListNodeBase* i2Cut = i2.mpNode; + ListNodeBase* i2CutLast = ix.mpNode->mpPrev; + result = i2; + end1 = i2 = ix; + ListNodeBase::remove_range(i2Cut, i2CutLast); + i1.mpNode->insert_range(i2Cut, i2CutLast); + } + else + { + result = i1; + end1 = i2; + } + + // Merge the two segments. We do this by merging the second sub-segment into the first, by walking forward in each of the two sub-segments. + for(++i1; (i1 != end1) && (i2 != end2); ++i1) // while still working on either segment... + { + if(compare(*i2, *i1)) // If i2 is less than i1 and it needs to be merged in front of i1... + { + // Find the position to insert the i2 list into the i1 list. + iterator ix = i2.next(); + while((ix != end2) && compare(*ix, *i1)) + ++ix; + + // Cut this section of the i2 sub-segment out and merge into the appropriate place in the i1 list. + ListNodeBase* i2Cut = i2.mpNode; + ListNodeBase* i2CutLast = ix.mpNode->mpPrev; + if(end1 == i2) + end1 = ix; + i2 = ix; + ListNodeBase::remove_range(i2Cut, i2CutLast); + i1.mpNode->insert_range(i2Cut, i2CutLast); + } + } + + return result; + } + + + template + template + inline typename list::node_type* + list::DoCreateNode(Args&&... args) + { + node_type* const pNode = DoAllocateNode(); // pNode is of type node_type, but it's uninitialized memory. + + #if EASTL_EXCEPTIONS_ENABLED + try + { + ::new((void*)&pNode->mValue) value_type(eastl::forward(args)...); + } + catch(...) + { + DoFreeNode(pNode); + throw; + } + #else + ::new((void*)&pNode->mValue) value_type(eastl::forward(args)...); + #endif + + return pNode; + } + + + template + inline typename list::node_type* + list::DoCreateNode() + { + node_type* const pNode = DoAllocateNode(); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + ::new((void*)&pNode->mValue) value_type(); + } + catch(...) + { + DoFreeNode(pNode); + throw; + } + #else + ::new((void*)&pNode->mValue) value_type; + #endif + + return pNode; + } + + + template + template + inline void list::DoAssign(Integer n, Integer value, true_type) + { + DoAssignValues(static_cast(n), static_cast(value)); + } + + + template + template + void list::DoAssign(InputIterator first, InputIterator last, false_type) + { + node_type* pNode = static_cast(internalNode().mpNext); + + for(; (pNode != &internalNode()) && (first != last); ++first) + { + pNode->mValue = *first; + pNode = static_cast(pNode->mpNext); + } + + if(first == last) + erase(const_iterator((ListNodeBase*)pNode), (ListNodeBase*)&internalNode()); + else + DoInsert((ListNodeBase*)&internalNode(), first, last, false_type()); + } + + + template + void list::DoAssignValues(size_type n, const value_type& value) + { + node_type* pNode = static_cast(internalNode().mpNext); + + for(; (pNode != &internalNode()) && (n > 0); --n) + { + pNode->mValue = value; + pNode = static_cast(pNode->mpNext); + } + + if(n) + DoInsertValues((ListNodeBase*)&internalNode(), n, value); + else + erase(const_iterator((ListNodeBase*)pNode), (ListNodeBase*)&internalNode()); + } + + + template + template + inline void list::DoInsert(ListNodeBase* pNode, Integer n, Integer value, true_type) + { + DoInsertValues(pNode, static_cast(n), static_cast(value)); + } + + + template + template + inline void list::DoInsert(ListNodeBase* pNode, InputIterator first, InputIterator last, false_type) + { + for(; first != last; ++first) + DoInsertValue(pNode, *first); + } + + + template + inline void list::DoInsertValues(ListNodeBase* pNode, size_type n, const value_type& value) + { + for(; n > 0; --n) + DoInsertValue(pNode, value); + } + + + template + template + inline void list::DoInsertValue(ListNodeBase* pNode, Args&&... args) + { + node_type* const pNodeNew = DoCreateNode(eastl::forward(args)...); + ((ListNodeBase*)pNodeNew)->insert(pNode); + #if EASTL_LIST_SIZE_CACHE + ++mSize; + #endif + } + + + template + inline void list::DoErase(ListNodeBase* pNode) + { + pNode->remove(); + ((node_type*)pNode)->~node_type(); + DoFreeNode(((node_type*)pNode)); + #if EASTL_LIST_SIZE_CACHE + --mSize; + #endif + + /* Test version that uses union intermediates + union + { + ListNodeBase* mpBase; + node_type* mpNode; + } node = { pNode }; + + node.mpNode->~node_type(); + node.mpBase->remove(); + DoFreeNode(node.mpNode); + #if EASTL_LIST_SIZE_CACHE + --mSize; + #endif + */ + } + + + template + inline void list::DoSwap(this_type& x) + { + ListNodeBase::swap((ListNodeBase&)internalNode(), (ListNodeBase&)x.internalNode()); // We need to implement a special swap because we can't do a shallow swap. + eastl::swap(internalAllocator(), x.internalAllocator()); // We do this even if EASTL_ALLOCATOR_COPY_ENABLED is 0. + #if EASTL_LIST_SIZE_CACHE + eastl::swap(mSize, x.mSize); + #endif + } + + + template + inline bool list::validate() const + { + #if EASTL_LIST_SIZE_CACHE + size_type n = 0; + + for(const_iterator i(begin()), iEnd(end()); i != iEnd; ++i) + ++n; + + if(n != mSize) + return false; + #endif + + // To do: More validation. + return true; + } + + + template + inline int list::validate_iterator(const_iterator i) const + { + // To do: Come up with a more efficient mechanism of doing this. + + for(const_iterator temp = begin(), tempEnd = end(); temp != tempEnd; ++temp) + { + if(temp == i) + return (isf_valid | isf_current | isf_can_dereference); + } + + if(i == end()) + return (isf_valid | isf_current); + + return isf_none; + } + + + + /////////////////////////////////////////////////////////////////////// + // global operators + /////////////////////////////////////////////////////////////////////// + + template + bool operator==(const list& a, const list& b) + { + typename list::const_iterator ia = a.begin(); + typename list::const_iterator ib = b.begin(); + typename list::const_iterator enda = a.end(); + + #if EASTL_LIST_SIZE_CACHE + if(a.size() == b.size()) + { + while((ia != enda) && (*ia == *ib)) + { + ++ia; + ++ib; + } + return (ia == enda); + } + return false; + #else + typename list::const_iterator endb = b.end(); + + while((ia != enda) && (ib != endb) && (*ia == *ib)) + { + ++ia; + ++ib; + } + return (ia == enda) && (ib == endb); + #endif + } + + template + bool operator<(const list& a, const list& b) + { + return eastl::lexicographical_compare(a.begin(), a.end(), b.begin(), b.end()); + } + + template + bool operator!=(const list& a, const list& b) + { + return !(a == b); + } + + template + bool operator>(const list& a, const list& b) + { + return b < a; + } + + template + bool operator<=(const list& a, const list& b) + { + return !(b < a); + } + + template + bool operator>=(const list& a, const list& b) + { + return !(a < b); + } + + template + void swap(list& a, list& b) + { + a.swap(b); + } + + + /////////////////////////////////////////////////////////////////////// + // erase / erase_if + // + // https://en.cppreference.com/w/cpp/container/list/erase2 + /////////////////////////////////////////////////////////////////////// + template + void erase(list& c, const U& value) + { + // Erases all elements that compare equal to value from the container. + c.remove_if([&](auto& elem) { return elem == value; }); + } + + template + void erase_if(list& c, Predicate predicate) + { + // Erases all elements that satisfy the predicate pred from the container. + c.remove_if(predicate); + } + + +} // namespace eastl + + +EA_RESTORE_SN_WARNING() + +EA_RESTORE_VC_WARNING(); + + +#endif // Header include guard diff --git a/libkram/eastl/include/EASTL/map.h b/libkram/eastl/include/EASTL/map.h new file mode 100644 index 00000000..0e6c1d0f --- /dev/null +++ b/libkram/eastl/include/EASTL/map.h @@ -0,0 +1,684 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_MAP_H +#define EASTL_MAP_H + + +#include +#include +#include +#include + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + + +namespace eastl +{ + + /// EASTL_MAP_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_MAP_DEFAULT_NAME + #define EASTL_MAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " map" // Unless the user overrides something, this is "EASTL map". + #endif + + + /// EASTL_MULTIMAP_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_MULTIMAP_DEFAULT_NAME + #define EASTL_MULTIMAP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " multimap" // Unless the user overrides something, this is "EASTL multimap". + #endif + + + /// EASTL_MAP_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_MAP_DEFAULT_ALLOCATOR + #define EASTL_MAP_DEFAULT_ALLOCATOR allocator_type(EASTL_MAP_DEFAULT_NAME) + #endif + + /// EASTL_MULTIMAP_DEFAULT_ALLOCATOR + /// + #ifndef EASTL_MULTIMAP_DEFAULT_ALLOCATOR + #define EASTL_MULTIMAP_DEFAULT_ALLOCATOR allocator_type(EASTL_MULTIMAP_DEFAULT_NAME) + #endif + + + + /// map + /// + /// Implements a canonical map. + /// + /// The large majority of the implementation of this class is found in the rbtree + /// base class. We control the behaviour of rbtree via template parameters. + /// + /// Pool allocation + /// If you want to make a custom memory pool for a map container, your pool + /// needs to contain items of type map::node_type. So if you have a memory + /// pool that has a constructor that takes the size of pool items and the + /// count of pool items, you would do this (assuming that MemoryPool implements + /// the Allocator interface): + /// typedef map, MemoryPool> WidgetMap; // Delare your WidgetMap type. + /// MemoryPool myPool(sizeof(WidgetMap::node_type), 100); // Make a pool of 100 Widget nodes. + /// WidgetMap myMap(&myPool); // Create a map that uses the pool. + /// + template , typename Allocator = EASTLAllocatorType> + class map + : public rbtree, Compare, Allocator, eastl::use_first >, true, true> + { + public: + typedef rbtree, Compare, Allocator, + eastl::use_first >, true, true> base_type; + typedef map this_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::key_type key_type; + typedef T mapped_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::iterator iterator; + typedef typename base_type::const_iterator const_iterator; + typedef typename base_type::allocator_type allocator_type; + typedef typename base_type::insert_return_type insert_return_type; + typedef typename base_type::extract_key extract_key; + // Other types are inherited from the base class. + + using base_type::begin; + using base_type::end; + using base_type::find; + using base_type::lower_bound; + using base_type::upper_bound; + using base_type::insert; + using base_type::erase; + + protected: + using base_type::compare; + using base_type::get_compare; + + public: + class value_compare + { + protected: + friend class map; + Compare compare; + value_compare(Compare c) : compare(c) {} + + public: + typedef bool result_type; + typedef value_type first_argument_type; + typedef value_type second_argument_type; + + bool operator()(const value_type& x, const value_type& y) const + { return compare(x.first, y.first); } + }; + + public: + map(const allocator_type& allocator = EASTL_MAP_DEFAULT_ALLOCATOR); + map(const Compare& compare, const allocator_type& allocator = EASTL_MAP_DEFAULT_ALLOCATOR); + map(const this_type& x); + map(this_type&& x); + map(this_type&& x, const allocator_type& allocator); + map(std::initializer_list ilist, const Compare& compare = Compare(), const allocator_type& allocator = EASTL_MAP_DEFAULT_ALLOCATOR); + + template + map(Iterator itBegin, Iterator itEnd); // allocator arg removed because VC7.1 fails on the default arg. To consider: Make a second version of this function without a default arg. + + this_type& operator=(const this_type& x) { return (this_type&)base_type::operator=(x); } + this_type& operator=(std::initializer_list ilist) { return (this_type&)base_type::operator=(ilist); } + this_type& operator=(this_type&& x) { return (this_type&)base_type::operator=(eastl::move(x)); } + + public: + /// This is an extension to the C++ standard. We insert a default-constructed + /// element with the given key. The reason for this is that we can avoid the + /// potentially expensive operation of creating and/or copying a mapped_type + /// object on the stack. Note that C++11 move insertions and variadic emplace + /// support make this extension mostly no longer necessary. + insert_return_type insert(const Key& key); + + value_compare value_comp() const; + + size_type erase(const Key& key); + size_type count(const Key& key) const; + + eastl::pair equal_range(const Key& key); + eastl::pair equal_range(const Key& key) const; + + T& operator[](const Key& key); // Of map, multimap, set, and multimap, only map has operator[]. + T& operator[](Key&& key); + + T& at(const Key& key); + const T& at(const Key& key) const; + + }; // map + + + + + + + /// multimap + /// + /// Implements a canonical multimap. + /// + /// The large majority of the implementation of this class is found in the rbtree + /// base class. We control the behaviour of rbtree via template parameters. + /// + /// Pool allocation + /// If you want to make a custom memory pool for a multimap container, your pool + /// needs to contain items of type multimap::node_type. So if you have a memory + /// pool that has a constructor that takes the size of pool items and the + /// count of pool items, you would do this (assuming that MemoryPool implements + /// the Allocator interface): + /// typedef multimap, MemoryPool> WidgetMap; // Delare your WidgetMap type. + /// MemoryPool myPool(sizeof(WidgetMap::node_type), 100); // Make a pool of 100 Widget nodes. + /// WidgetMap myMap(&myPool); // Create a map that uses the pool. + /// + template , typename Allocator = EASTLAllocatorType> + class multimap + : public rbtree, Compare, Allocator, eastl::use_first >, true, false> + { + public: + typedef rbtree, Compare, Allocator, + eastl::use_first >, true, false> base_type; + typedef multimap this_type; + typedef typename base_type::size_type size_type; + typedef typename base_type::key_type key_type; + typedef T mapped_type; + typedef typename base_type::value_type value_type; + typedef typename base_type::node_type node_type; + typedef typename base_type::iterator iterator; + typedef typename base_type::const_iterator const_iterator; + typedef typename base_type::allocator_type allocator_type; + typedef typename base_type::insert_return_type insert_return_type; + typedef typename base_type::extract_key extract_key; + // Other types are inherited from the base class. + + using base_type::begin; + using base_type::end; + using base_type::find; + using base_type::lower_bound; + using base_type::upper_bound; + using base_type::insert; + using base_type::erase; + + protected: + using base_type::compare; + using base_type::get_compare; + + public: + class value_compare + { + protected: + friend class multimap; + Compare compare; + value_compare(Compare c) : compare(c) {} + + public: + typedef bool result_type; + typedef value_type first_argument_type; + typedef value_type second_argument_type; + + bool operator()(const value_type& x, const value_type& y) const + { return compare(x.first, y.first); } + }; + + public: + multimap(const allocator_type& allocator = EASTL_MULTIMAP_DEFAULT_ALLOCATOR); + multimap(const Compare& compare, const allocator_type& allocator = EASTL_MULTIMAP_DEFAULT_ALLOCATOR); + multimap(const this_type& x); + multimap(this_type&& x); + multimap(this_type&& x, const allocator_type& allocator); + multimap(std::initializer_list ilist, const Compare& compare = Compare(), const allocator_type& allocator = EASTL_MULTIMAP_DEFAULT_ALLOCATOR); + + template + multimap(Iterator itBegin, Iterator itEnd); // allocator arg removed because VC7.1 fails on the default arg. To consider: Make a second version of this function without a default arg. + + this_type& operator=(const this_type& x) { return (this_type&)base_type::operator=(x); } + this_type& operator=(std::initializer_list ilist) { return (this_type&)base_type::operator=(ilist); } + this_type& operator=(this_type&& x) { return (this_type&)base_type::operator=(eastl::move(x)); } + + public: + /// This is an extension to the C++ standard. We insert a default-constructed + /// element with the given key. The reason for this is that we can avoid the + /// potentially expensive operation of creating and/or copying a mapped_type + /// object on the stack. Note that C++11 move insertions and variadic emplace + /// support make this extension mostly no longer necessary. + insert_return_type insert(const Key& key); + + value_compare value_comp() const; + + size_type erase(const Key& key); + size_type count(const Key& key) const; + + eastl::pair equal_range(const Key& key); + eastl::pair equal_range(const Key& key) const; + + /// equal_range_small + /// This is a special version of equal_range which is optimized for the + /// case of there being few or no duplicated keys in the tree. + eastl::pair equal_range_small(const Key& key); + eastl::pair equal_range_small(const Key& key) const; + + private: + // these base member functions are not included in multimaps + using base_type::try_emplace; + using base_type::insert_or_assign; + }; // multimap + + + + + + /////////////////////////////////////////////////////////////////////// + // map + /////////////////////////////////////////////////////////////////////// + + template + inline map::map(const allocator_type& allocator) + : base_type(allocator) + { + } + + + template + inline map::map(const Compare& compare, const allocator_type& allocator) + : base_type(compare, allocator) + { + } + + + template + inline map::map(const this_type& x) + : base_type(x) + { + } + + + template + inline map::map(this_type&& x) + : base_type(eastl::move(x)) + { + } + + template + inline map::map(this_type&& x, const allocator_type& allocator) + : base_type(eastl::move(x), allocator) + { + } + + + template + inline map::map(std::initializer_list ilist, const Compare& compare, const allocator_type& allocator) + : base_type(ilist.begin(), ilist.end(), compare, allocator) + { + } + + + template + template + inline map::map(Iterator itBegin, Iterator itEnd) + : base_type(itBegin, itEnd, Compare(), EASTL_MAP_DEFAULT_ALLOCATOR) + { + } + + + template + inline typename map::insert_return_type + map::insert(const Key& key) + { + return base_type::DoInsertKey(true_type(), key); + } + + + template + inline typename map::value_compare + map::value_comp() const + { + return value_compare(get_compare()); + } + + + template + inline typename map::size_type + map::erase(const Key& key) + { + const iterator it(find(key)); + + if(it != end()) // If it exists... + { + base_type::erase(it); + return 1; + } + return 0; + } + + + template + inline typename map::size_type + map::count(const Key& key) const + { + const const_iterator it(find(key)); + return (it != end()) ? 1 : 0; + } + + + template + inline eastl::pair::iterator, + typename map::iterator> + map::equal_range(const Key& key) + { + // The resulting range will either be empty or have one element, + // so instead of doing two tree searches (one for lower_bound and + // one for upper_bound), we do just lower_bound and see if the + // result is a range of size zero or one. + const iterator itLower(lower_bound(key)); + + if((itLower == end()) || compare(key, itLower.mpNode->mValue.first)) // If at the end or if (key is < itLower)... + return eastl::pair(itLower, itLower); + + iterator itUpper(itLower); + return eastl::pair(itLower, ++itUpper); + } + + + template + inline eastl::pair::const_iterator, + typename map::const_iterator> + map::equal_range(const Key& key) const + { + // See equal_range above for comments. + const const_iterator itLower(lower_bound(key)); + + if((itLower == end()) || compare(key, itLower.mpNode->mValue.first)) // If at the end or if (key is < itLower)... + return eastl::pair(itLower, itLower); + + const_iterator itUpper(itLower); + return eastl::pair(itLower, ++itUpper); + } + + + template + inline T& map::operator[](const Key& key) + { + iterator itLower(lower_bound(key)); // itLower->first is >= key. + + if((itLower == end()) || compare(key, (*itLower).first)) + { + itLower = base_type::DoInsertKey(true_type(), itLower, key); + } + + return (*itLower).second; + + // Reference implementation of this function, which may not be as fast: + //iterator it(base_type::insert(eastl::pair(key, T())).first); + //return it->second; + } + + + template + inline T& map::operator[](Key&& key) + { + iterator itLower(lower_bound(key)); // itLower->first is >= key. + + if((itLower == end()) || compare(key, (*itLower).first)) + { + itLower = base_type::DoInsertKey(true_type(), itLower, eastl::move(key)); + } + + return (*itLower).second; + + // Reference implementation of this function, which may not be as fast: + //iterator it(base_type::insert(eastl::pair(key, T())).first); + //return it->second; + } + + + template + inline T& map::at(const Key& key) + { + iterator itLower(lower_bound(key)); // itLower->first is >= key. + + if(itLower == end()) + { + #if EASTL_EXCEPTIONS_ENABLED + throw std::out_of_range("map::at key does not exist"); + #else + EASTL_FAIL_MSG("map::at key does not exist"); + #endif + } + + return (*itLower).second; + } + + + template + inline const T& map::at(const Key& key) const + { + const_iterator itLower(lower_bound(key)); // itLower->first is >= key. + + if(itLower == end()) + { + #if EASTL_EXCEPTIONS_ENABLED + throw std::out_of_range("map::at key does not exist"); + #else + EASTL_FAIL_MSG("map::at key does not exist"); + #endif + } + + return (*itLower).second; + } + + + /////////////////////////////////////////////////////////////////////// + // erase_if + // + // https://en.cppreference.com/w/cpp/container/map/erase_if + /////////////////////////////////////////////////////////////////////// + template + void erase_if(map& c, Predicate predicate) + { + for (auto i = c.begin(), last = c.end(); i != last;) + { + if (predicate(*i)) + { + i = c.erase(i); + } + else + { + ++i; + } + } + } + + + /////////////////////////////////////////////////////////////////////// + // multimap + /////////////////////////////////////////////////////////////////////// + + template + inline multimap::multimap(const allocator_type& allocator) + : base_type(allocator) + { + } + + + template + inline multimap::multimap(const Compare& compare, const allocator_type& allocator) + : base_type(compare, allocator) + { + } + + + template + inline multimap::multimap(const this_type& x) + : base_type(x) + { + } + + + template + inline multimap::multimap(this_type&& x) + : base_type(eastl::move(x)) + { + } + + template + inline multimap::multimap(this_type&& x, const allocator_type& allocator) + : base_type(eastl::move(x), allocator) + { + } + + + template + inline multimap::multimap(std::initializer_list ilist, const Compare& compare, const allocator_type& allocator) + : base_type(ilist.begin(), ilist.end(), compare, allocator) + { + } + + + template + template + inline multimap::multimap(Iterator itBegin, Iterator itEnd) + : base_type(itBegin, itEnd, Compare(), EASTL_MULTIMAP_DEFAULT_ALLOCATOR) + { + } + + + template + inline typename multimap::insert_return_type + multimap::insert(const Key& key) + { + return base_type::DoInsertKey(false_type(), key); + } + + + template + inline typename multimap::value_compare + multimap::value_comp() const + { + return value_compare(get_compare()); + } + + + template + inline typename multimap::size_type + multimap::erase(const Key& key) + { + const eastl::pair range(equal_range(key)); + const size_type n = (size_type)eastl::distance(range.first, range.second); + base_type::erase(range.first, range.second); + return n; + } + + + template + inline typename multimap::size_type + multimap::count(const Key& key) const + { + const eastl::pair range(equal_range(key)); + return (size_type)eastl::distance(range.first, range.second); + } + + + template + inline eastl::pair::iterator, + typename multimap::iterator> + multimap::equal_range(const Key& key) + { + // There are multiple ways to implement equal_range. The implementation mentioned + // in the C++ standard and which is used by most (all?) commercial STL implementations + // is this: + // return eastl::pair(lower_bound(key), upper_bound(key)); + // + // This does two tree searches -- one for the lower bound and one for the + // upper bound. This works well for the case whereby you have a large container + // and there are lots of duplicated values. We provide an alternative version + // of equal_range called equal_range_small for cases where the user is confident + // that the number of duplicated items is only a few. + + return eastl::pair(lower_bound(key), upper_bound(key)); + } + + + template + inline eastl::pair::const_iterator, + typename multimap::const_iterator> + multimap::equal_range(const Key& key) const + { + // See comments above in the non-const version of equal_range. + return eastl::pair(lower_bound(key), upper_bound(key)); + } + + + template + inline eastl::pair::iterator, + typename multimap::iterator> + multimap::equal_range_small(const Key& key) + { + // We provide alternative version of equal_range here which works faster + // for the case where there are at most small number of potential duplicated keys. + const iterator itLower(lower_bound(key)); + iterator itUpper(itLower); + + while((itUpper != end()) && !compare(key, itUpper.mpNode->mValue.first)) + ++itUpper; + + return eastl::pair(itLower, itUpper); + } + + + template + inline eastl::pair::const_iterator, + typename multimap::const_iterator> + multimap::equal_range_small(const Key& key) const + { + // We provide alternative version of equal_range here which works faster + // for the case where there are at most small number of potential duplicated keys. + const const_iterator itLower(lower_bound(key)); + const_iterator itUpper(itLower); + + while((itUpper != end()) && !compare(key, itUpper.mpNode->mValue.first)) + ++itUpper; + + return eastl::pair(itLower, itUpper); + } + + + + /////////////////////////////////////////////////////////////////////// + // erase_if + // + // https://en.cppreference.com/w/cpp/container/multimap/erase_if + /////////////////////////////////////////////////////////////////////// + template + void erase_if(multimap& c, Predicate predicate) + { + // Erases all elements that satisfy the predicate pred from the container. + for (auto i = c.begin(), last = c.end(); i != last;) + { + if (predicate(*i)) + { + i = c.erase(i); + } + else + { + ++i; + } + } + } + +} // namespace eastl + + +#endif // Header include guard + + + + diff --git a/libkram/eastl/include/EASTL/memory.h b/libkram/eastl/include/EASTL/memory.h new file mode 100644 index 00000000..cf24b41a --- /dev/null +++ b/libkram/eastl/include/EASTL/memory.h @@ -0,0 +1,1685 @@ +/////////////////////////////////////////////////////////////////////////////// +// Copyright (c) Electronic Arts Inc. All rights reserved. +/////////////////////////////////////////////////////////////////////////////// + +/////////////////////////////////////////////////////////////////////////////// +// This file implements the following functions from the C++ standard that +// are found in the header: +// +// Temporary memory: +// get_temporary_buffer +// return_temporary_buffer +// +// Utility: +// late_constructed - Extention to standard functionality. +// +// Uninitialized operations: +// These are the same as the copy, fill, and fill_n algorithms, except that +// they *construct* the destination with the source values rather than assign +// the destination with the source values. +// +// uninitialized_copy +// uninitialized_copy_n +// uninitialized_default_construct +// uninitialized_default_construct_n +// uninitialized_move +// uninitialized_move_if_noexcept - Extention to standard functionality. +// uninitialized_move_n +// uninitialized_fill +// uninitialized_fill_n +// uninitialized_value_construct +// uninitialized_value_construct_n +// uninitialized_default_fill - Extention to standard functionality. +// uninitialized_default_fill_n - Extention to standard functionality. +// uninitialized_relocate - Extention to standard functionality. +// uninitialized_copy_ptr - Extention to standard functionality. +// uninitialized_move_ptr - Extention to standard functionality. +// uninitialized_move_ptr_if_noexcept- Extention to standard functionality. +// uninitialized_fill_ptr - Extention to standard functionality. +// uninitialized_fill_n_ptr - Extention to standard functionality. +// uninitialized_copy_fill - Extention to standard functionality. +// uninitialized_fill_copy - Extention to standard functionality. +// uninitialized_copy_copy - Extention to standard functionality. +// +// In-place destructor helpers: +// destruct(T*) - Non-standard extension. +// destruct(first, last) - Non-standard extension. +// destroy_at(T*) +// destroy(first, last) +// destroy_n(first, n) +// +// Alignment +// align +// align_advance - Extention to standard functionality. +// +// Allocator-related +// uses_allocator +// allocator_arg_t +// allocator_arg +// +// Pointers +// pointer_traits +// +/////////////////////////////////////////////////////////////////////////////// + + +#ifndef EASTL_MEMORY_H +#define EASTL_MEMORY_H + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +EA_DISABLE_ALL_VC_WARNINGS() +#include +#include +EA_RESTORE_ALL_VC_WARNINGS() + + +// 4530 - C++ exception handler used, but unwind semantics are not enabled. Specify /EHsc +// 4146 - unary minus operator applied to unsigned type, result still unsigned +// 4571 - catch(...) semantics changed since Visual C++ 7.1; structured exceptions (SEH) are no longer caught. +EA_DISABLE_VC_WARNING(4530 4146 4571); + + +#if defined(EA_PRAGMA_ONCE_SUPPORTED) + #pragma once // Some compilers (e.g. VC++) benefit significantly from using this. We've measured 3-4% build speed improvements in apps as a result. +#endif + + +namespace eastl +{ + + /// EASTL_TEMP_DEFAULT_NAME + /// + /// Defines a default container name in the absence of a user-provided name. + /// + #ifndef EASTL_TEMP_DEFAULT_NAME + #define EASTL_TEMP_DEFAULT_NAME EASTL_DEFAULT_NAME_PREFIX " temp" // Unless the user overrides something, this is "EASTL temp". + #endif + + + /// get_temporary_buffer + /// + /// From the C++ standard, section 20.4.3: + /// 1 Effects: Obtains a pointer to storage sufficient to store up to n adjacent T objects. + /// 2 Returns: A pair containing the buffer's address and capacity (in the units of sizeof(T)), + /// or a pair of 0 values if no storage can be obtained. + /// + /// Note: The return value is space to hold T elements, but no T elements are constructed. + /// + /// Our implementation here differs slightly in that we have alignment, alignmentOffset, and pName arguments. + /// Note that you can use the EASTL_NAME_VAL macro to make names go away in release builds. + /// + /// Example usage: + /// pair pr = get_temporary_buffer(100, 0, 0, EASTL_NAME_VAL("Temp int array")); + /// memset(pr.first, 0, 100 * sizeof(int)); + /// return_temporary_buffer(pr.first); + /// + template + eastl::pair get_temporary_buffer(ptrdiff_t n, size_t alignment = 1, size_t alignmentOffset = 0, const char* pName = EASTL_TEMP_DEFAULT_NAME) + { + EASTLAllocatorType allocator(*EASTLAllocatorDefault(), pName); + return eastl::pair(static_cast(EASTLAllocAligned(allocator, n * sizeof(T), alignment, alignmentOffset)), n); + } + + + /// return_temporary_buffer + /// + /// From the C++ standard, section 20.4.3: + /// 3 Effects: Deallocates the buffer to which p points. + /// 4 Requires: The buffer shall have been previously allocated by get_temporary_buffer. + /// + /// Note: This function merely frees space and does not destruct any T elements. + /// + /// Example usage: + /// pair pr = get_temporary_buffer(300); + /// memset(pr.first, 0, 300 * sizeof(int)); + /// return_temporary_buffer(pr.first, pr.second); + /// + template + void return_temporary_buffer(T* p, ptrdiff_t n = 0) + { + EASTLAllocatorType& allocator(*EASTLAllocatorDefault()); + EASTLFree(allocator, p, n * sizeof(T)); + } + + + + /// late_constructed + /// + /// Implements a smart pointer type which separates the memory allocation of an object from + /// the object's construction. The primary use case is to declare a global variable of the + /// late_construction type, which allows the memory to be global but the constructor executes + /// at some point after main() begins as opposed to before main, which is often dangerous + /// for non-trivial types. + /// + /// The autoConstruct template parameter controls whether the object is automatically default + /// constructed upon first reference or must be manually constructed upon the first use of + /// operator * or ->. autoConstruct is convenient but it causes * and -> to be slightly slower + /// and may result in construction at an inconvenient time. + /// + /// The autoDestruct template parameter controls whether the object, if constructed, is automatically + /// destructed when ~late_constructed() is called or must be manually destructed via a call to + /// destruct(). + /// + /// While construction can be automatic or manual, automatic destruction support is always present. + /// Thus you aren't required in any case to manually call destruct. However, you may safely manually + /// destruct the object at any time before the late_constructed destructor is executed. + /// + /// You may still use late_constructed after calling destruct(), including calling construct() + /// again to reconstruct the instance. destruct returns the late_constructed instance to a + /// state equivalent to before construct was called. + /// + /// Caveat: While late_constructed instances can be declared in global scope and initialize + /// prior to main() executing, you cannot otherwise use such globally declared instances prior + /// to main with guaranteed behavior unless you can ensure that the late_constructed instance + /// is itself constructed prior to your use of it. + /// + /// Example usage (demonstrating manual-construction): + /// late_constructed gWidget; + /// + /// void main(){ + /// gWidget.construct(kScrollbarType, kVertical, "MyScrollbar"); + /// gWidget->SetValue(15); + /// gWidget.destruct(); + /// } + /// + /// Example usage (demonstrating auto-construction): + /// late_constructed gWidget; + /// + /// void main(){ + /// gWidget->SetValue(15); + /// // You may want to call destruct here, but aren't required to do so unless the Widget type requires it. + /// } + /// + template + class late_constructed + { + public: + using this_type = late_constructed; + using value_type = T; + using storage_type = eastl::aligned_storage_t>; + + late_constructed() EA_NOEXCEPT // In the case of the late_constructed instance being at global scope, we rely on the + : mStorage(), mpValue(nullptr) {} // compiler executing this constructor or placing the instance in auto-zeroed-at-startup memory. + + ~late_constructed() + { + if (autoDestruct && mpValue) + (*mpValue).~value_type(); + } + + template + void construct(Args&&... args) + { + if(!mpValue) + mpValue = new (&mStorage) value_type(eastl::forward(args)...); + } + + bool is_constructed() const EA_NOEXCEPT + { return mpValue != nullptr; } + + void destruct() + { + if(mpValue) + { + (*mpValue).~value_type(); + mpValue = nullptr; + } + } + + value_type& operator*() EA_NOEXCEPT + { + if(!mpValue) + construct(); + + EA_ANALYSIS_ASSUME(mpValue); + return *mpValue; + } + + const value_type& operator*() const EA_NOEXCEPT + { + if(!mpValue) + construct(); + + EA_ANALYSIS_ASSUME(mpValue); + return *mpValue; + } + + value_type* operator->() EA_NOEXCEPT + { + if(!mpValue) + construct(); + return mpValue; + } + + const value_type* operator->() const EA_NOEXCEPT + { + if(!mpValue) + construct(); + return mpValue; + } + + value_type* get() EA_NOEXCEPT + { + if(!mpValue) + construct(); + return mpValue; + } + + const value_type* get() const EA_NOEXCEPT + { + if(!mpValue) + construct(); + return mpValue; + } + + protected: + storage_type mStorage; // Declared first because it may have aligment requirements, and it would be more space-efficient if it was first. + value_type* mpValue; + }; + + + // Specialization that doesn't auto-construct on demand. + template + class late_constructed : public late_constructed + { + public: + typedef late_constructed base_type; + + typename base_type::value_type& operator*() EA_NOEXCEPT + { EASTL_ASSERT(base_type::mpValue); return *base_type::mpValue; } + + const typename base_type::value_type& operator*() const EA_NOEXCEPT + { EASTL_ASSERT(base_type::mpValue); return *base_type::mpValue; } + + typename base_type::value_type* operator->() EA_NOEXCEPT + { EASTL_ASSERT(base_type::mpValue); return base_type::mpValue; } + + const typename base_type::value_type* operator->() const EA_NOEXCEPT + { EASTL_ASSERT(base_type::mpValue); return base_type::mpValue; } + + typename base_type::value_type* get() EA_NOEXCEPT + { return base_type::mpValue; } + + const typename base_type::value_type* get() const EA_NOEXCEPT + { return base_type::mpValue; } + }; + + + + /// raw_storage_iterator + /// + /// From the C++11 Standard, section 20.6.10 p1 + /// raw_storage_iterator is provided to enable algorithms to store their results into uninitialized memory. + /// The formal template parameter OutputIterator is required to have its operator* return an object for + /// which operator& is defined and returns a pointer to T, and is also required to satisfy the requirements + /// of an output iterator (24.2.4). + + template + class raw_storage_iterator : public iterator + { + protected: + OutputIterator mIterator; + + public: + explicit raw_storage_iterator(OutputIterator iterator) + : mIterator(iterator) + { + } + + raw_storage_iterator& operator*() + { + return *this; + } + + raw_storage_iterator& operator=(const T& value) + { + ::new(eastl::addressof(*mIterator)) T(value); + return *this; + } + + raw_storage_iterator& operator++() + { + ++mIterator; + return *this; + } + + raw_storage_iterator operator++(int) + { + raw_storage_iterator tempIterator = *this; + ++mIterator; + return tempIterator; + } + }; + + + /// uninitialized_relocate (formerly named uninitialized_move prior to C++11) + /// + /// This utility is deprecated in favor of C++11 rvalue move functionality. + /// + /// uninitialized_relocate takes a constructed sequence of objects and an + /// uninitialized destination buffer. In the case of any exception thrown + /// while moving the objects, any newly constructed objects are guaranteed + /// to be destructed and the input left fully constructed. + /// + /// In the case where you need to do multiple moves atomically, split the + /// calls into uninitialized_relocate_start/abort/commit. + /// + /// uninitialized_relocate_start can possibly throw an exception. If it does, + /// you don't need to do anything. However, if it returns without throwing + /// an exception you need to guarantee that either uninitialized_relocate_abort + /// or uninitialized_relocate_commit is called. + /// + /// Both uninitialized_relocate_abort and uninitialize_move_commit are + /// guaranteed to not throw C++ exceptions. + namespace Internal + { + template + struct uninitialized_relocate_impl + { + template + static ForwardIteratorDest do_move_start(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest) + { + typedef typename eastl::iterator_traits::value_type value_type; + + #if EASTL_EXCEPTIONS_ENABLED + ForwardIteratorDest origDest(dest); + try + { + #endif + for(; first != last; ++first, ++dest) + ::new((void*)eastl::addressof(*dest)) value_type(*first); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(; origDest < dest; ++origDest) + (*origDest).~value_type(); + throw; + } + #endif + + return dest; + } + + template + static ForwardIteratorDest do_move_commit(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest) //throw() + { + typedef typename eastl::iterator_traits::value_type value_type; + for(; first != last; ++first, ++dest) + (*first).~value_type(); + + return dest; + } + + template + static ForwardIteratorDest do_move_abort(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest) //throw() + { + typedef typename eastl::iterator_traits::value_type value_type; + for(; first != last; ++first, ++dest) + (*dest).~value_type(); + return dest; + } + }; + + template <> + struct uninitialized_relocate_impl + { + template + static T* do_move_start(T* first, T* last, T* dest) + { + return (T*)memcpy(dest, first, (size_t)((uintptr_t)last - (uintptr_t)first)) + (last - first); + } + + template + static T* do_move_commit(T* first, T* last, T* dest) + { + return dest + (last - first); + } + + template + static T* do_move_abort(T* first, T* last, T* dest) + { + return dest + (last - first); + } + }; + } + + + /// uninitialized_relocate_start, uninitialized_relocate_commit, uninitialized_relocate_abort + /// + /// This utility is deprecated in favor of C++11 rvalue move functionality. + /// + /// After calling uninitialized_relocate_start, if it doesn't throw an exception, + /// both the source and destination iterators point to undefined data. If it + /// does throw an exception, the destination remains uninitialized and the source + /// is as it was before. + /// + /// In order to make the iterators valid again you need to call either uninitialized_relocate_abort + /// or uninitialized_relocate_commit. The abort call makes the original source + /// iterator valid again, and commit makes the destination valid. Both abort + /// and commit are guaranteed to not throw C++ exceptions. + /// + /// Example usage: + /// iterator dest2 = uninitialized_relocate_start(first, last, dest); + /// try { + /// // some code here that might throw an exception + /// } + /// catch(...) + /// { + /// uninitialized_relocate_abort(first, last, dest); + /// throw; + /// } + /// uninitialized_relocate_commit(first, last, dest); + /// + template + inline ForwardIteratorDest uninitialized_relocate_start(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest) + { + typedef typename eastl::iterator_traits::iterator_category IC; + typedef typename eastl::iterator_traits::value_type value_type_input; + typedef typename eastl::iterator_traits::value_type value_type_output; + + const bool bHasTrivialMove = type_and::value, + is_pointer::value, + is_pointer::value, + is_same::value>::value; + + return Internal::uninitialized_relocate_impl::do_move_start(first, last, dest); + } + + template + inline ForwardIteratorDest uninitialized_relocate_commit(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest) + { + typedef typename eastl::iterator_traits::iterator_category IC; + typedef typename eastl::iterator_traits::value_type value_type_input; + typedef typename eastl::iterator_traits::value_type value_type_output; + + const bool bHasTrivialMove = type_and::value, + is_pointer::value, + is_pointer::value, + is_same::value>::value; + + return Internal::uninitialized_relocate_impl::do_move_commit(first, last, dest); + } + + template + inline ForwardIteratorDest uninitialized_relocate_abort(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest) + { + typedef typename eastl::iterator_traits::iterator_category IC; + typedef typename eastl::iterator_traits::value_type value_type_input; + typedef typename eastl::iterator_traits::value_type value_type_output; + + const bool bHasTrivialMove = type_and::value, + is_pointer::value, + is_pointer::value, + is_same::value>::value; + + return Internal::uninitialized_relocate_impl::do_move_abort(first, last, dest); + } + + /// uninitialized_relocate + /// + /// See above for documentation. + /// + template + inline ForwardIteratorDest uninitialized_relocate(ForwardIterator first, ForwardIterator last, ForwardIteratorDest dest) + { + ForwardIteratorDest result = uninitialized_relocate_start(first, last, dest); + eastl::uninitialized_relocate_commit(first, last, dest); + + return result; + } + + + + + + // uninitialized_copy + // + namespace Internal + { + template + inline ForwardIterator uninitialized_copy_impl(InputIterator first, InputIterator last, ForwardIterator dest, true_type) + { + return eastl::copy(first, last, dest); // The copy() in turn will use memcpy for POD types. + } + + template + inline ForwardIterator uninitialized_copy_impl(InputIterator first, InputIterator last, ForwardIterator dest, false_type) + { + typedef typename eastl::iterator_traits::value_type value_type; + ForwardIterator currentDest(dest); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for(; first != last; ++first, ++currentDest) + ::new(static_cast(eastl::addressof(*currentDest))) value_type(*first); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(; dest < currentDest; ++dest) + (*dest).~value_type(); + throw; + } + #endif + + return currentDest; + } + } + + /// uninitialized_copy + /// + /// Copies a source range to a destination, copy-constructing the destination with + /// the source values (and not *assigning* the destination with the source values). + /// Returns the end of the destination range (i.e. dest + (last - first)). + /// + /// Declaration: + /// template + /// ForwardIterator uninitialized_copy(InputIterator sourceFirst, InputIterator sourceLast, ForwardIterator destination); + /// + /// Example usage: + /// SomeClass* pArray = malloc(10 * sizeof(SomeClass)); + /// uninitialized_copy(pSourceDataBegin, pSourceDataBegin + 10, pArray); + /// + template + inline ForwardIterator uninitialized_copy(InputIterator first, InputIterator last, ForwardIterator result) + { + typedef typename eastl::iterator_traits::value_type value_type; + + // We use is_trivial, which in the C++11 Standard means is_trivially_copyable and is_trivially_default_constructible. + return Internal::uninitialized_copy_impl(first, last, result, eastl::is_trivial()); + } + + + /// uninitialized_copy_n + /// + /// Copies count elements from a range beginning at first to an uninitialized memory area + /// beginning at dest. The elements in the uninitialized area are constructed using copy constructor. + /// If an exception is thrown during the initialization, the function has no final effects. + /// + /// first: Beginning of the range of the elements to copy. + /// dest: Beginning of the destination range. + /// return value: Iterator of dest type to the element past the last element copied. + /// + namespace Internal + { + template + struct uninitialized_copy_n_impl + { + static ForwardIterator impl(InputIterator first, Count n, ForwardIterator dest) + { + typedef typename eastl::iterator_traits::value_type value_type; + ForwardIterator currentDest(dest); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for(; n > 0; --n, ++first, ++currentDest) + ::new((void*)(eastl::addressof(*currentDest))) value_type(*first); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(; dest < currentDest; ++dest) + (*dest).~value_type(); + throw; + } + #endif + + return currentDest; + } + }; + + template + struct uninitialized_copy_n_impl + { + static inline ForwardIterator impl(InputIterator first, Count n, ForwardIterator dest) + { + return eastl::uninitialized_copy(first, first + n, dest); + } + }; + } + + template + inline ForwardIterator uninitialized_copy_n(InputIterator first, Count n, ForwardIterator dest) + { + typedef typename eastl::iterator_traits::iterator_category IC; + return Internal::uninitialized_copy_n_impl::impl(first, n, dest); + } + + + + /// uninitialized_copy_ptr + /// + /// This is a specialization of uninitialized_copy for iterators that are pointers. We use it because + /// internally it uses generic_iterator to make pointers act like regular eastl::iterator. + /// + template + inline Result uninitialized_copy_ptr(First first, Last last, Result result) + { + typedef typename eastl::iterator_traits >::value_type value_type; + const generic_iterator i(Internal::uninitialized_copy_impl(eastl::generic_iterator(first), // generic_iterator makes a pointer act like an iterator. + eastl::generic_iterator(last), + eastl::generic_iterator(result), + eastl::is_trivially_copy_assignable())); + return i.base(); + } + + + + /// uninitialized_move_ptr + /// + /// This is a specialization of uninitialized_move for iterators that are pointers. We use it because + /// internally it uses generic_iterator to make pointers act like regular eastl::iterator. + /// + namespace Internal + { + template + inline ForwardIterator uninitialized_move_impl(InputIterator first, InputIterator last, ForwardIterator dest, true_type) + { + return eastl::copy(first, last, dest); // The copy() in turn will use memcpy for is_trivially_copy_assignable (e.g. POD) types. + } + + template + inline ForwardIterator uninitialized_move_impl(InputIterator first, InputIterator last, ForwardIterator dest, false_type) + { + typedef typename eastl::iterator_traits::value_type value_type; + ForwardIterator currentDest(dest); + + // We must run a loop over every element and move-construct it at the new location. + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for(; first != last; ++first, ++currentDest) + ::new((void*)eastl::addressof(*currentDest)) value_type(eastl::move(*first)); // If value_type has a move constructor then it will be used here. + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + // We have a problem here: If an exception occurs while doing the loop below then we will + // have values that were moved from the source to the dest that may need to be moved back + // in the catch. What does the C++11 Standard say about this? And what happens if there's an + // exception while moving them back? We may want to trace through a conforming C++11 Standard + // Library to see what it does and do something similar. Given that rvalue references are + // objects that are going away, we may not need to move the values back, though that has the + // side effect of a certain kind of lost elements problem. + for(; dest < currentDest; ++dest) + (*dest).~value_type(); + throw; + } + #endif + + return currentDest; + } + } + + template + inline Result uninitialized_move_ptr(First first, Last last, Result dest) + { + typedef typename eastl::iterator_traits >::value_type value_type; + const generic_iterator i(Internal::uninitialized_move_impl(eastl::generic_iterator(first), // generic_iterator makes a pointer act like an iterator. + eastl::generic_iterator(last), + eastl::generic_iterator(dest), + eastl::is_trivially_copy_assignable())); // is_trivially_copy_assignable identifies if copy assignment would be as valid as move assignment, which means we have the opportunity to memcpy/memmove optimization. + return i.base(); + } + + + + + /// uninitialized_move + /// + /// Moves a source range to a destination, move-constructing the destination with + /// the source values (and not *assigning* the destination with the source values). + /// Returns the end of the destination range (i.e. dest + (last - first)). + /// + /// uninitialized_move is not part of any current C++ Standard, up to C++14. + /// + /// Declaration: + /// template + /// ForwardIterator uninitialized_move(InputIterator sourceFirst, InputIterator sourceLast, ForwardIterator destination); + /// + /// Example usage: + /// SomeClass* pArray = malloc(10 * sizeof(SomeClass)); + /// uninitialized_move(pSourceDataBegin, pSourceDataBegin + 10, pArray); + /// + template + inline ForwardIterator uninitialized_move(InputIterator first, InputIterator last, ForwardIterator dest) + { + return eastl::uninitialized_copy(eastl::make_move_iterator(first), eastl::make_move_iterator(last), dest); + } + + + /// uninitialized_move_if_noexcept + /// + /// If the iterated type can be moved without exceptions, move construct the dest with the input. Else copy-construct + /// the dest witih the input. If move isn't supported by the compiler, do regular copy. + /// + template + inline ForwardIterator uninitialized_move_if_noexcept(InputIterator first, InputIterator last, ForwardIterator dest) + { + return eastl::uninitialized_copy(eastl::make_move_if_noexcept_iterator(first), eastl::make_move_if_noexcept_iterator(last), dest); + } + + + /// uninitialized_move_ptr_if_noexcept + /// + template + inline Result uninitialized_move_ptr_if_noexcept(First first, Last last, Result dest) + { + #if EASTL_EXCEPTIONS_ENABLED + return eastl::uninitialized_move_if_noexcept(first, last, dest); + #else + return eastl::uninitialized_move_ptr(first, last, dest); + #endif + } + + + /// uninitialized_move_n + /// + /// Moves count elements from a range beginning at first to an uninitialized memory area + /// beginning at dest. The elements in the uninitialized area are constructed using copy constructor. + /// If an exception is thrown during the initialization, the function has no final effects. + /// + /// first: Beginning of the range of the elements to move. + /// dest: Beginning of the destination range. + /// return value: Iterator of dest type to the element past the last element moved. + /// + template + inline ForwardIterator uninitialized_move_n(InputIterator first, Count n, ForwardIterator dest) + { + return eastl::uninitialized_copy_n(eastl::make_move_iterator(first), n, dest); + } + + // Disable warning C4345 - behavior change: an object of POD type constructed with an initializer of the form () + // will be default-initialized. + // This is the behavior we intend below. + EA_DISABLE_VC_WARNING(4345) + /// uninitialized_default_fill + /// + /// Default-constructs the elements in the destination range. + /// Returns void. It wouldn't be useful to return the end of the destination range, + /// as that is the same as the 'last' input parameter. + /// + /// Declaration: + /// template + /// void uninitialized_default_fill(ForwardIterator destinationFirst, ForwardIterator destinationLast); + /// + template + inline void uninitialized_default_fill(ForwardIterator first, ForwardIterator last) + { + typedef typename eastl::iterator_traits::value_type value_type; + ForwardIterator currentDest(first); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for (; currentDest != last; ++currentDest) + ::new (eastl::addressof(*currentDest)) value_type(); + #if EASTL_EXCEPTIONS_ENABLED + } + catch (...) + { + for (; first < currentDest; ++first) + (*first).~value_type(); + throw; + } + #endif + } + + /// uninitialized_default_fill_n + /// + /// Default-constructs the range of [first, first + n). + /// Returns void as per the C++ standard, though returning the end input iterator + /// value may be of use. + /// + /// Declaration: + /// template + /// void uninitialized_default_fill_n(ForwardIterator destination, Count n); + /// + namespace Internal + { + template + inline void uninitialized_default_fill_n_impl(ForwardIterator first, Count n, false_type) + { + typedef typename eastl::iterator_traits::value_type value_type; + ForwardIterator currentDest(first); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for (; n > 0; --n, ++currentDest) + ::new (eastl::addressof(*currentDest)) value_type(); + #if EASTL_EXCEPTIONS_ENABLED + } + catch (...) + { + for (; first < currentDest; ++first) + (*first).~value_type(); + throw; + } + #endif + } + + template + inline void uninitialized_default_fill_n_impl(ForwardIterator first, Count n, true_type) + { + typedef typename eastl::iterator_traits::value_type value_type; + memset(first, 0, sizeof(value_type) * n); + } + } + + template + inline void uninitialized_default_fill_n(ForwardIterator first, Count n) + { + typedef typename eastl::iterator_traits::value_type value_type; + Internal::uninitialized_default_fill_n_impl(first, n, is_scalar()); + } + EA_RESTORE_VC_WARNING() + + /// uninitialized_default_construct + /// + /// Constructs objects in the uninitialized storage designated by the range [first, last) by default-initialization. + /// + /// Default-initialization: + /// If T is a class, the default constructor is called; otherwise, no initialization is done, resulting in + /// indeterminate values. + /// + /// http://en.cppreference.com/w/cpp/memory/uninitialized_default_construct + /// + template + inline void uninitialized_default_construct(ForwardIterator first, ForwardIterator last) + { + typedef typename eastl::iterator_traits::value_type value_type; + ForwardIterator currentDest(first); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for (; currentDest != last; ++currentDest) + ::new (eastl::addressof(*currentDest)) value_type; + #if EASTL_EXCEPTIONS_ENABLED + } + catch (...) + { + for (; first < currentDest; ++first) + (*first).~value_type(); + throw; + } + #endif + } + + /// uninitialized_default_construct_n + /// + /// Constructs n objects in the uninitialized storage starting at first by default-initialization. + /// + /// http://en.cppreference.com/w/cpp/memory/uninitialized_default_construct_n + /// + template + inline ForwardIterator uninitialized_default_construct_n(ForwardIterator first, Count n) + { + typedef typename eastl::iterator_traits::value_type value_type; + ForwardIterator currentDest(first); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for (; n > 0; --n, ++currentDest) + ::new (eastl::addressof(*currentDest)) value_type; + return currentDest; + #if EASTL_EXCEPTIONS_ENABLED + } + catch (...) + { + for (; first < currentDest; ++first) + (*first).~value_type(); + throw; + } + #endif + } + + /// uninitialized_fill + /// + /// Copy-constructs the elements in the destination range with the given input value. + /// Returns void. It wouldn't be useful to return the end of the destination range, + /// as that is the same as the 'last' input parameter. + /// + /// Declaration: + /// template + /// void uninitialized_fill(ForwardIterator destinationFirst, ForwardIterator destinationLast, const T& value); + /// + namespace Internal + { + template + inline void uninitialized_fill_impl(ForwardIterator first, ForwardIterator last, const T& value, true_type) + { + eastl::fill(first, last, value); + } + + template + void uninitialized_fill_impl(ForwardIterator first, ForwardIterator last, const T& value, false_type) + { + typedef typename eastl::iterator_traits::value_type value_type; + ForwardIterator currentDest(first); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for(; currentDest != last; ++currentDest) + ::new((void*)eastl::addressof(*currentDest)) value_type(value); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(; first < currentDest; ++first) + (*first).~value_type(); + throw; + } + #endif + } + } + + template + inline void uninitialized_fill(ForwardIterator first, ForwardIterator last, const T& value) + { + typedef typename eastl::iterator_traits::value_type value_type; + Internal::uninitialized_fill_impl(first, last, value, eastl::is_trivially_copy_assignable()); + } + + /// uninitialized_value_construct + /// + /// Constructs objects in the uninitialized storage range [first, last) by value-initialization. + /// + /// Value-Initialization: + /// If T is a class, the object is default-initialized (after being zero-initialized if T's default + /// constructor is not user-provided/deleted); otherwise, the object is zero-initialized. + /// + /// http://en.cppreference.com/w/cpp/memory/uninitialized_value_construct + /// + template + void uninitialized_value_construct(ForwardIterator first, ForwardIterator last) + { + typedef typename eastl::iterator_traits::value_type value_type; + ForwardIterator currentDest(first); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for (; currentDest != last; ++currentDest) + ::new (eastl::addressof(*currentDest)) value_type(); + #if EASTL_EXCEPTIONS_ENABLED + } + catch (...) + { + for (; first < currentDest; ++first) + (*first).~value_type(); + throw; + } + #endif + } + + /// uninitialized_value_construct_n + /// + /// Constructs n objects in the uninitialized storage starting at first by value-initialization. + /// + /// Value-Initialization: + /// If T is a class, the object is default-initialized (after being zero-initialized if T's default + /// constructor is not user-provided/deleted); otherwise, the object is zero-initialized. + /// + /// http://en.cppreference.com/w/cpp/memory/uninitialized_value_construct_n + /// + template + ForwardIterator uninitialized_value_construct_n(ForwardIterator first, Count n) + { + typedef typename eastl::iterator_traits::value_type value_type; + ForwardIterator currentDest(first); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for (; n > 0; --n, ++currentDest) + ::new (eastl::addressof(*currentDest)) value_type(); + return currentDest; + #if EASTL_EXCEPTIONS_ENABLED + } + catch (...) + { + for (; first < currentDest; ++first) + (*first).~value_type(); + throw; + } + #endif + } + + /// uninitialized_fill_ptr + /// + /// This is a specialization of uninitialized_fill for iterators that are pointers. + /// It exists so that we can declare a value_type for the iterator, which you + /// can't do with a pointer by itself. + /// + template + inline void uninitialized_fill_ptr(T* first, T* last, const T& value) + { + typedef typename eastl::iterator_traits >::value_type value_type; + Internal::uninitialized_fill_impl(eastl::generic_iterator(first), + eastl::generic_iterator(last), value, + eastl::is_trivially_copy_assignable()); + } + + /// uninitialized_fill_n + /// + /// Copy-constructs the range of [first, first + n) with the given input value. + /// Returns void as per the C++ standard, though returning the end input iterator + /// value may be of use. + /// + /// Declaration: + /// template + /// void uninitialized_fill_n(ForwardIterator destination, Count n, const T& value); + /// + namespace Internal + { + template + inline void uninitialized_fill_n_impl(ForwardIterator first, Count n, const T& value, true_type) + { + eastl::fill_n(first, n, value); + } + + template + void uninitialized_fill_n_impl(ForwardIterator first, Count n, const T& value, false_type) + { + typedef typename eastl::iterator_traits::value_type value_type; + ForwardIterator currentDest(first); + + #if EASTL_EXCEPTIONS_ENABLED + try + { + #endif + for(; n > 0; --n, ++currentDest) + ::new((void*)eastl::addressof(*currentDest)) value_type(value); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(; first < currentDest; ++first) + (*first).~value_type(); + throw; + } + #endif + } + } + + template + inline void uninitialized_fill_n(ForwardIterator first, Count n, const T& value) + { + typedef typename eastl::iterator_traits::value_type value_type; + Internal::uninitialized_fill_n_impl(first, n, value, eastl::is_trivially_copy_assignable()); + } + + + + /// uninitialized_fill_n_ptr + /// + /// This is a specialization of uninitialized_fill_n for iterators that are pointers. + /// It exists so that we can declare a value_type for the iterator, which you + /// can't do with a pointer by itself. + /// + template + inline void uninitialized_fill_n_ptr(T* first, Count n, const T& value) + { + typedef typename eastl::iterator_traits >::value_type value_type; + Internal::uninitialized_fill_n_impl(eastl::generic_iterator(first), n, value, eastl::is_trivially_copy_assignable()); + } + + + + + /// uninitialized_copy_fill + /// + /// Copies [first1, last1) into [first2, first2 + (last1 - first1)) then + /// fills [first2 + (last1 - first1), last2) with value. + /// + template + inline void uninitialized_copy_fill(InputIterator first1, InputIterator last1, + ForwardIterator first2, ForwardIterator last2, const T& value) + { + const ForwardIterator mid(eastl::uninitialized_copy(first1, last1, first2)); + + #if EASTL_EXCEPTIONS_ENABLED + typedef typename eastl::iterator_traits::value_type value_type; + try + { + #endif + eastl::uninitialized_fill(mid, last2, value); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(; first2 < mid; ++first2) + (*first2).~value_type(); + throw; + } + #endif + } + + + /// uninitialized_move_fill + /// + /// Moves [first1, last1) into [first2, first2 + (last1 - first1)) then + /// fills [first2 + (last1 - first1), last2) with value. + /// + template + inline void uninitialized_move_fill(InputIterator first1, InputIterator last1, + ForwardIterator first2, ForwardIterator last2, const T& value) + { + const ForwardIterator mid(eastl::uninitialized_move(first1, last1, first2)); + + #if EASTL_EXCEPTIONS_ENABLED + typedef typename eastl::iterator_traits::value_type value_type; + try + { + #endif + eastl::uninitialized_fill(mid, last2, value); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(; first2 < mid; ++first2) + (*first2).~value_type(); + throw; + } + #endif + } + + + + + + /// uninitialized_fill_copy + /// + /// Fills [result, mid) with value then copies [first, last) into [mid, mid + (last - first)). + /// + template + inline ForwardIterator + uninitialized_fill_copy(ForwardIterator result, ForwardIterator mid, const T& value, InputIterator first, InputIterator last) + { + eastl::uninitialized_fill(result, mid, value); + + #if EASTL_EXCEPTIONS_ENABLED + typedef typename eastl::iterator_traits::value_type value_type; + try + { + #endif + return eastl::uninitialized_copy(first, last, mid); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(; result < mid; ++result) + (*result).~value_type(); + throw; + } + #endif + } + + + /// uninitialized_fill_move + /// + /// Fills [result, mid) with value then copies [first, last) into [mid, mid + (last - first)). + /// + template + inline ForwardIterator + uninitialized_fill_move(ForwardIterator result, ForwardIterator mid, const T& value, InputIterator first, InputIterator last) + { + eastl::uninitialized_fill(result, mid, value); + + #if EASTL_EXCEPTIONS_ENABLED + typedef typename eastl::iterator_traits::value_type value_type; + try + { + #endif + return eastl::uninitialized_move(first, last, mid); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(; result < mid; ++result) + (*result).~value_type(); + throw; + } + #endif + } + + + + /// uninitialized_copy_copy + /// + /// Copies [first1, last1) into [result, result + (last1 - first1)) then + /// copies [first2, last2) into [result, result + (last1 - first1) + (last2 - first2)). + /// + template + inline ForwardIterator + uninitialized_copy_copy(InputIterator1 first1, InputIterator1 last1, + InputIterator2 first2, InputIterator2 last2, + ForwardIterator result) + { + const ForwardIterator mid(eastl::uninitialized_copy(first1, last1, result)); + + #if EASTL_EXCEPTIONS_ENABLED + typedef typename eastl::iterator_traits::value_type value_type; + try + { + #endif + return eastl::uninitialized_copy(first2, last2, mid); + #if EASTL_EXCEPTIONS_ENABLED + } + catch(...) + { + for(; result < mid; ++result) + (*result).~value_type(); + throw; + } + #endif + } + + + + /// destruct + /// + /// Calls the destructor of a given object. + /// + /// Note that we don't have a specialized version of this for objects + /// with trivial destructors, such as integers. This is because the + /// compiler can already see in our version here that the destructor + /// is a no-op. + /// + template + inline void destruct(T* p) + { + // https://msdn.microsoft.com/query/dev14.query?appId=Dev14IDEF1&l=EN-US&k=k(C4100)&rd=true + // "C4100 can also be issued when code calls a destructor on a otherwise unreferenced parameter + // of primitive type. This is a limitation of the Visual C++ compiler." + EA_UNUSED(p); + p->~T(); + } + + + + // destruct(first, last) + // + template + inline void destruct_impl(ForwardIterator /*first*/, ForwardIterator /*last*/, true_type) // true means the type has a trivial destructor. + { + // Empty. The type has a trivial destructor. + } + + template + inline void destruct_impl(ForwardIterator first, ForwardIterator last, false_type) // false means the type has a significant destructor. + { + typedef typename eastl::iterator_traits::value_type value_type; + + for(; first != last; ++first) + (*first).~value_type(); + } + + /// destruct + /// + /// Calls the destructor on a range of objects. + /// + /// We have a specialization for objects with trivial destructors, such as + /// PODs. In this specialization the destruction of the range is a no-op. + /// + template + inline void destruct(ForwardIterator first, ForwardIterator last) + { + typedef typename eastl::iterator_traits::value_type value_type; + destruct_impl(first, last, eastl::has_trivial_destructor()); + } + + + /// destroy_at + /// + /// Calls the destructor of a given object. + /// + /// Note that we don't have a specialized version of this for objects + /// with trivial destructors, such as integers. This is because the + /// compiler can already see in our version here that the destructor + /// is a no-op. + /// + /// This is the same as eastl::destruct but we included for C++17 compliance. + /// + /// http://en.cppreference.com/w/cpp/memory/destroy_at + /// + template + inline void destroy_at(T* p) + { + EA_UNUSED(p); + p->~T(); + } + + + /// destroy + /// + /// Calls the destructor on a range of objects. + /// + /// http://en.cppreference.com/w/cpp/memory/destroy + /// + template + inline void destroy(ForwardIterator first, ForwardIterator last) + { + for (; first != last; ++first) + eastl::destroy_at(eastl::addressof(*first)); + } + + + /// destroy_n + /// + /// Calls the destructor on the n objects in the range. + /// + /// http://en.cppreference.com/w/cpp/memory/destroy_n + /// + template + ForwardIterator destroy_n(ForwardIterator first, Size n) + { + for (; n > 0; ++first, --n) + eastl::destroy_at(eastl::addressof(*first)); + + return first; + } + + + /// align + /// + /// Same as C++11 std::align. http://en.cppreference.com/w/cpp/memory/align + /// If it is possible to fit size bytes of storage aligned by alignment into the buffer pointed to by + /// ptr with length space, the function updates ptr to point to the first possible address of such storage, + /// decreases space by the number of bytes used for alignment, and returns the new ptr value. Otherwise, + /// the function returns NULL and leaves ptr and space unmodified. + /// + /// Example usage: + /// char buffer[512]; + /// size_t space = sizeof(buffer); + /// void* p = buffer; + /// void* p1 = eastl::align(16, 3, p, space); p = (char*)p + 3; space -= 3; + /// void* p2 = eastl::align(32, 78, p, space); p = (char*)p + 78; space -= 78; + /// void* p3 = eastl::align(64, 9, p, space); p = (char*)p + 9; space -= 9; + + inline void* align(size_t alignment, size_t size, void*& ptr, size_t& space) + { + if(space >= size) + { + char* ptrAligned = (char*)(((size_t)ptr + (alignment - 1)) & -alignment); + size_t offset = (size_t)(ptrAligned - (char*)ptr); + + if((space - size) >= offset) // Have to implement this in terms of subtraction instead of addition in order to handle possible overflow. + { + ptr = ptrAligned; + space -= offset; + + return ptrAligned; + } + } + + return NULL; + } + + + /// align_advance + /// + /// Same as align except ptr and space can be adjusted to reflect remaining space. + /// Not present in the C++ Standard. + /// Note that the example code here is similar to align but simpler. + /// + /// Example usage: + /// char buffer[512]; + /// size_t space = sizeof(buffer); + /// void* p = buffer; + /// void* p1 = eastl::align_advance(16, 3, p, space, &p, &space); // p is advanced and space reduced accordingly. + /// void* p2 = eastl::align_advance(32, 78, p, space, &p, &space); + /// void* p3 = eastl::align_advance(64, 9, p, space, &p, &space); + /// void* p4 = eastl::align_advance(16, 33, p, space); + + inline void* align_advance(size_t alignment, size_t size, void* ptr, size_t space, void** ptrAdvanced = NULL, size_t* spaceReduced = NULL) + { + if(space >= size) + { + char* ptrAligned = (char*)(((size_t)ptr + (alignment - 1)) & -alignment); + size_t offset = (size_t)(ptrAligned - (char*)ptr); + + if((space - size) >= offset) // Have to implement this in terms of subtraction instead of addition in order to handle possible overflow. + { + if(ptrAdvanced) + *ptrAdvanced = (ptrAligned + size); + if(spaceReduced) + *spaceReduced = (space - (offset + size)); + + return ptrAligned; + } + } + + return NULL; + } + + + /////////////////////////////////////////////////////////////////////// + // uses_allocator + // + // Determines if the class T has an allocator_type member typedef + // which Allocator is convertible to. + // + // http://en.cppreference.com/w/cpp/memory/uses_allocator + // + // A program may specialize this template to derive from true_type for a + // user-defined type T that does not have a nested allocator_type but + // nonetheless can be constructed with an allocator where either: + // - the first argument of a constructor has type allocator_arg_t and + // the second argument has type Allocator. + // or + // - the last argument of a constructor has type Allocator. + // + // Example behavilor: + // uses_allocator::value => true + // uses_allocator::value => false + // + // This is useful for writing generic code for containers when you can't + // know ahead of time that the container has an allocator_type. + /////////////////////////////////////////////////////////////////////// + + template + struct has_allocator_type_helper + { + private: + template + static eastl::no_type test(...); + + template + static eastl::yes_type test(typename U::allocator_type* = NULL); + + public: + static const bool value = sizeof(test(NULL)) == sizeof(eastl::yes_type); + }; + + + template ::value> + struct uses_allocator_impl + : public integral_constant::value> + { + }; + + template + struct uses_allocator_impl + : public eastl::false_type + { + }; + + template + struct uses_allocator + : public uses_allocator_impl{ }; + + + + + + /////////////////////////////////////////////////////////////////////// + // pointer_traits + // + // C++11 Standard section 20.6.3 + // Provides information about a pointer type, mostly for the purpose + // of handling the case where the pointer type isn't a built-in T* but + // rather is a class that acts like a pointer. + // + // A user-defined Pointer has the following properties, by example: + // template + // struct Pointer + // { + // typedef Pointer pointer; // required for use by pointer_traits. + // typedef T1 element_type; // optional for use by pointer_traits. + // typedef T2 difference_type; // optional for use by pointer_traits. + // + // template + // using rebind = typename Ptr; // optional for use by pointer_traits. + // + // static pointer pointer_to(element_type& obj); // required for use by pointer_traits. + // }; + // + // + // Example usage: + // template + // typename pointer_traits::element_type& GetElementPointedTo(Pointer p) + // { return *p; } + // + /////////////////////////////////////////////////////////////////////// + + namespace Internal + { + // pointer_element_type + template + struct has_element_type // has_element_type::value is true if T has an element_type member typedef. + { + private: + template static eastl::no_type test(...); + template static eastl::yes_type test(typename U::element_type* = 0); + public: + static const bool value = sizeof(test(0)) == sizeof(eastl::yes_type); + }; + + template ::value> + struct pointer_element_type + { + using type = Pointer; + }; + + template + struct pointer_element_type + { typedef typename Pointer::element_type type; }; + + template