From ebe36394f22f6dab15bbd7f342cf2eaeeaa0bac3 Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Wed, 1 Nov 2023 09:55:22 -0400 Subject: [PATCH 01/23] new huffman --- include/SZ3/encoder/HuffmanEncoder.hpp | 1151 ++++++++++++------------ include/SZ3/utils/ByteUtil.hpp | 63 +- 2 files changed, 648 insertions(+), 566 deletions(-) diff --git a/include/SZ3/encoder/HuffmanEncoder.hpp b/include/SZ3/encoder/HuffmanEncoder.hpp index d515f04d..a1de6308 100644 --- a/include/SZ3/encoder/HuffmanEncoder.hpp +++ b/include/SZ3/encoder/HuffmanEncoder.hpp @@ -1,5 +1,5 @@ -#ifndef _SZ_HUFFMAN_ENCODER_HPP -#define _SZ_HUFFMAN_ENCODER_HPP +#ifndef _SZ_HUFFMAN_ENCODER_LZ_HPP +#define _SZ_HUFFMAN_ENCODER_LZ_HPP #include "SZ3/def.hpp" #include "SZ3/encoder/Encoder.hpp" @@ -7,642 +7,663 @@ #include "SZ3/utils/MemoryUtil.hpp" #include "SZ3/utils/Timer.hpp" #include "SZ3/utils/ska_hash/unordered_map.hpp" -#include -#include -#include -#include #include +#include #include +#include #include #include -#include +#include +#include + +namespace SZ3{ + + template + class HuffmanEncoder:public concepts::EncoderInterface{ + private: -namespace SZ3 { + class Node{ + public: - template - class HuffmanEncoder : public concepts::EncoderInterface { + Node(T c_=0,Node *lp=nullptr,Node *rp=nullptr){ - public: + c=c_; + p[0]=lp; + p[1]=rp; + } - typedef struct node_t { - struct node_t *left, *right; - size_t freq; - char t; //in_node:0; otherwise:1 T c; - } *node; - - typedef struct HuffmanTree { - unsigned int stateNum; - unsigned int allNodes; - struct node_t *pool; - node *qqq, *qq; //the root node of the HuffmanTree is qq[1] - int n_nodes; //n_nodes is for compression - int qend; - uint64_t **code; - unsigned char *cout; - int n_inode; //n_inode is for decompression - int maxBitCount; - } HuffmanTree; - - - HuffmanEncoder() { - int x = 1; - char *y = (char *) &x; - if (*y == 1) - sysEndianType = 0; - else //=0 - sysEndianType = 1; - } + Node *p[2]; - ~HuffmanEncoder() { - SZ_FreeHuffman(); - } + inline uchar isLeaf(){ - //build huffman tree - HuffmanTree *createHuffmanTree(int stateNum) { - HuffmanTree *huffmanTree = (HuffmanTree *) malloc(sizeof(HuffmanTree)); - memset(huffmanTree, 0, sizeof(HuffmanTree)); - huffmanTree->stateNum = stateNum; - huffmanTree->allNodes = 2 * stateNum; - - huffmanTree->pool = (struct node_t *) malloc(huffmanTree->allNodes * 2 * sizeof(struct node_t)); - huffmanTree->qqq = (node *) malloc(huffmanTree->allNodes * 2 * sizeof(node)); - huffmanTree->code = (uint64_t **) malloc(huffmanTree->stateNum * sizeof(uint64_t *)); - huffmanTree->cout = (unsigned char *) malloc(huffmanTree->stateNum * sizeof(unsigned char)); - - memset(huffmanTree->pool, 0, huffmanTree->allNodes * 2 * sizeof(struct node_t)); - memset(huffmanTree->qqq, 0, huffmanTree->allNodes * 2 * sizeof(node)); - memset(huffmanTree->code, 0, huffmanTree->stateNum * sizeof(uint64_t *)); - memset(huffmanTree->cout, 0, huffmanTree->stateNum * sizeof(unsigned char)); - huffmanTree->qq = huffmanTree->qqq - 1; - huffmanTree->n_nodes = 0; - huffmanTree->n_inode = 0; - huffmanTree->qend = 1; - - return huffmanTree; - } + return p[0]==nullptr; + } + }; - /** - * build huffman tree using bins - * @param bins - * @param stateNum - */ - void preprocess_encode(const std::vector &bins, int stateNum) { - preprocess_encode(bins.data(), bins.size(), stateNum); - } + class HuffmanTree{ + + private: + + uchar _constructed=0; - /** - * build huffman tree using bins - * @param bins - * @param num_bin - * @param stateNum - */ - void preprocess_encode(const T *bins, size_t num_bin, int stateNum) { - nodeCount = 0; - if (num_bin == 0) { - printf("Huffman bins should not be empty\n"); - exit(0); + uchar len=0; + int vec=0; + + void dfs(Node* u){ + + if(u->isLeaf()){ + + mplen[u->c]=len; + mpcode[u->c]=vec; + + limit=std::max(limit,len); + + return; + } + + ++len; + dfs(u->p[0]); + --len; + + vec^=1<p[1]); + vec^=1<<--len; } - init(bins, num_bin); - for (int i = 0; i < huffmanTree->stateNum; i++) - if (huffmanTree->code[i]) nodeCount++; - nodeCount = nodeCount * 2 - 1; - } - //save the huffman Tree in the compressed data - void save(uchar *&c) { - auto cc = c; - write(offset, c); - int32ToBytes_bigEndian(c, nodeCount); - c += sizeof(int); - int32ToBytes_bigEndian(c, huffmanTree->stateNum / 2); - c += sizeof(int); - uint totalSize = 0;// = convert_HuffTree_to_bytes_anyStates(nodeCount, c); - // std::cout << "nodeCount = " << nodeCount << std::endl; - if (nodeCount <= 256) - totalSize = convert_HuffTree_to_bytes_anyStates(nodeCount, c); - else if (nodeCount <= 65536) - totalSize = convert_HuffTree_to_bytes_anyStates(nodeCount, c); - else - totalSize = convert_HuffTree_to_bytes_anyStates(nodeCount, c); - c += totalSize; -// return c - cc; - } + class cmp{ + public: + bool operator()(const std::pair& u, const std::pair& v) { + return u.second==v.second?u.first>v.first:u.second>v.second; + } + }; - size_t size_est() { - size_t b = (nodeCount <= 256) ? sizeof(unsigned char) : ((nodeCount <= 65536) ? sizeof(unsigned short) : sizeof(unsigned int)); - return 1 + 2 * nodeCount * b + nodeCount * sizeof(unsigned char) + nodeCount * sizeof(T) + sizeof(int) + sizeof(int) + sizeof(T); - } + public: - //perform encoding - size_t encode(const std::vector &bins, uchar *&bytes) { - return encode(bins.data(), bins.size(), bytes); - } + std::vector mplen; + std::vector mpcode; - //perform encoding - size_t encode(const T *bins, size_t num_bin, uchar *&bytes) { - size_t outSize = 0; - size_t i = 0; - unsigned char bitSize = 0, byteSize, byteSizep; - int state; - uchar *p = bytes + sizeof(size_t); - int lackBits = 0; - //int64_t totalBitSize = 0, maxBitSize = 0, bitSize21 = 0, bitSize32 = 0; - for (i = 0; i < num_bin; i++) { - state = bins[i] - offset; - bitSize = huffmanTree->cout[state]; - - if (lackBits == 0) { - byteSize = bitSize % 8 == 0 ? bitSize / 8 : bitSize / 8 + - 1; //it's equal to the number of bytes involved (for *outSize) - byteSizep = bitSize / 8; //it's used to move the pointer p for next data - if (byteSize <= 8) { - int64ToBytes_bigEndian(p, (huffmanTree->code[state])[0]); - p += byteSizep; - } else //byteSize>8 - { - int64ToBytes_bigEndian(p, (huffmanTree->code[state])[0]); - p += 8; - int64ToBytes_bigEndian(p, (huffmanTree->code[state])[1]); - p += (byteSizep - 8); - } - outSize += byteSize; - lackBits = bitSize % 8 == 0 ? 0 : 8 - bitSize % 8; - } else { - *p = (*p) | (unsigned char) ((huffmanTree->code[state])[0] >> (64 - lackBits)); - if (lackBits < bitSize) { - p++; - - int64_t newCode = (huffmanTree->code[state])[0] << lackBits; - int64ToBytes_bigEndian(p, newCode); - - if (bitSize <= 64) { - bitSize -= lackBits; - byteSize = bitSize % 8 == 0 ? bitSize / 8 : bitSize / 8 + 1; - byteSizep = bitSize / 8; - p += byteSizep; - outSize += byteSize; - lackBits = bitSize % 8 == 0 ? 0 : 8 - bitSize % 8; - } else //bitSize > 64 - { - byteSizep = 7; //must be 7 bytes, because lackBits!=0 - p += byteSizep; - outSize += byteSize; - - bitSize -= 64; - if (lackBits < bitSize) { - *p = (*p) | (unsigned char) ((huffmanTree->code[state])[0] >> (64 - lackBits)); - p++; - newCode = (huffmanTree->code[state])[1] << lackBits; - int64ToBytes_bigEndian(p, newCode); - bitSize -= lackBits; - byteSize = bitSize % 8 == 0 ? bitSize / 8 : bitSize / 8 + 1; - byteSizep = bitSize / 8; - p += byteSizep; - outSize += byteSize; - lackBits = bitSize % 8 == 0 ? 0 : 8 - bitSize % 8; - } else //lackBits >= bitSize - { - *p = (*p) | (unsigned char) ((huffmanTree->code[state])[0] >> (64 - bitSize)); - lackBits -= bitSize; - } - } - } else //lackBits >= bitSize - { - lackBits -= bitSize; - if (lackBits == 0) - p++; - } + T offset; + // minimum bits for T + uchar mbft; + uchar limit; + + void init(){ + + _constructed=0; + ht.clear(); + mplen.clear(); + mpcode.clear(); + freq.clear(); + + offset=0; + mbft=0; + root=0; + n=0; + maxval=0; + limit=0; + } + + HuffmanTree(){ + + init(); + } + + int root; + int n; + int maxval; + std::vector ht; + std::vector freq; + + void addElement(T c,size_t freqc){ + + assert(!_constructed); + + ht.push_back(Node(c)); + freq[c]=freqc; + ++n; + } + + void constructHuffmanTree(){ + + assert(!_constructed); + assert(ht.size()>1); + + if(maxval==1){ + + mbft=1; + ht.push_back(Node(0,&ht[0],nullptr)); + mplen[0]=1; + mpcode[0]=0; + limit=1; + setConstructed(); + return; } + + Timer timer(true); + + mbft=1; + while((1<,std::vector>,cmp> q; + + for(int i=0;i1){ + + int u=q.top().first; + size_t freq_u=q.top().second; + q.pop(); + int v=q.top().first; + size_t freq_v=q.top().second; + q.pop(); + + ht.push_back(Node(0,&ht[u],&ht[v])); + + q.push({ht.size()-1,freq_u+freq_v}); + } + + root=ht.size()-1; + + dfs(&ht[root]); + + setConstructed(); + + timer.stop("construct huffman tree"); } - *reinterpret_cast(bytes) = outSize; - bytes += sizeof(size_t) + outSize; - return outSize; - } - void postprocess_encode() { - SZ_FreeHuffman(); - } + uchar isConstructed(){ + + return _constructed; + } - void preprocess_decode() {}; - - //perform decoding - std::vector decode(const uchar *&bytes, size_t targetLength) { - node t = treeRoot; - std::vector out(targetLength); - size_t i = 0, byteIndex = 0, count = 0; - int r; - node n = treeRoot; - size_t encodedLength = *reinterpret_cast(bytes); - bytes += sizeof(size_t); - if (n->t) //root->t==1 means that all state values are the same (constant) - { - for (count = 0; count < targetLength; count++) - out[count] = n->c + offset; - return out; + void setConstructed(){ + + _constructed=1; } + }; + + HuffmanTree tree; + + public: - for (i = 0; count < targetLength; i++) { - byteIndex = i >> 3; //i/8 - r = i % 8; - if (((bytes[byteIndex] >> (7 - r)) & 0x01) == 0) - n = n->left; - else - n = n->right; - - if (n->t) { - out[count] = n->c + offset; - n = t; - count++; + void preprocess_encode(const T *const bins,size_t num_bin,int stateNum){ + + Timer timer(true); + + tree.init(); + + T __minval,__maxval; + + if(stateNum==0){ + + __minval=*bins; + __maxval=*bins; + for(int i=1;inew_node2(C[0], t[0]); - this->unpad_tree(L, R, C, t, 0, root); - free(L); - free(R); - free(C); - free(t); - return root; - } else if (nodeCount <= 65536) { - unsigned short *L = (unsigned short *) malloc(nodeCount * sizeof(unsigned short)); - memset(L, 0, nodeCount * sizeof(unsigned short)); - unsigned short *R = (unsigned short *) malloc(nodeCount * sizeof(unsigned short)); - memset(R, 0, nodeCount * sizeof(unsigned short)); - T *C = (T *) malloc(nodeCount * sizeof(T)); - memset(C, 0, nodeCount * sizeof(T)); - unsigned char *t = (unsigned char *) malloc(nodeCount * sizeof(unsigned char)); - memset(t, 0, nodeCount * sizeof(unsigned char)); - - // TODO: Endian type - // unsigned char cmpSysEndianType = bytes[0]; - // if(cmpSysEndianType!=(unsigned char)sysEndianType) - // { - // unsigned char* p = (unsigned char*)(bytes+1); - // size_t i = 0, size = 3*nodeCount*sizeof(unsigned int); - // while(1) - // { - // symTransform_4bytes(p); - // i+=sizeof(unsigned int); - // if(inew_node2(0, 0); - this->unpad_tree(L, R, C, t, 0, root); - free(L); - free(R); - free(C); - free(t); - return root; - } else //nodeCount>65536 - { - unsigned int *L = (unsigned int *) malloc(nodeCount * sizeof(unsigned int)); - memset(L, 0, nodeCount * sizeof(unsigned int)); - unsigned int *R = (unsigned int *) malloc(nodeCount * sizeof(unsigned int)); - memset(R, 0, nodeCount * sizeof(unsigned int)); - T *C = (T *) malloc(nodeCount * sizeof(T)); - memset(C, 0, nodeCount * sizeof(T)); - unsigned char *t = (unsigned char *) malloc(nodeCount * sizeof(unsigned char)); - memset(t, 0, nodeCount * sizeof(unsigned char)); - // TODO: Endian type - // unsigned char cmpSysEndianType = bytes[0]; - // if(cmpSysEndianType!=(unsigned char)sysEndianType) - // { - // unsigned char* p = (unsigned char*)(bytes+1); - // size_t i = 0, size = 3*nodeCount*sizeof(unsigned int); - // while(1) - // { - // symTransform_4bytes(p); - // i+=sizeof(unsigned int); - // if(inew_node2(0, 0); - this->unpad_tree(L, R, C, t, 0, root); - free(L); - free(R); - free(C); - free(t); - return root; + // ska::unordered_map freq; + std::vector freq(tree.maxval); + // freq.reserve(4*stateNum); + + for(int i=0;ipool + huffmanTree->n_nodes++; - if (freq) { - n->c = c; - n->freq = freq; - n->t = 1; - } else { - n->left = a; - n->right = b; - n->freq = a->freq + b->freq; - n->t = 0; - //n->c = 0; + tree.ht.reserve(freq.size()<<1); + + for(int i=0;ipool[huffmanTree->n_nodes].c = c; - huffmanTree->pool[huffmanTree->n_nodes].t = t; - return huffmanTree->pool + huffmanTree->n_nodes++; + // printf("begins to construct huffman tree\n"); + + tree.constructHuffmanTree(); + + timer.stop("preprocess_encode"); } - /* priority queue */ - void qinsert(node n) { - int j, i = huffmanTree->qend++; - while ((j = (i >> 1))) //j=i/2 - { - if (huffmanTree->qq[j]->freq <= n->freq) break; - huffmanTree->qq[i] = huffmanTree->qq[j], i = j; - } - huffmanTree->qq[i] = n; + void preprocess_encode(const std::vector &bins,int stateNum){ + + preprocess_encode(bins.data(),bins.size(),stateNum); } - node qremove() { - int i, l; - node n = huffmanTree->qq[i = 1]; - node p; - if (huffmanTree->qend < 2) return 0; - huffmanTree->qend--; - huffmanTree->qq[i] = huffmanTree->qq[huffmanTree->qend]; - - while ((l = (i << 1)) < huffmanTree->qend) { //l=(i*2) - if (l + 1 < huffmanTree->qend && huffmanTree->qq[l + 1]->freq < huffmanTree->qq[l]->freq) l++; - if (huffmanTree->qq[i]->freq > huffmanTree->qq[l]->freq) { - p = huffmanTree->qq[i]; - huffmanTree->qq[i] = huffmanTree->qq[l]; - huffmanTree->qq[l] = p; - i = l; - } else { - break; - } + void saveAsCode(uchar *&c){ + + Timer timer(true); + + uchar *head=c; + + // whether the tree is full binary tree + + uchar& limit=tree.limit; + + std::vector> mp(limit+1); + + for(int i=0;it) { - huffmanTree->code[n->c] = (uint64_t *) malloc(2 * sizeof(uint64_t)); - if (len <= 64) { - (huffmanTree->code[n->c])[0] = out1 << (64 - len); - (huffmanTree->code[n->c])[1] = out2; - } else { - (huffmanTree->code[n->c])[0] = out1; - (huffmanTree->code[n->c])[1] = out2 << (128 - len); + uchar mask=0; + uchar index=0; + + assert(sizeof(T)<=8); + + if(mp[limit].size()==tree.n){ + + // 00 XXXXXX (mbft) + if(tree.maxval>1) writeBytesByte(c,tree.mbft); + else writeBytesByte(c,0x80|tree.mbft); + + writeBytesByte(c,((sizeof(T)-1)<<5)|(limit-1)); + + writeBytes(c,tree.offset,sizeof(T)<<3,mask,index); + + int32ToBytes_bigEndian(c,tree.n); + c+=4; + + int cnt=mp[limit].size(); + + uchar logcnt=0; + while(logcnt<32&&(1<cout[n->c] = (unsigned char) len; + + writeBytesClearMask(c,mask,index); + return; } - int index = len >> 6; //=len/64 - if (index == 0) { - out1 = out1 << 1; - out1 = out1 | 0; - build_code(n->left, len + 1, out1, 0); - out1 = out1 | 1; - build_code(n->right, len + 1, out1, 0); - } else { - if (len % 64 != 0) - out2 = out2 << 1; - out2 = out2 | 0; - build_code(n->left, len + 1, out1, out2); - out2 = out2 | 1; - build_code(n->right, len + 1, out1, out2); + + writeBytesByte(c,0x40|tree.mbft); + + writeBytesByte(c,((sizeof(T)-1)<<5)|(limit-1)); + + writeBytes(c,tree.offset,sizeof(T)<<3,mask,index); + + int32ToBytes_bigEndian(c,tree.maxval); + c+=4; + + for(uchar len=1;len<=limit;len++){ + + int cnt=mp[len].size(); + + writeBytes(c,cnt,len,mask,index); + + if(cnt){ + + for(const T& it:mp[len]){ + + writeBytes(c,it,tree.mbft,mask,index); + + const int code=tree.mpcode[it]; + + writeBytes(c,code,len,mask,index); + } + } } + + writeBytesClearMask(c,mask,index); + + timer.stop("saveAsCode"); + + // printf("huffman tree size = %d\n",(int)(c-head)); + + // Lossless_zstd zstd; + // size_t compressed_tree_size; + + // // uchar *compressed_tree = zstd.compress(head,c-head,compressed_tree_size); + // delete[] zstd.compress(head,c-head,compressed_tree_size); + + // printf("compressed huffman tree size = %d\n",(int)compressed_tree_size); + + return; } - /** - * Compute the frequency of the data and build the Huffman tree - * @param HuffmanTree* huffmanTree (output) - * @param int *s (input) - * @param size_t length (input) - * */ - void init(const T *s, size_t length) { - T max = s[0]; - offset = s[0]; //offset is min - - ska::unordered_map frequency; - for (size_t i = 0; i < length; i++) { - frequency[s[i]]++; + void loadAsCode(const uchar *&bytes,size_t &remaining_length){ + + Timer timer(true); + + tree.init(); + + uchar feature=(*bytes)>>6; + tree.mbft=(*bytes)&0x3f; + ++bytes; + + uchar szT=((*bytes)>>5)+1; + tree.limit=((*bytes)&0x1f)+1; + ++bytes; + + assert(szT==sizeof(T)); + + for(int i=0;i max) { - max = k; - } - if (k < offset) { - offset = k; + tree.maxval=bytesToInt32_bigEndian(bytes); + bytes+=4; + + tree.ht.reserve(tree.maxval<<1); + tree.freq.resize(tree.maxval); + tree.mplen.resize(tree.maxval); + tree.mpcode.resize(tree.maxval); + + tree.ht.push_back(Node()); + + if(feature==0x00||feature==0x02){ + + int i=0; + tree.n=1<p[e]==nullptr){ + + tree.ht.push_back(Node()); + u->p[e]=&tree.ht[tree.ht.size()-1]; + } + + u=u->p[e]; + } + + u->c=c; + assert(c>3; + + return; } - int stateNum = max - offset + 2; - huffmanTree = createHuffmanTree(stateNum); + tree.n=0; + + int i=0; + + for(uchar len=1;len<=tree.limit;len++){ - for (const auto &f: frequency) { - qinsert(new_node(f.second, f.first - offset, 0, 0)); + int cnt=0; + + for(uchar j=0;jp[e]==nullptr){ + + tree.ht.push_back(Node()); + u->p[e]=&tree.ht[tree.ht.size()-1]; + } + + u=u->p[e]; + } + + u->c=c; + ++tree.n; + tree.mplen[c]=len; + tree.mpcode[c]=vec; + } } - while (huffmanTree->qend > 2) - qinsert(new_node(0, 0, qremove(), qremove())); + bytes+=(i+7)>>3; - build_code(huffmanTree->qq[1], 0, 0, 0); - treeRoot = huffmanTree->qq[1]; + timer.stop("loadAsCode"); + tree.setConstructed(); } - template - void pad_tree(T1 *L, T1 *R, T *C, unsigned char *t, unsigned int i, node root) { - C[i] = root->c; - t[i] = root->t; - node lroot = root->left; - if (lroot != 0) { - huffmanTree->n_inode++; - L[i] = huffmanTree->n_inode; - pad_tree(L, R, C, t, huffmanTree->n_inode, lroot); + size_t encode(const T *bins, size_t num_bin, uchar *&bytes){ + + if(tree.maxval==1){ + + int32ToBytes_bigEndian(bytes,num_bin^0x1234abcd); + bytes+=4; + return 4; } - node rroot = root->right; - if (rroot != 0) { - huffmanTree->n_inode++; - R[i] = huffmanTree->n_inode; - pad_tree(L, R, C, t, huffmanTree->n_inode, rroot); + + Timer timer(true); + + assert(tree.isConstructed()); + + uchar *head=bytes; + bytes+=4; + + int len=0; + + uchar mask=0; + uchar index=0; + + for(int i=0;i - void unpad_tree(T1 *L, T1 *R, T *C, unsigned char *t, unsigned int i, node root) { - //root->c = C[i]; - if (root->t == 0) { - T1 l, r; - l = L[i]; - if (l != 0) { - node lroot = new_node2(C[l], t[l]); - root->left = lroot; - unpad_tree(L, R, C, t, l, lroot); + size_t encode(const std::vector &bins, uchar *&bytes){ + + return encode(bins.data(),bins.size(),bytes); + } + + void postprocess_encode(){ + + } + + void preprocess_decode(){ + + } + + std::vector decode(const uchar *&bytes, size_t targetLength){ + + if(tree.maxval==1){ + + int len=bytesToInt32_bigEndian(bytes)^0x1234abcd; + bytes+=4; + assert(len==targetLength); + return std::vector(len,tree.offset); + } + + Timer timer(true); + + assert(tree.isConstructed()); + + assert(targetLength>4); + + Node *u=&tree.ht[tree.root]; + + int len=bytesToInt32_bigEndian(bytes)^0x1234abcd; + bytes+=4; + + std::vector a(targetLength); + int sza=0; + // a.reserve(targetLength); + + // for(int i=0;ip[readBit(bytes,i++)]; + + // if(u->isLeaf()){ + + // a[sza++]=u->c+tree.offset; + // u=&tree.ht[tree.root]; + // } + // } + + // use unroll loops to optimize the above code + + int byteIndex=0; + int i=0; + uchar b; + for(;i+8p[b&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>1)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>2)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>3)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; } - r = R[i]; - if (r != 0) { - node rroot = new_node2(C[r], t[r]); - root->right = rroot; - unpad_tree(L, R, C, t, r, rroot); + u=u->p[(b>>4)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>5)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>6)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>7)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; } } - } - template - unsigned int convert_HuffTree_to_bytes_anyStates(unsigned int nodeCount, unsigned char *out) { - T1 *L = (T1 *) malloc(nodeCount * sizeof(T1)); - memset(L, 0, nodeCount * sizeof(T1)); - T1 *R = (T1 *) malloc(nodeCount * sizeof(T1)); - memset(R, 0, nodeCount * sizeof(T1)); - T *C = (T *) malloc(nodeCount * sizeof(T)); - memset(C, 0, nodeCount * sizeof(T)); - unsigned char *t = (unsigned char *) malloc(nodeCount * sizeof(unsigned char)); - memset(t, 0, nodeCount * sizeof(unsigned char)); - - pad_tree(L, R, C, t, 0, huffmanTree->qq[1]); - - unsigned int totalSize = - 1 + 2 * nodeCount * sizeof(T1) + nodeCount * sizeof(unsigned char) + nodeCount * sizeof(T); - //*out = (unsigned char*)malloc(totalSize); - out[0] = (unsigned char) sysEndianType; - memcpy(out + 1, L, nodeCount * sizeof(T1)); - memcpy(out + 1 + nodeCount * sizeof(T1), R, nodeCount * sizeof(T1)); - memcpy(out + 1 + 2 * nodeCount * sizeof(T1), C, nodeCount * sizeof(T)); - memcpy(out + 1 + 2 * nodeCount * sizeof(T1) + nodeCount * sizeof(T), t, nodeCount * sizeof(unsigned char)); - - free(L); - free(R); - free(C); - free(t); - return totalSize; - } + b=bytes[byteIndex]; + + for(int j=0;jpool); - huffmanTree->pool = NULL; - free(huffmanTree->qqq); - huffmanTree->qqq = NULL; - for (i = 0; i < huffmanTree->stateNum; i++) { - if (huffmanTree->code[i] != NULL) - free(huffmanTree->code[i]); + u=u->p[(b>>j)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; } - free(huffmanTree->code); - huffmanTree->code = NULL; - free(huffmanTree->cout); - huffmanTree->cout = NULL; - free(huffmanTree); - huffmanTree = NULL; } + + bytes+=(len+7)>>3; + + timer.stop("decode"); + + return a; + } + + void postprocess_decode(){ + + } + + void save(uchar *&c){ + + saveAsCode(c); + } + + void load(const uchar *&c,size_t &remaining_length){ + + loadAsCode(c,remaining_length); } }; + } -#endif +#endif \ No newline at end of file diff --git a/include/SZ3/utils/ByteUtil.hpp b/include/SZ3/utils/ByteUtil.hpp index f408fd67..b0e21eec 100644 --- a/include/SZ3/utils/ByteUtil.hpp +++ b/include/SZ3/utils/ByteUtil.hpp @@ -7,7 +7,6 @@ #include "SZ3/def.hpp" #include -#include namespace SZ3 { @@ -230,5 +229,67 @@ namespace SZ3 { return lfBuf_cur.value; } + inline void writeBytesBit(uchar *&c, uchar val, uchar &mask, uchar &index) { + + assert(val == 0 || val == 1); + + mask |= val << index++; + if (index == 8) { + *c++ = mask; + mask = index = 0; + } + } + + template + inline void writeBytes(uchar *&c, T val, uchar len, uchar &mask, uchar &index) { + + assert(len >= 1 && len <= sizeof(T) * 8); + + if (len + index >= 8) { + + mask |= (val & ((1 << (8 - index)) - 1)) << index; + val >>= 8 - index; + len -= 8 - index; + *c++ = mask; + mask = index = 0; + + while (len >= 8) { + + *c++ = val & (1 << 8) - 1; + val >>= 8; + len -= 8; + } + } + + mask |= (val & (1 << len) - 1) << index; + index += len; + + // for(int i=0;i>=1; + // } + } + + inline void writeBytesByte(uchar *&c, uchar val) { + *c++ = val; + } + + inline void writeBytesClearMask(uchar *&c, uchar &mask, uchar &index) { + + if (index > 0) { + *c++ = mask; + // mask=i=0; + } + } + + inline uchar readBit(const uchar *const &c, int i) { + + return ((*(c + (i >> 3))) >> (i & 7)) & 1; + } + }; #endif //SZ3_BYTEUTIL_HPP From 65207f62fd9b71cdde61f7427200072ff139c4f7 Mon Sep 17 00:00:00 2001 From: LangdaoZhang Date: Fri, 10 Nov 2023 17:26:03 -0500 Subject: [PATCH 02/23] use unordered_map when num_bin is small --- include/SZ3/encoder/HuffmanEncoder.hpp | 384 ++++++++++++++++++------- 1 file changed, 288 insertions(+), 96 deletions(-) diff --git a/include/SZ3/encoder/HuffmanEncoder.hpp b/include/SZ3/encoder/HuffmanEncoder.hpp index a1de6308..454d8270 100644 --- a/include/SZ3/encoder/HuffmanEncoder.hpp +++ b/include/SZ3/encoder/HuffmanEncoder.hpp @@ -52,7 +52,7 @@ namespace SZ3{ uchar len=0; int vec=0; - void dfs(Node* u){ + void dfs_mp(Node* u){ if(u->isLeaf()){ @@ -65,11 +65,32 @@ namespace SZ3{ } ++len; - dfs(u->p[0]); + dfs_mp(u->p[0]); --len; vec^=1<p[1]); + dfs_mp(u->p[1]); + vec^=1<<--len; + } + + void dfs_vec(Node* u){ + + if(u->isLeaf()){ + + veclen[u->c]=len; + veccode[u->c]=vec; + + limit=std::max(limit,len); + + return; + } + + ++len; + dfs_vec(u->p[0]); + --len; + + vec^=1<p[1]); vec^=1<<--len; } @@ -82,8 +103,16 @@ namespace SZ3{ public: - std::vector mplen; - std::vector mpcode; + uchar usemp; + // 0 : vec + // 1 : mp + + std::vector veclen; + std::vector veccode; +// ska::unordered_map mplen; +// ska::unordered_map mpcode; + std::unordered_map mplen; + std::unordered_map mpcode; T offset; // minimum bits for T @@ -94,9 +123,12 @@ namespace SZ3{ _constructed=0; ht.clear(); + veclen.clear(); + veccode.clear(); mplen.clear(); mpcode.clear(); - freq.clear(); + vecfreq.clear(); + mpfreq.clear(); offset=0; mbft=0; @@ -115,28 +147,44 @@ namespace SZ3{ int n; int maxval; std::vector ht; - std::vector freq; + std::vector vecfreq; +// ska::unordered_map mpfreq; + std::unordered_map mpfreq; + + void addElementInMap(T c,size_t freqc){ + + assert(!_constructed); + + ht.push_back(Node(c)); + mpfreq[c]=freqc; + ++n; + } - void addElement(T c,size_t freqc){ + void addElementInVector(T c,size_t freqc){ assert(!_constructed); ht.push_back(Node(c)); - freq[c]=freqc; + vecfreq[c]=freqc; ++n; } void constructHuffmanTree(){ assert(!_constructed); - assert(ht.size()>1); if(maxval==1){ mbft=1; ht.push_back(Node(0,&ht[0],nullptr)); - mplen[0]=1; - mpcode[0]=0; + if(usemp){ + mplen[0]=1; + mpcode[0]=0; + } + else { + veclen[0]=1; + veccode[0]=0; + } limit=1; setConstructed(); return; @@ -149,9 +197,15 @@ namespace SZ3{ std::priority_queue,std::vector>,cmp> q; - for(int i=0;i1){ @@ -170,7 +224,8 @@ namespace SZ3{ root=ht.size()-1; - dfs(&ht[root]); + if(usemp) dfs_mp(&ht[root]); + else dfs_vec(&ht[root]); setConstructed(); @@ -196,12 +251,14 @@ namespace SZ3{ Timer timer(true); + tree.usemp=stateNum>=(1<<12)&&num_bin<2*stateNum?1:0; + tree.init(); T __minval,__maxval; if(stateNum==0){ - + printf("please input the stateNum\n"); __minval=*bins; __maxval=*bins; for(int i=1;i freq; - std::vector freq(tree.maxval); - // freq.reserve(4*stateNum); +// ska::unordered_map freq; + std::unordered_map freq; +// freq.reserve(num_bin); - for(int i=0;i freq(tree.maxval); - for(int i=0;i> mp(limit+1); - for(int i=0;i=(1<<12)&&(1<<(tree.limit-1))c=c; - assert(c>3; @@ -467,8 +575,14 @@ namespace SZ3{ u->c=c; ++tree.n; - tree.mplen[c]=len; - tree.mpcode[c]=vec; + if(tree.usemp){ + tree.mplen[c]=len; + tree.mpcode[c]=vec; + } + else { + tree.veclen[c]=len; + tree.veccode[c]=vec; + } } } @@ -500,16 +614,45 @@ namespace SZ3{ uchar mask=0; uchar index=0; - for(int i=0;i4); - Node *u=&tree.ht[tree.root]; int len=bytesToInt32_bigEndian(bytes)^0x1234abcd; @@ -584,49 +725,100 @@ namespace SZ3{ int byteIndex=0; int i=0; uchar b; - for(;i+8p[b&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>1)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>2)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>3)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>4)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>5)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>6)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; + b=bytes[byteIndex]; + + u=u->p[b&1]; + if(u->isLeaf()){ + a[sza++]=u->c; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>1)&1]; + if(u->isLeaf()){ + a[sza++]=u->c; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>2)&1]; + if(u->isLeaf()){ + a[sza++]=u->c; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>3)&1]; + if(u->isLeaf()){ + a[sza++]=u->c; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>4)&1]; + if(u->isLeaf()){ + a[sza++]=u->c; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>5)&1]; + if(u->isLeaf()){ + a[sza++]=u->c; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>6)&1]; + if(u->isLeaf()){ + a[sza++]=u->c; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>7)&1]; + if(u->isLeaf()){ + a[sza++]=u->c; + u=&tree.ht[tree.root]; + } } - u=u->p[(b>>7)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; + } + else{ + + for(;i+8p[b&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>1)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>2)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>3)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>4)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>5)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>6)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } + u=u->p[(b>>7)&1]; + if(u->isLeaf()){ + a[sza++]=u->c+tree.offset; + u=&tree.ht[tree.root]; + } } } From fe0d6a5189936ef54313ea8914feeec71a1826e8 Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Sat, 11 Nov 2023 20:28:13 -0500 Subject: [PATCH 03/23] add bio module --- include/SZ3/api/impl/SZBioMD.hpp | 52 +++++++ include/SZ3/api/impl/SZDispatcher.hpp | 5 + include/SZ3/frontend/SZBioMDFrontend.hpp | 189 +++++++++++++++++++++++ include/SZ3/utils/Config.hpp | 6 +- 4 files changed, 249 insertions(+), 3 deletions(-) create mode 100644 include/SZ3/api/impl/SZBioMD.hpp create mode 100644 include/SZ3/frontend/SZBioMDFrontend.hpp diff --git a/include/SZ3/api/impl/SZBioMD.hpp b/include/SZ3/api/impl/SZBioMD.hpp new file mode 100644 index 00000000..087b79e4 --- /dev/null +++ b/include/SZ3/api/impl/SZBioMD.hpp @@ -0,0 +1,52 @@ +#ifndef SZ3_SZ_BIOMD_HPP +#define SZ3_SZ_BIOMD_HPP + +#include "SZ3/compressor/SZGeneralCompressor.hpp" +#include "SZ3/frontend/SZBioMDFrontend.hpp" +#include "SZ3/quantizer/IntegerQuantizer.hpp" +#include "SZ3/predictor/ComposedPredictor.hpp" +#include "SZ3/predictor/LorenzoPredictor.hpp" +#include "SZ3/predictor/RegressionPredictor.hpp" +#include "SZ3/predictor/PolyRegressionPredictor.hpp" +#include "SZ3/lossless/Lossless_zstd.hpp" +#include "SZ3/utils/Iterator.hpp" +#include "SZ3/utils/Statistic.hpp" +#include "SZ3/utils/Extraction.hpp" +#include "SZ3/utils/QuantOptimizatioin.hpp" +#include "SZ3/utils/Config.hpp" +#include "SZ3/def.hpp" +#include +#include + +namespace SZ3 { + + + template + char *SZ_compress_bioMD(Config &conf, T *data, size_t &outSize) { + + assert(N == conf.N); + assert(conf.cmprAlgo == ALGO_BIOMD); + calAbsErrorBound(conf, data); + + char *cmpData; + auto quantizer = LinearQuantizer(conf.absErrorBound, conf.quantbinCnt / 2); + auto sz = make_sz_general_compressor(make_sz_bio_frontend(conf, quantizer), HuffmanEncoder(), + Lossless_zstd()); + cmpData = (char *) sz->compress(conf, data, outSize); + return cmpData; + } + + + template + void SZ_decompress_bioMD(const Config &conf, char *cmpData, size_t cmpSize, T *decData) { + assert(conf.cmprAlgo == ALGO_BIOMD); + + uchar const *cmpDataPos = (uchar *) cmpData; + LinearQuantizer quantizer; + auto sz = make_sz_general_compressor(make_sz_bio_frontend(conf, quantizer), + HuffmanEncoder(), Lossless_zstd()); + sz->decompress(cmpDataPos, cmpSize, decData); + + } +} +#endif \ No newline at end of file diff --git a/include/SZ3/api/impl/SZDispatcher.hpp b/include/SZ3/api/impl/SZDispatcher.hpp index e95ec4ca..694bf822 100644 --- a/include/SZ3/api/impl/SZDispatcher.hpp +++ b/include/SZ3/api/impl/SZDispatcher.hpp @@ -6,6 +6,7 @@ #include "SZ3/utils/Config.hpp" #include "SZ3/api/impl/SZInterp.hpp" #include "SZ3/api/impl/SZLorenzoReg.hpp" +#include "SZ3/api/impl/SZBioMD.hpp" #include namespace SZ3 { @@ -25,6 +26,8 @@ namespace SZ3 { cmpData = (char *) SZ_compress_Interp(conf, data, outSize); } else if (conf.cmprAlgo == ALGO_INTERP_LORENZO) { cmpData = (char *) SZ_compress_Interp_lorenzo(conf, data, outSize); + } else if (conf.cmprAlgo == ALGO_BIOMD) { + cmpData = (char *) SZ_compress_bioMD(conf, data, outSize); } return cmpData; } @@ -40,6 +43,8 @@ namespace SZ3 { SZ_decompress_LorenzoReg(conf, cmpData, cmpSize, decData); } else if (conf.cmprAlgo == ALGO_INTERP) { SZ_decompress_Interp(conf, cmpData, cmpSize, decData); + } else if (conf.cmprAlgo == ALGO_BIOMD) { + SZ_decompress_bioMD(conf, cmpData, cmpSize, decData); } else { printf("SZ_decompress_dispatcher, Method not supported\n"); exit(0); diff --git a/include/SZ3/frontend/SZBioMDFrontend.hpp b/include/SZ3/frontend/SZBioMDFrontend.hpp new file mode 100644 index 00000000..aa55bc4a --- /dev/null +++ b/include/SZ3/frontend/SZBioMDFrontend.hpp @@ -0,0 +1,189 @@ +#ifndef SZ3_SZBIOMD_FRONTEND +#define SZ3_SZBIOMD_FRONTEND + +/** + */ + +#include "Frontend.hpp" +//#include "SZ3/utils/MemoryUtil.hpp" +#include "SZ3/utils/Config.hpp" +#include + +namespace SZ3 { + + template + class SZBioMDFrontend : public concepts::FrontendInterface { + public: + SZBioMDFrontend(const Config &conf, Quantizer quantizer) : + quantizer(quantizer), + conf(conf) { + if (N != 1 && N != 3) { + throw std::invalid_argument("SZBioFront only support 1D or 3D data"); + } + } + + ~SZBioMDFrontend() { + clear(); + } + + void print() {}; + + + std::vector compress(T *data) { + if (N == 1) { + return compress_1d(data); + } else { + return compress_3d(data); + } + }; + + T *decompress(std::vector &quant_inds, T *dec_data) { + if (N == 1) { + return decompress_1d(quant_inds, dec_data); + } else { + return decompress_3d(quant_inds, dec_data); + } + }; + + + void save(uchar *&c) { + quantizer.save(c); + } + + void load(const uchar *&c, size_t &remaining_length) { + clear(); + const uchar *c_pos = c; + quantizer.load(c, remaining_length); + remaining_length -= c_pos - c; + } + + + void clear() { + quantizer.clear(); + } + + size_t size_est() { + return quantizer.size_est(); //unpred + } + + int get_radius() const { + return quantizer.get_radius(); + } + + size_t get_num_elements() const { + return conf.num; + }; + + private: + std::vector compress_1d(T *data) { + std::vector quant_bins(conf.num); + quant_bins[0] = quantizer.quantize_and_overwrite(data[0], 0); + for (size_t i = 1; i < conf.num; i++) { + quant_bins[i] = quantizer.quantize_and_overwrite(data[i], data[i - 1]); + } + return quant_bins; + } + + T *decompress_1d(std::vector &quant_inds, T *dec_data) { + dec_data[0] = quantizer.recover(0, quant_inds[0]); + for (size_t i = 1; i < conf.num; i++) { + dec_data[i] = quantizer.recover(dec_data[i - 1], quant_inds[i]); + } + return dec_data; + } + + std::vector compress_3d(T *data) { + std::vector quant_bins(conf.num); + auto dims = conf.dims; + std::vector stride({dims[1] * dims[2], dims[2], 1}); + //TODO determine the # of system + //i==0 & j==0 + for (size_t k = 0; k < dims[2]; k++) { //xyz + size_t idx = k; + quant_bins[idx] = quantizer.quantize_and_overwrite(data[idx], 0); + } + + //i==0 + for (size_t j = 1; j < dims[1]; j++) { //atoms + for (size_t k = 0; k < dims[2]; k++) { //xyz + size_t idx = j * stride[1] + k; + size_t idx1 = (j - 1) * stride[1] + k; + quant_bins[idx] = quantizer.quantize_and_overwrite(data[idx], data[idx1]); + } + } + + for (size_t i = 1; i < dims[0]; i++) {//time + for (size_t j = 0; j < dims[1]; j++) { //atoms + for (size_t k = 0; k < dims[2]; k++) { //xyz + size_t idx = i * stride[0] + j * stride[1] + k; + size_t idx1 = (i - 1) * stride[0] + j * stride[1] + k; + size_t idx2 = i * stride[0] + (j - 1) * stride[1] + k; + size_t idx3 = (i - 1) * stride[0] + (j - 1) * stride[1] + k; + if (j % 3 == 0) {// time -1 + quant_bins[idx] = + quantizer.quantize_and_overwrite(data[idx], data[idx1]); + } else { // time -1 & atom -1 + quant_bins[idx] = + quantizer.quantize_and_overwrite(data[idx], data[idx1] + data[idx2] - data[idx3]); + } + } + } + } + + return quant_bins; + } + + + T *decompress_3d(std::vector &quant_inds, T *dec_data) { + + auto dims = conf.dims; + std::vector stride({dims[1] * dims[2], dims[2], 1}); +// quant_bins[0] = quantizer.quantize_and_overwrite(data[0], 0); + + //i==0 & j==0 + for (size_t k = 0; k < dims[2]; k++) { //xyz + size_t idx = k; + dec_data[idx] = quantizer.recover(0, quant_inds[idx]); + } + + //i==0 + for (size_t j = 1; j < dims[1]; j++) { //atoms + for (size_t k = 0; k < dims[2]; k++) { //xyz + size_t idx = j * stride[1] + k; + size_t idx1 = (j - 1) * stride[1] + k; + dec_data[idx] = quantizer.recover(dec_data[idx1], quant_inds[idx]); + } + } + + for (size_t i = 1; i < dims[0]; i++) {//time + for (size_t j = 0; j < dims[1]; j++) { //atoms + for (size_t k = 0; k < dims[2]; k++) { //xyz + size_t idx = i * stride[0] + j * stride[1] + k; + size_t idx1 = (i - 1) * stride[0] + j * stride[1] + k; + size_t idx2 = i * stride[0] + (j - 1) * stride[1] + k; + size_t idx3 = (i - 1) * stride[0] + (j - 1) * stride[1] + k; + if (j % 3 == 0) {// time -1 + dec_data[idx] = quantizer.recover(dec_data[idx1], quant_inds[idx]); + } else { // time -1 & atom -1 + dec_data[idx] = quantizer.recover(dec_data[idx1] + dec_data[idx2] - dec_data[idx3], quant_inds[idx]); + } + } + } + } + return dec_data; + } + + Quantizer quantizer; + Config conf; + + }; + + template + SZBioMDFrontend + make_sz_bio_frontend(const Config &conf, Predictor predictor) { + return SZBioMDFrontend(conf, predictor); + } +} + + +#endif diff --git a/include/SZ3/utils/Config.hpp b/include/SZ3/utils/Config.hpp index 65cd4c8a..77649cc7 100644 --- a/include/SZ3/utils/Config.hpp +++ b/include/SZ3/utils/Config.hpp @@ -22,10 +22,10 @@ namespace SZ3 { constexpr EB EB_OPTIONS[] = {EB_ABS, EB_REL, EB_PSNR, EB_L2NORM, EB_ABS_AND_REL, EB_ABS_OR_REL}; enum ALGO { - ALGO_LORENZO_REG, ALGO_INTERP_LORENZO, ALGO_INTERP + ALGO_LORENZO_REG, ALGO_INTERP_LORENZO, ALGO_INTERP, ALGO_BIOMD }; - constexpr const char *ALGO_STR[] = {"ALGO_LORENZO_REG", "ALGO_INTERP_LORENZO", "ALGO_INTERP"}; - constexpr const ALGO ALGO_OPTIONS[] = {ALGO_LORENZO_REG, ALGO_INTERP_LORENZO, ALGO_INTERP}; + constexpr const char *ALGO_STR[] = {"ALGO_LORENZO_REG", "ALGO_INTERP_LORENZO", "ALGO_INTERP", "ALGO_BIOMD"}; + constexpr const ALGO ALGO_OPTIONS[] = {ALGO_LORENZO_REG, ALGO_INTERP_LORENZO, ALGO_INTERP, ALGO_BIOMD}; enum INTERP_ALGO { INTERP_ALGO_LINEAR, INTERP_ALGO_CUBIC From c2cd6079ccab188acc938ddee906265a7fac98db Mon Sep 17 00:00:00 2001 From: LangdaoZhang Date: Sat, 11 Nov 2023 21:05:04 -0700 Subject: [PATCH 04/23] fixed some bug when n=1 --- include/SZ3/encoder/HuffmanEncoder.hpp | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/include/SZ3/encoder/HuffmanEncoder.hpp b/include/SZ3/encoder/HuffmanEncoder.hpp index 454d8270..2844f646 100644 --- a/include/SZ3/encoder/HuffmanEncoder.hpp +++ b/include/SZ3/encoder/HuffmanEncoder.hpp @@ -173,9 +173,11 @@ namespace SZ3{ assert(!_constructed); - if(maxval==1){ + if(n==1||maxval==1){ mbft=1; + maxval=1; + offset=ht[0].c; ht.push_back(Node(0,&ht[0],nullptr)); if(usemp){ mplen[0]=1; @@ -386,16 +388,19 @@ namespace SZ3{ while(logcnt<32&&(1<1){ - writeBytes(c,it,tree.mbft,mask,index); + for(T it:mp[limit]){ - const int code=tree.usemp?tree.mpcode[it]:tree.veccode[it]; + writeBytes(c,it,tree.mbft,mask,index); - writeBytes(c,code,logcnt,mask,index); - } + const int code=tree.usemp?tree.mpcode[it]:tree.veccode[it]; - writeBytesClearMask(c,mask,index); + writeBytes(c,code,logcnt,mask,index); + } + + writeBytesClearMask(c,mask,index); + } return; } @@ -692,7 +697,8 @@ namespace SZ3{ int len=bytesToInt32_bigEndian(bytes)^0x1234abcd; bytes+=4; - assert(len==targetLength); +// assert(len==targetLength); + return std::vector(len,tree.offset); } From d26cd44b4826e47d12eb87e4f7fd27f6fd714f1a Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Sun, 12 Nov 2023 10:15:33 -0500 Subject: [PATCH 05/23] add bio module --- include/SZ3/utils/Config.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/SZ3/utils/Config.hpp b/include/SZ3/utils/Config.hpp index 77649cc7..67eb0d2a 100644 --- a/include/SZ3/utils/Config.hpp +++ b/include/SZ3/utils/Config.hpp @@ -82,6 +82,8 @@ namespace SZ3 { cmprAlgo = ALGO_INTERP_LORENZO; } else if (cmprAlgoStr == ALGO_STR[ALGO_INTERP]) { cmprAlgo = ALGO_INTERP; + } else if (cmprAlgoStr == ALGO_STR[ALGO_BIOMD]) { + cmprAlgo = ALGO_BIOMD; } auto ebModeStr = cfg.Get("GlobalSettings", "ErrorBoundMode", ""); if (ebModeStr == EB_STR[EB_ABS]) { From cb04420d5c7e5624d9411fc5413c5f3165a6fb9b Mon Sep 17 00:00:00 2001 From: LangdaoZhang Date: Sun, 12 Nov 2023 10:59:56 -0700 Subject: [PATCH 06/23] fixed some bug when n=1, bytes does not properly increase. --- include/SZ3/encoder/HuffmanEncoder.hpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/include/SZ3/encoder/HuffmanEncoder.hpp b/include/SZ3/encoder/HuffmanEncoder.hpp index 2844f646..6ab0c1d3 100644 --- a/include/SZ3/encoder/HuffmanEncoder.hpp +++ b/include/SZ3/encoder/HuffmanEncoder.hpp @@ -111,8 +111,10 @@ namespace SZ3{ std::vector veccode; // ska::unordered_map mplen; // ska::unordered_map mpcode; - std::unordered_map mplen; - std::unordered_map mpcode; +// std::unordered_map mplen; +// std::unordered_map mpcode; + std::map mplen; + std::map mpcode; T offset; // minimum bits for T @@ -149,7 +151,8 @@ namespace SZ3{ std::vector ht; std::vector vecfreq; // ska::unordered_map mpfreq; - std::unordered_map mpfreq; +// std::unordered_map mpfreq; + std::map mpfreq; void addElementInMap(T c,size_t freqc){ @@ -283,7 +286,8 @@ namespace SZ3{ // tree.mpcode.reserve(num_bin); // ska::unordered_map freq; - std::unordered_map freq; +// std::unordered_map freq; + std::map freq; // freq.reserve(num_bin); if(tree.offset==0){ @@ -496,7 +500,16 @@ namespace SZ3{ int i=0; tree.n=1< Date: Sun, 12 Nov 2023 13:54:27 -0700 Subject: [PATCH 07/23] add site dedector --- include/SZ3/frontend/SZBioMDFrontend.hpp | 36 +++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/include/SZ3/frontend/SZBioMDFrontend.hpp b/include/SZ3/frontend/SZBioMDFrontend.hpp index aa55bc4a..78e88687 100644 --- a/include/SZ3/frontend/SZBioMDFrontend.hpp +++ b/include/SZ3/frontend/SZBioMDFrontend.hpp @@ -92,10 +92,44 @@ namespace SZ3 { return dec_data; } + + int cal_site_3d(T *data, std::vector dims) { + std::vector sites; + for (int j = 0; j < std::min(dims[2], 5); j++) { + size_t lprev = 0, lavg = 0, lcnt = 0; + for (size_t i = 1; i < std::min(dims[1], 100); i++) { + auto c = data[i * dims[2] + j], p = data[(i - 1) * dims[2] + j]; + if (fabs(c - p) / c > 0.5) { + sites.push_back(i - lprev); +// printf("%d %d\n", i, i - lprev); + lprev = i; + } + } + } + ska::unordered_map frequency; + for (size_t i = 0; i < sites.size(); i++) { + frequency[sites[i]]++; + } + int maxCount = 0, res = 0; + for (const auto &kv: frequency) { + auto k = kv.first; + auto f = kv.second; +// printf("k %d f %d\n", k ,f); + if (maxCount < f) { + res = k; + maxCount = f; + } + } + return (res <= 1 || res > 20) ? 0 : res; + } + std::vector compress_3d(T *data) { + std::vector quant_bins(conf.num); auto dims = conf.dims; std::vector stride({dims[1] * dims[2], dims[2], 1}); + int site = cal_site_3d(data + stride[0], conf.dims); + printf("#of site = %d\n", site); //TODO determine the # of system //i==0 & j==0 for (size_t k = 0; k < dims[2]; k++) { //xyz @@ -119,7 +153,7 @@ namespace SZ3 { size_t idx1 = (i - 1) * stride[0] + j * stride[1] + k; size_t idx2 = i * stride[0] + (j - 1) * stride[1] + k; size_t idx3 = (i - 1) * stride[0] + (j - 1) * stride[1] + k; - if (j % 3 == 0) {// time -1 + if (site != 0 && j % site == 0) {// time -1 quant_bins[idx] = quantizer.quantize_and_overwrite(data[idx], data[idx1]); } else { // time -1 & atom -1 From 7e49720607740bd392267b231bd89324bb95e7f4 Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Sun, 12 Nov 2023 16:09:35 -0700 Subject: [PATCH 08/23] add site dedector --- include/SZ3/frontend/SZBioMDFrontend.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/SZ3/frontend/SZBioMDFrontend.hpp b/include/SZ3/frontend/SZBioMDFrontend.hpp index 78e88687..c9aa4f21 100644 --- a/include/SZ3/frontend/SZBioMDFrontend.hpp +++ b/include/SZ3/frontend/SZBioMDFrontend.hpp @@ -47,12 +47,14 @@ namespace SZ3 { void save(uchar *&c) { + write(site, c); quantizer.save(c); } void load(const uchar *&c, size_t &remaining_length) { clear(); const uchar *c_pos = c; + read(site, c, remaining_length); quantizer.load(c, remaining_length); remaining_length -= c_pos - c; } @@ -124,12 +126,11 @@ namespace SZ3 { } std::vector compress_3d(T *data) { - std::vector quant_bins(conf.num); auto dims = conf.dims; std::vector stride({dims[1] * dims[2], dims[2], 1}); int site = cal_site_3d(data + stride[0], conf.dims); - printf("#of site = %d\n", site); + printf("# of site in the MD simulation guessed by SZ3 = %d\n", site); //TODO determine the # of system //i==0 & j==0 for (size_t k = 0; k < dims[2]; k++) { //xyz @@ -153,7 +154,7 @@ namespace SZ3 { size_t idx1 = (i - 1) * stride[0] + j * stride[1] + k; size_t idx2 = i * stride[0] + (j - 1) * stride[1] + k; size_t idx3 = (i - 1) * stride[0] + (j - 1) * stride[1] + k; - if (site != 0 && j % site == 0) {// time -1 + if (j == 0 || (site != 0 && j % site == 0)) {// time -1 quant_bins[idx] = quantizer.quantize_and_overwrite(data[idx], data[idx1]); } else { // time -1 & atom -1 @@ -196,7 +197,7 @@ namespace SZ3 { size_t idx1 = (i - 1) * stride[0] + j * stride[1] + k; size_t idx2 = i * stride[0] + (j - 1) * stride[1] + k; size_t idx3 = (i - 1) * stride[0] + (j - 1) * stride[1] + k; - if (j % 3 == 0) {// time -1 + if (j == 0 || (site != 0 && j % site == 0)) {// time -1 dec_data[idx] = quantizer.recover(dec_data[idx1], quant_inds[idx]); } else { // time -1 & atom -1 dec_data[idx] = quantizer.recover(dec_data[idx1] + dec_data[idx2] - dec_data[idx3], quant_inds[idx]); @@ -209,6 +210,7 @@ namespace SZ3 { Quantizer quantizer; Config conf; + int site = 0; }; From 0ab5ea1eb14e41db58263a4ec90bdb1f0669ee4d Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Sun, 12 Nov 2023 16:10:21 -0700 Subject: [PATCH 09/23] config bugfix --- include/SZ3/utils/Config.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/include/SZ3/utils/Config.hpp b/include/SZ3/utils/Config.hpp index 67eb0d2a..1f4ff06c 100644 --- a/include/SZ3/utils/Config.hpp +++ b/include/SZ3/utils/Config.hpp @@ -64,6 +64,7 @@ namespace SZ3 { dims = std::vector(begin, end); N = dims.size(); num = std::accumulate(dims.begin(), dims.end(), (size_t) 1, std::multiplies()); + pred_dim = N; return num; } From 9e3fa3b8bf542c191eaa058deb4c3c14b1770003 Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Mon, 13 Nov 2023 15:36:11 -0700 Subject: [PATCH 10/23] site --- include/SZ3/frontend/SZBioMDFrontend.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/SZ3/frontend/SZBioMDFrontend.hpp b/include/SZ3/frontend/SZBioMDFrontend.hpp index c9aa4f21..259d5ab9 100644 --- a/include/SZ3/frontend/SZBioMDFrontend.hpp +++ b/include/SZ3/frontend/SZBioMDFrontend.hpp @@ -122,7 +122,7 @@ namespace SZ3 { maxCount = f; } } - return (res <= 1 || res > 20) ? 0 : res; + return (res <= 2 || res > 10) ? 0 : res; } std::vector compress_3d(T *data) { From df57b204294d22af128673dfebbfe3372411b4a0 Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Thu, 16 Nov 2023 07:28:28 -0700 Subject: [PATCH 11/23] site --- include/SZ3/frontend/SZBioMDFrontend.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/SZ3/frontend/SZBioMDFrontend.hpp b/include/SZ3/frontend/SZBioMDFrontend.hpp index 259d5ab9..4b0c2c5c 100644 --- a/include/SZ3/frontend/SZBioMDFrontend.hpp +++ b/include/SZ3/frontend/SZBioMDFrontend.hpp @@ -129,7 +129,7 @@ namespace SZ3 { std::vector quant_bins(conf.num); auto dims = conf.dims; std::vector stride({dims[1] * dims[2], dims[2], 1}); - int site = cal_site_3d(data + stride[0], conf.dims); + site = cal_site_3d(data + stride[0], conf.dims); printf("# of site in the MD simulation guessed by SZ3 = %d\n", site); //TODO determine the # of system //i==0 & j==0 From a26c82889d9f4dfdc88e6d440d055c0f66e7899d Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Fri, 8 Mar 2024 11:43:06 -0500 Subject: [PATCH 12/23] increase huffman decoding speed by adding a cache of the huffman codebook --- include/SZ3/encoder/HuffmanEncoder.hpp | 854 +++++++++++++------------ 1 file changed, 428 insertions(+), 426 deletions(-) diff --git a/include/SZ3/encoder/HuffmanEncoder.hpp b/include/SZ3/encoder/HuffmanEncoder.hpp index 6ab0c1d3..16cfd620 100644 --- a/include/SZ3/encoder/HuffmanEncoder.hpp +++ b/include/SZ3/encoder/HuffmanEncoder.hpp @@ -16,50 +16,50 @@ #include #include -namespace SZ3{ +namespace SZ3 { template - class HuffmanEncoder:public concepts::EncoderInterface{ + class HuffmanEncoder : public concepts::EncoderInterface { private: - class Node{ + class Node { public: - Node(T c_=0,Node *lp=nullptr,Node *rp=nullptr){ + Node(T c_ = 0, Node *lp = nullptr, Node *rp = nullptr) { - c=c_; - p[0]=lp; - p[1]=rp; + c = c_; + p[0] = lp; + p[1] = rp; } T c; Node *p[2]; - inline uchar isLeaf(){ + inline uchar isLeaf() { - return p[0]==nullptr; + return p[0] == nullptr; } }; - class HuffmanTree{ + class HuffmanTree { private: - uchar _constructed=0; + uchar _constructed = 0; - uchar len=0; - int vec=0; + uchar len = 0; + int vec = 0; - void dfs_mp(Node* u){ + void dfs_mp(Node *u) { - if(u->isLeaf()){ + if (u->isLeaf()) { - mplen[u->c]=len; - mpcode[u->c]=vec; + mplen[u->c] = len; + mpcode[u->c] = vec; - limit=std::max(limit,len); + limit = std::max(limit, len); return; } @@ -68,19 +68,19 @@ namespace SZ3{ dfs_mp(u->p[0]); --len; - vec^=1<p[1]); - vec^=1<<--len; + vec ^= 1 << --len; } - void dfs_vec(Node* u){ + void dfs_vec(Node *u) { - if(u->isLeaf()){ + if (u->isLeaf()) { - veclen[u->c]=len; - veccode[u->c]=vec; + veclen[u->c] = len; + veccode[u->c] = vec; - limit=std::max(limit,len); + limit = std::max(limit, len); return; } @@ -89,15 +89,15 @@ namespace SZ3{ dfs_vec(u->p[0]); --len; - vec^=1<p[1]); - vec^=1<<--len; + vec ^= 1 << --len; } - class cmp{ + class cmp { public: - bool operator()(const std::pair& u, const std::pair& v) { - return u.second==v.second?u.first>v.first:u.second>v.second; + bool operator()(const std::pair &u, const std::pair &v) { + return u.second == v.second ? u.first > v.first : u.second > v.second; } }; @@ -113,17 +113,17 @@ namespace SZ3{ // ska::unordered_map mpcode; // std::unordered_map mplen; // std::unordered_map mpcode; - std::map mplen; - std::map mpcode; + std::map mplen; + std::map mpcode; T offset; // minimum bits for T uchar mbft; uchar limit; - void init(){ + void init() { - _constructed=0; + _constructed = 0; ht.clear(); veclen.clear(); veccode.clear(); @@ -132,15 +132,15 @@ namespace SZ3{ vecfreq.clear(); mpfreq.clear(); - offset=0; - mbft=0; - root=0; - n=0; - maxval=0; - limit=0; + offset = 0; + mbft = 0; + root = 0; + n = 0; + maxval = 0; + limit = 0; } - HuffmanTree(){ + HuffmanTree() { init(); } @@ -152,84 +152,82 @@ namespace SZ3{ std::vector vecfreq; // ska::unordered_map mpfreq; // std::unordered_map mpfreq; - std::map mpfreq; + std::map mpfreq; - void addElementInMap(T c,size_t freqc){ + void addElementInMap(T c, size_t freqc) { assert(!_constructed); ht.push_back(Node(c)); - mpfreq[c]=freqc; + mpfreq[c] = freqc; ++n; } - void addElementInVector(T c,size_t freqc){ + void addElementInVector(T c, size_t freqc) { assert(!_constructed); ht.push_back(Node(c)); - vecfreq[c]=freqc; + vecfreq[c] = freqc; ++n; } - void constructHuffmanTree(){ + void constructHuffmanTree() { assert(!_constructed); - if(n==1||maxval==1){ + if (n == 1 || maxval == 1) { - mbft=1; - maxval=1; - offset=ht[0].c; - ht.push_back(Node(0,&ht[0],nullptr)); - if(usemp){ - mplen[0]=1; - mpcode[0]=0; + mbft = 1; + maxval = 1; + offset = ht[0].c; + ht.push_back(Node(0, &ht[0], nullptr)); + if (usemp) { + mplen[0] = 1; + mpcode[0] = 0; + } else { + veclen[0] = 1; + veccode[0] = 0; } - else { - veclen[0]=1; - veccode[0]=0; - } - limit=1; + limit = 1; setConstructed(); return; } Timer timer(true); - mbft=1; - while((1<,std::vector>,cmp> q; + std::priority_queue, std::vector>, cmp> q; - if(usemp){ - for(int i=0;i1){ + while (q.size() > 1) { - int u=q.top().first; - size_t freq_u=q.top().second; + int u = q.top().first; + size_t freq_u = q.top().second; q.pop(); - int v=q.top().first; - size_t freq_v=q.top().second; + int v = q.top().first; + size_t freq_v = q.top().second; q.pop(); - ht.push_back(Node(0,&ht[u],&ht[v])); + ht.push_back(Node(0, &ht[u], &ht[v])); - q.push({ht.size()-1,freq_u+freq_v}); + q.push({ht.size() - 1, freq_u + freq_v}); } - root=ht.size()-1; + root = ht.size() - 1; - if(usemp) dfs_mp(&ht[root]); + if (usemp) dfs_mp(&ht[root]); else dfs_vec(&ht[root]); setConstructed(); @@ -237,14 +235,14 @@ namespace SZ3{ timer.stop("construct huffman tree"); } - uchar isConstructed(){ + uchar isConstructed() { return _constructed; } - void setConstructed(){ + void setConstructed() { - _constructed=1; + _constructed = 1; } }; @@ -252,34 +250,33 @@ namespace SZ3{ public: - void preprocess_encode(const T *const bins,size_t num_bin,int stateNum){ + void preprocess_encode(const T *const bins, size_t num_bin, int stateNum) { Timer timer(true); - tree.usemp=stateNum>=(1<<12)&&num_bin<2*stateNum?1:0; + tree.usemp = stateNum >= (1 << 12) && num_bin < 2 * stateNum ? 1 : 0; tree.init(); - T __minval,__maxval; + T __minval, __maxval; - if(stateNum==0){ + if (stateNum == 0) { printf("please input the stateNum\n"); - __minval=*bins; - __maxval=*bins; - for(int i=1;i freq; // std::unordered_map freq; - std::map freq; + std::map freq; // freq.reserve(num_bin); - if(tree.offset==0){ - for(int i=0;i freq(tree.maxval); - if(tree.offset==0){ - for(int i=0;i &bins,int stateNum){ + void preprocess_encode(const std::vector &bins, int stateNum) { - preprocess_encode(bins.data(),bins.size(),stateNum); + preprocess_encode(bins.data(), bins.size(), stateNum); } - void saveAsCode(uchar *&c){ + void saveAsCode(uchar *&c) { Timer timer(true); - uchar *head=c; + uchar *head = c; // whether the tree is full binary tree - uchar& limit=tree.limit; + uchar &limit = tree.limit; - std::vector> mp(limit+1); + std::vector> mp(limit + 1); - if(tree.usemp){ - for(auto it:tree.mplen){ + if (tree.usemp) { + for (auto it: tree.mplen) { mp[it.second].push_back(it.first); } - } - else{ - for(int i=0;i1) writeBytesByte(c,tree.mbft); - else writeBytesByte(c,0x80|tree.mbft); + if (tree.maxval > 1) writeBytesByte(c, tree.mbft); + else writeBytesByte(c, 0x80 | tree.mbft); - writeBytesByte(c,((sizeof(T)-1)<<5)|(limit-1)); + writeBytesByte(c, ((sizeof(T) - 1) << 5) | (limit - 1)); - writeBytes(c,tree.offset,sizeof(T)<<3,mask,index); + writeBytes(c, tree.offset, sizeof(T) << 3, mask, index); - int32ToBytes_bigEndian(c,tree.n); - c+=4; + int32ToBytes_bigEndian(c, tree.n); + c += 4; - int cnt=mp[limit].size(); + int cnt = mp[limit].size(); - uchar logcnt=0; - while(logcnt<32&&(1<1){ + if (tree.n > 1) { - for(T it:mp[limit]){ + for (T it: mp[limit]) { - writeBytes(c,it,tree.mbft,mask,index); + writeBytes(c, it, tree.mbft, mask, index); - const int code=tree.usemp?tree.mpcode[it]:tree.veccode[it]; + const int code = tree.usemp ? tree.mpcode[it] : tree.veccode[it]; - writeBytes(c,code,logcnt,mask,index); + writeBytes(c, code, logcnt, mask, index); } - writeBytesClearMask(c,mask,index); + writeBytesClearMask(c, mask, index); } return; } - writeBytesByte(c,0x40|tree.mbft); + writeBytesByte(c, 0x40 | tree.mbft); - writeBytesByte(c,((sizeof(T)-1)<<5)|(limit-1)); + writeBytesByte(c, ((sizeof(T) - 1) << 5) | (limit - 1)); - writeBytes(c,tree.offset,sizeof(T)<<3,mask,index); + writeBytes(c, tree.offset, sizeof(T) << 3, mask, index); - int32ToBytes_bigEndian(c,tree.maxval); - c+=4; + int32ToBytes_bigEndian(c, tree.maxval); + c += 4; - for(uchar len=1;len<=limit;len++){ + for (uchar len = 1; len <= limit; len++) { - int cnt=mp[len].size(); + int cnt = mp[len].size(); - writeBytes(c,cnt,len,mask,index); + writeBytes(c, cnt, len, mask, index); - if(cnt){ + if (cnt) { - for(const T& it:mp[len]){ + for (const T &it: mp[len]) { - writeBytes(c,it,tree.mbft,mask,index); + writeBytes(c, it, tree.mbft, mask, index); - const int code=tree.usemp?tree.mpcode[it]:tree.veccode[it]; + const int code = tree.usemp ? tree.mpcode[it] : tree.veccode[it]; - writeBytes(c,code,len,mask,index); + writeBytes(c, code, len, mask, index); } } } - writeBytesClearMask(c,mask,index); + writeBytesClearMask(c, mask, index); timer.stop("saveAsCode"); @@ -454,41 +447,40 @@ namespace SZ3{ return; } - void loadAsCode(const uchar *&bytes,size_t &remaining_length){ + void loadAsCode(const uchar *&bytes, size_t &remaining_length) { Timer timer(true); tree.init(); - uchar feature=(*bytes)>>6; - tree.mbft=(*bytes)&0x3f; + uchar feature = (*bytes) >> 6; + tree.mbft = (*bytes) & 0x3f; ++bytes; - uchar szT=((*bytes)>>5)+1; - tree.limit=((*bytes)&0x1f)+1; + uchar szT = ((*bytes) >> 5) + 1; + tree.limit = ((*bytes) & 0x1f) + 1; ++bytes; - assert(szT==sizeof(T)); + assert(szT == sizeof(T)); - for(int i=0;i=(1<<12)&&(1<<(tree.limit-1))= (1 << 12) && (1 << (tree.limit - 1)) < tree.maxval ? 1 : 0; - if(tree.usemp){ - tree.ht.reserve(2<p[e]==nullptr){ + if (u->p[e] == nullptr) { tree.ht.push_back(Node()); - u->p[e]=&tree.ht[tree.ht.size()-1]; + u->p[e] = &tree.ht[tree.ht.size() - 1]; } - u=u->p[e]; + u = u->p[e]; } - u->c=c; - if(tree.usemp){ - tree.mplen[c]=tree.limit; - tree.mpcode[c]=vec; - } - else{ - tree.veclen[c]=tree.limit; - tree.veccode[c]=vec; + u->c = c; + if (tree.usemp) { + tree.mplen[c] = tree.limit; + tree.mpcode[c] = vec; + } else { + tree.veclen[c] = tree.limit; + tree.veccode[c] = vec; } } - bytes+=(i+7)>>3; + bytes += (i + 7) >> 3; return; } - tree.n=0; + tree.n = 0; - int i=0; + int i = 0; - for(uchar len=1;len<=tree.limit;len++){ + for (uchar len = 1; len <= tree.limit; len++) { - int cnt=0; + int cnt = 0; - for(uchar j=0;jp[e]==nullptr){ + int e = readBit(bytes, i++); + vec |= e << k; + if (u->p[e] == nullptr) { tree.ht.push_back(Node()); - u->p[e]=&tree.ht[tree.ht.size()-1]; + u->p[e] = &tree.ht[tree.ht.size() - 1]; } - u=u->p[e]; + u = u->p[e]; } - u->c=c; + u->c = c; ++tree.n; - if(tree.usemp){ - tree.mplen[c]=len; - tree.mpcode[c]=vec; - } - else { - tree.veclen[c]=len; - tree.veccode[c]=vec; + if (tree.usemp) { + tree.mplen[c] = len; + tree.mpcode[c] = vec; + } else { + tree.veclen[c] = len; + tree.veccode[c] = vec; } } } - bytes+=(i+7)>>3; + bytes += (i + 7) >> 3; timer.stop("loadAsCode"); tree.setConstructed(); } - size_t encode(const T *bins, size_t num_bin, uchar *&bytes){ - if(tree.maxval==1){ + size_t encode(const T *bins, size_t num_bin, uchar *&bytes) { - int32ToBytes_bigEndian(bytes,num_bin^0x1234abcd); - bytes+=4; + if (tree.maxval == 1) { + + int32ToBytes_bigEndian(bytes, num_bin ^ 0x1234abcd); + bytes += 4; return 4; } @@ -624,58 +615,55 @@ namespace SZ3{ assert(tree.isConstructed()); - uchar *head=bytes; - bytes+=4; + uchar *head = bytes; + bytes += 4; - int len=0; + int len = 0; - uchar mask=0; - uchar index=0; + uchar mask = 0; + uchar index = 0; - if(tree.offset==0){ - if(tree.usemp){ - for(int i=0;i &bins, uchar *&bytes){ + size_t encode(const std::vector &bins, uchar *&bytes) { - return encode(bins.data(),bins.size(),bytes); + return encode(bins.data(), bins.size(), bytes); } - void postprocess_encode(){ + void postprocess_encode() { } - void preprocess_decode(){ + void preprocess_decode() { } - std::vector decode(const uchar *&bytes, size_t targetLength){ - - if(tree.maxval==1){ + std::vector decode(const uchar *&bytes, size_t targetLength) { - int len=bytesToInt32_bigEndian(bytes)^0x1234abcd; - bytes+=4; -// assert(len==targetLength); + if (tree.maxval == 1) { - return std::vector(len,tree.offset); + int len = bytesToInt32_bigEndian(bytes) ^ 0x1234abcd; + bytes += 4; + return std::vector(len, tree.offset); } - Timer timer(true); - assert(tree.isConstructed()); - Node *u=&tree.ht[tree.root]; - - int len=bytesToInt32_bigEndian(bytes)^0x1234abcd; - bytes+=4; - - std::vector a(targetLength); - int sza=0; - // a.reserve(targetLength); - - // for(int i=0;ip[readBit(bytes,i++)]; - - // if(u->isLeaf()){ - - // a[sza++]=u->c+tree.offset; - // u=&tree.ht[tree.root]; - // } - // } - - // use unroll loops to optimize the above code - int byteIndex=0; - int i=0; - uchar b; - if(tree.offset==0){ - - for(;i+8p[b&1]; - if(u->isLeaf()){ - a[sza++]=u->c; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>1)&1]; - if(u->isLeaf()){ - a[sza++]=u->c; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>2)&1]; - if(u->isLeaf()){ - a[sza++]=u->c; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>3)&1]; - if(u->isLeaf()){ - a[sza++]=u->c; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>4)&1]; - if(u->isLeaf()){ - a[sza++]=u->c; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>5)&1]; - if(u->isLeaf()){ - a[sza++]=u->c; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>6)&1]; - if(u->isLeaf()){ - a[sza++]=u->c; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>7)&1]; - if(u->isLeaf()){ - a[sza++]=u->c; - u=&tree.ht[tree.root]; + int len = bytesToInt32_bigEndian(bytes) ^ 0x1234abcd; + bytes += 4; + + std::vector out(targetLength); + int outLen = 0; + + if (tree.limit > 16) { + //if huffman tree is large, a cache of huffman codebook is used to increase the performance + //Reference paper: Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Haijun Zhang, Sheng Di, Dingwen Tao, and Franck Cappello, "Performance Optimization for Relative-Error-Bounded Lossy Compression on Scientific Data", IEEE Transactions on Parallel and Distributed Systems (IEEE TPDS), 2020. + //Reference code: https://github.com/szcompressor/SZ/blob/a92658e785c072de1061f549c6cbc6d42d0f7f22/sz/src/Huffman.c#L345 + + int maxBits = 16; + size_t count = 0; + Node *t = &tree.ht[tree.root]; + Node *n = t; + + int tableSize = 1 << maxBits; + std::vector valueTable(tableSize); + std::vector lengthTable(tableSize); + std::vector nodeTable(tableSize); + int j; + for (uint32_t i = 0; i < tableSize; i++) { + n = t; + j = 0; + uint32_t res = i; + while (!n->isLeaf() && j < maxBits) { + n = n->p[res & 0x00000001]; + res >>= 1; + j++; + } + if (!n->isLeaf()) { + nodeTable[i] = n; + valueTable[i] = -1; + lengthTable[i] = maxBits; + } else { + valueTable[i] = n->c + tree.offset; + lengthTable[i] = j; } } - } - else{ - - for(;i+8p[b&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>1)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>2)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>3)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>4)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>5)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; - } - u=u->p[(b>>6)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; + int leftBits = 0; + uint32_t currentValue = 0; + size_t i = 0; + + while (count < targetLength) { + while (leftBits < maxBits) { + currentValue += (bytes[i] << leftBits); + leftBits += 8; + i++; + } + + uint32_t index = currentValue & ((1 << maxBits) - 1); + int value = valueTable[index]; + if (value != -1) { + out[count] = value; + int bitLength = lengthTable[index]; + leftBits -= bitLength; + currentValue >>= bitLength; + count++; + } else { + int bitLength = lengthTable[index]; + leftBits -= bitLength; + currentValue >>= bitLength; + n = nodeTable[index]; + while (!n->isLeaf()) { + if (!leftBits) { + currentValue += (bytes[i] << leftBits); + leftBits += 8; + i++; + } + n = n->p[(currentValue & 0x01)]; + leftBits--; + currentValue >>= 1; + } + out[count] = n->c + tree.offset; + count++; } - u=u->p[(b>>7)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; + } + } else { + + // for small huffman tree, use loop unrolling to increase the performance + // for(int i=0;ip[readBit(bytes,i++)]; + // if(u->isLeaf()){ + // out[outLen++]=u->c+tree.offset; + // u=&tree.ht[tree.root]; + // } + // } + + int byteIndex = 0; + int i = 0; + uchar b; + Node *u = &tree.ht[tree.root]; + auto offset = tree.offset; + + for (; i + 8 < len; i += 8, byteIndex++) { + + b = bytes[byteIndex]; + + u = u->p[b & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 1) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 2) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 3) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 4) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 5) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 6) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 7) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; } } - } - b=bytes[byteIndex]; + b = bytes[byteIndex]; - for(int j=0;jp[(b>>j)&1]; - if(u->isLeaf()){ - a[sza++]=u->c+tree.offset; - u=&tree.ht[tree.root]; + u = u->p[(b >> j) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + tree.offset; + u = &tree.ht[tree.root]; + } } } + bytes += (len + 7) >> 3; - bytes+=(len+7)>>3; - - timer.stop("decode"); - - return a; + return out; } - void postprocess_decode(){ + void postprocess_decode() { } - void save(uchar *&c){ + void save(uchar *&c) { saveAsCode(c); } - void load(const uchar *&c,size_t &remaining_length){ + void load(const uchar *&c, size_t &remaining_length) { - loadAsCode(c,remaining_length); + loadAsCode(c, remaining_length); } }; From e69da8b6b355e006b6359b3ef58377b61417265b Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Sat, 29 Jun 2024 16:09:50 -0400 Subject: [PATCH 13/23] add XTC and water-based bio compressors --- include/SZ3/api/impl/SZBioMD.hpp | 41 +- include/SZ3/api/impl/SZDispatcher.hpp | 15 +- .../SZ3/compressor/SZGeneralCompressor.hpp | 53 +- include/SZ3/encoder/XtcBasedEncoder.hpp | 797 ++++++++++++++++++ include/SZ3/frontend/SZBioMDFrontend.hpp | 254 ++++-- .../SZ3/frontend/SZBioMDXtcBasedFrontend.hpp | 208 +++++ include/SZ3/lossless/Lossless_zstd.hpp | 27 +- include/SZ3/utils/ByteUtil.hpp | 120 +-- include/SZ3/utils/Config.hpp | 132 ++- tools/sz3/sz3.cpp | 85 +- 10 files changed, 1468 insertions(+), 264 deletions(-) create mode 100644 include/SZ3/encoder/XtcBasedEncoder.hpp create mode 100644 include/SZ3/frontend/SZBioMDXtcBasedFrontend.hpp diff --git a/include/SZ3/api/impl/SZBioMD.hpp b/include/SZ3/api/impl/SZBioMD.hpp index 087b79e4..8cc362d6 100644 --- a/include/SZ3/api/impl/SZBioMD.hpp +++ b/include/SZ3/api/impl/SZBioMD.hpp @@ -4,11 +4,14 @@ #include "SZ3/compressor/SZGeneralCompressor.hpp" #include "SZ3/frontend/SZBioMDFrontend.hpp" #include "SZ3/quantizer/IntegerQuantizer.hpp" +#include "SZ3/frontend/SZBioMDXtcBasedFrontend.hpp" +#include "SZ3/encoder/XtcBasedEncoder.hpp" #include "SZ3/predictor/ComposedPredictor.hpp" #include "SZ3/predictor/LorenzoPredictor.hpp" #include "SZ3/predictor/RegressionPredictor.hpp" #include "SZ3/predictor/PolyRegressionPredictor.hpp" #include "SZ3/lossless/Lossless_zstd.hpp" +#include "SZ3/lossless/Lossless_bypass.hpp" #include "SZ3/utils/Iterator.hpp" #include "SZ3/utils/Statistic.hpp" #include "SZ3/utils/Extraction.hpp" @@ -19,15 +22,13 @@ #include namespace SZ3 { - - + template char *SZ_compress_bioMD(Config &conf, T *data, size_t &outSize) { - assert(N == conf.N); assert(conf.cmprAlgo == ALGO_BIOMD); calAbsErrorBound(conf, data); - + char *cmpData; auto quantizer = LinearQuantizer(conf.absErrorBound, conf.quantbinCnt / 2); auto sz = make_sz_general_compressor(make_sz_bio_frontend(conf, quantizer), HuffmanEncoder(), @@ -35,18 +36,40 @@ namespace SZ3 { cmpData = (char *) sz->compress(conf, data, outSize); return cmpData; } - - + template void SZ_decompress_bioMD(const Config &conf, char *cmpData, size_t cmpSize, T *decData) { assert(conf.cmprAlgo == ALGO_BIOMD); - + uchar const *cmpDataPos = (uchar *) cmpData; LinearQuantizer quantizer; auto sz = make_sz_general_compressor(make_sz_bio_frontend(conf, quantizer), HuffmanEncoder(), Lossless_zstd()); sz->decompress(cmpDataPos, cmpSize, decData); - } + + template + char *SZ_compress_bioMDXtcBased(Config &conf, T *data, size_t &outSize) { + assert(N == conf.N); + assert(conf.cmprAlgo == ALGO_BIOMDXTC); + calAbsErrorBound(conf, data); + + char *cmpData; + auto sz = make_sz_general_compressor(SZBioMDXtcBasedFrontend(conf), XtcBasedEncoder(), + Lossless_bypass()); + cmpData = (char *) sz->compress(conf, data, outSize); + return cmpData; + } + + template + void SZ_decompress_bioMDXtcBased(const Config &conf, char *cmpData, size_t cmpSize, T *decData) { + assert(conf.cmprAlgo == ALGO_BIOMDXTC); + + const uchar *cmpDataPos = (uchar *) cmpData; + auto sz = make_sz_general_compressor(SZBioMDXtcBasedFrontend(conf), + XtcBasedEncoder(), Lossless_bypass()); + sz->decompress(cmpDataPos, cmpSize, decData); + } + } -#endif \ No newline at end of file +#endif diff --git a/include/SZ3/api/impl/SZDispatcher.hpp b/include/SZ3/api/impl/SZDispatcher.hpp index 694bf822..57b04917 100644 --- a/include/SZ3/api/impl/SZDispatcher.hpp +++ b/include/SZ3/api/impl/SZDispatcher.hpp @@ -12,10 +12,10 @@ namespace SZ3 { template char *SZ_compress_dispatcher(Config &conf, T *data, size_t &outSize) { - + assert(N == conf.N); calAbsErrorBound(conf, data); - + char *cmpData; if (conf.absErrorBound == 0) { auto zstd = Lossless_zstd(); @@ -28,11 +28,12 @@ namespace SZ3 { cmpData = (char *) SZ_compress_Interp_lorenzo(conf, data, outSize); } else if (conf.cmprAlgo == ALGO_BIOMD) { cmpData = (char *) SZ_compress_bioMD(conf, data, outSize); + } else if (conf.cmprAlgo == ALGO_BIOMDXTC) { + cmpData = (char *) SZ_compress_bioMDXtcBased(conf, data, outSize); } return cmpData; } - - + template void SZ_decompress_dispatcher(Config &conf, char *cmpData, size_t cmpSize, T *decData) { if (conf.absErrorBound == 0) { @@ -45,11 +46,13 @@ namespace SZ3 { SZ_decompress_Interp(conf, cmpData, cmpSize, decData); } else if (conf.cmprAlgo == ALGO_BIOMD) { SZ_decompress_bioMD(conf, cmpData, cmpSize, decData); + } else if (conf.cmprAlgo == ALGO_BIOMDXTC) { + SZ_decompress_bioMDXtcBased(conf, cmpData, cmpSize, decData); } else { printf("SZ_decompress_dispatcher, Method not supported\n"); exit(0); } - + } } -#endif \ No newline at end of file +#endif diff --git a/include/SZ3/compressor/SZGeneralCompressor.hpp b/include/SZ3/compressor/SZGeneralCompressor.hpp index 15be3950..b1c9f08c 100644 --- a/include/SZ3/compressor/SZGeneralCompressor.hpp +++ b/include/SZ3/compressor/SZGeneralCompressor.hpp @@ -14,11 +14,10 @@ namespace SZ3 { template class SZGeneralCompressor : public concepts::CompressorInterface { - public: - - + public: + SZGeneralCompressor(Frontend frontend, Encoder encoder, Lossless lossless) : - frontend(frontend), encoder(encoder), lossless(lossless) { + frontend(frontend), encoder(encoder), lossless(lossless) { static_assert(std::is_base_of, Frontend>::value, "must implement the frontend interface"); static_assert(std::is_base_of, Encoder>::value, @@ -26,74 +25,72 @@ namespace SZ3 { static_assert(std::is_base_of::value, "must implement the lossless interface"); } - + uchar *compress(const Config &conf, T *data, size_t &compressed_size) { - + std::vector quant_inds = frontend.compress(data); - + encoder.preprocess_encode(quant_inds, frontend.get_radius() * 2); - size_t bufferSize = 1.2 * (frontend.size_est() + encoder.size_est() + sizeof(T) * quant_inds.size()); - + size_t bufferSize = 1.2 * (conf.size_est() + frontend.size_est() + encoder.size_est() + sizeof(T) * quant_inds.size()); + uchar *buffer = new uchar[bufferSize]; uchar *buffer_pos = buffer; - + frontend.save(buffer_pos); - + encoder.save(buffer_pos); encoder.encode(quant_inds, buffer_pos); encoder.postprocess_encode(); - + assert(buffer_pos - buffer < bufferSize); - + uchar *lossless_data = lossless.compress(buffer, buffer_pos - buffer, compressed_size); lossless.postcompress_data(buffer); - + return lossless_data; } - + T *decompress(uchar const *cmpData, const size_t &cmpSize, size_t num) { T *dec_data = new T[num]; return decompress(cmpData, cmpSize, dec_data); } - + T *decompress(uchar const *cmpData, const size_t &cmpSize, T *decData) { size_t remaining_length = cmpSize; - + Timer timer(true); auto compressed_data = lossless.decompress(cmpData, remaining_length); uchar const *compressed_data_pos = compressed_data; // timer.stop("Lossless"); - + frontend.load(compressed_data_pos, remaining_length); - + encoder.load(compressed_data_pos, remaining_length); - + timer.start(); auto quant_inds = encoder.decode(compressed_data_pos, frontend.get_num_elements()); encoder.postprocess_decode(); // timer.stop("Decoder"); - + lossless.postdecompress_data(compressed_data); - + timer.start(); frontend.decompress(quant_inds, decData); // timer.stop("Prediction & Recover"); return decData; } - - - private: + + private: Frontend frontend; Encoder encoder; Lossless lossless; }; - + template std::shared_ptr> make_sz_general_compressor(Frontend frontend, Encoder encoder, Lossless lossless) { return std::make_shared>(frontend, encoder, lossless); } - - + } #endif diff --git a/include/SZ3/encoder/XtcBasedEncoder.hpp b/include/SZ3/encoder/XtcBasedEncoder.hpp new file mode 100644 index 00000000..9af9f5fd --- /dev/null +++ b/include/SZ3/encoder/XtcBasedEncoder.hpp @@ -0,0 +1,797 @@ +/*! \file XTC compression encoder + * This is based on libxdrf.cpp from GROMACS (2024-05-18) + * License: LGPL 2.1 or later + * \author The GROMACS Authors + * \author Magnus Lundborg: Modifications to fit as SZ3 encoder + */ + +#ifndef _SZ_XTC3_ENCODER_HPP +#define _SZ_XTC3_ENCODER_HPP + +#include + +#include + +#include "SZ3/def.hpp" +#include "SZ3/encoder/Encoder.hpp" + +//#define DEBUG_OUTPUT + +/* What follows are the C routine to read/write compressed coordinates together + * with some routines to assist in this task (those are marked + * static and cannot be called from user programs) + */ + +// Integers above 2^24 do not have unique representations in +// 32-bit floats ie with 24 bits of precision. We use maxAbsoluteInt +// to check that float values can be transformed into an in-range +// 32-bit integer. There is no need to ensure we are within the range +// of ints with exact floating-point representations. However, we should +// reject all floats above that which converts to an in-range 32-bit integer. +const float maxAbsoluteInt = std::nextafterf(float(INT_MAX), 0.F); // NOLINT(cert-err58-cpp) + +#ifndef SQR +# define SQR(x) ((x) * (x)) +#endif +static const int magicInts[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, + 10, 12, 16, 20, 25, 32, 40, 50, 64, 80, + 101, 128, 161, 203, 256, 322, 406, 512, 645, 812, + 1024, 1290, 1625, 2048, 2580, 3250, 4096, 5060, 6501, 8192, + 10321, 13003, 16384, 20642, 26007, 32768, 41285, 52015, 65536, 82570, + 104031, 131072, 165140, 208063, 262144, 330280, 416127, 524287, 660561, 832255, + 1048576, 1321122, 1664510, 2097152, 2642245, 3329021, 4194304, 5284491, 6658042, 8388607, + 10568983, 13316085, 16777216 +}; + +#define FIRSTIDX 9 +/* note that magicInts[FIRSTIDX-1] == 0 */ +#define LASTIDX static_cast((sizeof(magicInts) / sizeof(*magicInts))) + +namespace SZ3 { + + struct DataBuffer { + std::size_t index; + int lastbits; + unsigned int lastbyte; + unsigned char *data; + }; + +/*! \brief encode num into buf using the specified number of bits + * + * This routines appends the value of num to the bits already present in + * the databuffer. You need to give it the number of bits to use and you + * better make sure that this number of bits is enough to hold the value + * Also num must be positive. + * + */ + + static void sendbits(struct DataBuffer *buffer, int num_of_bits, int num) { + + unsigned int lastbyte; + int lastbits; + + lastbits = buffer->lastbits; + lastbyte = buffer->lastbyte; + while (num_of_bits >= CHAR_BIT) { + lastbyte = (lastbyte << CHAR_BIT) | ((num >> (num_of_bits - CHAR_BIT)) /* & 0xff*/); + buffer->data[buffer->index++] = lastbyte >> lastbits; + num_of_bits -= CHAR_BIT; + } + if (num_of_bits > 0) { + lastbyte = (lastbyte << num_of_bits) | num; + lastbits += num_of_bits; + if (lastbits >= CHAR_BIT) { + lastbits -= CHAR_BIT; + buffer->data[buffer->index++] = lastbyte >> lastbits; + } + } + buffer->lastbits = lastbits; + buffer->lastbyte = lastbyte; + if (lastbits > 0) { + buffer->data[buffer->index] = lastbyte << (CHAR_BIT - lastbits); + } + } + +/*! \brief calculate bitSize of an integer + * + * return the number of bits needed to store an integer with given max size + * + */ + + static int sizeofint(const int size) { + int num = 1; + int num_of_bits = 0; + + while (size >= num && num_of_bits < 32) { + num_of_bits++; + num <<= 1; + } + return num_of_bits; + } + +/*! \brief calculate 'bitSize' of compressed ints + * + * given the number of small unsigned integers and the maximum value + * return the number of bits needed to read or write them with the + * routines receiveints and sendints. You need this parameter when + * calling these routines. Note that for many calls I can use + * the variable 'smallIdx' which is exactly the number of bits, and + * So I don't need to call 'sizeofints for those calls. + */ + + static int sizeofints(const int num_of_ints, const unsigned int sizes[]) { + int i, num; + int bytes[32]; + unsigned int num_of_bytes, num_of_bits, bytecnt, tmp; + num_of_bytes = 1; + bytes[0] = 1; + num_of_bits = 0; + for (i = 0; i < num_of_ints; i++) { + tmp = 0; + for (bytecnt = 0; bytecnt < num_of_bytes; bytecnt++) { + tmp = bytes[bytecnt] * sizes[i] + tmp; + bytes[bytecnt] = tmp & 0xff; + tmp >>= CHAR_BIT; + } + while (tmp != 0) { + bytes[bytecnt++] = tmp & 0xff; + tmp >>= CHAR_BIT; + } + num_of_bytes = bytecnt; + } + num = 1; + num_of_bytes--; + while (bytes[num_of_bytes] >= num) { + num_of_bits++; + num *= 2; + } + return num_of_bits + num_of_bytes * CHAR_BIT; + } + +/*! \brief send a small set of small integers in compressed format + * + * this routine is used internally by xdr3dfcoord, to send a set of + * small integers to the buffer. + * Multiplication with fixed (specified maximum ) sizes is used to get + * to one big, multibyte integer. Allthough the routine could be + * modified to handle sizes bigger than 16777216, or more than just + * a few integers, this is not done, because the gain in compression + * isn't worth the effort. Note that overflowing the multiplication + * or the byte buffer (32 bytes) is unchecked and causes bad results. + * + */ + + static void sendints(struct DataBuffer *buffer, + const int num_of_ints, + const int num_of_bits, + unsigned int sizes[], + unsigned int nums[]) { + + int i, num_of_bytes, bytecnt; + unsigned int bytes[32], tmp; + + tmp = nums[0]; + num_of_bytes = 0; + do { + bytes[num_of_bytes++] = tmp & 0xff; + tmp >>= CHAR_BIT; + } while (tmp != 0); + + for (i = 1; i < num_of_ints; i++) { + if (nums[i] >= sizes[i]) { + fprintf(stderr, + "major breakdown in sendints num %u doesn't " + "match size %u\n", + nums[i], + sizes[i]); + exit(1); + } + /* use one step multiply */ + tmp = nums[i]; + for (bytecnt = 0; bytecnt < num_of_bytes; bytecnt++) { + tmp = bytes[bytecnt] * sizes[i] + tmp; + bytes[bytecnt] = tmp & 0xff; + tmp >>= CHAR_BIT; + } + while (tmp != 0) { + bytes[bytecnt++] = tmp & 0xff; + tmp >>= CHAR_BIT; + } + num_of_bytes = bytecnt; + } + if (num_of_bits >= num_of_bytes * CHAR_BIT) { + for (i = 0; i < num_of_bytes; i++) { + sendbits(buffer, CHAR_BIT, bytes[i]); + } + sendbits(buffer, num_of_bits - num_of_bytes * CHAR_BIT, 0); + } else { + for (i = 0; i < num_of_bytes - 1; i++) { + sendbits(buffer, CHAR_BIT, bytes[i]); + } + sendbits(buffer, num_of_bits - (num_of_bytes - 1) * CHAR_BIT, bytes[i]); + } + } + +/*! \brief decode number from buffer using specified number of bits + * + * extract the number of bits from the data array in buffer and construct an integer + * from it. Return that value. + * + */ + + static int receivebits(struct DataBuffer *buffer, int num_of_bits) { + + int num, lastbits; + unsigned int lastbyte; + int mask = (1 << num_of_bits) - 1; + + lastbits = buffer->lastbits; + lastbyte = buffer->lastbyte; + + num = 0; + while (num_of_bits >= CHAR_BIT) { + lastbyte = (lastbyte << CHAR_BIT) | buffer->data[buffer->index++]; + num |= (lastbyte >> lastbits) << (num_of_bits - CHAR_BIT); + num_of_bits -= CHAR_BIT; + } + if (num_of_bits > 0) { + if (lastbits < num_of_bits) { + lastbits += CHAR_BIT; + lastbyte = (lastbyte << CHAR_BIT) | buffer->data[buffer->index++]; + } + lastbits -= num_of_bits; + num |= (lastbyte >> lastbits) & ((1 << num_of_bits) - 1); + } + num &= mask; + buffer->lastbits = lastbits; + buffer->lastbyte = lastbyte; + return num; + } + +/*! \brief decode 'small' integers from the buf array + * + * this routine is the inverse from sendints() and decodes the small integers + * written to buf by calculating the remainder and doing divisions with + * the given sizes[]. You need to specify the total number of bits to be + * used from buf in num_of_bits. + * + */ + + static void receiveints(struct DataBuffer *buffer, + const int num_of_ints, + int num_of_bits, + const unsigned int sizes[], + int nums[]) { + int bytes[32]; + int i, j, num_of_bytes, p, num; + + bytes[0] = bytes[1] = bytes[2] = bytes[3] = 0; + num_of_bytes = 0; + while (num_of_bits > CHAR_BIT) { + bytes[num_of_bytes++] = receivebits(buffer, CHAR_BIT); + num_of_bits -= CHAR_BIT; + } + if (num_of_bits > 0) { + bytes[num_of_bytes++] = receivebits(buffer, num_of_bits); + } + for (i = num_of_ints - 1; i > 0; i--) { + num = 0; + for (j = num_of_bytes - 1; j >= 0; j--) { + num = (num << CHAR_BIT) | bytes[j]; + p = num / sizes[i]; + bytes[j] = p; + num = num - p * sizes[i]; + } + nums[i] = num; + } + nums[0] = bytes[0] | (bytes[1] << CHAR_BIT) | (bytes[2] << 16) | (bytes[3] << 24); + } + + template + class XtcBasedEncoder : public concepts::EncoderInterface { + + private: + Config conf_; + + public: + void preprocess_encode(const std::vector &quantData, int stateNum) {} + + /*! \brief Compress 3d coordinates to memory. + * + * this routine writes a large number of, already quantized (as integers with a + * given precision) 3d coordinates. + * The minimum and maximum value are calculated to determine the range. + * The limited range of integers so found, is used to compress the coordinates. + * In addition the differences between succesive coordinates is calculated. + * If the difference happens to be 'small' then only the difference is saved, + * compressing the data even more. The notion of 'small' is changed dynamically + * and is enlarged or reduced whenever needed or possible. + * Extra compression is achieved in the case of commonly used water models. + * In those the Oxygen position is followed by the two hydrogens. In order to + * make the differences smaller (and thereby compression the data better) the + * order is changed into first one hydrogen then the oxygen, followed by the + * other hydrogen. This is rather special, but it shouldn't harm in the general case. + * + */ + size_t encode(const std::vector &quantData, unsigned char *&bytes) { + size_t size3 = quantData.size(); + + size_t bufferSize = size3 * 1.2; + struct DataBuffer buffer; + int *intBufferPoiner = reinterpret_cast(malloc(size3 * sizeof(*intBufferPoiner))); + buffer.data = reinterpret_cast(malloc(bufferSize * sizeof(int))); + if (buffer.data == nullptr) { + fprintf(stderr, "malloc failed\n"); + exit(1); + } + buffer.index = 0; + buffer.lastbits = 0; + buffer.lastbyte = 0; + + unsigned char *charOutputPtr = bytes; + unsigned int *intOutputPtr = reinterpret_cast(charOutputPtr); + uint64_t numTriplets = size3 / 3; + +#ifdef DEBUG_OUTPUT + printf("Encoding: %llu triplets to write.\n", numTriplets); + for (size_t i = 0; i < numTriplets; i++) + { + printf("Triplet %ld: %d %d %d\n", i, quantData[i * 3], quantData[i * 3 + 1], quantData[i * 3 + 2]); + } +#endif + + size_t coordDataOffset = reinterpret_cast(intOutputPtr) - charOutputPtr; + int *localIntBufferPointer = intBufferPoiner; + int minInt[3] = {INT_MAX, INT_MAX, INT_MAX}; + int maxInt[3] = {INT_MIN, INT_MIN, INT_MIN}; + int minDiff = INT_MAX; + int oldLocalValue1 = 0; + int oldLocalValue2 = 0; + int oldLocalValue3 = 0; + const int *inputDataPtr = quantData.data(); + while (inputDataPtr < quantData.data() + size3) { + int localValue1 = *inputDataPtr++; + if (localValue1 < minInt[0]) { + minInt[0] = localValue1; + } + if (localValue1 > maxInt[0]) { + maxInt[0] = localValue1; + } + *localIntBufferPointer++ = localValue1; + + int localValue2 = *inputDataPtr++; + if (localValue2 < minInt[1]) { + minInt[1] = localValue2; + } + if (localValue2 > maxInt[1]) { + maxInt[1] = localValue2; + } + *localIntBufferPointer++ = localValue2; + + int localValue3 = *inputDataPtr++; + if (localValue3 < minInt[2]) { + minInt[2] = localValue3; + } + if (localValue3 > maxInt[2]) { + maxInt[2] = localValue3; + } + *localIntBufferPointer++ = localValue3; + int diff = std::abs(oldLocalValue1 - localValue1) + std::abs(oldLocalValue2 - localValue2) + + std::abs(oldLocalValue3 - localValue3); + if (diff < minDiff && inputDataPtr > quantData.data() + 3) { + minDiff = diff; + } + oldLocalValue1 = localValue1; + oldLocalValue2 = localValue2; + oldLocalValue3 = localValue3; + } + + for (int i = 0; i < 3; i++) { + *intOutputPtr++ = minInt[i]; + } + for (int i = 0; i < 3; i++) { + *intOutputPtr++ = maxInt[i]; + } + + if (static_cast(maxInt[0]) - static_cast(minInt[0]) >= maxAbsoluteInt + || static_cast(maxInt[1]) - static_cast(minInt[1]) >= maxAbsoluteInt + || static_cast(maxInt[2]) - static_cast(minInt[2]) >= maxAbsoluteInt) { + /* turning value in unsigned by subtracting minInt + * would cause overflow + */ + fprintf(stderr, + "Error. Turning value in unsigned by subtracting minInt would cause " + "overflow.\n"); + } + unsigned int sizeInt[3]; + sizeInt[0] = maxInt[0] - minInt[0] + 1; + sizeInt[1] = maxInt[1] - minInt[1] + 1; + sizeInt[2] = maxInt[2] - minInt[2] + 1; + unsigned int bitSizeInt[3]; + int bitSize; + +#ifdef DEBUG_OUTPUT + printf(" minInt %d %d %d, maxInt %d %d %d\n", + minInt[0], + minInt[1], + minInt[2], + maxInt[0], + maxInt[1], + maxInt[2]); +#endif + + /* check if one of the sizes is too big to be multiplied */ + if ((sizeInt[0] | sizeInt[1] | sizeInt[2]) > 0xffffff) { + bitSizeInt[0] = sizeofint(sizeInt[0]); + bitSizeInt[1] = sizeofint(sizeInt[1]); + bitSizeInt[2] = sizeofint(sizeInt[2]); + bitSize = 0; /* flag the use of large sizes */ + } else { + bitSize = sizeofints(3, sizeInt); + } + int smallIdx = FIRSTIDX; + while (smallIdx < LASTIDX && magicInts[smallIdx] < minDiff) { + smallIdx++; + } + *intOutputPtr++ = smallIdx; + + int maxIdx = std::min(LASTIDX, smallIdx + CHAR_BIT); + int minIdx = maxIdx - CHAR_BIT; /* often this equal smallIdx */ + int smaller = magicInts[std::max(FIRSTIDX, smallIdx - 1)] / 2; + int smallNum = magicInts[smallIdx] / 2; + unsigned int sizeSmall[3]; + sizeSmall[0] = magicInts[smallIdx]; + sizeSmall[1] = magicInts[smallIdx]; + sizeSmall[2] = magicInts[smallIdx]; + int larger = magicInts[maxIdx] / 2; + size_t i = 0; + unsigned int *localUnsignedIntBufferPointer = reinterpret_cast(intBufferPoiner); + int prevCoord[3] = {0, 0, 0}; + int prevRun = -1; + while (i < numTriplets) { + bool isSmall = false; + int *thisCoord = reinterpret_cast(localUnsignedIntBufferPointer) + i * 3; + int isSmaller; + if (smallIdx < maxIdx && i >= 1 && std::abs(thisCoord[0] - prevCoord[0]) < larger + && std::abs(thisCoord[1] - prevCoord[1]) < larger + && std::abs(thisCoord[2] - prevCoord[2]) < larger) { + isSmaller = 1; + } else if (smallIdx > minIdx) { + isSmaller = -1; + } else { + isSmaller = 0; + } + if (i + 1 < numTriplets) { + if (std::abs(thisCoord[0] - thisCoord[3]) < smallNum + && std::abs(thisCoord[1] - thisCoord[4]) < smallNum + && std::abs(thisCoord[2] - thisCoord[5]) < smallNum) { + /* interchange first with second atom for better + * compression of water molecules + */ + int tmp = thisCoord[0]; + thisCoord[0] = thisCoord[3]; + thisCoord[3] = tmp; + tmp = thisCoord[1]; + thisCoord[1] = thisCoord[4]; + thisCoord[4] = tmp; + tmp = thisCoord[2]; + thisCoord[2] = thisCoord[5]; + thisCoord[5] = tmp; + isSmall = true; + } + } + unsigned int tmpCoord[30]; + tmpCoord[0] = thisCoord[0] - minInt[0]; + tmpCoord[1] = thisCoord[1] - minInt[1]; + tmpCoord[2] = thisCoord[2] - minInt[2]; + if (bitSize == 0) { + sendbits(&buffer, bitSizeInt[0], tmpCoord[0]); + sendbits(&buffer, bitSizeInt[1], tmpCoord[1]); + sendbits(&buffer, bitSizeInt[2], tmpCoord[2]); + } else { + sendints(&buffer, 3, bitSize, sizeInt, tmpCoord); + } + prevCoord[0] = thisCoord[0]; + prevCoord[1] = thisCoord[1]; + prevCoord[2] = thisCoord[2]; + thisCoord = thisCoord + 3; + i++; + + int run = 0; + if (!isSmall && isSmaller == -1) { + isSmaller = 0; + } + while (isSmall && run < CHAR_BIT * 3) { + if (isSmaller == -1 + && (SQR(thisCoord[0] - prevCoord[0]) + SQR(thisCoord[1] - prevCoord[1]) + + SQR(thisCoord[2] - prevCoord[2]) + >= smaller * smaller)) { + isSmaller = 0; + } + + tmpCoord[run++] = thisCoord[0] - prevCoord[0] + smallNum; + tmpCoord[run++] = thisCoord[1] - prevCoord[1] + smallNum; + tmpCoord[run++] = thisCoord[2] - prevCoord[2] + smallNum; + + prevCoord[0] = thisCoord[0]; + prevCoord[1] = thisCoord[1]; + prevCoord[2] = thisCoord[2]; + + i++; + thisCoord = thisCoord + 3; + isSmall = 0; + if (i < numTriplets && abs(thisCoord[0] - prevCoord[0]) < smallNum + && abs(thisCoord[1] - prevCoord[1]) < smallNum + && abs(thisCoord[2] - prevCoord[2]) < smallNum) { + isSmall = true; + } + } + if (run != prevRun || isSmaller != 0) { + prevRun = run; + sendbits(&buffer, 1, 1); /* flag the change in run-length */ + sendbits(&buffer, 5, run + isSmaller + 1); + } else { + sendbits(&buffer, 1, 0); /* flag the fact that runlength did not change */ + } + for (int k = 0; k < run; k += 3) { + sendints(&buffer, 3, smallIdx, sizeSmall, &tmpCoord[k]); + } + if (isSmaller != 0) { + smallIdx += isSmaller; + if (isSmaller < 0) { + smallNum = smaller; + smaller = magicInts[smallIdx - 1] / 2; + } else { + smaller = smallNum; + smallNum = magicInts[smallIdx] / 2; + } + sizeSmall[0] = sizeSmall[1] = sizeSmall[2] = magicInts[smallIdx]; + } + } + if (buffer.lastbits != 0) { + buffer.index++; + } + + *(reinterpret_cast(intOutputPtr)) = buffer.index; + intOutputPtr += sizeof(uint64_t) / sizeof(int); + + // Since this file is full of old code, and many signed-to-unsigned conversions, we + // read data in batches if the smallest number that is a multiple of 4 that + // fits in a signed integer to keep data access aligned if possible. + size_t offset = 0; + size_t remain = buffer.index; + charOutputPtr = reinterpret_cast(intOutputPtr); + do { + // Max batch size is largest 4-tuple that fits in signed 32-bit int + size_t batchSize = std::min(remain, static_cast(2147483644)); + memcpy(charOutputPtr, buffer.data + offset, batchSize); + charOutputPtr += batchSize; + offset += batchSize; + remain -= batchSize; + } while (remain > 0); + + free(buffer.data); + free(intBufferPoiner); + + size_t outputSize = charOutputPtr - bytes; + +#ifdef DEBUG_OUTPUT + printf("Finished encoding. Output length = %ld bytes, compression = %f.\n\n", + outputSize, + (float)size3 / outputSize); +#endif + + bytes = charOutputPtr; + return outputSize; + } + + void postprocess_encode() {} + + void preprocess_decode() {} + + /*! \brief Decompress 3d coordinates to memory. + * + * this routine decompresses a large number of compressed 3d coordinates. + * + */ + std::vector decode(const unsigned char *&bytes, size_t targetLength) { +#ifdef DEBUG_OUTPUT + printf("\nDecoding, targetLength: %ld\n", targetLength); +#endif + std::vector quantData(targetLength, 0); + + const unsigned char *inputBytesPointer = bytes; + const int *inputIntPtr = reinterpret_cast(inputBytesPointer); + + size_t bufferSize = targetLength * 1.2; + struct DataBuffer buffer; + buffer.data = reinterpret_cast(malloc(bufferSize * sizeof(int))); + if (buffer.data == nullptr) { + fprintf(stderr, "malloc failed\n"); + } + buffer.index = 0; + buffer.lastbits = 0; + buffer.lastbyte = 0; + + int minInt[3]; + int maxInt[3]; + + minInt[0] = *inputIntPtr++; + minInt[1] = *inputIntPtr++; + minInt[2] = *inputIntPtr++; + maxInt[0] = *inputIntPtr++; + maxInt[1] = *inputIntPtr++; + maxInt[2] = *inputIntPtr++; + +#ifdef DEBUG_OUTPUT + printf(" minInt %d %d %d, maxInt %d %d %d\n", + minInt[0], + minInt[1], + minInt[2], + maxInt[0], + maxInt[1], + maxInt[2]); +#endif + + unsigned int sizeInt[3]; + sizeInt[0] = maxInt[0] - minInt[0] + 1; + sizeInt[1] = maxInt[1] - minInt[1] + 1; + sizeInt[2] = maxInt[2] - minInt[2] + 1; + unsigned int bitSizeInt[3]; + int bitSize; + /* check if one of the sizes is too big to be multiplied */ + if ((sizeInt[0] | sizeInt[1] | sizeInt[2]) > 0xffffff) { + bitSizeInt[0] = sizeofint(sizeInt[0]); + bitSizeInt[1] = sizeofint(sizeInt[1]); + bitSizeInt[2] = sizeofint(sizeInt[2]); + bitSize = 0; /* flag the use of large sizes */ + } else { + bitSize = sizeofints(3, sizeInt); + } + + int smallIdx = *inputIntPtr++; + + int smaller = magicInts[std::max(FIRSTIDX, smallIdx - 1)] / 2; + int smallNum = magicInts[smallIdx] / 2; + unsigned int sizeSmall[3]; + sizeSmall[0] = magicInts[smallIdx]; + sizeSmall[1] = magicInts[smallIdx]; + sizeSmall[2] = magicInts[smallIdx]; + + size_t size3 = targetLength; + bufferSize = size3 * 1.2; + buffer.data = reinterpret_cast(malloc(bufferSize * sizeof(int))); + buffer.index = *(reinterpret_cast(inputIntPtr)); + inputIntPtr += sizeof(uint64_t) / sizeof(int); + + size_t offset = 0; + size_t remain = buffer.index; + inputBytesPointer = reinterpret_cast(inputIntPtr); + do { + // Max batch size is largest 4-tuple that fits in signed 32-bit int + size_t batchSize = std::min(remain, static_cast(2147483644)); + memcpy(buffer.data + offset, inputBytesPointer, batchSize); + inputBytesPointer += batchSize; + offset += batchSize; + remain -= batchSize; + } while (remain > 0); + + buffer.index = 0; + buffer.lastbits = 0; + buffer.lastbyte = 0; + + int run = 0; + size_t i = 0; + int *intBufferPoiner = reinterpret_cast(malloc(size3 * sizeof(*intBufferPoiner))); + int *localIntBufferPointer = intBufferPoiner; + unsigned char *charOutputPtr = reinterpret_cast(quantData.data()); + int *intOutputPtr = reinterpret_cast(charOutputPtr); + int prevCoord[3] = {0, 0, 0}; + uint64_t numTriplets = targetLength / 3; + while (i < numTriplets) { + int *thisCoord = reinterpret_cast(localIntBufferPointer) + i * 3; + + if (bitSize == 0) { + thisCoord[0] = receivebits(&buffer, bitSizeInt[0]); + thisCoord[1] = receivebits(&buffer, bitSizeInt[1]); + thisCoord[2] = receivebits(&buffer, bitSizeInt[2]); + } else { + receiveints(&buffer, 3, bitSize, sizeInt, thisCoord); + } + + i++; + thisCoord[0] += minInt[0]; + thisCoord[1] += minInt[1]; + thisCoord[2] += minInt[2]; + + prevCoord[0] = thisCoord[0]; + prevCoord[1] = thisCoord[1]; + prevCoord[2] = thisCoord[2]; + + int flag = receivebits(&buffer, 1); + int isSmaller = 0; + if (flag == 1) { + run = receivebits(&buffer, 5); + isSmaller = run % 3; + run -= isSmaller; + isSmaller--; + } + if (run > 0) { + thisCoord += 3; + for (int k = 0; k < run; k += 3) { + receiveints(&buffer, 3, smallIdx, sizeSmall, thisCoord); + i++; + thisCoord[0] += prevCoord[0] - smallNum; + thisCoord[1] += prevCoord[1] - smallNum; + thisCoord[2] += prevCoord[2] - smallNum; + if (k == 0) { + /* interchange first with second atom for better + * compression of water molecules + */ + int tmp = thisCoord[0]; + thisCoord[0] = prevCoord[0]; + prevCoord[0] = tmp; + tmp = thisCoord[1]; + thisCoord[1] = prevCoord[1]; + prevCoord[1] = tmp; + tmp = thisCoord[2]; + thisCoord[2] = prevCoord[2]; + prevCoord[2] = tmp; + *intOutputPtr++ = prevCoord[0]; + *intOutputPtr++ = prevCoord[1]; + *intOutputPtr++ = prevCoord[2]; + } else { + prevCoord[0] = thisCoord[0]; + prevCoord[1] = thisCoord[1]; + prevCoord[2] = thisCoord[2]; + } + *intOutputPtr++ = thisCoord[0]; + *intOutputPtr++ = thisCoord[1]; + *intOutputPtr++ = thisCoord[2]; + } + } else { + *intOutputPtr++ = thisCoord[0]; + *intOutputPtr++ = thisCoord[1]; + *intOutputPtr++ = thisCoord[2]; + } + + smallIdx += isSmaller; + if (isSmaller < 0) { + smallNum = smaller; + if (smallIdx > FIRSTIDX) { + smaller = magicInts[smallIdx - 1] / 2; + } else { + smaller = 0; + } + } else if (isSmaller > 0) { + smaller = smallNum; + smallNum = magicInts[smallIdx] / 2; + } + sizeSmall[0] = sizeSmall[1] = sizeSmall[2] = magicInts[smallIdx]; + } + free(buffer.data); + free(intBufferPoiner); + +#ifdef DEBUG_OUTPUT + printf("Decoded %llu triplets.\n", numTriplets); + for (size_t i = 0; i < numTriplets; i++) + { + printf("Triplet %ld: %d %d %d\n", i, quantData[i * 3], quantData[i * 3 + 1], quantData[i * 3 + 2]); + } +#endif + + return quantData; + } + + void postprocess_decode() {} + + void save(uchar *&c) {} + + void load(const uchar *&c, size_t &remaining_length) {} + }; + +} // namespace SZ3 + + +#endif diff --git a/include/SZ3/frontend/SZBioMDFrontend.hpp b/include/SZ3/frontend/SZBioMDFrontend.hpp index 4b0c2c5c..9c8ea18d 100644 --- a/include/SZ3/frontend/SZBioMDFrontend.hpp +++ b/include/SZ3/frontend/SZBioMDFrontend.hpp @@ -10,73 +10,78 @@ #include namespace SZ3 { - + template class SZBioMDFrontend : public concepts::FrontendInterface { - public: + public: SZBioMDFrontend(const Config &conf, Quantizer quantizer) : - quantizer(quantizer), - conf(conf) { - if (N != 1 && N != 3) { - throw std::invalid_argument("SZBioFront only support 1D or 3D data"); + quantizer(quantizer), + conf(conf) { + if (N != 1 && N != 2 && N != 3) { + throw std::invalid_argument("SZBioFront only support 1D, 2D or 3D data"); } } - + ~SZBioMDFrontend() { clear(); } - + void print() {}; - - + std::vector compress(T *data) { if (N == 1) { return compress_1d(data); + } else if (N == 2) { + return compress_2d(data); } else { return compress_3d(data); } }; - + T *decompress(std::vector &quant_inds, T *dec_data) { if (N == 1) { return decompress_1d(quant_inds, dec_data); + } else if (N == 2) { + return decompress_2d(quant_inds, dec_data); } else { return decompress_3d(quant_inds, dec_data); } }; - - + void save(uchar *&c) { write(site, c); + write(firstFillFrame_, c); + write(fillValue_, c); quantizer.save(c); } - + void load(const uchar *&c, size_t &remaining_length) { clear(); const uchar *c_pos = c; read(site, c, remaining_length); + read(firstFillFrame_, c, remaining_length); + read(fillValue_, c, remaining_length); quantizer.load(c, remaining_length); remaining_length -= c_pos - c; } - - + void clear() { quantizer.clear(); } - + size_t size_est() { return quantizer.size_est(); //unpred } - + int get_radius() const { return quantizer.get_radius(); } - + size_t get_num_elements() const { return conf.num; }; - - private: + + private: std::vector compress_1d(T *data) { std::vector quant_bins(conf.num); quant_bins[0] = quantizer.quantize_and_overwrite(data[0], 0); @@ -85,7 +90,7 @@ namespace SZ3 { } return quant_bins; } - + T *decompress_1d(std::vector &quant_inds, T *dec_data) { dec_data[0] = quantizer.recover(0, quant_inds[0]); for (size_t i = 1; i < conf.num; i++) { @@ -93,14 +98,17 @@ namespace SZ3 { } return dec_data; } - - - int cal_site_3d(T *data, std::vector dims) { + + int cal_site(T *data, std::vector dims) { + size_t numDims = dims.size(); + if (numDims < 2) { + return 0; + } std::vector sites; - for (int j = 0; j < std::min(dims[2], 5); j++) { + for (int j = 0; j < std::min(dims[numDims - 1], 5); j++) { size_t lprev = 0, lavg = 0, lcnt = 0; - for (size_t i = 1; i < std::min(dims[1], 100); i++) { - auto c = data[i * dims[2] + j], p = data[(i - 1) * dims[2] + j]; + for (size_t i = 1; i < std::min(dims[numDims - 2], 100); i++) { + auto c = data[i * dims[numDims - 1] + j], p = data[(i - 1) * dims[numDims - 1] + j]; if (fabs(c - p) / c > 0.5) { sites.push_back(i - lprev); // printf("%d %d\n", i, i - lprev); @@ -113,7 +121,7 @@ namespace SZ3 { frequency[sites[i]]++; } int maxCount = 0, res = 0; - for (const auto &kv: frequency) { + for (const auto &kv : frequency) { auto k = kv.first; auto f = kv.second; // printf("k %d f %d\n", k ,f); @@ -124,96 +132,235 @@ namespace SZ3 { } return (res <= 2 || res > 10) ? 0 : res; } - + + /* Start from the last frame and look for frames filled with the same value. + * Those frames do not need to be compressed - they can all just be filled. */ + std::tuple findFillValueAndFirstFilledFrame(T *data, std::vector dims) { + size_t numDims = dims.size(); + if (numDims < 3) { + return std::make_tuple(dims[0], 0); + } + size_t frameStride = dims[1] * dims[2]; + size_t firstFillFrame = dims[0]; + + if (firstFillFrame == 0) { + return std::make_tuple(firstFillFrame, 0); + } + /* Assume that the first value of the last frame is the potential fill value */ + T fillFrameValue = data[(dims[0] - 1) * frameStride]; + /* To simplify compression/decompression below, assume that the first frame needs + * to be compressed. */ + for (size_t i = dims[0] - 1; i > 0; i--) { + size_t idx = i * frameStride; + bool allFrameValuesAreFillValue = true; + + for (size_t j = 0; j < dims[1] * dims[2]; j++) { + size_t idy = idx + j; + if (data[idy] != fillFrameValue) { + allFrameValuesAreFillValue = false; + break; + } + } + if (allFrameValuesAreFillValue) { + firstFillFrame = i; + } else { + break; + } + } + return std::make_tuple(firstFillFrame, fillFrameValue); + } + + std::vector compress_2d(T *data) { + std::vector quant_bins(conf.num); + auto dims = conf.dims; + site = cal_site(data, conf.dims); + printf("# of site in the MD simulation guessed by SZ3 = %d\n", site); + + firstFillFrame_ = dims[0]; + fillValue_ = 0; + + //i==0 & j==0 + for (size_t k = 0; k < dims[1]; k++) { //xyz + size_t idx = k; + quant_bins[idx] = quantizer.quantize_and_overwrite(data[idx], 0); + } + + //i==0 + for (size_t j = 1; j < dims[0]; j++) { //atoms + /* Assume that we are compressing water coordinates. Water models are constructed the following way: + * 3-site: O-H-H + * 4-site: O-H-H-VS + * 5-site: O-H-H-VS-VS + * The oxygen atom is always most centrally located (on average closest to the others). + * Use the oxygen atom as reference. + */ + size_t site_reference_offset = site != 0 ? std::max(1, j % site) : 1; + for (size_t k = 0; k < dims[1]; k++) { //xyz + size_t idx = j * dims[1] + k; + size_t idx1 = (j - site_reference_offset) * dims[1] + k; + quant_bins[idx] = quantizer.quantize_and_overwrite(data[idx], data[idx1]); + } + } + + return quant_bins; + } + + T *decompress_2d(std::vector &quant_inds, T *dec_data) { + + auto dims = conf.dims; + + //i==0 & j==0 + for (size_t k = 0; k < dims[1]; k++) { //xyz + size_t idx = k; + dec_data[idx] = quantizer.recover(0, quant_inds[idx]); + } + + //i==0 + for (size_t j = 1; j < dims[0]; j++) { //atoms + /* Assume that we are decompressing water coordinates. Water models are constructed the following way: + * 3-site: O-H-H + * 4-site: O-H-H-VS + * 5-site: O-H-H-VS-VS + * The oxygen atom is always most centrally located (on average closest to the others). + * Use the oxygen atom as reference. + */ + size_t site_reference_offset = site != 0 ? std::max(1, j % site) : 1; + for (size_t k = 0; k < dims[1]; k++) { //xyz + size_t idx = j * dims[1] + k; + size_t idx1 = (j - site_reference_offset) * dims[1] + k; + dec_data[idx] = quantizer.recover(dec_data[idx1], quant_inds[idx]); + } + } + + return dec_data; + } + std::vector compress_3d(T *data) { std::vector quant_bins(conf.num); auto dims = conf.dims; std::vector stride({dims[1] * dims[2], dims[2], 1}); - site = cal_site_3d(data + stride[0], conf.dims); + site = cal_site(data + stride[0], conf.dims); printf("# of site in the MD simulation guessed by SZ3 = %d\n", site); + + /* Find out if the last frames are all filled with the same value. */ + std::tuple fillValueSettings = findFillValueAndFirstFilledFrame(data, dims); + firstFillFrame_ = std::get<0>(fillValueSettings); + fillValue_ = std::get<1>(fillValueSettings); + size_t lastFrame = std::min(dims[0], firstFillFrame_); + //TODO determine the # of system //i==0 & j==0 for (size_t k = 0; k < dims[2]; k++) { //xyz size_t idx = k; quant_bins[idx] = quantizer.quantize_and_overwrite(data[idx], 0); } - + //i==0 for (size_t j = 1; j < dims[1]; j++) { //atoms + /* Assume that we are compressing water coordinates. Water models are constructed the following way: + * 3-site: O-H-H + * 4-site: O-H-H-VS + * 5-site: O-H-H-VS-VS + * The oxygen atom is always most centrally located (on average closest to the others). + * Use the oxygen atom as reference. + */ + size_t site_reference_offset = site != 0 ? std::max(1, j % site) : 1; for (size_t k = 0; k < dims[2]; k++) { //xyz size_t idx = j * stride[1] + k; - size_t idx1 = (j - 1) * stride[1] + k; + size_t idx1 = (j - site_reference_offset) * stride[1] + k; quant_bins[idx] = quantizer.quantize_and_overwrite(data[idx], data[idx1]); } } - - for (size_t i = 1; i < dims[0]; i++) {//time + + for (size_t i = 1; i < lastFrame; i++) {//time for (size_t j = 0; j < dims[1]; j++) { //atoms + size_t site_reference_offset = site != 0 ? j % site : 1; for (size_t k = 0; k < dims[2]; k++) { //xyz size_t idx = i * stride[0] + j * stride[1] + k; size_t idx1 = (i - 1) * stride[0] + j * stride[1] + k; - size_t idx2 = i * stride[0] + (j - 1) * stride[1] + k; - size_t idx3 = (i - 1) * stride[0] + (j - 1) * stride[1] + k; if (j == 0 || (site != 0 && j % site == 0)) {// time -1 quant_bins[idx] = - quantizer.quantize_and_overwrite(data[idx], data[idx1]); + quantizer.quantize_and_overwrite(data[idx], data[idx1]); } else { // time -1 & atom -1 + size_t idx2 = i * stride[0] + (j - site_reference_offset) * stride[1] + k; + size_t idx3 = (i - 1) * stride[0] + (j - site_reference_offset) * stride[1] + k; quant_bins[idx] = - quantizer.quantize_and_overwrite(data[idx], data[idx1] + data[idx2] - data[idx3]); + quantizer.quantize_and_overwrite(data[idx], data[idx1] + data[idx2] - data[idx3]); } } } } - + return quant_bins; } - - + T *decompress_3d(std::vector &quant_inds, T *dec_data) { - + auto dims = conf.dims; std::vector stride({dims[1] * dims[2], dims[2], 1}); // quant_bins[0] = quantizer.quantize_and_overwrite(data[0], 0); - + + size_t lastFrame = std::min(dims[0], firstFillFrame_); + //i==0 & j==0 for (size_t k = 0; k < dims[2]; k++) { //xyz size_t idx = k; dec_data[idx] = quantizer.recover(0, quant_inds[idx]); } - + //i==0 for (size_t j = 1; j < dims[1]; j++) { //atoms + /* Assume that we are decompressing water coordinates. Water models are constructed the following way: + * 3-site: O-H-H + * 4-site: O-H-H-VS + * 5-site: O-H-H-VS-VS + * The oxygen atom is always most centrally located (on average closest to the others). + * Use the oxygen atom as reference. + */ + size_t site_reference_offset = site != 0 ? std::max(1, j % site) : 1; for (size_t k = 0; k < dims[2]; k++) { //xyz size_t idx = j * stride[1] + k; - size_t idx1 = (j - 1) * stride[1] + k; + size_t idx1 = (j - site_reference_offset) * stride[1] + k; dec_data[idx] = quantizer.recover(dec_data[idx1], quant_inds[idx]); } } - - for (size_t i = 1; i < dims[0]; i++) {//time + + for (size_t i = 1; i < lastFrame; i++) {//time for (size_t j = 0; j < dims[1]; j++) { //atoms + size_t site_reference_offset = site != 0 ? j % site : 1; for (size_t k = 0; k < dims[2]; k++) { //xyz size_t idx = i * stride[0] + j * stride[1] + k; size_t idx1 = (i - 1) * stride[0] + j * stride[1] + k; - size_t idx2 = i * stride[0] + (j - 1) * stride[1] + k; - size_t idx3 = (i - 1) * stride[0] + (j - 1) * stride[1] + k; if (j == 0 || (site != 0 && j % site == 0)) {// time -1 dec_data[idx] = quantizer.recover(dec_data[idx1], quant_inds[idx]); } else { // time -1 & atom -1 + size_t idx2 = i * stride[0] + (j - site_reference_offset) * stride[1] + k; + size_t idx3 = (i - 1) * stride[0] + (j - site_reference_offset) * stride[1] + k; dec_data[idx] = quantizer.recover(dec_data[idx1] + dec_data[idx2] - dec_data[idx3], quant_inds[idx]); } } } } + + /* Fill frames at the end with the fill value. */ + for (size_t i = firstFillFrame_; i < dims[0]; i++) { + size_t idx = i * stride[0]; + for (size_t j = 0; j < dims[1] * dims[2]; j++) { + size_t idy = idx + j; + dec_data[idy] = fillValue_; + } + } return dec_data; } - + Quantizer quantizer; Config conf; int site = 0; - + size_t firstFillFrame_; + T fillValue_; + }; - + template SZBioMDFrontend make_sz_bio_frontend(const Config &conf, Predictor predictor) { @@ -221,5 +368,4 @@ namespace SZ3 { } } - #endif diff --git a/include/SZ3/frontend/SZBioMDXtcBasedFrontend.hpp b/include/SZ3/frontend/SZBioMDXtcBasedFrontend.hpp new file mode 100644 index 00000000..e514a06d --- /dev/null +++ b/include/SZ3/frontend/SZBioMDXtcBasedFrontend.hpp @@ -0,0 +1,208 @@ +/* + * Based on SZBioMDFrontend.hpp + * \author: Magnus Lundborg + */ + +#ifndef SZ3_SZBIOMDXTCBASED_FRONTEND +#define SZ3_SZBIOMDXTCBASED_FRONTEND + +#include "Frontend.hpp" +#include "SZ3/utils/Config.hpp" + +#include + +namespace SZ3 { + + template + class SZBioMDXtcBasedFrontend : public concepts::FrontendInterface { + public: + SZBioMDXtcBasedFrontend(const Config &conf) : conf(conf) { + if (N != 1 && N != 2 && N != 3) { + throw std::invalid_argument("SZBioFront only support 1D, 2D or 3D data"); + } + } + + ~SZBioMDXtcBasedFrontend() { clear(); } + + void print() {}; + + std::vector compress(T *data) { + if (N <= 2) { + return compressSingleFrame(data); + } else { + return compressMultiFrame(data); + } + }; + + T *decompress(std::vector &quantData, T *decData) { + if (N <= 2) { + return decompressSingleFrame(quantData, decData); + } else { + return decompressMultiFrame(quantData, decData); + } + }; + + void save(uchar *&c) { + write(firstFillFrame_, c); + write(fillValue_, c); + } + + void load(const uchar *&c, size_t &remaining_length) { + clear(); + const uchar *c_pos = c; + read(firstFillFrame_, c, remaining_length); + read(fillValue_, c, remaining_length); + } + + void clear() {} + + size_t size_est() { return 0; } + + int get_radius() const { return 0; } + + size_t get_num_elements() const { + if (N == 3) { + return firstFillFrame_ * conf.dims[1] * conf.dims[2]; + } + return conf.num; + } + + private: + std::vector compressSingleFrame(T *data) { + std::vector quantData(conf.num); + + /* To prevent that potential rounding errors make the error slightly larger than the + * absolute error bound, scale down the error limit slightly. + * The precision is twice the required maximum error. */ + double reciprocalPrecision = 1.0 / (conf.absErrorBound * 0.99999 * 2.0); + + for (size_t i = 0; i < conf.num; i++) { + quantData[i] = std::floor(data[i] * reciprocalPrecision + 0.5); + } + return quantData; + } + + T *decompressSingleFrame(std::vector &quantData, T *decData) { + /* To prevent that potential rounding errors make the error slightly larger than the + * absolute error bound, scale down the error limit slightly. + * The precision is twice the required maximum error. */ + double precision = conf.absErrorBound * 0.99999 * 2.0; + + for (size_t i = 0; i < conf.num; i++) { + decData[i] = quantData[i] * precision; + } + return decData; + } + + /* Start from the last frame and look for frames filled with the same value. + * Those frames do not need to be compressed - they can all just be filled. */ + std::tuple findFillValueAndFirstFilledFrame(T *data, std::vector dims) { + size_t numDims = dims.size(); + if (numDims < 3) { + return std::make_tuple(dims[0], 0); + } + size_t frameStride = dims[1] * dims[2]; + size_t firstFillFrame = dims[0]; + + if (firstFillFrame == 0) { + return std::make_tuple(firstFillFrame, 0); + } + /* Assume that the first value of the last frame is the potential fill value */ + T fillFrameValue = data[(dims[0] - 1) * frameStride]; + /* To simplify compression/decompression below, assume that the first frame needs + * to be compressed. */ + for (size_t i = dims[0] - 1; i > 0; i--) { + size_t idx = i * frameStride; + bool allFrameValuesAreFillValue = true; + + for (size_t j = 0; j < dims[1] * dims[2]; j++) { + size_t idy = idx + j; + if (data[idy] != fillFrameValue) { + allFrameValuesAreFillValue = false; + break; + } + } + if (allFrameValuesAreFillValue) { + firstFillFrame = i; + } else { + break; + } + } + return std::make_tuple(firstFillFrame, fillFrameValue); + } + + /* This just converts float to integer based on the absolute error. */ + std::vector compressMultiFrame(T *data) { + auto dims = conf.dims; + std::vector stride({dims[1] * dims[2], dims[2], 1}); + + /* Find out if the last frames are all filled with the same value. */ + std::tuple fillValueSettings = findFillValueAndFirstFilledFrame(data, dims); + firstFillFrame_ = std::get<0>(fillValueSettings); + fillValue_ = std::get<1>(fillValueSettings); + size_t lastFrame = std::min(dims[0], firstFillFrame_); + std::vector quantData(lastFrame * dims[1] * dims[2]); + + /* To prevent that potential rounding errors make the error slightly larger than the + * absolute error bound, scale down the error limit slightly. + * The precision is twice the required maximum error. */ + double reciprocalPrecision = 1.0 / (conf.absErrorBound * 0.99999 * 2.0); + + for (size_t i = 0; i < lastFrame; i++) // time + { + for (size_t j = 0; j < dims[1]; j++) // atoms + { + for (size_t k = 0; k < dims[2]; k++) // xyz + { + size_t idx = i * stride[0] + j * stride[1] + k; + quantData[idx] = std::floor(data[idx] * reciprocalPrecision + 0.5); + } + } + } + + return quantData; + } + + /* This just converts integer to float based on the absolute error. */ + T *decompressMultiFrame(std::vector &quantData, T *decData) { + + // printf("Decompressing 3D.\n"); + auto dims = conf.dims; + std::vector stride({dims[1] * dims[2], dims[2], 1}); + + size_t lastFrame = std::min(dims[0], firstFillFrame_); + + /* To prevent that potential rounding errors make the error slightly larger than the + * absolute error bound, scale down the error limit slightly. + * The precision is twice the required maximum error. */ + double precision = conf.absErrorBound * 0.99999 * 2.0; + + for (size_t i = 0; i < lastFrame; i++) { // time + for (size_t j = 0; j < dims[1]; j++) { // atoms + for (size_t k = 0; k < dims[2]; k++) { // xyz + size_t idx = i * stride[0] + j * stride[1] + k; + decData[idx] = quantData[idx] * precision; + } + } + } + + /* Fill frames at the end with the fill value. */ + for (size_t i = firstFillFrame_; i < dims[0]; i++) { + size_t idx = i * stride[0]; + for (size_t j = 0; j < dims[1] * dims[2]; j++) { + size_t idy = idx + j; + decData[idy] = fillValue_; + } + } + return decData; + } + + Config conf; + size_t firstFillFrame_; + T fillValue_; + }; + +} // namespace SZ3 + + +#endif diff --git a/include/SZ3/lossless/Lossless_zstd.hpp b/include/SZ3/lossless/Lossless_zstd.hpp index 6cb36a9f..8e91a562 100644 --- a/include/SZ3/lossless/Lossless_zstd.hpp +++ b/include/SZ3/lossless/Lossless_zstd.hpp @@ -13,44 +13,45 @@ namespace SZ3 { class Lossless_zstd : public concepts::LosslessInterface { - - public: + + public: Lossless_zstd() = default; - + Lossless_zstd(int comp_level) : compression_level(comp_level) {}; - + uchar *compress(uchar *data, size_t dataLength, size_t &outSize) { - size_t estimatedCompressedSize = std::max(size_t(dataLength * 1.2), size_t(400)); - uchar *compressBytes = new uchar[estimatedCompressedSize]; + size_t estimatedCompressedSize = std::max(size_t(dataLength * 1.2), size_t(500)); + /* Using malloc to match free(), called in H5Dwrite. If using new[] valgrind complains. */ + uchar *compressBytes = (uchar *) malloc(estimatedCompressedSize * sizeof(uchar)); uchar *compressBytesPos = compressBytes; write(dataLength, compressBytesPos); - + outSize = ZSTD_compress(compressBytesPos, estimatedCompressedSize, data, dataLength, compression_level); outSize += sizeof(size_t); return compressBytes; } - + void postcompress_data(uchar *data) { delete[] data; } - + uchar *decompress(const uchar *data, size_t &compressedSize) { const uchar *dataPos = data; size_t dataLength = 0; read(dataLength, dataPos, compressedSize); - + uchar *oriData = new uchar[dataLength]; ZSTD_decompress(oriData, dataLength, dataPos, compressedSize); compressedSize = dataLength; return oriData; } - + void postdecompress_data(uchar *data) { delete[] data; } - - private: + + private: int compression_level = 3; //default setting of level is 3 }; } diff --git a/include/SZ3/utils/ByteUtil.hpp b/include/SZ3/utils/ByteUtil.hpp index b0e21eec..9a70643f 100644 --- a/include/SZ3/utils/ByteUtil.hpp +++ b/include/SZ3/utils/ByteUtil.hpp @@ -6,141 +6,141 @@ #define SZ3_BYTEUTIL_HPP #include "SZ3/def.hpp" -#include +#include +#include +#include namespace SZ3 { - + typedef union lint16 { unsigned short usvalue; short svalue; unsigned char byte[2]; } lint16; - + typedef union lint32 { int ivalue; unsigned int uivalue; unsigned char byte[4]; } lint32; - + typedef union lint64 { int64_t lvalue; uint64_t ulvalue; unsigned char byte[8]; } lint64; - + typedef union ldouble { double value; uint64_t lvalue; unsigned char byte[8]; } ldouble; - + typedef union lfloat { float value; unsigned int ivalue; unsigned char byte[4]; uint16_t int16[2]; } lfloat; - + inline void symTransform_4bytes(uchar data[4]) { unsigned char tmp = data[0]; data[0] = data[3]; data[3] = tmp; - + tmp = data[1]; data[1] = data[2]; data[2] = tmp; } - + inline int16_t bytesToInt16_bigEndian(unsigned char *bytes) { int16_t temp = 0; int16_t res = 0; - + temp = bytes[0] & 0xff; res |= temp; - + res <<= 8; temp = bytes[1] & 0xff; res |= temp; - + return res; } - + inline int32_t bytesToInt32_bigEndian(const unsigned char *bytes) { int32_t temp = 0; int32_t res = 0; - + res <<= 8; temp = bytes[0] & 0xff; res |= temp; - + res <<= 8; temp = bytes[1] & 0xff; res |= temp; - + res <<= 8; temp = bytes[2] & 0xff; res |= temp; - + res <<= 8; temp = bytes[3] & 0xff; res |= temp; - + return res; } - + inline int64_t bytesToInt64_bigEndian(const unsigned char *b) { int64_t temp = 0; int64_t res = 0; - + res <<= 8; temp = b[0] & 0xff; res |= temp; - + res <<= 8; temp = b[1] & 0xff; res |= temp; - + res <<= 8; temp = b[2] & 0xff; res |= temp; - + res <<= 8; temp = b[3] & 0xff; res |= temp; - + res <<= 8; temp = b[4] & 0xff; res |= temp; - + res <<= 8; temp = b[5] & 0xff; res |= temp; - + res <<= 8; temp = b[6] & 0xff; res |= temp; - + res <<= 8; temp = b[7] & 0xff; res |= temp; - + return res; } - - + inline void int16ToBytes_bigEndian(unsigned char *b, int16_t num) { b[0] = (unsigned char) (num >> 8); b[1] = (unsigned char) (num); } - + inline void int32ToBytes_bigEndian(unsigned char *b, int32_t num) { b[0] = (unsigned char) (num >> 24); b[1] = (unsigned char) (num >> 16); b[2] = (unsigned char) (num >> 8); b[3] = (unsigned char) (num); } - - + inline void int64ToBytes_bigEndian(unsigned char *b, int64_t num) { b[0] = (unsigned char) (num >> 56); b[1] = (unsigned char) (num >> 48); @@ -151,7 +151,7 @@ namespace SZ3 { b[6] = (unsigned char) (num >> 8); b[7] = (unsigned char) (num); } - + std::string floatToBinary(float f) { lfloat u; u.value = f; @@ -162,7 +162,7 @@ namespace SZ3 { } return str; } - + template void truncateArray(T data, size_t n, int byteLen, uchar *&binary) { lfloat bytes; @@ -174,7 +174,7 @@ namespace SZ3 { } } } - + template void truncateArrayRecover(uchar *binary, size_t n, int byteLen, T *data) { lfloat bytes; @@ -187,15 +187,15 @@ namespace SZ3 { data[i] = bytes.value; } } - + std::vector LeadingBitsEncode(float pre, float data) { lfloat lfBuf_pre; lfloat lfBuf_cur; - + lfBuf_pre.value = pre; lfBuf_cur.value = data; lfBuf_pre.ivalue = lfBuf_cur.ivalue ^ lfBuf_pre.ivalue; - + std::vector bytes; int n = 0; if (lfBuf_pre.ivalue == 0) { @@ -209,61 +209,61 @@ namespace SZ3 { } else { n = 4; } - + for (int i = 0; i < n; i++) { bytes.push_back(lfBuf_cur.byte[i]); } return bytes; } - + float LeadingBitsDecode(float pre, std::vector bytes) { lfloat lfBuf_pre; lfloat lfBuf_cur; - + lfBuf_pre.value = pre; lfBuf_cur = lfBuf_pre; - + for (int i = 0; i < bytes.size(); i++) { lfBuf_cur.byte[i] = bytes[i]; } return lfBuf_cur.value; } - + inline void writeBytesBit(uchar *&c, uchar val, uchar &mask, uchar &index) { - + assert(val == 0 || val == 1); - + mask |= val << index++; if (index == 8) { *c++ = mask; mask = index = 0; } } - + template inline void writeBytes(uchar *&c, T val, uchar len, uchar &mask, uchar &index) { - + assert(len >= 1 && len <= sizeof(T) * 8); - + if (len + index >= 8) { - + mask |= (val & ((1 << (8 - index)) - 1)) << index; val >>= 8 - index; len -= 8 - index; *c++ = mask; mask = index = 0; - + while (len >= 8) { - + *c++ = val & (1 << 8) - 1; val >>= 8; len -= 8; } } - + mask |= (val & (1 << len) - 1) << index; index += len; - + // for(int i=0;i>=1; // } } - + inline void writeBytesByte(uchar *&c, uchar val) { *c++ = val; } - + inline void writeBytesClearMask(uchar *&c, uchar &mask, uchar &index) { - + if (index > 0) { *c++ = mask; // mask=i=0; } } - + inline uchar readBit(const uchar *const &c, int i) { - + return ((*(c + (i >> 3))) >> (i & 7)) & 1; } - + }; #endif //SZ3_BYTEUTIL_HPP diff --git a/include/SZ3/utils/Config.hpp b/include/SZ3/utils/Config.hpp index 1f4ff06c..07e08f76 100644 --- a/include/SZ3/utils/Config.hpp +++ b/include/SZ3/utils/Config.hpp @@ -13,26 +13,42 @@ #include "MemoryUtil.hpp" #include "SZ3/utils/inih/INIReader.h" -namespace SZ3 { +#define SZ_FLOAT 0 +#define SZ_DOUBLE 1 +#define SZ_UINT8 2 +#define SZ_INT8 3 +#define SZ_UINT16 4 +#define SZ_INT16 5 +#define SZ_UINT32 6 +#define SZ_INT32 7 +#define SZ_UINT64 8 +#define SZ_INT64 9 +namespace SZ3 { enum EB { EB_ABS, EB_REL, EB_PSNR, EB_L2NORM, EB_ABS_AND_REL, EB_ABS_OR_REL }; + constexpr const char *EB_STR[] = {"ABS", "REL", "PSNR", "NORM", "ABS_AND_REL", "ABS_OR_REL"}; constexpr EB EB_OPTIONS[] = {EB_ABS, EB_REL, EB_PSNR, EB_L2NORM, EB_ABS_AND_REL, EB_ABS_OR_REL}; - + enum ALGO { - ALGO_LORENZO_REG, ALGO_INTERP_LORENZO, ALGO_INTERP, ALGO_BIOMD + ALGO_LORENZO_REG, ALGO_INTERP_LORENZO, ALGO_INTERP, ALGO_BIOMD, ALGO_BIOMDXTC }; - constexpr const char *ALGO_STR[] = {"ALGO_LORENZO_REG", "ALGO_INTERP_LORENZO", "ALGO_INTERP", "ALGO_BIOMD"}; - constexpr const ALGO ALGO_OPTIONS[] = {ALGO_LORENZO_REG, ALGO_INTERP_LORENZO, ALGO_INTERP, ALGO_BIOMD}; - + constexpr const char *ALGO_STR[] = { + "ALGO_LORENZO_REG", "ALGO_INTERP_LORENZO", "ALGO_INTERP", "ALGO_BIOMD", "ALGO_BIOMDXTC" + }; + constexpr const ALGO ALGO_OPTIONS[] = { + ALGO_LORENZO_REG, ALGO_INTERP_LORENZO, ALGO_INTERP, ALGO_BIOMD, ALGO_BIOMDXTC + }; + enum INTERP_ALGO { INTERP_ALGO_LINEAR, INTERP_ALGO_CUBIC }; + constexpr const char *INTERP_ALGO_STR[] = {"INTERP_ALGO_LINEAR", "INTERP_ALGO_CUBIC"}; constexpr INTERP_ALGO INTERP_ALGO_OPTIONS[] = {INTERP_ALGO_LINEAR, INTERP_ALGO_CUBIC}; - + template const char *enum2Str(T e) { if (std::is_same::value) { @@ -46,36 +62,43 @@ namespace SZ3 { exit(0); } } - + class Config { - public: - template - Config(Dims ... args) { + public: + template + Config(Dims... args) { dims = std::vector{static_cast(std::forward(args))...}; - N = dims.size(); - num = std::accumulate(dims.begin(), dims.end(), (size_t) 1, std::multiplies()); - blockSize = (N == 1 ? 128 : (N == 2 ? 16 : 6)); - pred_dim = N; - stride = blockSize; + setDims(dims.begin(), dims.end()); } - + template size_t setDims(Iter begin, Iter end) { - dims = std::vector(begin, end); + auto dims_ = std::vector(begin, end); + dims.clear(); + for (auto dim : dims_) { + if (dim > 1) { + dims.push_back(dim); + } + } + if (dims.empty()) { + dims = {1}; + } N = dims.size(); num = std::accumulate(dims.begin(), dims.end(), (size_t) 1, std::multiplies()); pred_dim = N; + blockSize = (N == 1 ? 128 : (N == 2 ? 16 : 6)); + stride = blockSize; return num; } - + void loadcfg(const std::string &cfgpath) { INIReader cfg(cfgpath); - + if (cfg.ParseError() != 0) { std::cout << "Can't load cfg file " << cfgpath << std::endl; exit(0); } - + auto cmprAlgoStr = cfg.Get("GlobalSettings", "CmprAlgo", ""); if (cmprAlgoStr == ALGO_STR[ALGO_LORENZO_REG]) { cmprAlgo = ALGO_LORENZO_REG; @@ -85,6 +108,8 @@ namespace SZ3 { cmprAlgo = ALGO_INTERP; } else if (cmprAlgoStr == ALGO_STR[ALGO_BIOMD]) { cmprAlgo = ALGO_BIOMD; + } else if (cmprAlgoStr == ALGO_STR[ALGO_BIOMDXTC]) { + cmprAlgo = ALGO_BIOMDXTC; } auto ebModeStr = cfg.Get("GlobalSettings", "ErrorBoundMode", ""); if (ebModeStr == EB_STR[EB_ABS]) { @@ -104,13 +129,13 @@ namespace SZ3 { relErrorBound = cfg.GetReal("GlobalSettings", "RelErrorBound", relErrorBound); psnrErrorBound = cfg.GetReal("GlobalSettings", "PSNRErrorBound", psnrErrorBound); l2normErrorBound = cfg.GetReal("GlobalSettings", "L2NormErrorBound", l2normErrorBound); - + openmp = cfg.GetBoolean("GlobalSettings", "OpenMP", openmp); lorenzo = cfg.GetBoolean("AlgoSettings", "Lorenzo", lorenzo); lorenzo2 = cfg.GetBoolean("AlgoSettings", "Lorenzo2ndOrder", lorenzo2); regression = cfg.GetBoolean("AlgoSettings", "Regression", regression); regression2 = cfg.GetBoolean("AlgoSettings", "Regression2ndOrder", regression2); - + auto interpAlgoStr = cfg.Get("AlgoSettings", "InterpolationAlgo", ""); if (interpAlgoStr == INTERP_ALGO_STR[INTERP_ALGO_LINEAR]) { interpAlgo = INTERP_ALGO_LINEAR; @@ -121,11 +146,8 @@ namespace SZ3 { interpBlockSize = cfg.GetInteger("AlgoSettings", "InterpolationBlockSize", interpBlockSize); blockSize = cfg.GetInteger("AlgoSettings", "BlockSize", blockSize); quantbinCnt = cfg.GetInteger("AlgoSettings", "QuantizationBinTotal", quantbinCnt); - - } - - + void save(unsigned char *&c) { write(N, c); write(dims.data(), dims.size(), c); @@ -148,8 +170,9 @@ namespace SZ3 { write(stride, c); write(pred_dim, c); write(openmp, c); + write(dataType, c); }; - + void load(const unsigned char *&c) { read(N, c); dims.resize(N); @@ -173,32 +196,64 @@ namespace SZ3 { read(stride, c); read(pred_dim, c); read(openmp, c); + read(dataType, c); } - + void print() { + printf("===================== Begin SZ3 Configuration =====================\n"); + printf("N = %d\n", N); + printf("dims = "); + for (auto dim : dims) { + printf("%zu ", dim); + } + printf("\nnum = %zu\n", num); printf("CmprAlgo = %s\n", enum2Str((ALGO) cmprAlgo)); + printf("ErrorBoundMode = %s\n", enum2Str((EB) errorBoundMode)); + printf("AbsErrorBound = %f\n", absErrorBound); + printf("RelErrorBound = %f\n", relErrorBound); + printf("PSNRErrorBound = %f\n", psnrErrorBound); + printf("L2NormErrorBound = %f\n", l2normErrorBound); + printf("Lorenzo = %d\n", lorenzo); + printf("Lorenzo2ndOrder = %d\n", lorenzo2); + printf("Regression = %d\n", regression); + printf("Regression2ndOrder = %d\n", regression2); + printf("OpenMP = %d\n", openmp); + printf("DataType = %d\n", dataType); + printf("Lossless = %d\n", lossless); + printf("Encoder = %d\n", encoder); + printf("InterpolationAlgo = %s\n", enum2Str((INTERP_ALGO) interpAlgo)); + printf("InterpolationDirection = %d\n", interpDirection); + printf("InterpolationBlockSize = %d\n", interpBlockSize); + printf("QuantizationBinTotal = %d\n", quantbinCnt); + printf("BlockSize = %d\n", blockSize); + printf("Stride = %d\n", stride); + printf("PredDim = %d\n", pred_dim); + printf("===================== End SZ3 Configuration =====================\n"); } - + static size_t size_est() { - return sizeof(size_t) * 5 + sizeof(double) * 4 + sizeof(bool) * 5 + sizeof(uint8_t) * 6 + sizeof(int) * 5 + 50; //50 is for redundancy + return sizeof(Config) + sizeof(size_t) * 5 + 32; //sizeof(size_t) * 5 is for dims vector, 32 is for redundancy + return sizeof(size_t) * 5 + sizeof(double) * 4 + sizeof(bool) * 5 + sizeof(uint8_t) * 7 + sizeof(int) * 5 + + 50; //50 is for redundancy } - + char N; std::vector dims; size_t num; uint8_t cmprAlgo = ALGO_INTERP_LORENZO; uint8_t errorBoundMode = EB_ABS; - double absErrorBound; - double relErrorBound; - double psnrErrorBound; - double l2normErrorBound; + double absErrorBound = 1e-3; + double relErrorBound = 0; + double psnrErrorBound = 0; + double l2normErrorBound = 0; bool lorenzo = true; bool lorenzo2 = false; bool regression = true; bool regression2 = false; bool openmp = false; + uint8_t dataType = SZ_FLOAT; // dataType is only used in HDF5 filter uint8_t lossless = 1; // 0-> skip lossless(use lossless_bypass); 1-> zstd - uint8_t encoder = 1;// 0-> skip encoder; 1->HuffmanEncoder; 2->ArithmeticEncoder + uint8_t encoder = 1; // 0-> skip encoder; 1->HuffmanEncoder; 2->ArithmeticEncoder uint8_t interpAlgo = INTERP_ALGO_CUBIC; uint8_t interpDirection = 0; int interpBlockSize = 32; @@ -206,10 +261,7 @@ namespace SZ3 { int blockSize; int stride; //not used now int pred_dim; // not used now - }; - - } #endif //SZ_CONFIG_HPP diff --git a/tools/sz3/sz3.cpp b/tools/sz3/sz3.cpp index 15a9282c..17293a79 100644 --- a/tools/sz3/sz3.cpp +++ b/tools/sz3/sz3.cpp @@ -3,18 +3,6 @@ #include #include "SZ3/api/sz.hpp" - -#define SZ_FLOAT 0 -#define SZ_DOUBLE 1 -#define SZ_UINT8 2 -#define SZ_INT8 3 -#define SZ_UINT16 4 -#define SZ_INT16 5 -#define SZ_UINT32 6 -#define SZ_INT32 7 -#define SZ_UINT64 8 -#define SZ_INT64 9 - void usage() { printf("Note: SZ3 command line arguments are backward compatible with SZ2, \n"); printf(" use -h2 to show the supported SZ2 command line arguments. \n"); @@ -126,12 +114,12 @@ template void compress(char *inPath, char *cmpPath, SZ3::Config conf) { T *data = new T[conf.num]; SZ3::readfile(inPath, conf.num, data); - + size_t outSize; SZ3::Timer timer(true); char *bytes = SZ_compress(conf, data, outSize); double compress_time = timer.stop(); - + char outputFilePath[1024]; if (cmpPath == nullptr) { snprintf(outputFilePath, 1024, "%s.sz", inPath); @@ -139,11 +127,11 @@ void compress(char *inPath, char *cmpPath, SZ3::Config conf) { strcpy(outputFilePath, cmpPath); } SZ3::writefile(outputFilePath, bytes, outSize); - + printf("compression ratio = %.2f \n", conf.num * 1.0 * sizeof(T) / outSize); printf("compression time = %f\n", compress_time); printf("compressed data file = %s\n", outputFilePath); - + delete[]data; delete[]bytes; } @@ -152,14 +140,14 @@ template void decompress(char *inPath, char *cmpPath, char *decPath, SZ3::Config conf, int binaryOutput, int printCmpResults) { - + size_t cmpSize; auto cmpData = SZ3::readfile(cmpPath, cmpSize); - + SZ3::Timer timer(true); T *decData = SZ_decompress(conf, cmpData.get(), cmpSize); double compress_time = timer.stop(); - + char outputFilePath[1024]; if (decPath == nullptr) { snprintf(outputFilePath, 1024, "%s.out", cmpPath); @@ -179,7 +167,7 @@ void decompress(char *inPath, char *cmpPath, char *decPath, SZ3::verify(ori_data.get(), decData, conf.num); } delete[]decData; - + printf("compression ratio = %f\n", conf.num * sizeof(T) * 1.0 / cmpSize); printf("decompression time = %f seconds.\n", compress_time); printf("decompressed file = %s\n", outputFilePath); @@ -196,7 +184,7 @@ int main(int argc, char *argv[]) { char *conPath = nullptr; char *decPath = nullptr; bool delCmpPath = false; - + char *errBoundMode = nullptr; char *errBound = nullptr; char *absErrorBound = nullptr; @@ -204,20 +192,20 @@ int main(int argc, char *argv[]) { char *pwrErrorBound = nullptr; char *psnrErrorBound = nullptr; char *normErrorBound = nullptr; - + bool sz2mode = false; - + size_t r4 = 0; size_t r3 = 0; size_t r2 = 0; size_t r1 = 0; - + size_t i = 0; int status; if (argc == 1) usage(); int width = -1; - + for (i = 1; i < argc; i++) { if (argv[i][0] != '-' || argv[i][2]) { if (argv[i][1] == 'h' && argv[i][2] == '2') { @@ -227,23 +215,17 @@ int main(int argc, char *argv[]) { } } switch (argv[i][1]) { - case 'h': - usage(); + case 'h':usage(); exit(0); - case 'v': - printf("version: %s\n", SZ3_VER); + case 'v':printf("version: %s\n", SZ3_VER); exit(0); - case 'b': - binaryOutput = true; + case 'b':binaryOutput = true; break; - case 't': - binaryOutput = false; + case 't':binaryOutput = false; break; - case 'a': - printCmpResults = 1; + case 'a':printCmpResults = 1; break; - case 'z': - compression = true; + case 'z':compression = true; if (i + 1 < argc) { cmpPath = argv[i + 1]; if (cmpPath[0] != '-') @@ -252,8 +234,7 @@ int main(int argc, char *argv[]) { cmpPath = nullptr; } break; - case 'x': - sz2mode = true; + case 'x':sz2mode = true; decompression = true; if (i + 1 < argc) { decPath = argv[i + 1]; @@ -263,11 +244,9 @@ int main(int argc, char *argv[]) { decPath = nullptr; } break; - case 'f': - dataType = SZ_FLOAT; + case 'f':dataType = SZ_FLOAT; break; - case 'd': - dataType = SZ_DOUBLE; + case 'd':dataType = SZ_DOUBLE; break; case 'I': if (++i == argc || sscanf(argv[i], "%d", &width) != 1) { @@ -291,8 +270,7 @@ int main(int argc, char *argv[]) { usage(); decPath = argv[i]; break; - case 's': - sz2mode = true; + case 's':sz2mode = true; if (++i == argc) usage(); cmpPath = argv[i]; @@ -357,18 +335,17 @@ int main(int argc, char *argv[]) { usage(); psnrErrorBound = argv[i]; break; - default: - usage(); + default:usage(); break; } } - + if ((inPath == nullptr) && (cmpPath == nullptr)) { printf("Error: you need to specify either a raw binary data file or a compressed data file as input\n"); usage(); exit(0); } - + if (!sz2mode && inPath != nullptr && cmpPath != nullptr) { compression = true; } @@ -390,7 +367,7 @@ int main(int argc, char *argv[]) { usage(); exit(0); } - + SZ3::Config conf; if (r2 == 0) { conf = SZ3::Config(r1); @@ -404,7 +381,7 @@ int main(int argc, char *argv[]) { if (compression && conPath != nullptr) { conf.loadcfg(conPath); } - + if (errBoundMode != nullptr) { { // backward compatible with SZ2 @@ -451,9 +428,9 @@ int main(int argc, char *argv[]) { exit(0); } } - + if (compression) { - + if (dataType == SZ_FLOAT) { compress(inPath, cmpPath, conf); } else if (dataType == SZ_DOUBLE) { @@ -473,7 +450,7 @@ int main(int argc, char *argv[]) { printf("Error: Since you add -a option (analysis), please specify the original data path by -i .\n"); exit(0); } - + if (dataType == SZ_FLOAT) { decompress(inPath, cmpPath, decPath, conf, binaryOutput, printCmpResults); } else if (dataType == SZ_DOUBLE) { From 3b24d9708e934f6a864610152377c2fe0ad15ce9 Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Sat, 29 Jun 2024 16:10:57 -0400 Subject: [PATCH 14/23] completely rewrite the SZ3 HDF5 filter. The config is stored in cd_values now. --- tools/H5Z-SZ3/include/H5Z_SZ3.hpp | 64 +- tools/H5Z-SZ3/src/H5Z_SZ3.cpp | 1120 ++------------------ tools/H5Z-SZ3/test/CMakeLists.txt | 1 - tools/H5Z-SZ3/test/dsz3FromHDF5.cpp | 544 +++++----- tools/H5Z-SZ3/test/print_h5repack_args.cpp | 219 ---- tools/H5Z-SZ3/test/sz3ToHDF5.cpp | 617 +++-------- 6 files changed, 527 insertions(+), 2038 deletions(-) delete mode 100644 tools/H5Z-SZ3/test/print_h5repack_args.cpp diff --git a/tools/H5Z-SZ3/include/H5Z_SZ3.hpp b/tools/H5Z-SZ3/include/H5Z_SZ3.hpp index cd26fe87..e8b1fd47 100644 --- a/tools/H5Z-SZ3/include/H5Z_SZ3.hpp +++ b/tools/H5Z-SZ3/include/H5Z_SZ3.hpp @@ -6,31 +6,13 @@ #define SZ3_H5Z_SZ3_H #define H5Z_FILTER_SZ3 32024 -#define SZ_FLOAT 0 -#define SZ_DOUBLE 1 -#define SZ_UINT8 2 -#define SZ_INT8 3 -#define SZ_UINT16 4 -#define SZ_INT16 5 -#define SZ_UINT32 6 -#define SZ_INT32 7 -#define SZ_UINT64 8 -#define SZ_INT64 9 #include "hdf5.h" -#include -#include -#include -#include +#include +#include +#include +#include #include - - - - -#define LITTLE_ENDIAN_SYSTEM 0 -#define BIG_ENDIAN_SYSTEM 1 -#define LITTLE_ENDIAN_DATA 0 -#define BIG_ENDIAN_DATA 1 #ifdef __cplusplus extern "C" { @@ -48,45 +30,15 @@ do { \ #define H5Z_SZ_PUSH_AND_GOTO(MAJ, MIN, RET, MSG) \ do \ { \ - H5Epush(H5E_DEFAULT,__FILE__,_funcname_,__LINE__,H5Z_SZ_ERRCLASS,MAJ,MIN,MSG); \ - return RET; \ + H5Epush(H5E_DEFAULT,__FILE__,_funcname_,__LINE__,H5Z_SZ_ERRCLASS,MAJ,MIN,MSG); \ + return RET; \ } while(0) -extern int sysEndianType; -extern int dataEndianType; - -void SZ_refreshDimForCdArray(int dataType, size_t old_cd_nelmts, unsigned int *old_cd_values, size_t* new_cd_nelmts, unsigned int **new_cd_values, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); - -void SZ_errConfigToCdArray(size_t* cd_nelmts, unsigned int **cd_values, int error_bound_mode, double abs_error, double rel_error, double l2normErrorBound, double psnr); static herr_t H5Z_sz3_set_local(hid_t dcpl_id, hid_t type_id, hid_t chunk_space_id); -static size_t H5Z_filter_sz3(unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[], size_t nbytes, size_t* buf_size, void** buf); - -void SZ_cdArrayToMetaData(size_t cd_nelmts, const unsigned int cd_values[], int* dimSize, int* dataType, size_t* r5, size_t* r4, size_t* r3, size_t* r2, size_t* r1); - -void SZ_cdArrayToMetaDataErr(size_t cd_nelmts, const unsigned int cd_values[], int* dimSize, int* dataType, size_t* r5, size_t* r4, size_t* r3, size_t* r2, size_t* r1, - int* error_bound_mode, double* abs_error, double* rel_error, double* l2norm_error, double* psnr); - -void SZ_copymetaDataToCdArray(size_t* cd_nelmts, unsigned int *cd_values, int dataType, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); - -int checkCDValuesWithErrors(size_t cd_nelmts, const unsigned int cd_values[]); - -size_t computeDataLength(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); -int computeDimension(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); -void init_dims_chunk(int dim, hsize_t dims[5], hsize_t chunk[5], size_t nbEle, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1); - -double bytesToDouble(unsigned char* bytes); -void doubleToBytes(unsigned char *b, double num); - -void longToBytes_bigEndian(unsigned char *b, uint64_t num) ; - -int bytesToInt_bigEndian(unsigned char* bytes); -int64_t bytesToLong_bigEndian(unsigned char* b); - -void detectSysEndianType(); -void symTransform_8bytes(unsigned char data[8]); +static size_t H5Z_filter_sz3(unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[], size_t nbytes, size_t *buf_size, void **buf); -int filterDimension(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t* correctedDimension); +const void *H5PLget_plugin_info(void); #ifdef __cplusplus } diff --git a/tools/H5Z-SZ3/src/H5Z_SZ3.cpp b/tools/H5Z-SZ3/src/H5Z_SZ3.cpp index adee0cc6..952e4013 100644 --- a/tools/H5Z-SZ3/src/H5Z_SZ3.cpp +++ b/tools/H5Z-SZ3/src/H5Z_SZ3.cpp @@ -3,39 +3,26 @@ // #include -#include "H5Z_SZ3.hpp" #include +#include #include "H5PLextern.h" +#include "H5Z_SZ3.hpp" #include "SZ3/api/sz.hpp" -#include "SZ3/utils/ByteUtil.hpp" - - -int sysEndianType = LITTLE_ENDIAN_SYSTEM; -int dataEndianType = LITTLE_ENDIAN_DATA; hid_t H5Z_SZ_ERRCLASS = -1; -using namespace SZ3; - //h5repack -f UD=32024,0 /home/arham23/Software/SZ3/test/testfloat_8_8_128.dat.h5 tf_8_8_128.dat.sz.h5 -//load from "sz3.config" in local directory if 1 else use default values or cd values - -#define SZ3_CONFIG_PATH "SZ3_CONFIG_PATH" -SZ3::Config sz3_conf; -bool sz3_conf_loaded = false; -int MAX_CHUNK_SIZE = INT_MAX; - //filter definition const H5Z_class2_t H5Z_SZ3[1] = {{ - H5Z_CLASS_T_VERS, /* H5Z_class_t version */ - (H5Z_filter_t) H5Z_FILTER_SZ3, /* Filter id number */ - 1, /* encoder_present flag (set to true) */ - 1, /* decoder_present flag (set to true) */ - "SZ3 compressor/decompressor for floating-point data.", /* Filter name for debugging */ - NULL, /* The "can apply" callback */ - H5Z_sz3_set_local, /* The "set local" callback */ - (H5Z_func_t) H5Z_filter_sz3, /* The actual filter function */ + H5Z_CLASS_T_VERS, /* H5Z_class_t version */ + (H5Z_filter_t) H5Z_FILTER_SZ3, /* Filter id number */ + 1, /* encoder_present flag (set to true) */ + 1, /* decoder_present flag (set to true) */ + "SZ3 compressor/decompressor for floating-point data.", /* Filter name for debugging */ + NULL, /* The "can apply" callback */ + H5Z_sz3_set_local, /* The "set local" callback */ + (H5Z_func_t) H5Z_filter_sz3, /* The actual filter function */ } }; @@ -47,1042 +34,163 @@ const void *H5PLget_plugin_info(void) { return H5Z_SZ3; } -/*FILTER FUNCTIONS*/ - -/** - * to be used in compression, and to be called outside H5Z_filter_sz(). - * */ - -void SZ_refreshDimForCdArray(int dataType, size_t old_cd_nelmts, unsigned int *old_cd_values, size_t *new_cd_nelmts, unsigned int **new_cd_values, - size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) { - unsigned char bytes[8] = {0}; - *new_cd_values = (unsigned int *) malloc(sizeof(unsigned int) * 16); - memset(*new_cd_values, 0, sizeof(unsigned int) * 16); - - //correct dimension if needed - size_t _r[5]; - filterDimension(r5, r4, r3, r2, r1, _r); - size_t _r5 = _r[4]; - size_t _r4 = _r[3]; - size_t _r3 = _r[2]; - size_t _r2 = _r[1]; - size_t _r1 = _r[0]; - - int i = 0; - int oldDim = computeDimension(r5, r4, r3, r2, r1); - int newDim = computeDimension(_r5, _r4, _r3, _r2, _r1); - (*new_cd_values)[0] = newDim; - (*new_cd_values)[1] = dataType; - - - switch (newDim) { - case 1: - longToBytes_bigEndian(bytes, (uint64_t) r1); - (*new_cd_values)[2] = bytesToInt_bigEndian(bytes); - (*new_cd_values)[3] = bytesToInt_bigEndian(&bytes[4]); - if (old_cd_nelmts == 0) - *new_cd_nelmts = 4; - else { - (*new_cd_values)[4] = old_cd_values[0]; - (*new_cd_values)[5] = old_cd_values[1]; - (*new_cd_values)[6] = old_cd_values[2]; - (*new_cd_values)[7] = old_cd_values[3]; - (*new_cd_values)[8] = old_cd_values[4]; - (*new_cd_values)[9] = old_cd_values[5]; - (*new_cd_values)[10] = old_cd_values[6]; - (*new_cd_values)[11] = old_cd_values[7]; - (*new_cd_values)[12] = old_cd_values[8]; - *new_cd_nelmts = 13; - } - break; - case 2: - (*new_cd_values)[2] = (unsigned int) _r2; - (*new_cd_values)[3] = (unsigned int) _r1; - if (old_cd_nelmts == 0) - *new_cd_nelmts = 4; - else { - (*new_cd_values)[4] = old_cd_values[0]; - (*new_cd_values)[5] = old_cd_values[1]; - (*new_cd_values)[6] = old_cd_values[2]; - (*new_cd_values)[7] = old_cd_values[3]; - (*new_cd_values)[8] = old_cd_values[4]; - (*new_cd_values)[9] = old_cd_values[5]; - (*new_cd_values)[10] = old_cd_values[6]; - (*new_cd_values)[11] = old_cd_values[7]; - (*new_cd_values)[12] = old_cd_values[8]; - *new_cd_nelmts = 13; - } - break; - case 3: - (*new_cd_values)[2] = (unsigned int) _r3; - (*new_cd_values)[3] = (unsigned int) _r2; - (*new_cd_values)[4] = (unsigned int) _r1; - if (old_cd_nelmts == 0) - *new_cd_nelmts = 5; - else { - (*new_cd_values)[5] = old_cd_values[0]; - (*new_cd_values)[6] = old_cd_values[1]; - (*new_cd_values)[7] = old_cd_values[2]; - (*new_cd_values)[8] = old_cd_values[3]; - (*new_cd_values)[9] = old_cd_values[4]; - (*new_cd_values)[10] = old_cd_values[5]; - (*new_cd_values)[11] = old_cd_values[6]; - (*new_cd_values)[12] = old_cd_values[7]; - (*new_cd_values)[13] = old_cd_values[8]; - *new_cd_nelmts = 14; - } - break; - case 4: - (*new_cd_values)[2] = (unsigned int) _r4; - (*new_cd_values)[3] = (unsigned int) _r3; - (*new_cd_values)[4] = (unsigned int) _r2; - (*new_cd_values)[5] = (unsigned int) _r1; - if (old_cd_nelmts == 0) - *new_cd_nelmts = 6; - else { - (*new_cd_values)[6] = old_cd_values[0]; - (*new_cd_values)[7] = old_cd_values[1]; - (*new_cd_values)[8] = old_cd_values[2]; - (*new_cd_values)[9] = old_cd_values[3]; - (*new_cd_values)[10] = old_cd_values[4]; - (*new_cd_values)[11] = old_cd_values[5]; - (*new_cd_values)[12] = old_cd_values[6]; - (*new_cd_values)[13] = old_cd_values[7]; - (*new_cd_values)[14] = old_cd_values[8]; - *new_cd_nelmts = 15; - break; - } - default: - (*new_cd_values)[2] = (unsigned int) _r5; - (*new_cd_values)[3] = (unsigned int) _r4; - (*new_cd_values)[4] = (unsigned int) _r3; - (*new_cd_values)[5] = (unsigned int) _r2; - (*new_cd_values)[6] = (unsigned int) _r1; - if (old_cd_nelmts == 0) - *new_cd_nelmts = 7; - else { - (*new_cd_values)[7] = old_cd_values[0]; - (*new_cd_values)[8] = old_cd_values[1]; - (*new_cd_values)[9] = old_cd_values[2]; - (*new_cd_values)[10] = old_cd_values[3]; - (*new_cd_values)[11] = old_cd_values[4]; - (*new_cd_values)[12] = old_cd_values[5]; - (*new_cd_values)[13] = old_cd_values[6]; - (*new_cd_values)[14] = old_cd_values[7]; - (*new_cd_values)[15] = old_cd_values[8]; - *new_cd_nelmts = 16; - } - } -} - - -void -SZ_errConfigToCdArray(size_t *cd_nelmts, unsigned int **cd_values, int error_bound_mode, double abs_error, double rel_error, double l2normErrorBound, - double psnr) { - *cd_values = (unsigned int *) malloc(sizeof(unsigned int) * 9); - int k = 0; - (*cd_values)[k++] = error_bound_mode; - unsigned char b[8]; - doubleToBytes(b, abs_error); - (*cd_values)[k++] = bytesToInt32_bigEndian(b); - (*cd_values)[k++] = bytesToInt32_bigEndian(b + 4); - doubleToBytes(b, rel_error); - (*cd_values)[k++] = bytesToInt32_bigEndian(b); - (*cd_values)[k++] = bytesToInt32_bigEndian(b + 4); - doubleToBytes(b, l2normErrorBound); - (*cd_values)[k++] = bytesToInt32_bigEndian(b); - (*cd_values)[k++] = bytesToInt32_bigEndian(b + 4); - doubleToBytes(b, psnr); - (*cd_values)[k++] = bytesToInt32_bigEndian(b); - (*cd_values)[k++] = bytesToInt32_bigEndian(b + 4); - *cd_nelmts = k; -} - static herr_t H5Z_sz3_set_local(hid_t dcpl_id, hid_t type_id, hid_t chunk_space_id) { - - //printf("get into H5Z_sz3_set_local\n"); - detectSysEndianType(); - + + printf("start H5Z_sz3_set_local\n"); + //printf("start in H5Z_sz3_set_local, dcpl_id = %d\n", dcpl_id); static char const *_funcname_ = "H5Z_sz3_set_local"; - size_t r5 = 0, r4 = 0, r3 = 0, r2 = 0, r1 = 0, dsize; - - int i, ndims, ndims_used = 0; - hsize_t dims[H5S_MAX_RANK], dims_used[5] = {0, 0, 0, 0, 0}; - herr_t retval = 0; - H5T_class_t dclass; - H5T_sign_t dsign; + + // herr_t ret = H5Zregister(H5Z_SZ3); + + SZ3::Config conf; + unsigned int flags = 0; - size_t mem_cd_nelmts = 9, cd_nelmts = 0; - unsigned int mem_cd_values[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - - //H5Z_FILTER_SZ - //note that mem_cd_nelmts must be non-zero, otherwise, mem_cd_values cannot be filled. - if (0 > H5Pget_filter_by_id(dcpl_id, H5Z_FILTER_SZ3, &flags, &mem_cd_nelmts, mem_cd_values, 0, NULL, NULL)) + size_t cd_nelmts = conf.size_est(); + std::vector cd_values(conf.size_est(), 0); + + //read cd_values from HDF5 + //note that cd_nelmts must be non-zero, otherwise, cd_values cannot be filled. + if (0 > H5Pget_filter_by_id(dcpl_id, H5Z_FILTER_SZ3, &flags, &cd_nelmts, cd_values.data(), 0, NULL, NULL)) H5Z_SZ_PUSH_AND_GOTO(H5E_PLINE, H5E_CANTGET, 0, "unable to get current SZ cd_values"); - - //set default value for error bound - sz3_conf.errorBoundMode = EB_ABS; - sz3_conf.absErrorBound = 1e-3; - if (!sz3_conf_loaded) { - if (const char *conf_file = std::getenv(SZ3_CONFIG_PATH)) { - sz3_conf.loadcfg(conf_file); - sz3_conf_loaded = true; - } + + //load cd_values into config + if (cd_nelmts != 0) { + auto buffer = (const unsigned char *) (cd_values.data()); + conf.load(buffer); } -// if (mem_cd_nelmts == 0) //this means that the error information is missing from the cd_values -// { -// //printf("mem_cd_nelmets is 0, so let's try using sz3.config to load error configuration....\n"); -// std::ifstream f(CONFIG_PATH); -// if (f.good()) { -// printf("sz3.config found!\n"); -// sz3_conf_loaded = 1; -// } else -// printf("sz3.config not found, using default parameters\n"); -// f.close(); -// } else //this means that the error information is included in the cd_values -// { -// sz3_conf_loaded = 0; -// //printf("mem_cd_nelmets is non-zero, so let's use the parameters set through cd_values.....\n"); -// } - herr_t ret = H5Zregister(H5Z_SZ3); - - int dataType = SZ_FLOAT; - - //printf("DC\n"); + + //read datatype and dims from HDF5 + H5T_class_t dclass; if (0 > (dclass = H5Tget_class(type_id))) H5Z_SZ_PUSH_AND_GOTO(H5E_ARGS, H5E_BADTYPE, -1, "not a datatype"); - - //printf("DS\n"); + + size_t dsize; if (0 == (dsize = H5Tget_size(type_id))) H5Z_SZ_PUSH_AND_GOTO(H5E_ARGS, H5E_BADTYPE, -1, "size is smaller than 0!"); - - //printf("ND\n"); - if (0 > (ndims = H5Sget_simple_extent_dims(chunk_space_id, dims, 0))) + + int ndims; + hsize_t dims_all[H5S_MAX_RANK]; + if (0 > (ndims = H5Sget_simple_extent_dims(chunk_space_id, dims_all, 0))) H5Z_SZ_PUSH_AND_GOTO(H5E_ARGS, H5E_BADTYPE, -1, "not a data space"); - - for (i = 0; i < ndims; i++) - dims_used[i] = dims[i]; - - - //printf("NDIM: %i\n", ndims); - //printf("N_USE: %i\n", ndims_used); - //printf("DCLASS: %i\n", dclass); - //printf("DSIZE: %zu\n", dsize); - - //for(i = 0; i < ndims_used; i++){ - // printf("DIMS[%i] : %zu\n", i, dims_used[i]); - //} - //printf("\nDCEQ\n"); - + std::vector dims(dims_all, dims_all + ndims); + + //update conf with datatype + conf.dataType = SZ_FLOAT; if (dclass == H5T_FLOAT) - dataType = dsize == 4 ? SZ_FLOAT : SZ_DOUBLE; + conf.dataType = dsize == 4 ? SZ_FLOAT : SZ_DOUBLE; else if (dclass == H5T_INTEGER) { + H5T_sign_t dsign; if (0 > (dsign = H5Tget_sign(type_id))) H5Z_SZ_PUSH_AND_GOTO(H5E_ARGS, H5E_BADTYPE, -1, "Error in calling H5Tget_sign(type_id)...."); if (dsign == H5T_SGN_NONE) //unsigned { switch (dsize) { - case 1: - dataType = SZ_UINT8; + case 1:conf.dataType = SZ_UINT8; break; - case 2: - dataType = SZ_UINT16; + case 2:conf.dataType = SZ_UINT16; break; - case 4: - dataType = SZ_UINT32; + case 4:conf.dataType = SZ_UINT32; break; - case 8: - dataType = SZ_UINT64; + case 8:conf.dataType = SZ_UINT64; break; } } else { switch (dsize) { - case 1: - dataType = SZ_INT8; + case 1:conf.dataType = SZ_INT8; break; - case 2: - dataType = SZ_INT16; + case 2:conf.dataType = SZ_INT16; break; - case 4: - dataType = SZ_INT32; + case 4:conf.dataType = SZ_INT32; break; - case 8: - dataType = SZ_INT64; + case 8:conf.dataType = SZ_INT64; break; } } } else { H5Z_SZ_PUSH_AND_GOTO(H5E_PLINE, H5E_BADTYPE, 0, "datatype class must be H5T_FLOAT or H5T_INTEGER"); } - - unsigned int *cd_values = NULL; - if (mem_cd_nelmts != 0 && mem_cd_nelmts != 9) { - H5Epush(H5E_DEFAULT, __FILE__, "H5Z_sz3_set_local", __LINE__, H5E_ERR_CLS, H5E_ARGS, H5E_BADVALUE, - "Wrong number of cd_values: The new version has 9 integer elements in cd_values. Please check 'test/print_h5repack_args' to get the correct cd_values."); - H5Eprint(H5E_DEFAULT, stderr); - return -1; - } - SZ_refreshDimForCdArray(dataType, mem_cd_nelmts, mem_cd_values, &cd_nelmts, &cd_values, dims_used[4], dims_used[3], dims_used[2], dims_used[1], - dims_used[0]); - - /* Now, update cd_values for the filter */ - if (0 > H5Pmodify_filter(dcpl_id, H5Z_FILTER_SZ3, flags, cd_nelmts, cd_values)) + + //update conf with dims + conf.setDims(std::begin(dims), std::end(dims)); + + //save conf into cd_values + cd_nelmts = std::ceil(conf.size_est() / 1.0 / sizeof(int)); + auto buffer = (unsigned char *) (cd_values.data()); + conf.save(buffer); + + /* update cd_values for the filter */ + if (0 > H5Pmodify_filter(dcpl_id, H5Z_FILTER_SZ3, flags, cd_nelmts, cd_values.data())) H5Z_SZ_PUSH_AND_GOTO(H5E_PLINE, H5E_BADVALUE, 0, "failed to modify cd_values"); - - free(cd_values); - - retval = 1; - done: + + herr_t retval = 1; return retval; } - -static size_t H5Z_filter_sz3(unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[], size_t nbytes, size_t *buf_size, void **buf) { - //printf("get into H5Z_filter_sz3\n"); - size_t r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; - int dimSize = 0, dataType = 0; - - if (cd_nelmts == 0) //this is special data such as string, which should not be treated as values. - return nbytes; - - int withErrInfo = checkCDValuesWithErrors(cd_nelmts, cd_values); - int error_mode = 0; -// int cmp_algo = 1; -// int interp_algo = 1; - double abs_error = 0, rel_error = 0, l2norm_error = 0, psnr = 0; - if (withErrInfo) - SZ_cdArrayToMetaDataErr(cd_nelmts, cd_values, &dimSize, &dataType, &r5, &r4, &r3, &r2, &r1, &error_mode, &abs_error, &rel_error, - &l2norm_error, &psnr); - else - SZ_cdArrayToMetaData(cd_nelmts, cd_values, &dimSize, &dataType, &r5, &r4, &r3, &r2, &r1); - - /*int i=0; - for(i=0;i +void process_data(SZ3::Config &conf, void **buf, size_t *buf_size, size_t nbytes, bool is_decompress) { + if (is_decompress) { + T *processedData = (T *) malloc(conf.num * sizeof(T)); + SZ_decompress(conf, (char *) *buf, nbytes, processedData); + free(*buf); + *buf = processedData; + *buf_size = conf.num * sizeof(T); } else { - /*compress data*/ - //based on # dimensions, get relevant dimensions and load config object with them - if (dimSize <= 0) { - printf("Error: Number of Dimensions is <= 0"); - exit(0); - } - //printf("\nDIMS_CMP:\n"); - //printf("r1 %u r2 %u r3 %u r4 %u r5 %u\n", r1,r2,r3,r4,r5); - - SZ3::Config conf(sz3_conf); - std::vector dims; - if (r2 == 0) { - dims = {r1}; - } else if (r3 == 0) { - dims = {r2, r1}; - } else if (r4 == 0) { - dims = {r3, r2, r1}; - } else if (r5 == 0) { - dims = {r4, r3, r2, r1}; - } else { - dims = {r5, r4, r3, r2, r1}; - } - conf.setDims(dims.begin(), dims.end()); - - - - //if config file found and no user defined params, read the config file - if (withErrInfo) { - if (error_mode < 0 || error_mode > 5) { - printf("Invalid error mode: %i, error mode should be in [0,5]", error_mode); - exit(0); - } - conf.errorBoundMode = error_mode; - conf.absErrorBound = abs_error; - conf.relErrorBound = rel_error; - conf.l2normErrorBound = l2norm_error; - conf.psnrErrorBound = psnr; - } - //printf("PARAMS: mode|%i, abs_eb|%f, rel_eb|%f, l2_eb|%f, psnr_eb|%f\n", error_mode, abs_error, rel_error, l2norm_error, psnr); - size_t outSize = 0; - char *compressedData = NULL; - - - switch (dataType) { - case SZ_FLOAT: //FLOAT - { - compressedData = SZ_compress(conf, (float *) *buf, outSize); - break; - } - - case SZ_DOUBLE: //DOUBLE - { - compressedData = SZ_compress(conf, (double *) *buf, outSize); - break; - } - - case SZ_INT8: //INT 8 - { - compressedData = SZ_compress(conf, (int8_t *) *buf, outSize); - break; - } - - case SZ_UINT8: //UINT 8 - { - compressedData = SZ_compress(conf, (uint8_t *) *buf, outSize); - break; - } - - case SZ_INT16: //INT 16 - { - compressedData = SZ_compress(conf, (int16_t *) *buf, outSize); - break; - } - - case SZ_UINT16: //UINT 16 - { - compressedData = SZ_compress(conf, (uint16_t *) *buf, outSize); - break; - } - - case SZ_INT32: //INT 32 - { - compressedData = SZ_compress(conf, (int32_t *) *buf, outSize); - break; - } - - case SZ_UINT32: //UINT 32 - { - compressedData = SZ_compress(conf, (uint32_t *) *buf, outSize); - break; - } - - case SZ_INT64: //INT 64 - { - compressedData = SZ_compress(conf, (int64_t *) *buf, outSize); - break; - } - - case SZ_UINT64: //UINT 64 - { - compressedData = SZ_compress(conf, (uint64_t *) *buf, outSize); - break; - } - - default: { - printf("Compression Error: Unknown Datatype"); - exit(0); - } - } - - //printf("\nOS: %u \n", outSize); + char *processedData = SZ_compress(conf, (T *) *buf, outSize); free(*buf); - *buf = compressedData; + *buf = processedData; *buf_size = outSize; - - - } - - return *buf_size; -} - -/*HELPER FUNCTIONS*/ -//use to convert HDF5 cd_array to SZ params inside filter -void -SZ_cdArrayToMetaData(size_t cd_nelmts, const unsigned int cd_values[], int *dimSize, int *dataType, size_t *r5, size_t *r4, size_t *r3, size_t *r2, - size_t *r1) { - assert(cd_nelmts >= 4); - unsigned char bytes[8]; - *dimSize = cd_values[0]; - *dataType = cd_values[1]; - - switch (*dimSize) { - case 1: - SZ3::int32ToBytes_bigEndian(bytes, cd_values[2]); - SZ3::int32ToBytes_bigEndian(&bytes[4], cd_values[3]); - if (sizeof(size_t) == 4) - *r1 = (unsigned int) SZ3::bytesToInt64_bigEndian(bytes); - else - *r1 = (uint64_t) SZ3::bytesToInt64_bigEndian(bytes); - *r2 = *r3 = *r4 = *r5 = 0; - break; - case 2: - *r3 = *r4 = *r5 = 0; - *r2 = cd_values[3]; - *r1 = cd_values[2]; - break; - case 3: - *r4 = *r5 = 0; - *r3 = cd_values[4]; - *r2 = cd_values[3]; - *r1 = cd_values[2]; - break; - case 4: - *r5 = 0; - *r4 = cd_values[5]; - *r3 = cd_values[4]; - *r2 = cd_values[3]; - *r1 = cd_values[2]; - break; - default: - *r5 = cd_values[6]; - *r4 = cd_values[5]; - *r3 = cd_values[4]; - *r2 = cd_values[3]; - *r1 = cd_values[2]; } } -void -SZ_cdArrayToMetaDataErr(size_t cd_nelmts, const unsigned int cd_values[], int *dimSize, int *dataType, size_t *r5, size_t *r4, size_t *r3, size_t *r2, - size_t *r1, int *error_bound_mode, double *abs_error, double *rel_error, double *l2norm_error, double *psnr) { - //get dimension, datatype metadata from cd_values - SZ_cdArrayToMetaData(cd_nelmts, cd_values, dimSize, dataType, r5, r4, r3, r2, r1); - //read in error bound value information - int dim = *dimSize; - int k = dim == 1 ? 4 : dim + 2; - unsigned char b[8]; - int32ToBytes_bigEndian(b, cd_values[k++]); - *error_bound_mode = bytesToInt32_bigEndian(b); - int32ToBytes_bigEndian(b, cd_values[k++]); - int32ToBytes_bigEndian(b + 4, cd_values[k++]); - *abs_error = bytesToDouble(b); - int32ToBytes_bigEndian(b, cd_values[k++]); - int32ToBytes_bigEndian(b + 4, cd_values[k++]); - *rel_error = bytesToDouble(b); - int32ToBytes_bigEndian(b, cd_values[k++]); - int32ToBytes_bigEndian(b + 4, cd_values[k++]); - *l2norm_error = bytesToDouble(b); - int32ToBytes_bigEndian(b, cd_values[k++]); - int32ToBytes_bigEndian(b + 4, cd_values[k++]); - *psnr = bytesToDouble(b); -} - -void SZ_copymetaDataToCdArray(size_t *cd_nelmts, unsigned int *cd_values, int dataType, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) { - unsigned char bytes[8] = {0}; - uint64_t size; - int dim = computeDimension(r5, r4, r3, r2, r1); - cd_values[0] = dim; - cd_values[1] = dataType; //0: FLOAT ; 1: DOUBLE ; 2,3,4,....: INTEGER.... - - switch (dim) { - case 1: - size = (uint64_t) r1; - SZ3::int64ToBytes_bigEndian(bytes, size); - cd_values[2] = SZ3::bytesToInt32_bigEndian(bytes); - cd_values[3] = SZ3::bytesToInt32_bigEndian(&bytes[4]); - *cd_nelmts = 4; - break; - case 2: - cd_values[2] = (unsigned int) r2; - cd_values[3] = (unsigned int) r1; - *cd_nelmts = 4; +/** + * https://docs.hdfgroup.org/hdf5/v1_14/_f_i_l_t_e_r.html + * The flags, cd_nelmts, and cd_values are the same as for the H5Pset_filter() function with the additional flag H5Z_FLAG_REVERSE which is set when the filter is called as part of the input pipeline. + * The input buffer is pointed to by *buf and has a total size of *buf_size bytes but only nbytes are valid data. + * The filter should perform the transformation in place if possible and return the number of valid bytes or zero for failure. + * If the transformation cannot be done in place then the filter should allocate a new buffer with malloc() and assign it to *buf, assigning the allocated size of that buffer to *buf_size. + * The old buffer should be freed by calling free(). + */ +static size_t H5Z_filter_sz3(unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[], size_t nbytes, size_t *buf_size, void **buf) { + printf("start H5Z_filter_sz3\n"); + + if (cd_nelmts == 0) //this is special data such as string, which should not be treated as values. + return nbytes; + + SZ3::Config conf; + + auto buffer = (const unsigned char *) (cd_values); + conf.load(buffer); +// conf.print(); + + if (conf.num < 20) + return nbytes; + + bool is_decompress = flags & H5Z_FLAG_REVERSE; + switch (conf.dataType) { + case SZ_FLOAT: process_data(conf, buf, buf_size, nbytes, is_decompress); break; - case 3: - cd_values[2] = (unsigned int) r3; - cd_values[3] = (unsigned int) r2; - cd_values[4] = (unsigned int) r1; - *cd_nelmts = 5; + case SZ_DOUBLE: process_data(conf, buf, buf_size, nbytes, is_decompress); break; - case 4: - cd_values[2] = (unsigned int) r4; - cd_values[3] = (unsigned int) r3; - cd_values[4] = (unsigned int) r2; - cd_values[5] = (unsigned int) r1; - *cd_nelmts = 6; + case SZ_INT8: process_data(conf, buf, buf_size, nbytes, is_decompress); break; - default: - cd_values[2] = (unsigned int) r5; - cd_values[3] = (unsigned int) r4; - cd_values[4] = (unsigned int) r3; - cd_values[5] = (unsigned int) r2; - cd_values[6] = (unsigned int) r1; - *cd_nelmts = 7; - } -} - -int checkCDValuesWithErrors(size_t cd_nelmts, const unsigned int cd_values[]) { - int result = 0; //0 means no-error-information-in-cd_values; 1 means cd_values contains error information - int dimSize = cd_values[0]; - //printf("nc_nelmts = %d\n", cd_nelmts); - switch (dimSize) { - case 1: - if (cd_nelmts > 4) - result = 1; + case SZ_UINT8: process_data(conf, buf, buf_size, nbytes, is_decompress); break; - case 2: - if (cd_nelmts > 4) - result = 1; + case SZ_INT16: process_data(conf, buf, buf_size, nbytes, is_decompress); break; - case 3: - if (cd_nelmts > 5) - result = 1; + case SZ_UINT16: process_data(conf, buf, buf_size, nbytes, is_decompress); break; - case 4: - if (cd_nelmts > 6) - result = 1; + case SZ_INT32: process_data(conf, buf, buf_size, nbytes, is_decompress); break; - case 5: - if (cd_nelmts > 7) - result = 1; + case SZ_UINT32: process_data(conf, buf, buf_size, nbytes, is_decompress); break; - } - return result; -} - -size_t computeDataLength(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) { - size_t dataLength; - if (r1 == 0) { - dataLength = 0; - } else if (r2 == 0) { - dataLength = r1; - } else if (r3 == 0) { - dataLength = r1 * r2; - } else if (r4 == 0) { - dataLength = r1 * r2 * r3; - } else if (r5 == 0) { - dataLength = r1 * r2 * r3 * r4; - } else { - dataLength = r1 * r2 * r3 * r4 * r5; - } - return dataLength; -} - -int computeDimension(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) { - int dimension; - if (r1 == 0) { - dimension = 0; - } else if (r2 == 0) { - dimension = 1; - } else if (r3 == 0) { - dimension = 2; - } else if (r4 == 0) { - dimension = 3; - } else if (r5 == 0) { - dimension = 4; - } else { - dimension = 5; - } - return dimension; -} - -void init_dims_chunk(int dim, hsize_t dims[5], hsize_t chunk[5], size_t nbEle, size_t r5, size_t r4, size_t r3, size_t r2, size_t r1) { - switch (dim) { - case 1: - dims[0] = r1; - if (nbEle <= MAX_CHUNK_SIZE) //2^32-1 - chunk[0] = r1; - else - chunk[0] = 2147483648;//2^31 + case SZ_INT64: process_data(conf, buf, buf_size, nbytes, is_decompress); break; - case 2: - dims[0] = r2; - dims[1] = r1; - if (nbEle <= MAX_CHUNK_SIZE) //2^32-1 - { - chunk[0] = r2; - chunk[1] = r1; - } else { - printf("Error: size is too big!\n"); - exit(0); - } + case SZ_UINT64: process_data(conf, buf, buf_size, nbytes, is_decompress); break; - case 3: - dims[0] = r3; - dims[1] = r2; - dims[2] = r1; - if (nbEle <= MAX_CHUNK_SIZE) //2^32-1 - { - chunk[0] = r3; - chunk[1] = r2; - chunk[2] = r1; - } else { - printf("Error: size is too big!\n"); - exit(0); - } - break; - case 4: - dims[0] = r4; - dims[1] = r3; - dims[2] = r2; - dims[3] = r1; - if (nbEle <= MAX_CHUNK_SIZE) //2^32-1 - { - chunk[0] = r4; - chunk[1] = r3; - chunk[2] = r2; - chunk[3] = r1; - } else { - printf("Error: size is too big!\n"); - exit(0); - } - break; - default: - dims[0] = r5; - dims[1] = r4; - dims[2] = r3; - dims[3] = r2; - dims[4] = r1; - if (nbEle <= MAX_CHUNK_SIZE) //2^32-1 - { - chunk[0] = r5; - chunk[1] = r4; - chunk[2] = r3; - chunk[3] = r2; - chunk[4] = r1; - } else { - printf("Error: size is too big!\n"); - exit(0); - } - } -} - -//detect sys endian type -inline void detectSysEndianType() { - //get sys endian type - int x_temp = 1; - char *y_temp = (char *) &x_temp; - - if (*y_temp == 1) - sysEndianType = LITTLE_ENDIAN_SYSTEM; - else //=0 - sysEndianType = BIG_ENDIAN_SYSTEM; -} - -inline void symTransform_8bytes(unsigned char data[8]) { - unsigned char tmp = data[0]; - data[0] = data[7]; - data[7] = tmp; - - tmp = data[1]; - data[1] = data[6]; - data[6] = tmp; - - tmp = data[2]; - data[2] = data[5]; - data[5] = tmp; - - tmp = data[3]; - data[3] = data[4]; - data[4] = tmp; -} - -//the byte to input is in the big-endian format -inline double bytesToDouble(unsigned char *bytes) { - ldouble buf; - memcpy(buf.byte, bytes, 8); - if (sysEndianType == LITTLE_ENDIAN_SYSTEM) - symTransform_8bytes(buf.byte); - return buf.value; -} - -inline void doubleToBytes(unsigned char *b, double num) { - ldouble buf; - buf.value = num; - memcpy(b, buf.byte, 8); - if (sysEndianType == LITTLE_ENDIAN_SYSTEM) - symTransform_8bytes(b); -} - - -/** - * @brief check dimension and correct it if needed - * @return 0 (didn't change dimension) - * 1 (dimension is changed) - * 2 (dimension is problematic) - **/ -int filterDimension(size_t r5, size_t r4, size_t r3, size_t r2, size_t r1, size_t *correctedDimension) { - int dimensionCorrected = 0; - int dim = computeDimension(r5, r4, r3, r2, r1); - correctedDimension[0] = r1; - correctedDimension[1] = r2; - correctedDimension[2] = r3; - correctedDimension[3] = r4; - correctedDimension[4] = r5; - size_t *c = correctedDimension; - if (dim == 1) { - if (r1 < 1) - return 2; - } else if (dim == 2) { - if (r2 == 1) { - c[1] = 0; - dimensionCorrected = 1; - } - if (r1 == 1) //remove this dimension - { - c[0] = c[1]; - c[1] = c[2]; - dimensionCorrected = 1; - } - } else if (dim == 3) { - if (r3 == 1) { - c[2] = 0; - dimensionCorrected = 1; - } - if (r2 == 1) { - c[1] = c[2]; - c[2] = c[3]; - dimensionCorrected = 1; - } - if (r1 == 1) { - c[0] = c[1]; - c[1] = c[2]; - c[2] = c[3]; - dimensionCorrected = 1; - } - } else if (dim == 4) { - if (r4 == 1) { - c[3] = 0; - dimensionCorrected = 1; - } - if (r3 == 1) { - c[2] = c[3]; - c[3] = c[4]; - dimensionCorrected = 1; - } - if (r2 == 1) { - c[1] = c[2]; - c[2] = c[3]; - c[3] = c[4]; - dimensionCorrected = 1; - } - if (r1 == 1) { - c[0] = c[1]; - c[1] = c[2]; - c[2] = c[3]; - c[3] = c[4]; - dimensionCorrected = 1; - } - } else if (dim == 5) { - if (r5 == 1) { - c[4] = 0; - dimensionCorrected = 1; - } - if (r4 == 1) { - c[3] = c[4]; - c[4] = 0; - dimensionCorrected = 1; - } - if (r3 == 1) { - c[2] = c[3]; - c[3] = c[4]; - c[4] = 0; - dimensionCorrected = 1; - } - if (r2 == 1) { - c[1] = c[2]; - c[2] = c[3]; - c[3] = c[4]; - c[4] = 0; - dimensionCorrected = 1; - } - if (r1 == 1) { - c[0] = c[1]; - c[1] = c[2]; - c[2] = c[3]; - c[3] = c[4]; - c[4] = 0; - dimensionCorrected = 1; - } + default: std::cerr << (is_decompress ? "Decompression" : "Compression") << " Error: Unknown Datatype" << std::endl; + std::exit(EXIT_FAILURE); } - - return dimensionCorrected; - -} - -inline void longToBytes_bigEndian(unsigned char *b, uint64_t num) { - b[0] = (unsigned char) (num >> 56); - b[1] = (unsigned char) (num >> 48); - b[2] = (unsigned char) (num >> 40); - b[3] = (unsigned char) (num >> 32); - b[4] = (unsigned char) (num >> 24); - b[5] = (unsigned char) (num >> 16); - b[6] = (unsigned char) (num >> 8); - b[7] = (unsigned char) (num); -// if(dataEndianType==LITTLE_ENDIAN_DATA) -// symTransform_8bytes(*b); -} - -inline int bytesToInt_bigEndian(unsigned char *bytes) { - int temp = 0; - int res = 0; - - res <<= 8; - temp = bytes[0] & 0xff; - res |= temp; - - res <<= 8; - temp = bytes[1] & 0xff; - res |= temp; - - res <<= 8; - temp = bytes[2] & 0xff; - res |= temp; - - res <<= 8; - temp = bytes[3] & 0xff; - res |= temp; - - return res; -} - -/** - * @endianType: refers to the endian_type of unsigned char* b. - * */ -inline int64_t bytesToLong_bigEndian(unsigned char *b) { - int64_t temp = 0; - int64_t res = 0; - - res <<= 8; - temp = b[0] & 0xff; - res |= temp; - - res <<= 8; - temp = b[1] & 0xff; - res |= temp; - - res <<= 8; - temp = b[2] & 0xff; - res |= temp; - - res <<= 8; - temp = b[3] & 0xff; - res |= temp; - - res <<= 8; - temp = b[4] & 0xff; - res |= temp; - - res <<= 8; - temp = b[5] & 0xff; - res |= temp; - - res <<= 8; - temp = b[6] & 0xff; - res |= temp; - - res <<= 8; - temp = b[7] & 0xff; - res |= temp; - - return res; -} + return *buf_size; +} \ No newline at end of file diff --git a/tools/H5Z-SZ3/test/CMakeLists.txt b/tools/H5Z-SZ3/test/CMakeLists.txt index 8a765486..e4cfcd01 100644 --- a/tools/H5Z-SZ3/test/CMakeLists.txt +++ b/tools/H5Z-SZ3/test/CMakeLists.txt @@ -4,7 +4,6 @@ function(build_hdf5_test) target_link_libraries(${test_name} PUBLIC SZ3 hdf5sz3) endfunction(build_hdf5_test) -build_hdf5_test(print_h5repack_args.cpp) build_hdf5_test(sz3ToHDF5.cpp) build_hdf5_test(dsz3FromHDF5.cpp) build_hdf5_test(convertBinToHDF5.cpp) diff --git a/tools/H5Z-SZ3/test/dsz3FromHDF5.cpp b/tools/H5Z-SZ3/test/dsz3FromHDF5.cpp index 32ed97ce..f93aa6ba 100644 --- a/tools/H5Z-SZ3/test/dsz3FromHDF5.cpp +++ b/tools/H5Z-SZ3/test/dsz3FromHDF5.cpp @@ -17,295 +17,269 @@ #define DATASET "testdata_compressed" #define MAX_CHUNK_SIZE 4294967295 //2^32-1 -int main(int argc, char * argv[]) -{ - int dimSize = 0; - size_t r5=0,r4=0,r3=0,r2=0,r1=0,nbEle = 0; - char hdf5FilePath[640], outputFilePath[640]; - hid_t file, dset, dcpl, space_id, dtype; /*Handles*/ - hid_t fid, sid, cpid, idsid; /*Output Handles*/ - H5Z_filter_t filter_id = 0; - herr_t status; - H5T_class_t type_class; - H5T_sign_t dsign; - H5T_order_t dorder; - - htri_t avail; - char filter_name[80]; - unsigned int flags = 0; - size_t nelmts = 0, dsize; - unsigned int values_out[7] = {0,0,0,0,0,0,0}; //at most 7 parameters - - //hold dims - hsize_t dims[H5S_MAX_RANK], dims_used[5] = {0,0,0,0,0}; - int ndims, dim; - - - if(argc < 2) - { - printf("Test case: dszFromHDF5 [hdf5FilePath]\n"); - printf("Example 1: dszFromHDF5 testdata/x86/testfloat_8_8_128.dat.sz3.h5\n"); - printf("Example 2: dszFromHDF5 testdata/x86/testint32_8x8x8.dat.sz3.h5\n"); - exit(0); - } - - snprintf(hdf5FilePath, 640, "%s", argv[1]); - snprintf(outputFilePath, 640, "%s.out.h5", hdf5FilePath); - - /*Open the hdf5 file with SZ-compressed data*/ +int main(int argc, char *argv[]) { + int dimSize = 0; + size_t r5 = 0, r4 = 0, r3 = 0, r2 = 0, r1 = 0, nbEle = 0; + char hdf5FilePath[640], outputFilePath[640]; + hid_t file, dset, dcpl, space_id, dtype; /*Handles*/ + hid_t fid, sid, cpid, idsid; /*Output Handles*/ + H5Z_filter_t filter_id = 0; + herr_t status; + H5T_class_t type_class; + H5T_sign_t dsign; + H5T_order_t dorder; + + htri_t avail; + char filter_name[80]; + unsigned int flags = 0; + size_t nelmts = 0, dsize; + unsigned int values_out[7] = {0, 0, 0, 0, 0, 0, 0}; //at most 7 parameters + + //hold dims + hsize_t dims[H5S_MAX_RANK], dims_used[5] = {0, 0, 0, 0, 0}; + int ndims, dim; + + if (argc < 2) { + printf("Test case: dszFromHDF5 [hdf5FilePath]\n"); + printf("Example 1: dszFromHDF5 testdata/x86/testfloat_8_8_128.dat.sz3.h5\n"); + printf("Example 2: dszFromHDF5 testdata/x86/testint32_8x8x8.dat.sz3.h5\n"); + exit(0); + } + + snprintf(hdf5FilePath, 640, "%s", argv[1]); + snprintf(outputFilePath, 640, "%s.out.h5", hdf5FilePath); + + /*Open the hdf5 file with SZ-compressed data*/ file = H5Fopen(hdf5FilePath, H5F_ACC_RDONLY, H5P_DEFAULT); dset = H5Dopen(file, DATASET, H5P_DEFAULT); /*Retrieve dataset creation property list.*/ dcpl = H5Dget_create_plist(dset); - + + herr_t ret = H5Zregister(H5PLget_plugin_info()); + /*Check that filter is not registered with the library yet*/ - avail = H5Zfilter_avail(H5Z_FILTER_SZ3); - if(!avail) - printf("sz3 filter is not yet available after the H5Pget_filter call.\n"); - else - printf("sz3 filter is available.\n"); - - space_id = H5Dget_space(dset); - nbEle = H5Sget_simple_extent_npoints(space_id); - - if((dtype = H5Dget_type(dset)) < 0) - printf("Error: H5Dget_type(dset) < 0\n"); - - /*Read the data using the default properties.*/ - printf("....Reading SZ3 compressed data .....................\n"); - - if((type_class = H5Tget_class(dtype)) < 0) - { - printf("Error: H5Tget_class<0\n"); - exit(0); - } - if (0 == (dsize = H5Tget_size(dtype))) - { - printf("Error: H5Tget_size==0\n"); - exit(0); - } - - if((dorder = H5Tget_order(dtype)) < 0) - printf("Error: H5Tget_order<0\n"); - - //create output file /*Get Dimensions for writing*/ - - - if (0 > (ndims = H5Sget_simple_extent_dims(space_id, dims, 0))) ERROR(H5Sget_simple_extent_dims); - - int ndims_used = 0; - for(int i = 0; i < ndims; i++){ - if(dims[i] <= 1) continue; - dims_used[ndims_used] = dims[i]; - ndims_used++; - } - - dim = ndims_used; - - - /* create HDF5 file */ - if (0 > (fid = H5Fcreate(outputFilePath, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT))) ERROR(H5Fcreate); - - /*Create dataspace. Setting maximum size */ - if (0 > (sid = H5Screate_simple(dim, dims, NULL))) ERROR(H5Screate_simple); - - /* setup dataset creation properties */ - if (0 > (cpid = H5Pcreate(H5P_DATASET_CREATE))) ERROR(H5Pcreate); - - - switch (type_class) - { - case H5T_FLOAT: - if (H5Tequal(dtype, H5T_IEEE_F32BE) == 1 || H5Tequal(dtype, H5T_IEEE_F32LE) == 1 - || H5Tequal(dtype, H5T_NATIVE_FLOAT) == 1) - { - printf("data type: float\n"); - float* data = (float*)malloc(sizeof(float)*nbEle); - if(dorder==H5T_ORDER_LE) - status = H5Dread(dset, H5T_IEEE_F32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - else //H5T_ORDER_BE - status = H5Dread(dset, H5T_IEEE_F32BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - /*Print the first 20 data values to check the correctness.*/ - int i; - printf("reconstructed data = "); - for(i=0;i<20;i++) - printf("%f ", data[i]); - printf("\n"); - - //write out file data - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_IEEE_F32LE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) ERROR(H5Dcreate); - if (0 > H5Dwrite(idsid, H5T_IEEE_F32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) ERROR(H5Dwrite); - if (0 > H5Dclose(idsid)) ERROR(H5Dclose); - - - free(data); - } - else //64bit: double - { - printf("data type: double\n"); - double* data = (double*)malloc(sizeof(double)*nbEle); - if(dorder==H5T_ORDER_LE) - status = H5Dread(dset, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - else - status = H5Dread(dset, H5T_IEEE_F64BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - /*Print the first 10 data values to check the correctness.*/ - int i; - printf("reconstructed data = "); - for(i=0;i<20;i++) - printf("%f ", data[i]); - printf("\n"); - free(data); - } - break; - case H5T_INTEGER: - if (0 > (dsign = H5Tget_sign(dtype))) - { - printf("Error in calling H5Tget_sign(type_id)....\n"); - exit(0); - } - if(dsign == H5T_SGN_NONE) //unsigned - { - if(dsize==1) - { - printf("data type: unsigned char\n"); - unsigned char* data = (unsigned char*)malloc(sizeof(unsigned char)*nbEle); - if(dorder==H5T_ORDER_LE) - status = H5Dread(dset, H5T_STD_U8LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - else - status = H5Dread(dset, H5T_STD_U8BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - int i; - printf("reconstructed data = "); - for(i=0;i<20;i++) - printf("%d ", data[i]); - printf("\n"); - free(data); - } - else if(dsize==2) - { - printf("data type: unsigned short\n"); - unsigned short* data = (unsigned short*)malloc(sizeof(unsigned short)*nbEle); - if(dorder==H5T_ORDER_LE) - status = H5Dread(dset, H5T_STD_U16LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - else - status = H5Dread(dset, H5T_STD_U16BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - int i; - printf("reconstructed data = "); - for(i=0;i<20;i++) - printf("%d ", data[i]); - printf("\n"); - free(data); - } - else if(dsize==4) - { - printf("data type: unsigned int\n"); - unsigned int* data = (unsigned int*)malloc(sizeof(unsigned int)*nbEle); - if(dorder==H5T_ORDER_LE) - status = H5Dread(dset, H5T_STD_U32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - else - status = H5Dread(dset, H5T_STD_U32BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - int i; - printf("reconstructed data = "); - for(i=0;i<20;i++) - printf("%d ", data[i]); - printf("\n"); - free(data); - } - else if(dsize==8) - { - printf("data type: unsigned long\n"); - uint64_t* data = (uint64_t*)malloc(sizeof(uint64_t)*nbEle); - if(dorder==H5T_ORDER_LE) - status = H5Dread(dset, H5T_STD_U64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - else - status = H5Dread(dset, H5T_STD_U64BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - int i; - printf("reconstructed data = "); - for(i=0;i<20;i++) - printf("%ld ", data[i]); - printf("\n"); - free(data); - } - } - else - { - if(dsize==1) - { - printf("data type: char\n"); - char *data = (char*)malloc(sizeof(char)*nbEle); - if(dorder==H5T_ORDER_LE) - status = H5Dread(dset, H5T_STD_I8LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - else - status = H5Dread(dset, H5T_STD_I8BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - int i; - printf("reconstructed data = "); - for(i=0;i<20;i++) - printf("%d ", data[i]); - printf("\n"); - free(data); - } - else if(dsize==2) - { - printf("data type: short\n"); - short *data = (short*)malloc(sizeof(short)*nbEle); - if(dorder==H5T_ORDER_LE) - status = H5Dread(dset, H5T_STD_I16LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - else - status = H5Dread(dset, H5T_STD_I16BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - int i; - printf("reconstructed data = "); - for(i=0;i<20;i++) - printf("%d ", data[i]); - printf("\n"); - free(data); - } - else if(dsize==4) - { - printf("data type: int\n"); - int *data = (int*)malloc(sizeof(int)*nbEle); - if(dorder==H5T_ORDER_LE) - status = H5Dread(dset, H5T_STD_I32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - else - status = H5Dread(dset, H5T_STD_I32BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - int i; - printf("reconstructed data = "); - for(i=0;i<20;i++) - printf("%d ", data[i]); - printf("\n"); - free(data); - } - else if(dsize==8) - { - printf("data type: long\n"); - int64_t *data = (int64_t*)malloc(sizeof(int64_t)*nbEle); - if(dorder==H5T_ORDER_LE) - status = H5Dread(dset, H5T_STD_I64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - else - status = H5Dread(dset, H5T_STD_I64BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); - int i; - printf("reconstructed data = "); - for(i=0;i<20;i++) - printf("%ld ", data[i]); - printf("\n"); - free(data); - } - } - - break; - default: - printf("Error: H5Z-SZ3 supports only float, double or integers.\n"); - exit(1); - } - - //Close reading resources - status = H5Pclose(dcpl); - status = H5Dclose(dset); - status = H5Fclose(file); - - /*Close and release writing resources*/ - if (0 > H5Sclose(sid)) ERROR(H5Sclose); - if (0 > H5Pclose(cpid)) ERROR(H5Pclose); - if (0 > H5Fclose(fid)) ERROR(H5Fclose); - - return 0; + avail = H5Zfilter_avail(H5Z_FILTER_SZ3); + if (!avail) + printf("sz3 filter is not yet available after the H5Pget_filter call.\n"); + else + printf("sz3 filter is available.\n"); + + space_id = H5Dget_space(dset); + nbEle = H5Sget_simple_extent_npoints(space_id); + + if ((dtype = H5Dget_type(dset)) < 0) + printf("Error: H5Dget_type(dset) < 0\n"); + + /*Read the data using the default properties.*/ + printf("....Reading SZ3 compressed data .....................\n"); + + if ((type_class = H5Tget_class(dtype)) < 0) { + printf("Error: H5Tget_class<0\n"); + exit(0); + } + if (0 == (dsize = H5Tget_size(dtype))) { + printf("Error: H5Tget_size==0\n"); + exit(0); + } + + if ((dorder = H5Tget_order(dtype)) < 0) + printf("Error: H5Tget_order<0\n"); + + //create output file /*Get Dimensions for writing*/ + + + if (0 > (ndims = H5Sget_simple_extent_dims(space_id, dims, 0))) ERROR(H5Sget_simple_extent_dims); + + int ndims_used = 0; + for (int i = 0; i < ndims; i++) { + if (dims[i] <= 1) continue; + dims_used[ndims_used] = dims[i]; + ndims_used++; + } + + dim = ndims_used; + + + /* create HDF5 file */ + if (0 > (fid = H5Fcreate(outputFilePath, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT))) ERROR(H5Fcreate); + + /*Create dataspace. Setting maximum size */ + if (0 > (sid = H5Screate_simple(dim, dims, NULL))) ERROR(H5Screate_simple); + + /* setup dataset creation properties */ + if (0 > (cpid = H5Pcreate(H5P_DATASET_CREATE))) ERROR(H5Pcreate); + + switch (type_class) { + case H5T_FLOAT: + if (H5Tequal(dtype, H5T_IEEE_F32BE) == 1 || H5Tequal(dtype, H5T_IEEE_F32LE) == 1 + || H5Tequal(dtype, H5T_NATIVE_FLOAT) == 1) { + printf("data type: float\n"); + float *data = (float *) malloc(sizeof(float) * nbEle); + if (dorder == H5T_ORDER_LE) + status = H5Dread(dset, H5T_IEEE_F32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + else //H5T_ORDER_BE + status = H5Dread(dset, H5T_IEEE_F32BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + /*Print the first 20 data values to check the correctness.*/ + int i; + printf("reconstructed data = "); + for (i = 0; i < 20; i++) + printf("%f ", data[i]); + printf("\n"); + + //write out file data + if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_IEEE_F32LE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) ERROR(H5Dcreate); + if (0 > H5Dwrite(idsid, H5T_IEEE_F32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) ERROR(H5Dwrite); + if (0 > H5Dclose(idsid)) ERROR(H5Dclose); + + free(data); + } else //64bit: double + { + printf("data type: double\n"); + double *data = (double *) malloc(sizeof(double) * nbEle); + if (dorder == H5T_ORDER_LE) + status = H5Dread(dset, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + else + status = H5Dread(dset, H5T_IEEE_F64BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + /*Print the first 10 data values to check the correctness.*/ + int i; + printf("reconstructed data = "); + for (i = 0; i < 20; i++) + printf("%f ", data[i]); + printf("\n"); + free(data); + } + break; + case H5T_INTEGER: + if (0 > (dsign = H5Tget_sign(dtype))) { + printf("Error in calling H5Tget_sign(type_id)....\n"); + exit(0); + } + if (dsign == H5T_SGN_NONE) //unsigned + { + if (dsize == 1) { + printf("data type: unsigned char\n"); + unsigned char *data = (unsigned char *) malloc(sizeof(unsigned char) * nbEle); + if (dorder == H5T_ORDER_LE) + status = H5Dread(dset, H5T_STD_U8LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + else + status = H5Dread(dset, H5T_STD_U8BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + int i; + printf("reconstructed data = "); + for (i = 0; i < 20; i++) + printf("%d ", data[i]); + printf("\n"); + free(data); + } else if (dsize == 2) { + printf("data type: unsigned short\n"); + unsigned short *data = (unsigned short *) malloc(sizeof(unsigned short) * nbEle); + if (dorder == H5T_ORDER_LE) + status = H5Dread(dset, H5T_STD_U16LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + else + status = H5Dread(dset, H5T_STD_U16BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + int i; + printf("reconstructed data = "); + for (i = 0; i < 20; i++) + printf("%d ", data[i]); + printf("\n"); + free(data); + } else if (dsize == 4) { + printf("data type: unsigned int\n"); + unsigned int *data = (unsigned int *) malloc(sizeof(unsigned int) * nbEle); + if (dorder == H5T_ORDER_LE) + status = H5Dread(dset, H5T_STD_U32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + else + status = H5Dread(dset, H5T_STD_U32BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + int i; + printf("reconstructed data = "); + for (i = 0; i < 20; i++) + printf("%d ", data[i]); + printf("\n"); + free(data); + } else if (dsize == 8) { + printf("data type: unsigned long\n"); + uint64_t *data = (uint64_t *) malloc(sizeof(uint64_t) * nbEle); + if (dorder == H5T_ORDER_LE) + status = H5Dread(dset, H5T_STD_U64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + else + status = H5Dread(dset, H5T_STD_U64BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + int i; + printf("reconstructed data = "); + for (i = 0; i < 20; i++) + printf("%ld ", data[i]); + printf("\n"); + free(data); + } + } else { + if (dsize == 1) { + printf("data type: char\n"); + char *data = (char *) malloc(sizeof(char) * nbEle); + if (dorder == H5T_ORDER_LE) + status = H5Dread(dset, H5T_STD_I8LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + else + status = H5Dread(dset, H5T_STD_I8BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + int i; + printf("reconstructed data = "); + for (i = 0; i < 20; i++) + printf("%d ", data[i]); + printf("\n"); + free(data); + } else if (dsize == 2) { + printf("data type: short\n"); + short *data = (short *) malloc(sizeof(short) * nbEle); + if (dorder == H5T_ORDER_LE) + status = H5Dread(dset, H5T_STD_I16LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + else + status = H5Dread(dset, H5T_STD_I16BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + int i; + printf("reconstructed data = "); + for (i = 0; i < 20; i++) + printf("%d ", data[i]); + printf("\n"); + free(data); + } else if (dsize == 4) { + printf("data type: int\n"); + int *data = (int *) malloc(sizeof(int) * nbEle); + if (dorder == H5T_ORDER_LE) + status = H5Dread(dset, H5T_STD_I32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + else + status = H5Dread(dset, H5T_STD_I32BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + int i; + printf("reconstructed data = "); + for (i = 0; i < 20; i++) + printf("%d ", data[i]); + printf("\n"); + free(data); + } else if (dsize == 8) { + printf("data type: long\n"); + int64_t *data = (int64_t *) malloc(sizeof(int64_t) * nbEle); + if (dorder == H5T_ORDER_LE) + status = H5Dread(dset, H5T_STD_I64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + else + status = H5Dread(dset, H5T_STD_I64BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data); + int i; + printf("reconstructed data = "); + for (i = 0; i < 20; i++) + printf("%ld ", data[i]); + printf("\n"); + free(data); + } + } + + break; + default: printf("Error: H5Z-SZ3 supports only float, double or integers.\n"); + exit(1); + } + + //Close reading resources + status = H5Pclose(dcpl); + status = H5Dclose(dset); + status = H5Fclose(file); + + /*Close and release writing resources*/ + if (0 > H5Sclose(sid)) ERROR(H5Sclose); + if (0 > H5Pclose(cpid)) ERROR(H5Pclose); + if (0 > H5Fclose(fid)) ERROR(H5Fclose); + + return 0; } diff --git a/tools/H5Z-SZ3/test/print_h5repack_args.cpp b/tools/H5Z-SZ3/test/print_h5repack_args.cpp deleted file mode 100644 index 3549df2b..00000000 --- a/tools/H5Z-SZ3/test/print_h5repack_args.cpp +++ /dev/null @@ -1,219 +0,0 @@ -#include -#include -#include -#include -#include - -#define ABS 0 -#define REL 1 -#define NORM2 2 -#define PSNR 3 - - -#define LITTLE_ENDIAN_SYSTEM 0 -#define BIG_ENDIAN_SYSTEM 1 -#define LITTLE_ENDIAN_DATA 0 -#define BIG_ENDIAN_DATA 1 - - -int sysEndianType = LITTLE_ENDIAN_SYSTEM; -int dataEndianType = LITTLE_ENDIAN_DATA; - -typedef union ldouble -{ - double value; - uint64_t lvalue; - unsigned char byte[8]; -} ldouble; - -ldouble buf; - -void usage() -{ - printf("Usage: print_h5repack_args \n"); - printf("Options:\n"); - printf(" -M : 10 options as follows. \n"); - printf(" ABS (absolute error bound)\n"); - printf(" REL (value range based error bound, so a.k.a., VR_REL)\n"); - printf(" PSNR (peak signal-to-noise ratio)\n"); - printf(" NORM2 (norm2)\n"); - printf(" -A : specifying absolute error bound\n"); - printf(" -R : specifying relative error bound\n"); - printf(" -N : specifying norm2 error bound\n"); - printf(" -S : specifying PSNR\n"); - printf("* examples: \n"); - printf(" print_h5repack_args -M ABS -A 1E-3 (output: -f UD=32024,0,9,0,1062232653,3539053052,0,0,0,0,0,0)\n"); - printf(" print_h5repack_args -M REL -R 1E-4 (output: -f UD=32024,0,9,1,0,0,1058682594,3944497965,0,0,0,0)\n"); - exit(0); -} - -int bytesToInt32_bigEndian(unsigned char* bytes) -{ - int temp = 0; - int res = 0; - - res <<= 8; - temp = bytes[0] & 0xff; - res |= temp; - - res <<= 8; - temp = bytes[1] & 0xff; - res |= temp; - - res <<= 8; - temp = bytes[2] & 0xff; - res |= temp; - - res <<= 8; - temp = bytes[3] & 0xff; - res |= temp; - - return res; -} - -void symTransform_8bytes(unsigned char data[8]) -{ - unsigned char tmp = data[0]; - data[0] = data[7]; - data[7] = tmp; - - tmp = data[1]; - data[1] = data[6]; - data[6] = tmp; - - tmp = data[2]; - data[2] = data[5]; - data[5] = tmp; - - tmp = data[3]; - data[3] = data[4]; - data[4] = tmp; -} - -void doubleToBytes(unsigned char *b, double num) -{ - ldouble buf; - buf.value = num; - memcpy(b, buf.byte, 8); - if(sysEndianType==LITTLE_ENDIAN_SYSTEM) - symTransform_8bytes(b); -} - -int main(int argc, char* argv[]) -{ - char* errBoundMode = NULL; - char* absErrorBound = NULL; - char* relErrorBound = NULL; - char* norm2ErrorBound = NULL; - char* psnr_ = NULL; - - if(argc==1) - usage(); - - int i = 0; - - for(i=1;i #include #include +#include + #include "hdf5.h" +#include "H5Cpp.h" #include "H5Z_SZ3.hpp" #include "SZ3/utils/FileUtil.hpp" +#include "SZ3/utils/Config.hpp" +#define LITTLE_ENDIAN_SYSTEM 0 +#define BIG_ENDIAN_SYSTEM 1 +#define LITTLE_ENDIAN_DATA 0 +#define BIG_ENDIAN_DATA 1 int sysEndianType = LITTLE_ENDIAN_SYSTEM; int dataEndianType = LITTLE_ENDIAN_DATA; #define DATASET "testdata_compressed" +int MAX_CHUNK_SIZE = INT_MAX; + +//detect sys endian type +inline void detectSysEndianType() { + //get sys endian type + int x_temp = 1; + char *y_temp = (char *) &x_temp; + + if (*y_temp == 1) + sysEndianType = LITTLE_ENDIAN_SYSTEM; + else //=0 + sysEndianType = BIG_ENDIAN_SYSTEM; +} -using namespace SZ3; +template +void process_data(const SZ3::Config &conf, + const char *oriFilePath, + int dataEndianType, + hid_t fid, + hid_t sid, + hid_t cpid, + const char *datasetName, + hid_t h5TypeLE, + hid_t h5TypeBE) { + T *data = new T[conf.num]; + SZ3::readfile(oriFilePath, conf.num, data); + + std::cout << "original data = "; + for (int i = 0; i < 20; ++i) + std::cout << data[i] << " "; + std::cout << "....\n"; + + hid_t dataset; + hid_t h5Type = (dataEndianType == LITTLE_ENDIAN_DATA) ? h5TypeLE : h5TypeBE; + + if ((dataset = H5Dcreate(fid, datasetName, h5Type, sid, H5P_DEFAULT, cpid, H5P_DEFAULT)) < 0) { + std::cerr << "Error in H5Dcreate\n"; + delete[] data; + exit(EXIT_FAILURE); + } + if (H5Dwrite(dataset, h5Type, H5S_ALL, H5S_ALL, H5P_DEFAULT, data) < 0) { + std::cerr << "Error in H5Dwrite\n"; + delete[] data; + exit(EXIT_FAILURE); + } + + delete[] data; + if (H5Dclose(dataset) < 0) { + std::cerr << "Error in H5Dclose\n"; + exit(EXIT_FAILURE); + } +} int main(int argc, char *argv[]) { - - //(void) helper fn to detect system endian type - //detectSysEndianType(); - //by default sysEndianType and dataEndianType are little endian, can set them manually here - //dataEndianType = BIG_ENDIAN_DATA; - - size_t r5 = 0, r4 = 0, r3 = 0, r2 = 0, r1 = 0; - int cmp_algo, interp_algo; //select compression and interpolation for SZ3 - char outDir[640], oriFilePath[640], outputFilePath[640]; - size_t cd_nelmts=0, nbEle; - unsigned int *cd_values = NULL; - //unsigned int cd_values[7]; - - herr_t status; - htri_t avail; - unsigned filter_config; - - hid_t sid, idsid, cpid, fid; - + + char oriFilePath[640], outputFilePath[640]; + if (argc < 3) { printf("Test case: sz3ToHDF5 [dataType] [srcFilePath] [dimension sizes...]\n"); printf("Example1 : sz3ToHDF5 -f testdata/x86/testfloat_8_8_128.dat 8 8 128\n"); printf("Example 2: sz3ToHDF5 -i32 testdata/x86/testint32_8x8x8.dat 8 8 8\n"); exit(0); } - - //printf("config file = %s\n", argv[2]); - + + std::map dataTypeMap = { + {"-f", SZ_FLOAT}, {"-d", SZ_DOUBLE}, {"-i8", SZ_INT8}, {"-u8", SZ_UINT8}, + {"-i16", SZ_INT16}, {"-u16", SZ_UINT16}, {"-i32", SZ_INT32}, {"-u32", SZ_UINT32}, + {"-i64", SZ_INT64}, {"-u64", SZ_UINT64} + }; + int dataType = 0; - if (strcmp(argv[1], "-f") == 0) - dataType = SZ_FLOAT; - else if (strcmp(argv[1], "-d") == 0) - dataType = SZ_DOUBLE; - else if (strcmp(argv[1], "-i8") == 0) - dataType = SZ_INT8; - else if (strcmp(argv[1], "-u8") == 0) - dataType = SZ_UINT8; - else if (strcmp(argv[1], "-i16") == 0) - dataType = SZ_INT16; - else if (strcmp(argv[1], "-u16") == 0) - dataType = SZ_UINT16; - else if (strcmp(argv[1], "-i32") == 0) - dataType = SZ_INT32; - else if (strcmp(argv[1], "-u32") == 0) - dataType = SZ_UINT32; - else if (strcmp(argv[1], "-i64") == 0) - dataType = SZ_INT64; - else if (strcmp(argv[1], "-u64") == 0) - dataType = SZ_UINT64; - else { - printf("Error: unknown data type in sz3ToHDF5.c!\n"); - exit(0); + auto it = dataTypeMap.find(argv[1]); + if (it != dataTypeMap.end()) { + dataType = it->second; + } else { + std::cerr << "Error: unknown data type in sz3ToHDF5.c!\n"; + return 0; } - - printf("DTYPE: %i", dataType); + snprintf(oriFilePath, 640, "%s", argv[2]); - if (argc >= 4) { - r1 = atoi(argv[3]); //8 - } - if (argc >= 5) { - r2 = atoi(argv[4]); //8 + + std::vector dimensions; + for (int i = 3; i < argc && i < 8; ++i) { + dimensions.push_back(std::atoi(argv[i])); } - if (argc >= 6) { - r3 = atoi(argv[5]); //128 - } - if (argc >= 7) { - r4 = atoi(argv[6]); - } - if (argc >= 8) { - r5 = atoi(argv[7]); - } - - //read in compression and interp algo - //for testing set these here as defaults in config - cmp_algo = 1; - interp_algo = 1; - - //printf("cfgFile=%s\n", cfgFile); + std::reverse(dimensions.begin(), dimensions.end()); // slowest to fastest + snprintf(outputFilePath, 640, "%s.sz3.h5", oriFilePath); - -// printf("argv[1]=%s, dataType=%d\n", argv[1], dataType); - nbEle = computeDataLength(r5, r4, r3, r2, r1); - -// printf("nbEle=%u\n", nbEle); - - //Create cd_values - printf("Dimension sizes: n5=%u, n4=%u, n3=%u, n2=%u, n1=%u\n", r5, r4, r3, r2, r1); - int mode = 0; //0: ABS, 1: REL, ... - SZ_errConfigToCdArray(&cd_nelmts, &cd_values, mode, 0.001, 0.001, 0, - 0); //SZ_FLOAT or SZ_DOUBLE or SZ_INT 100x500x500 : 0, 0, 100, 500, 500, ABS, REL (0.01, 0.01*(max-min), PW_REL (0.01, 5, 6, 7, 8, 9 --> 5*0.01, 6*0.01, ...), PSNR (mean squared error)). - //load_conffile_flag = 0; - // REL - //SZ_metaDataErrToCdArray(&cd_nelmts, &cd_values, dataType, r5, r4, r3, r2, r1, 1, 0.01, 0.01, 0, 0); //SZ_FLOAT or SZ_DOUBLE or SZ_INT 100x500x500 : 0, 0, 100, 500, 500, ABS, REL (0.01, 0.01*(max-min), PW_REL (0.01, 5, 6, 7, 8, 9 --> 5*0.01, 6*0.01, ...), PSNR (mean squared error)). - /*cd_nelmts = 5; - cd_values[0] = 3; - cd_values[1] = 0; - cd_values[2] = 128; - cd_values[3] = 8; - cd_values[4] = 8; - cd_values[5] = 0; - cd_values[6] = 0;*/ - - int i = 0; -// for(i=0;i (fid = H5Fcreate(outputFilePath, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT))) { printf("Error in H5Pcreate"); exit(0); } - + + + //set up SZ configuration + SZ3::Config conf; + // use config.loadcfg("path_to_sz3_conf") to load configuration from a file if needed + // setting up data related attributes (data type, dims, etc.) is not necessary, as they will be updated in H5Z_sz3_set_local automatically + conf.setDims(dimensions.begin(), dimensions.end()); + // Set compression related attributes here + conf.cmprAlgo = SZ3::ALGO_BIOMD; + + //save conf to cd_values + size_t cd_nelmts = std::ceil(conf.size_est() / 1.0 / sizeof(int)); + std::vector cd_values(cd_nelmts); + auto buffer = (unsigned char *) (cd_values.data()); + conf.save(buffer); + // conf.print(); + + std::vector hdims(conf.dims.begin(), conf.dims.end()); /*Create dataspace. Setting maximum size */ - if (0 > (sid = H5Screate_simple(dim, dims, NULL))) { + if (0 > (sid = H5Screate_simple(conf.N, hdims.data(), NULL))) { printf("Error in H5Screate_simple"); exit(0); } - /* setup dataset creation properties */ if (0 > (cpid = H5Pcreate(H5P_DATASET_CREATE))) { printf("Error in H5Pcreate"); exit(0); } - + /* Add the SZ compression filter and set the chunk size */ - if (0 > H5Pset_filter(cpid, H5Z_FILTER_SZ3, H5Z_FLAG_MANDATORY, cd_nelmts, cd_values)) { + if (0 > H5Pset_filter(cpid, H5Z_FILTER_SZ3, H5Z_FLAG_MANDATORY, cd_nelmts, cd_values.data())) { printf("Error in H5Pcreate"); exit(0); } - avail = H5Zfilter_avail(H5Z_FILTER_SZ3); + herr_t ret = H5Zregister(H5PLget_plugin_info()); + if (ret < 0) { + printf("Error in H5Zregister"); + exit(0); + } + htri_t avail = H5Zfilter_avail(H5Z_FILTER_SZ3); if (avail) { - status = H5Zget_filter_info(H5Z_FILTER_SZ3, &filter_config); - + unsigned filter_config; + auto status = H5Zget_filter_info(H5Z_FILTER_SZ3, &filter_config); + if (filter_config & H5Z_FILTER_CONFIG_ENCODE_ENABLED) printf("sz filter is available for encoding and decoding.\n"); } - if (0 > H5Pset_chunk(cpid, dim, chunk)) { + + std::vector hchunk(hdims); + hchunk[0] = 10; + if (0 > H5Pset_chunk(cpid, conf.N, hchunk.data())) { printf("Error in H5Pcreate"); exit(0); } - - //Initialize the configuration for SZ - //You can also use the global variable conf_params to set the configuration for sz without cfgFile. - //Example of setting an absolute error bound: - // sz_params* params = H5Z_SZ_Init_Default(); - // params->errorBoundMode = ABS; - // params->absErrBound = 1E-4; - - //H5Z_SZ_Init(cfgFile); - + printf("....Writing SZ compressed data.............\n"); - - if (dataType == SZ_FLOAT) { - float *data = new float[nbEle]; - readfile(oriFilePath, nbEle, data); - - printf("original data = "); - for (i = 0; i < 20; i++) - printf("%f ", data[i]); - printf("....\n"); - - if (dataEndianType == LITTLE_ENDIAN_DATA) { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_IEEE_F32LE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_IEEE_F32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } else //BIG_ENDIAN_DATA - { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_IEEE_F32BE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_IEEE_F32BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } - delete[] data; - if (0 > H5Dclose(idsid)) { - printf("Error in H5Dclose"); - exit(0); - }; - } else if (dataType == SZ_DOUBLE) { - double *data = new double[nbEle]; - readfile(oriFilePath, nbEle, data); - - printf("original data = "); - for (i = 0; i < 20; i++) - printf("%f ", data[i]); - printf("....\n"); - - if (dataEndianType == LITTLE_ENDIAN_DATA) { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_IEEE_F64LE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_IEEE_F64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } else //BIG_ENDIAN_DATA - { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_IEEE_F64BE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_IEEE_F64BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } - delete[] data; - if (0 > H5Dclose(idsid)) { - printf("Error in H5Dclose"); - exit(0); - }; - } else if (dataType == SZ_INT8) { - int8_t *data = new int8_t[nbEle]; - readfile(oriFilePath, nbEle, data); - - printf("original data = "); - for (i = 0; i < 20; i++) - printf("%d ", data[i]); - printf("....\n"); - - if (dataEndianType == LITTLE_ENDIAN_DATA) { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_I8LE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_I8LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } else //BIG_ENDIAN_DATA - { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_I8BE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_I8BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } - delete[] data; - if (0 > H5Dclose(idsid)) { - printf("Error in H5Dclose"); - exit(0); - } - } else if (dataType == SZ_UINT8) { - uint8_t *data = new uint8_t[nbEle]; - readfile(oriFilePath, nbEle, data); - - printf("original data = "); - for (i = 0; i < 20; i++) - printf("%d ", data[i]); - printf("....\n"); - - if (dataEndianType == LITTLE_ENDIAN_DATA) { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_U8LE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_U8LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } else //BIG_ENDIAN_DATA - { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_U8BE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_U8BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } - delete[] data; - if (0 > H5Dclose(idsid)) { - printf("Error in H5Dclose"); - exit(0); - } - } else if (dataType == SZ_INT16) { - - int16_t *data = new int16_t[nbEle]; - readfile(oriFilePath, nbEle, data); - - printf("original data = "); - for (i = 0; i < 20; i++) - printf("%d ", data[i]); - printf("....\n"); - - if (dataEndianType == LITTLE_ENDIAN_DATA) { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_I16LE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_I16LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } else //BIG_ENDIAN_DATA - { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_I16BE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_I16BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } - delete[] data; - if (0 > H5Dclose(idsid)) { - printf("Error in H5Dclose"); - exit(0); - } - } else if (dataType == SZ_UINT16) { - uint16_t *data = new uint16_t[nbEle]; - readfile(oriFilePath, nbEle, data); - - printf("original data = "); - for (i = 0; i < 20; i++) - printf("%d ", data[i]); - printf("....\n"); - - if (dataEndianType == LITTLE_ENDIAN_DATA) { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_U16LE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_U16LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } else //BIG_ENDIAN_DATA - { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_U16BE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_U16BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } - delete[] data; - if (0 > H5Dclose(idsid)) { - printf("Error in H5Dclose"); - exit(0); - } - } else if (dataType == SZ_INT32) { - //printf("%i \t %i\n", sizeof(int), sizeof(int32_t)); - int32_t *data = new int32_t[nbEle]; - readfile(oriFilePath, nbEle, data); - - printf("original data = "); - for (i = 0; i < 20; i++) - printf("%d ", data[i]); - printf("....\n"); - - if (dataEndianType == LITTLE_ENDIAN_DATA) { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_I32LE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_I32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } else //BIG_ENDIAN_DATA - { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_I32BE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_I32BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } - delete[] data; - if (0 > H5Dclose(idsid)) { - printf("Error in H5Dclose"); - exit(0); - } - } else if (dataType == SZ_UINT32) { - uint32_t *data = new uint32_t[nbEle]; - readfile(oriFilePath, nbEle, data); - - printf("original data = "); - for (i = 0; i < 20; i++) - printf("%d ", data[i]); - printf("....\n"); - - if (dataEndianType == LITTLE_ENDIAN_DATA) { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_U32LE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_U32LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } else //BIG_ENDIAN_DATA - { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_U32BE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_U32BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } - delete[] data; - if (0 > H5Dclose(idsid)) { - printf("Error in H5Dclose"); - exit(0); - } - } else if (dataType == SZ_INT64) { - int64_t *data = new int64_t[nbEle]; - readfile(oriFilePath, nbEle, data); - - printf("original data = "); - for (i = 0; i < 20; i++) - printf("%ld ", data[i]); - printf("....\n"); - - if (dataEndianType == LITTLE_ENDIAN_DATA) { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_I64LE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_I64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } else //BIG_ENDIAN_DATA - { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_I64BE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_I64BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } - delete[] data; - if (0 > H5Dclose(idsid)) { - printf("Error in H5Dclose"); - exit(0); - } - } else if (dataType == SZ_UINT64) { - uint64_t *data = new uint64_t[nbEle]; - readfile(oriFilePath, nbEle, data); - - printf("original data = "); - for (i = 0; i < 20; i++) - printf("%ld ", data[i]); - printf("....\n"); - - if (dataEndianType == LITTLE_ENDIAN_DATA) { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_U64LE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_U64LE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } else //BIG_ENDIAN_DATA - { - if (0 > (idsid = H5Dcreate(fid, DATASET, H5T_STD_U64BE, sid, H5P_DEFAULT, cpid, H5P_DEFAULT))) { - printf("Error in H5Dcreate"); - exit(0); - } - if (0 > H5Dwrite(idsid, H5T_STD_U64BE, H5S_ALL, H5S_ALL, H5P_DEFAULT, data)) { - printf("Error in H5Dwrite"); - exit(0); - } - } - delete[] data; - if (0 > H5Dclose(idsid)) { - printf("Error in H5Dclose"); - exit(0); - } - } else { - printf("Error: unknown data type in sz3ToHDF5.cpp!\n"); - exit(0); - } - + + switch (dataType) { + case SZ_FLOAT:process_data(conf, oriFilePath, dataEndianType, fid, sid, cpid, DATASET, H5T_IEEE_F32LE, H5T_IEEE_F32BE); + break; + case SZ_DOUBLE:process_data(conf, oriFilePath, dataEndianType, fid, sid, cpid, DATASET, H5T_IEEE_F64LE, H5T_IEEE_F64BE); + break; + case SZ_INT8:process_data(conf, oriFilePath, dataEndianType, fid, sid, cpid, DATASET, H5T_STD_I8LE, H5T_STD_I8BE); + break; + case SZ_UINT8:process_data(conf, oriFilePath, dataEndianType, fid, sid, cpid, DATASET, H5T_STD_U8LE, H5T_STD_U8BE); + break; + case SZ_INT16:process_data(conf, oriFilePath, dataEndianType, fid, sid, cpid, DATASET, H5T_STD_I16LE, H5T_STD_I16BE); + break; + case SZ_UINT16:process_data(conf, oriFilePath, dataEndianType, fid, sid, cpid, DATASET, H5T_STD_U16LE, H5T_STD_U16BE); + break; + case SZ_INT32:process_data(conf, oriFilePath, dataEndianType, fid, sid, cpid, DATASET, H5T_STD_I32LE, H5T_STD_I32BE); + break; + case SZ_UINT32:process_data(conf, oriFilePath, dataEndianType, fid, sid, cpid, DATASET, H5T_STD_U32LE, H5T_STD_U32BE); + break; + case SZ_INT64:process_data(conf, oriFilePath, dataEndianType, fid, sid, cpid, DATASET, H5T_STD_I64LE, H5T_STD_I64BE); + break; + case SZ_UINT64:process_data(conf, oriFilePath, dataEndianType, fid, sid, cpid, DATASET, H5T_STD_U64LE, H5T_STD_U64BE); + break; + default:std::cerr << "Error: Unknown data type\n"; + exit(EXIT_FAILURE); + } + /*Close and release resources*/ if (0 > H5Sclose(sid)) { printf("Error in H5Sclose"); @@ -542,9 +218,8 @@ int main(int argc, char *argv[]) { printf("Error in H5Fclose"); exit(0); } - free(cd_values); printf("Output hdf5 file: %s\n", outputFilePath); - herr_t ret = H5Zunregister(H5Z_FILTER_SZ3); + ret = H5Zunregister(H5Z_FILTER_SZ3); if (ret < 0) return -1; H5close(); return 0; From 1ac337f3ffd7373c314c99e371c17d66daffd9e4 Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Mon, 1 Jul 2024 11:55:05 -0400 Subject: [PATCH 15/23] add two helper func to get/set SZ3 conf from H5 --- tools/H5Z-SZ3/include/H5Z_SZ3.hpp | 6 ++++ tools/H5Z-SZ3/src/H5Z_SZ3.cpp | 55 +++++++++++++++++++------------ tools/H5Z-SZ3/test/sz3ToHDF5.cpp | 11 +++++-- 3 files changed, 49 insertions(+), 23 deletions(-) diff --git a/tools/H5Z-SZ3/include/H5Z_SZ3.hpp b/tools/H5Z-SZ3/include/H5Z_SZ3.hpp index e8b1fd47..b2999719 100644 --- a/tools/H5Z-SZ3/include/H5Z_SZ3.hpp +++ b/tools/H5Z-SZ3/include/H5Z_SZ3.hpp @@ -13,6 +13,7 @@ #include #include #include +#include "SZ3/api/sz.hpp" #ifdef __cplusplus extern "C" { @@ -40,8 +41,13 @@ static size_t H5Z_filter_sz3(unsigned int flags, size_t cd_nelmts, const unsigne const void *H5PLget_plugin_info(void); +herr_t set_SZ3_conf_to_H5(const hid_t propertyList, SZ3::Config &conf); + +herr_t get_SZ3_conf_from_H5(const hid_t propertyList, SZ3::Config &conf); + #ifdef __cplusplus } #endif + #endif //SZ3_H5Z_SZ3_H diff --git a/tools/H5Z-SZ3/src/H5Z_SZ3.cpp b/tools/H5Z-SZ3/src/H5Z_SZ3.cpp index 952e4013..634dfda2 100644 --- a/tools/H5Z-SZ3/src/H5Z_SZ3.cpp +++ b/tools/H5Z-SZ3/src/H5Z_SZ3.cpp @@ -7,7 +7,6 @@ #include #include "H5PLextern.h" #include "H5Z_SZ3.hpp" -#include "SZ3/api/sz.hpp" hid_t H5Z_SZ_ERRCLASS = -1; @@ -34,24 +33,32 @@ const void *H5PLget_plugin_info(void) { return H5Z_SZ3; } -static herr_t H5Z_sz3_set_local(hid_t dcpl_id, hid_t type_id, hid_t chunk_space_id) { +herr_t set_SZ3_conf_to_H5(const hid_t propertyList, SZ3::Config & conf) { + static char const *_funcname_ = "set_SZ3_conf_to_H5"; - printf("start H5Z_sz3_set_local\n"); + //save conf into cd_values + size_t cd_nelmts = std::ceil(conf.size_est() / 1.0 / sizeof(int)); + std::vector cd_values(cd_nelmts, 0); + auto buffer = (unsigned char *) (cd_values.data()); - //printf("start in H5Z_sz3_set_local, dcpl_id = %d\n", dcpl_id); - static char const *_funcname_ = "H5Z_sz3_set_local"; + conf.save(buffer); - // herr_t ret = H5Zregister(H5Z_SZ3); + /* update cd_values for the filter */ + if (0 > H5Pmodify_filter(propertyList, H5Z_FILTER_SZ3, H5Z_FLAG_MANDATORY, cd_nelmts, cd_values.data())) + H5Z_SZ_PUSH_AND_GOTO(H5E_PLINE, H5E_BADVALUE, 0, "failed to modify cd_values"); - SZ3::Config conf; + return (herr_t) 1; +} + +herr_t get_SZ3_conf_from_H5(const hid_t propertyList, SZ3::Config & conf) { + static char const *_funcname_ = "get_SZ3_conf_from_H5"; - unsigned int flags = 0; - size_t cd_nelmts = conf.size_est(); - std::vector cd_values(conf.size_est(), 0); + size_t cd_nelmts = std::ceil(conf.size_est() / 1.0 / sizeof(int)); + std::vector cd_values(cd_nelmts, 0); //read cd_values from HDF5 //note that cd_nelmts must be non-zero, otherwise, cd_values cannot be filled. - if (0 > H5Pget_filter_by_id(dcpl_id, H5Z_FILTER_SZ3, &flags, &cd_nelmts, cd_values.data(), 0, NULL, NULL)) + if (0 > H5Pget_filter_by_id(propertyList, H5Z_FILTER_SZ3, H5Z_FLAG_MANDATORY, &cd_nelmts, cd_values.data(), 0, NULL, NULL)) H5Z_SZ_PUSH_AND_GOTO(H5E_PLINE, H5E_CANTGET, 0, "unable to get current SZ cd_values"); //load cd_values into config @@ -59,6 +66,20 @@ static herr_t H5Z_sz3_set_local(hid_t dcpl_id, hid_t type_id, hid_t chunk_space_ auto buffer = (const unsigned char *) (cd_values.data()); conf.load(buffer); } + return (herr_t) 1; +} + +static herr_t H5Z_sz3_set_local(hid_t dcpl_id, hid_t type_id, hid_t chunk_space_id) { + + printf("start H5Z_sz3_set_local\n"); + + //printf("start in H5Z_sz3_set_local, dcpl_id = %d\n", dcpl_id); + static char const *_funcname_ = "H5Z_sz3_set_local"; + + // herr_t ret = H5Zregister(H5Z_SZ3); + + SZ3::Config conf; + get_SZ3_conf_from_H5(dcpl_id, conf); //read datatype and dims from HDF5 H5T_class_t dclass; @@ -114,17 +135,9 @@ static herr_t H5Z_sz3_set_local(hid_t dcpl_id, hid_t type_id, hid_t chunk_space_ //update conf with dims conf.setDims(std::begin(dims), std::end(dims)); - //save conf into cd_values - cd_nelmts = std::ceil(conf.size_est() / 1.0 / sizeof(int)); - auto buffer = (unsigned char *) (cd_values.data()); - conf.save(buffer); - - /* update cd_values for the filter */ - if (0 > H5Pmodify_filter(dcpl_id, H5Z_FILTER_SZ3, flags, cd_nelmts, cd_values.data())) - H5Z_SZ_PUSH_AND_GOTO(H5E_PLINE, H5E_BADVALUE, 0, "failed to modify cd_values"); + set_SZ3_conf_to_H5(dcpl_id, conf); - herr_t retval = 1; - return retval; + return (herr_t) 1; } template diff --git a/tools/H5Z-SZ3/test/sz3ToHDF5.cpp b/tools/H5Z-SZ3/test/sz3ToHDF5.cpp index 65e40410..26ce3f96 100644 --- a/tools/H5Z-SZ3/test/sz3ToHDF5.cpp +++ b/tools/H5Z-SZ3/test/sz3ToHDF5.cpp @@ -152,7 +152,7 @@ int main(int argc, char *argv[]) { exit(0); } - /* Add the SZ compression filter and set the chunk size */ + /* Add the SZ compression filter */ if (0 > H5Pset_filter(cpid, H5Z_FILTER_SZ3, H5Z_FLAG_MANDATORY, cd_nelmts, cd_values.data())) { printf("Error in H5Pcreate"); exit(0); @@ -171,13 +171,20 @@ int main(int argc, char *argv[]) { printf("sz filter is available for encoding and decoding.\n"); } + /* set the chunk size*/ std::vector hchunk(hdims); - hchunk[0] = 10; +// hchunk[0] = 10; if (0 > H5Pset_chunk(cpid, conf.N, hchunk.data())) { printf("Error in H5Pcreate"); exit(0); } + {//This is an example to get/set SZ configuration from HDF5 file + SZ3::Config conf1; + get_SZ3_conf_from_H5(cpid, conf1); +// conf1.absErrorBound = 1; + set_SZ3_conf_to_H5(cpid, conf1); + } printf("....Writing SZ compressed data.............\n"); switch (dataType) { From 7602f702aa57c61c20913f571423edc040ce0130 Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Fri, 5 Jul 2024 12:23:27 -0400 Subject: [PATCH 16/23] add two helper func to get/set SZ3 conf from H5 --- tools/H5Z-SZ3/src/H5Z_SZ3.cpp | 17 +++++++++++------ tools/H5Z-SZ3/test/sz3ToHDF5.cpp | 27 ++++++--------------------- 2 files changed, 17 insertions(+), 27 deletions(-) diff --git a/tools/H5Z-SZ3/src/H5Z_SZ3.cpp b/tools/H5Z-SZ3/src/H5Z_SZ3.cpp index 634dfda2..c9452b5e 100644 --- a/tools/H5Z-SZ3/src/H5Z_SZ3.cpp +++ b/tools/H5Z-SZ3/src/H5Z_SZ3.cpp @@ -33,7 +33,7 @@ const void *H5PLget_plugin_info(void) { return H5Z_SZ3; } -herr_t set_SZ3_conf_to_H5(const hid_t propertyList, SZ3::Config & conf) { +herr_t set_SZ3_conf_to_H5(const hid_t propertyList, SZ3::Config &conf) { static char const *_funcname_ = "set_SZ3_conf_to_H5"; //save conf into cd_values @@ -42,15 +42,20 @@ herr_t set_SZ3_conf_to_H5(const hid_t propertyList, SZ3::Config & conf) { auto buffer = (unsigned char *) (cd_values.data()); conf.save(buffer); - - /* update cd_values for the filter */ - if (0 > H5Pmodify_filter(propertyList, H5Z_FILTER_SZ3, H5Z_FLAG_MANDATORY, cd_nelmts, cd_values.data())) - H5Z_SZ_PUSH_AND_GOTO(H5E_PLINE, H5E_BADVALUE, 0, "failed to modify cd_values"); + auto szfilter = H5Pget_filter_by_id(propertyList, H5Z_FILTER_SZ3, H5Z_FLAG_MANDATORY, NULL, NULL, 0, NULL, NULL); //check if filter is set + if (0 > szfilter) { //filter not set, set filter. Notice that calling H5Pset_filter twice with the same filter id will cause unexpected errors for decompression + if (0 > H5Pset_filter(propertyList, H5Z_FILTER_SZ3, H5Z_FLAG_MANDATORY, cd_nelmts, cd_values.data())) { + H5Z_SZ_PUSH_AND_GOTO(H5E_PLINE, H5E_BADVALUE, 0, "failed to modify cd_values"); + } + } else { // filter already set, update filter + if (0 > H5Pmodify_filter(propertyList, H5Z_FILTER_SZ3, H5Z_FLAG_MANDATORY, cd_nelmts, cd_values.data())) + H5Z_SZ_PUSH_AND_GOTO(H5E_PLINE, H5E_BADVALUE, 0, "failed to modify cd_values"); + } return (herr_t) 1; } -herr_t get_SZ3_conf_from_H5(const hid_t propertyList, SZ3::Config & conf) { +herr_t get_SZ3_conf_from_H5(const hid_t propertyList, SZ3::Config &conf) { static char const *_funcname_ = "get_SZ3_conf_from_H5"; size_t cd_nelmts = std::ceil(conf.size_est() / 1.0 / sizeof(int)); diff --git a/tools/H5Z-SZ3/test/sz3ToHDF5.cpp b/tools/H5Z-SZ3/test/sz3ToHDF5.cpp index 26ce3f96..99226572 100644 --- a/tools/H5Z-SZ3/test/sz3ToHDF5.cpp +++ b/tools/H5Z-SZ3/test/sz3ToHDF5.cpp @@ -133,13 +133,6 @@ int main(int argc, char *argv[]) { // Set compression related attributes here conf.cmprAlgo = SZ3::ALGO_BIOMD; - //save conf to cd_values - size_t cd_nelmts = std::ceil(conf.size_est() / 1.0 / sizeof(int)); - std::vector cd_values(cd_nelmts); - auto buffer = (unsigned char *) (cd_values.data()); - conf.save(buffer); - // conf.print(); - std::vector hdims(conf.dims.begin(), conf.dims.end()); /*Create dataspace. Setting maximum size */ if (0 > (sid = H5Screate_simple(conf.N, hdims.data(), NULL))) { @@ -152,22 +145,15 @@ int main(int argc, char *argv[]) { exit(0); } - /* Add the SZ compression filter */ - if (0 > H5Pset_filter(cpid, H5Z_FILTER_SZ3, H5Z_FLAG_MANDATORY, cd_nelmts, cd_values.data())) { - printf("Error in H5Pcreate"); - exit(0); - } - herr_t ret = H5Zregister(H5PLget_plugin_info()); - if (ret < 0) { + set_SZ3_conf_to_H5(cpid, conf); + + if (0 > H5Zregister(H5PLget_plugin_info())) { printf("Error in H5Zregister"); exit(0); } - htri_t avail = H5Zfilter_avail(H5Z_FILTER_SZ3); - if (avail) { + if (H5Zfilter_avail(H5Z_FILTER_SZ3)) { unsigned filter_config; - auto status = H5Zget_filter_info(H5Z_FILTER_SZ3, &filter_config); - - if (filter_config & H5Z_FILTER_CONFIG_ENCODE_ENABLED) + if (H5Zget_filter_info(H5Z_FILTER_SZ3, &filter_config) & H5Z_FILTER_CONFIG_ENCODE_ENABLED) printf("sz filter is available for encoding and decoding.\n"); } @@ -226,8 +212,7 @@ int main(int argc, char *argv[]) { exit(0); } printf("Output hdf5 file: %s\n", outputFilePath); - ret = H5Zunregister(H5Z_FILTER_SZ3); - if (ret < 0) return -1; + if (H5Zunregister(H5Z_FILTER_SZ3) < 0) return -1; H5close(); return 0; } From d2fb5e5ada826cf1c6e13c7ffc2fc59ba92482ae Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Fri, 23 Aug 2024 21:19:27 -0400 Subject: [PATCH 17/23] remove warning during make --- tools/H5Z-SZ3/include/H5Z_SZ3.hpp | 4 ---- tools/H5Z-SZ3/test/dsz3FromHDF5.cpp | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/tools/H5Z-SZ3/include/H5Z_SZ3.hpp b/tools/H5Z-SZ3/include/H5Z_SZ3.hpp index b2999719..16582349 100644 --- a/tools/H5Z-SZ3/include/H5Z_SZ3.hpp +++ b/tools/H5Z-SZ3/include/H5Z_SZ3.hpp @@ -1,7 +1,3 @@ -// -// Created by arham23 on 2/8/22. -// - #ifndef SZ3_H5Z_SZ3_H #define SZ3_H5Z_SZ3_H diff --git a/tools/H5Z-SZ3/test/dsz3FromHDF5.cpp b/tools/H5Z-SZ3/test/dsz3FromHDF5.cpp index f93aa6ba..07c005e2 100644 --- a/tools/H5Z-SZ3/test/dsz3FromHDF5.cpp +++ b/tools/H5Z-SZ3/test/dsz3FromHDF5.cpp @@ -206,7 +206,7 @@ int main(int argc, char *argv[]) { int i; printf("reconstructed data = "); for (i = 0; i < 20; i++) - printf("%ld ", data[i]); + printf("%llu ", data[i]); printf("\n"); free(data); } @@ -260,7 +260,7 @@ int main(int argc, char *argv[]) { int i; printf("reconstructed data = "); for (i = 0; i < 20; i++) - printf("%ld ", data[i]); + printf("%llu ", data[i]); printf("\n"); free(data); } From 751f41ea2e65216f707b197d5e4d3d482f7a9246 Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Tue, 27 Aug 2024 19:56:49 -0400 Subject: [PATCH 18/23] mergew with sz v3.2.0 --- include/SZ3/api/impl/SZAlgoBioMD.hpp | 59 +++++++++++++++ include/SZ3/api/impl/SZBioMD.hpp | 75 ------------------- include/SZ3/api/impl/SZDispatcher.hpp | 6 +- ...DFrontend.hpp => SZBioMDDecomposition.hpp} | 32 ++++---- ...ontend.hpp => SZBioMDXtcDecomposition.hpp} | 14 ++-- include/SZ3/utils/Config.hpp | 1 + 6 files changed, 86 insertions(+), 101 deletions(-) create mode 100644 include/SZ3/api/impl/SZAlgoBioMD.hpp delete mode 100644 include/SZ3/api/impl/SZBioMD.hpp rename include/SZ3/decomposition/{SZBioMDFrontend.hpp => SZBioMDDecomposition.hpp} (95%) rename include/SZ3/decomposition/{SZBioMDXtcBasedFrontend.hpp => SZBioMDXtcDecomposition.hpp} (94%) diff --git a/include/SZ3/api/impl/SZAlgoBioMD.hpp b/include/SZ3/api/impl/SZAlgoBioMD.hpp new file mode 100644 index 00000000..1633bd5b --- /dev/null +++ b/include/SZ3/api/impl/SZAlgoBioMD.hpp @@ -0,0 +1,59 @@ +#ifndef SZ3_SZ_BIOMD_HPP +#define SZ3_SZ_BIOMD_HPP + +#include "SZ3/quantizer/IntegerQuantizer.hpp" +#include "SZ3/decomposition/SZBioMDXtcDecomposition.hpp" +#include "SZ3/decomposition/SZBioMDDecomposition.hpp" +#include "SZ3/encoder/XtcBasedEncoder.hpp" +#include "SZ3/lossless/Lossless_zstd.hpp" +#include "SZ3/lossless/Lossless_bypass.hpp" +#include "SZ3/utils/Statistic.hpp" +#include "SZ3/utils/Config.hpp" +#include "SZ3/def.hpp" + +namespace SZ3 { + + template + size_t SZ_compress_bioMD(Config &conf, T *data, uchar *cmpData, size_t cmpCap) { + assert(N == conf.N); + assert(conf.cmprAlgo == ALGO_BIOMD); + calAbsErrorBound(conf, data); + + auto quantizer = LinearQuantizer(conf.absErrorBound, conf.quantbinCnt / 2); + auto sz = make_compressor_sz_generic(make_decomposition_biomd(conf, quantizer), HuffmanEncoder(), + Lossless_zstd()); + return sz->compress(conf, data, cmpData, cmpCap); + } + + template + void SZ_decompress_bioMD(const Config &conf, const uchar *cmpData, size_t cmpSize, T *decData) { + assert(conf.cmprAlgo == ALGO_BIOMD); + + LinearQuantizer quantizer; + auto sz = make_compressor_sz_generic(make_decomposition_biomd(conf, quantizer), + HuffmanEncoder(), Lossless_zstd()); + sz->decompress(conf, cmpData, cmpSize, decData); + } + + template + size_t SZ_compress_bioMDXtcBased(Config &conf, T *data, uchar *cmpData, size_t cmpCap) { + assert(N == conf.N); + assert(conf.cmprAlgo == ALGO_BIOMDXTC); + calAbsErrorBound(conf, data); + + auto sz = make_compressor_sz_generic(SZBioMDXtcDecomposition(conf), XtcBasedEncoder(), + Lossless_bypass()); + return sz->compress(conf, data, cmpData, cmpCap); + } + + template + void SZ_decompress_bioMDXtcBased(const Config &conf, const uchar *cmpData, size_t cmpSize, T *decData) { + assert(conf.cmprAlgo == ALGO_BIOMDXTC); + + auto sz = make_compressor_sz_generic(SZBioMDXtcDecomposition(conf), + XtcBasedEncoder(), Lossless_bypass()); + sz->decompress(conf, cmpData, cmpSize, decData); + } + +} +#endif diff --git a/include/SZ3/api/impl/SZBioMD.hpp b/include/SZ3/api/impl/SZBioMD.hpp deleted file mode 100644 index 8cc362d6..00000000 --- a/include/SZ3/api/impl/SZBioMD.hpp +++ /dev/null @@ -1,75 +0,0 @@ -#ifndef SZ3_SZ_BIOMD_HPP -#define SZ3_SZ_BIOMD_HPP - -#include "SZ3/compressor/SZGeneralCompressor.hpp" -#include "SZ3/frontend/SZBioMDFrontend.hpp" -#include "SZ3/quantizer/IntegerQuantizer.hpp" -#include "SZ3/frontend/SZBioMDXtcBasedFrontend.hpp" -#include "SZ3/encoder/XtcBasedEncoder.hpp" -#include "SZ3/predictor/ComposedPredictor.hpp" -#include "SZ3/predictor/LorenzoPredictor.hpp" -#include "SZ3/predictor/RegressionPredictor.hpp" -#include "SZ3/predictor/PolyRegressionPredictor.hpp" -#include "SZ3/lossless/Lossless_zstd.hpp" -#include "SZ3/lossless/Lossless_bypass.hpp" -#include "SZ3/utils/Iterator.hpp" -#include "SZ3/utils/Statistic.hpp" -#include "SZ3/utils/Extraction.hpp" -#include "SZ3/utils/QuantOptimizatioin.hpp" -#include "SZ3/utils/Config.hpp" -#include "SZ3/def.hpp" -#include -#include - -namespace SZ3 { - - template - char *SZ_compress_bioMD(Config &conf, T *data, size_t &outSize) { - assert(N == conf.N); - assert(conf.cmprAlgo == ALGO_BIOMD); - calAbsErrorBound(conf, data); - - char *cmpData; - auto quantizer = LinearQuantizer(conf.absErrorBound, conf.quantbinCnt / 2); - auto sz = make_sz_general_compressor(make_sz_bio_frontend(conf, quantizer), HuffmanEncoder(), - Lossless_zstd()); - cmpData = (char *) sz->compress(conf, data, outSize); - return cmpData; - } - - template - void SZ_decompress_bioMD(const Config &conf, char *cmpData, size_t cmpSize, T *decData) { - assert(conf.cmprAlgo == ALGO_BIOMD); - - uchar const *cmpDataPos = (uchar *) cmpData; - LinearQuantizer quantizer; - auto sz = make_sz_general_compressor(make_sz_bio_frontend(conf, quantizer), - HuffmanEncoder(), Lossless_zstd()); - sz->decompress(cmpDataPos, cmpSize, decData); - } - - template - char *SZ_compress_bioMDXtcBased(Config &conf, T *data, size_t &outSize) { - assert(N == conf.N); - assert(conf.cmprAlgo == ALGO_BIOMDXTC); - calAbsErrorBound(conf, data); - - char *cmpData; - auto sz = make_sz_general_compressor(SZBioMDXtcBasedFrontend(conf), XtcBasedEncoder(), - Lossless_bypass()); - cmpData = (char *) sz->compress(conf, data, outSize); - return cmpData; - } - - template - void SZ_decompress_bioMDXtcBased(const Config &conf, char *cmpData, size_t cmpSize, T *decData) { - assert(conf.cmprAlgo == ALGO_BIOMDXTC); - - const uchar *cmpDataPos = (uchar *) cmpData; - auto sz = make_sz_general_compressor(SZBioMDXtcBasedFrontend(conf), - XtcBasedEncoder(), Lossless_bypass()); - sz->decompress(cmpDataPos, cmpSize, decData); - } - -} -#endif diff --git a/include/SZ3/api/impl/SZDispatcher.hpp b/include/SZ3/api/impl/SZDispatcher.hpp index 237a1143..b7661bd8 100644 --- a/include/SZ3/api/impl/SZDispatcher.hpp +++ b/include/SZ3/api/impl/SZDispatcher.hpp @@ -7,7 +7,7 @@ #include "SZ3/api/impl/SZAlgoInterp.hpp" #include "SZ3/api/impl/SZAlgoLorenzoReg.hpp" #include "SZ3/api/impl/SZAlgo.hpp" -#include "SZ3/api/impl/SZBioMD.hpp" +#include "SZ3/api/impl/SZAlgoBioMD.hpp" #include namespace SZ3 { @@ -30,9 +30,9 @@ namespace SZ3 { } else if (conf.cmprAlgo == ALGO_NOPRED) { return SZ_compress_nopred(conf, data, cmpData, cmpCap); } else if (conf.cmprAlgo == ALGO_BIOMD) { - cmpData = (char *) SZ_compress_bioMD(conf, data, outSize); + return SZ_compress_bioMD(conf, data, cmpData, cmpCap); } else if (conf.cmprAlgo == ALGO_BIOMDXTC) { - cmpData = (char *) SZ_compress_bioMDXtcBased(conf, data, outSize); + return SZ_compress_bioMDXtcBased(conf, data, cmpData, cmpCap); } return 0; // return cmpData; diff --git a/include/SZ3/decomposition/SZBioMDFrontend.hpp b/include/SZ3/decomposition/SZBioMDDecomposition.hpp similarity index 95% rename from include/SZ3/decomposition/SZBioMDFrontend.hpp rename to include/SZ3/decomposition/SZBioMDDecomposition.hpp index 9c8ea18d..7cca79d7 100644 --- a/include/SZ3/decomposition/SZBioMDFrontend.hpp +++ b/include/SZ3/decomposition/SZBioMDDecomposition.hpp @@ -4,7 +4,7 @@ /** */ -#include "Frontend.hpp" +#include "Decomposition.hpp" //#include "SZ3/utils/MemoryUtil.hpp" #include "SZ3/utils/Config.hpp" #include @@ -12,9 +12,9 @@ namespace SZ3 { template - class SZBioMDFrontend : public concepts::FrontendInterface { + class SZBioMDDecomposition : public concepts::DecompositionInterface { public: - SZBioMDFrontend(const Config &conf, Quantizer quantizer) : + SZBioMDDecomposition(const Config &conf, Quantizer quantizer) : quantizer(quantizer), conf(conf) { if (N != 1 && N != 2 && N != 3) { @@ -22,13 +22,13 @@ namespace SZ3 { } } - ~SZBioMDFrontend() { - clear(); + ~SZBioMDDecomposition() { +// clear(); } void print() {}; - - std::vector compress(T *data) { + + std::vector compress(const Config &conf, T *data) { if (N == 1) { return compress_1d(data); } else if (N == 2) { @@ -37,8 +37,8 @@ namespace SZ3 { return compress_3d(data); } }; - - T *decompress(std::vector &quant_inds, T *dec_data) { + + T *decompress(const Config &conf, std::vector &quant_inds, T *dec_data) { if (N == 1) { return decompress_1d(quant_inds, dec_data); } else if (N == 2) { @@ -56,7 +56,7 @@ namespace SZ3 { } void load(const uchar *&c, size_t &remaining_length) { - clear(); +// clear(); const uchar *c_pos = c; read(site, c, remaining_length); read(firstFillFrame_, c, remaining_length); @@ -65,9 +65,9 @@ namespace SZ3 { remaining_length -= c_pos - c; } - void clear() { - quantizer.clear(); - } +// void clear() { +// quantizer.clear(); +// } size_t size_est() { return quantizer.size_est(); //unpred @@ -362,9 +362,9 @@ namespace SZ3 { }; template - SZBioMDFrontend - make_sz_bio_frontend(const Config &conf, Predictor predictor) { - return SZBioMDFrontend(conf, predictor); + SZBioMDDecomposition + make_decomposition_biomd(const Config &conf, Predictor predictor) { + return SZBioMDDecomposition(conf, predictor); } } diff --git a/include/SZ3/decomposition/SZBioMDXtcBasedFrontend.hpp b/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp similarity index 94% rename from include/SZ3/decomposition/SZBioMDXtcBasedFrontend.hpp rename to include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp index e514a06d..90b085d6 100644 --- a/include/SZ3/decomposition/SZBioMDXtcBasedFrontend.hpp +++ b/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp @@ -1,12 +1,12 @@ /* - * Based on SZBioMDFrontend.hpp + * Based on SZBioMDDecomposition.hpp * \author: Magnus Lundborg */ #ifndef SZ3_SZBIOMDXTCBASED_FRONTEND #define SZ3_SZBIOMDXTCBASED_FRONTEND -#include "Frontend.hpp" +#include "Decomposition.hpp" #include "SZ3/utils/Config.hpp" #include @@ -14,19 +14,19 @@ namespace SZ3 { template - class SZBioMDXtcBasedFrontend : public concepts::FrontendInterface { + class SZBioMDXtcDecomposition : public concepts::DecompositionInterface { public: - SZBioMDXtcBasedFrontend(const Config &conf) : conf(conf) { + SZBioMDXtcDecomposition(const Config &conf) : conf(conf) { if (N != 1 && N != 2 && N != 3) { throw std::invalid_argument("SZBioFront only support 1D, 2D or 3D data"); } } - ~SZBioMDXtcBasedFrontend() { clear(); } + // ~SZBioMDXtcDecomposition() { clear(); } void print() {}; - std::vector compress(T *data) { + std::vector compress(const Config &conf, T *data) { if (N <= 2) { return compressSingleFrame(data); } else { @@ -34,7 +34,7 @@ namespace SZ3 { } }; - T *decompress(std::vector &quantData, T *decData) { + T *decompress(const Config &conf, std::vector &quantData, T *decData) { if (N <= 2) { return decompressSingleFrame(quantData, decData); } else { diff --git a/include/SZ3/utils/Config.hpp b/include/SZ3/utils/Config.hpp index 5aa06142..95401f59 100644 --- a/include/SZ3/utils/Config.hpp +++ b/include/SZ3/utils/Config.hpp @@ -11,6 +11,7 @@ #include #include "SZ3/utils/MemoryUtil.hpp" #include "SZ3/utils/inih/INIReader.h" +#include "SZ3/version.hpp" #define SZ_FLOAT 0 #define SZ_DOUBLE 1 From 857682803acf4073817144e5fbbfefbe35b45f3f Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Thu, 14 Nov 2024 12:04:26 -0500 Subject: [PATCH 19/23] bugfix for BioMDXTC when input is 1D or 2D and length is not divided by 3 --- .../decomposition/SZBioMDXtcDecomposition.hpp | 3 -- include/SZ3/encoder/HuffmanEncoder.hpp | 8 ++--- include/SZ3/encoder/XtcBasedEncoder.hpp | 30 +++++++++++++++++-- include/SZ3/lossless/Lossless_bypass.hpp | 9 ++++-- 4 files changed, 37 insertions(+), 13 deletions(-) diff --git a/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp b/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp index f2a142c7..66eb8141 100644 --- a/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp +++ b/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp @@ -22,9 +22,6 @@ class SZBioMDXtcDecomposition : public concepts::DecompositionInterface compress(const Config &conf, T *data) override { if (N <= 2) { return compressSingleFrame(data); diff --git a/include/SZ3/encoder/HuffmanEncoder.hpp b/include/SZ3/encoder/HuffmanEncoder.hpp index 5ef86011..d485c9d4 100644 --- a/include/SZ3/encoder/HuffmanEncoder.hpp +++ b/include/SZ3/encoder/HuffmanEncoder.hpp @@ -308,7 +308,7 @@ class HuffmanEncoder : public concepts::EncoderInterface { } void saveAsCode(uchar *&c) { - Timer timer(true); + // Timer timer(true); // uchar *head = c; @@ -395,7 +395,7 @@ class HuffmanEncoder : public concepts::EncoderInterface { writeBytesClearMask(c, mask, index); - timer.stop("saveAsCode"); + // timer.stop("saveAsCode"); // printf("huffman tree size = %d\n",(int)(c-head)); @@ -411,7 +411,7 @@ class HuffmanEncoder : public concepts::EncoderInterface { } void loadAsCode(const uchar *&bytes, size_t &remaining_length) { - Timer timer(true); + // Timer timer(true); tree.init(); @@ -546,7 +546,7 @@ class HuffmanEncoder : public concepts::EncoderInterface { bytes += (i + 7) >> 3; - timer.stop("loadAsCode"); + // timer.stop("loadAsCode"); tree.setConstructed(); } diff --git a/include/SZ3/encoder/XtcBasedEncoder.hpp b/include/SZ3/encoder/XtcBasedEncoder.hpp index 076f0462..88346a56 100644 --- a/include/SZ3/encoder/XtcBasedEncoder.hpp +++ b/include/SZ3/encoder/XtcBasedEncoder.hpp @@ -281,7 +281,15 @@ class XtcBasedEncoder : public concepts::EncoderInterface { Config conf_; public: - void preprocess_encode(const std::vector &quantData, int stateNum) override {} + void preprocess_encode(const std::vector &quantData, int stateNum) override { + auto nreminder = quantData.size() % 3; + if (nreminder == 1) { + reminder1 = quantData[quantData.size() - 1]; + } else if (nreminder == 2) { + reminder1 = quantData[quantData.size() - 1]; + reminder2 = quantData[quantData.size() - 2]; + } + } /*! \brief Compress 3d coordinates to memory. * @@ -751,14 +759,30 @@ class XtcBasedEncoder : public concepts::EncoderInterface { } #endif + auto nreminder = quantData.size() % 3; + if (nreminder == 1) { + quantData[quantData.size() - 1] = reminder1; + } else if (nreminder == 2) { + quantData[quantData.size() - 1] = reminder1; + quantData[quantData.size() - 2] = reminder2; + } return quantData; } void postprocess_decode() override {} - void save(uchar *&c) override {} + void save(uchar *&c) override { + write(reminder1, c); + write(reminder2, c); + } - void load(const uchar *&c, size_t &remaining_length) override {} + void load(const uchar *&c, size_t &remaining_length) override { + read(reminder1, c, remaining_length); + read(reminder2, c, remaining_length); + } + + private: + int reminder1 = 0, reminder2 = 0; }; } // namespace SZ3 diff --git a/include/SZ3/lossless/Lossless_bypass.hpp b/include/SZ3/lossless/Lossless_bypass.hpp index a9b52300..f911fdea 100644 --- a/include/SZ3/lossless/Lossless_bypass.hpp +++ b/include/SZ3/lossless/Lossless_bypass.hpp @@ -18,9 +18,12 @@ class Lossless_bypass : public concepts::LosslessInterface { } size_t decompress(const uchar *src, const size_t srcLen, uchar *&dst, size_t &dstLen) override { - std::memcpy(dst, src, srcLen); - // dst = (uchar *)src; - return srcLen; + dstLen = srcLen; + if (dst == nullptr) { + dst = static_cast(malloc(dstLen)); + } + std::memcpy(dst, src, dstLen); + return dstLen; } }; } // namespace SZ3 From f34abb31c08f80c5f675c2730efb11396170d1f9 Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Wed, 27 Nov 2024 10:59:02 -0500 Subject: [PATCH 20/23] new huffman --- include/SZ3/encoder/HuffmanEncoder.hpp | 1563 ++++++++++++++++-------- 1 file changed, 1027 insertions(+), 536 deletions(-) diff --git a/include/SZ3/encoder/HuffmanEncoder.hpp b/include/SZ3/encoder/HuffmanEncoder.hpp index 5859de8e..1fdc200f 100644 --- a/include/SZ3/encoder/HuffmanEncoder.hpp +++ b/include/SZ3/encoder/HuffmanEncoder.hpp @@ -1,635 +1,1126 @@ #ifndef _SZ_HUFFMAN_ENCODER_HPP #define _SZ_HUFFMAN_ENCODER_HPP -#include +#include +#include +#include +#include +#include +#include #include "SZ3/def.hpp" #include "SZ3/encoder/Encoder.hpp" #include "SZ3/utils/ByteUtil.hpp" #include "SZ3/utils/MemoryUtil.hpp" #include "SZ3/utils/Timer.hpp" -#if INTPTR_MAX == INT64_MAX // 64bit system #include "SZ3/utils/ska_hash/unordered_map.hpp" -#endif // INTPTR_MAX == INT64_MAX -#include -#include -#include -#include -#include -#include -#include -#include -#include namespace SZ3 { template class HuffmanEncoder : public concepts::EncoderInterface { - public: - typedef struct node_t { - struct node_t *left, *right; - size_t freq; - char t; // in_node:0; otherwise:1 + private: + class Node { + public: + Node(T c_ = 0, Node *lp = nullptr, Node *rp = nullptr) { + c = c_; + p[0] = lp; + p[1] = rp; + } + T c; - } *node; - - typedef struct HuffmanTree { - unsigned int stateNum; - unsigned int allNodes; - struct node_t *pool; - node *qqq, *qq; // the root node of the HuffmanTree is qq[1] - int n_nodes; // n_nodes is for compression - int qend; - uint64_t **code; - unsigned char *cout; - int n_inode; // n_inode is for decompression - int maxBitCount; - } HuffmanTree; - - HuffmanEncoder() { - int x = 1; - char *y = reinterpret_cast(&x); - if (*y == 1) - sysEndianType = 0; - else //=0 - sysEndianType = 1; - } + Node *p[2]; + + inline uchar isLeaf() { return p[0] == nullptr; } + }; + + class HuffmanTree { + private: + uchar _constructed = 0; + + uchar len = 0; + int vec = 0; + + class cmp { + public: + bool operator()(const std::pair &u, const std::pair &v) { + return u.second == v.second ? u.first > v.first : u.second > v.second; + } + }; + + public: + void dfs_mp(Node *u) { + if (u->isLeaf()) { + mplen[u->c] = len; + mpcode[u->c] = vec; + + limit = std::max(limit, len); + + return; + } + + ++len; + dfs_mp(u->p[0]); + --len; + + vec ^= 1 << len++; + dfs_mp(u->p[1]); + vec ^= 1 << --len; + } + + void dfs_vec(Node *u) { + if (u->isLeaf()) { + veclen[u->c] = len; + veccode[u->c] = vec; + + limit = std::max(limit, len); + + return; + } + + ++len; + dfs_vec(u->p[0]); + --len; + + vec ^= 1 << len++; + dfs_vec(u->p[1]); + vec ^= 1 << --len; + } + + uchar usemp; + // 0 : vec + // 1 : mp + + std::vector veclen; + std::vector veccode; + ska::unordered_map mplen; + ska::unordered_map mpcode; + // std::unordered_map mplen; + // std::unordered_map mpcode; + // std::map mplen; + // std::map mpcode; + + T offset; + // minimum bits for T + uchar mbft; + uchar limit; + + void init() { + _constructed = 0; + ht.clear(); + veclen.clear(); + veccode.clear(); + mplen.clear(); + mpcode.clear(); + vecfreq.clear(); + mpfreq.clear(); + + offset = 0; + mbft = 0; + root = 0; + n = 0; + maxval = 0; + limit = 0; + } + + HuffmanTree() { init(); } + + int root; + int n; + T maxval; + std::vector ht; + std::vector vecfreq; + ska::unordered_map mpfreq; + // std::unordered_map mpfreq; + // std::map mpfreq; + + void addElementInMap(T c, size_t freqc) { + assert(!_constructed); + + ht.push_back(Node(c)); + mpfreq[c] = freqc; + ++n; + } + + void addElementInVector(T c, size_t freqc) { + assert(!_constructed); + + ht.push_back(Node(c)); + vecfreq[c] = freqc; + ++n; + } + + void constructHuffmanTree() { + assert(!_constructed); + + if (n == 1 || maxval == 1) { + mbft = 1; + maxval = 1; + offset += ht[0].c; + ht[0].c = 0; + ht.push_back(Node(0, &ht[0], nullptr)); + if (usemp) { + mplen[0] = 1; + mpcode[0] = 0; + } else { + veclen[0] = 1; + veccode[0] = 0; + } + limit = 1; + setConstructed(); + return; + } + + // Timer timer(true); + + mbft = 1; + while ((1llu << mbft) < maxval) ++mbft; + + std::priority_queue, std::vector>, cmp> q; + + if (usemp) { + for (int i = 0; i < ht.size(); i++) { + q.push({i, mpfreq[ht[i].c]}); + } + } else { + for (int i = 0; i < ht.size(); i++) { + q.push({i, vecfreq[ht[i].c]}); + } + } + + while (q.size() > 1) { + int u = q.top().first; + size_t freq_u = q.top().second; + q.pop(); + int v = q.top().first; + size_t freq_v = q.top().second; + q.pop(); + + ht.push_back(Node(0, &ht[u], &ht[v])); + + q.push({ht.size() - 1, freq_u + freq_v}); + } + + root = ht.size() - 1; + + if (usemp) + dfs_mp(&ht[root]); + else + dfs_vec(&ht[root]); + + setConstructed(); + + // timer.stop("construct huffman tree"); + } + + uchar isConstructed() { return _constructed; } + + void setConstructed() { _constructed = 1; } + }; + + HuffmanTree tree; + + public: + void preprocess_encode(const T *const bins, size_t num_bin, int stateNum, uchar flag = 0x00) { + // Timer timer(true); + + tree.init(); + + T __minval, __maxval; + + if (stateNum == 0) { + __minval = *bins; + __maxval = *bins; + for (int i = 1; i < num_bin; i++) { + __minval = std::min(__minval, *(bins + i)); + __maxval = std::max(__maxval, *(bins + i)); + } + } else { + __minval = 0; + __maxval = stateNum - 1; + } + + tree.offset = __minval; + tree.maxval = __maxval - __minval + 1; + + switch ((flag & 0xc0) >> 6) { + case 0: { + tree.usemp = tree.maxval >= (1 << 12) && num_bin < 2 * __maxval || tree.maxval >= (1 << 28) ? 1 : 0; + break; + } + case 1: { + tree.usemp = 1; + break; + } + case 2: { + tree.usemp = 0; + break; + } + case 3: { + tree.usemp = 0; + break; + } + } + + if ((flag & 0x01) == 0x01) { + tree.mbft = 1; + while ((1llu << tree.mbft) < tree.maxval) ++tree.mbft; + tree.n = 0; + tree.setConstructed(); + return; + } + + if (tree.usemp) { + // tree.mpfreq.reserve(num_bin); + // tree.mplen.reserve(num_bin); + // tree.mpcode.reserve(num_bin); + + // ska::unordered_map freq; + // std::unordered_map freq; + std::map freq; + // freq.reserve(num_bin); + + if (tree.offset == 0) { + for (int i = 0; i < num_bin; i++) { + ++freq[bins[i]]; + } + } else { + for (int i = 0; i < num_bin; i++) { + ++freq[bins[i] - tree.offset]; + } + } + + tree.ht.reserve(freq.size() << 1); + + for (auto it : freq) { + tree.addElementInMap(it.first, it.second); + } + } else { + tree.vecfreq.resize(tree.maxval); + tree.veclen.resize(tree.maxval); + tree.veccode.resize(tree.maxval); + + std::vector freq(tree.maxval); + + if (tree.offset == 0) { + for (int i = 0; i < num_bin; i++) { + ++freq[bins[i]]; + } + } else { + for (int i = 0; i < num_bin; i++) { + ++freq[bins[i] - tree.offset]; + } + } + + tree.ht.reserve(freq.size() << 1); - ~HuffmanEncoder() override { SZ_FreeHuffman(); } - - // build huffman tree - HuffmanTree *createHuffmanTree(int stateNum) { - HuffmanTree *huffmanTree = static_cast(malloc(sizeof(HuffmanTree))); - memset(huffmanTree, 0, sizeof(HuffmanTree)); - huffmanTree->stateNum = stateNum; - huffmanTree->allNodes = 2 * stateNum; - - huffmanTree->pool = static_cast(malloc(huffmanTree->allNodes * 2 * sizeof(struct node_t))); - huffmanTree->qqq = static_cast(malloc(huffmanTree->allNodes * 2 * sizeof(node))); - huffmanTree->code = static_cast(malloc(huffmanTree->stateNum * sizeof(uint64_t *))); - huffmanTree->cout = static_cast(malloc(huffmanTree->stateNum * sizeof(unsigned char))); - - memset(huffmanTree->pool, 0, huffmanTree->allNodes * 2 * sizeof(struct node_t)); - memset(huffmanTree->qqq, 0, huffmanTree->allNodes * 2 * sizeof(node)); - memset(huffmanTree->code, 0, huffmanTree->stateNum * sizeof(uint64_t *)); - memset(huffmanTree->cout, 0, huffmanTree->stateNum * sizeof(unsigned char)); - huffmanTree->qq = huffmanTree->qqq - 1; - huffmanTree->n_nodes = 0; - huffmanTree->n_inode = 0; - huffmanTree->qend = 1; - - return huffmanTree; + for (int i = 0; i < tree.maxval; i++) { + if (freq[i]) tree.addElementInVector(i, freq[i]); + } + } + + // printf("begins to construct huffman tree\n"); + + tree.constructHuffmanTree(); + + // timer.stop("preprocess_encode"); } - /** - * build huffman tree using bins - * @param bins - * @param stateNum - */ - void preprocess_encode(const std::vector &bins, int stateNum)override { + void preprocess_encode(const std::vector &bins, int stateNum) override { preprocess_encode(bins.data(), bins.size(), stateNum); } - /** - * build huffman tree using bins - * @param bins - * @param num_bin - * @param stateNum - */ - void preprocess_encode(const T *bins, size_t num_bin, int stateNum) { - nodeCount = 0; - if (num_bin == 0) { - printf("Huffman bins should not be empty\n"); - exit(0); - } - init(bins, num_bin); - for (int i = 0; i < huffmanTree->stateNum; i++) - if (huffmanTree->code[i]) nodeCount++; - nodeCount = nodeCount * 2 - 1; + size_t encode(const std::vector &bins, uchar *&bytes) override { + return encode(bins.data(), bins.size(), bytes); } - // save the huffman Tree in the compressed data - void save(uchar *&c) override { - // auto cc = c; - write(offset, c); - int32ToBytes_bigEndian(c, nodeCount); - c += sizeof(int); - int32ToBytes_bigEndian(c, huffmanTree->stateNum / 2); - c += sizeof(int); - uint totalSize = 0; // = convert_HuffTree_to_bytes_anyStates(nodeCount, c); - // std::cout << "nodeCount = " << nodeCount << std::endl; - if (nodeCount <= 256) - totalSize = convert_HuffTree_to_bytes_anyStates(nodeCount, c); - else if (nodeCount <= 65536) - totalSize = convert_HuffTree_to_bytes_anyStates(nodeCount, c); - else - totalSize = convert_HuffTree_to_bytes_anyStates(nodeCount, c); - c += totalSize; - // return c - cc; - } + size_t encode(const T *bins, size_t num_bin, uchar *&bytes) { + if (tree.maxval == 1) { + int64ToBytes_bigEndian(bytes, num_bin ^ 0x1234abcd); + bytes += 8; + return 8; + } - size_t size_est() override{ - size_t b = (nodeCount <= 256) ? sizeof(unsigned char) - : ((nodeCount <= 65536) ? sizeof(unsigned short) : sizeof(unsigned int)); - return 1 + 2 * nodeCount * b + nodeCount * sizeof(unsigned char) + nodeCount * sizeof(T) + sizeof(int) + - sizeof(int) + sizeof(T); - } + // Timer timer(true); - // perform encoding - size_t encode(const std::vector &bins, uchar *&bytes) override{ return encode(bins.data(), bins.size(), bytes); } + assert(tree.isConstructed()); - // perform encoding - size_t encode(const T *bins, size_t num_bin, uchar *&bytes) { - size_t outSize = 0; - size_t i = 0; - unsigned char bitSize = 0, byteSize, byteSizep; - int state; - uchar *p = bytes + sizeof(size_t); - int lackBits = 0; - // int64_t totalBitSize = 0, maxBitSize = 0, bitSize21 = 0, bitSize32 = 0; - for (i = 0; i < num_bin; i++) { - state = bins[i] - offset; - bitSize = huffmanTree->cout[state]; - - if (lackBits == 0) { - byteSize = bitSize % 8 == 0 - ? bitSize / 8 - : bitSize / 8 + 1; // it's equal to the number of bytes involved (for *outSize) - byteSizep = bitSize / 8; // it's used to move the pointer p for next data - if (byteSize <= 8) { - int64ToBytes_bigEndian(p, (huffmanTree->code[state])[0]); - p += byteSizep; - } else // byteSize>8 - { - int64ToBytes_bigEndian(p, (huffmanTree->code[state])[0]); - p += 8; - int64ToBytes_bigEndian(p, (huffmanTree->code[state])[1]); - p += (byteSizep - 8); - } - outSize += byteSize; - lackBits = bitSize % 8 == 0 ? 0 : 8 - bitSize % 8; + uchar *head = bytes; + bytes += 8; + + size_t len = 0; + + uchar mask = 0; + uchar index = 0; + + if (tree.n == 0) { + if (tree.offset == 0) { + for (size_t i = 0; i < num_bin; i++) { + writeBytes(bytes, bins[i], tree.mbft, mask, index); + } } else { - *p = (*p) | static_cast((huffmanTree->code[state])[0] >> (64 - lackBits)); - if (lackBits < bitSize) { - p++; - - int64_t newCode = (huffmanTree->code[state])[0] << lackBits; - int64ToBytes_bigEndian(p, newCode); - - if (bitSize <= 64) { - bitSize -= lackBits; - byteSize = bitSize % 8 == 0 ? bitSize / 8 : bitSize / 8 + 1; - byteSizep = bitSize / 8; - p += byteSizep; - outSize += byteSize; - lackBits = bitSize % 8 == 0 ? 0 : 8 - bitSize % 8; - } else // bitSize > 64 - { - byteSizep = 7; // must be 7 bytes, because lackBits!=0 - p += byteSizep; - outSize += byteSize; - - bitSize -= 64; - if (lackBits < bitSize) { - *p = (*p) | static_cast((huffmanTree->code[state])[0] >> (64 - lackBits)); - p++; - newCode = (huffmanTree->code[state])[1] << lackBits; - int64ToBytes_bigEndian(p, newCode); - bitSize -= lackBits; - byteSize = bitSize % 8 == 0 ? bitSize / 8 : bitSize / 8 + 1; - byteSizep = bitSize / 8; - p += byteSizep; - outSize += byteSize; - lackBits = bitSize % 8 == 0 ? 0 : 8 - bitSize % 8; - } else // lackBits >= bitSize - { - *p = (*p) | static_cast((huffmanTree->code[state])[0] >> (64 - bitSize)); - lackBits -= bitSize; - } - } - } else // lackBits >= bitSize - { - lackBits -= bitSize; - if (lackBits == 0) p++; + for (size_t i = 0; i < num_bin; i++) { + writeBytes(bytes, bins[i] - tree.offset, tree.mbft, mask, index); } } + writeBytesClearMask(bytes, mask, index); + len = tree.mbft * num_bin; + int64ToBytes_bigEndian(head, len ^ 0x1234abcd); + return bytes - head; } - *reinterpret_cast(bytes) = outSize; - bytes += sizeof(size_t) + outSize; - return outSize; + + if (tree.offset == 0) { + if (tree.usemp) { + for (size_t i = 0; i < num_bin; i++) { + const T &it = bins[i]; + const uchar &len_i = tree.mplen[it]; + const int &code_i = tree.mpcode[it]; + len += len_i; + writeBytes(bytes, code_i, len_i, mask, index); + } + } else { + for (size_t i = 0; i < num_bin; i++) { + const T &it = bins[i]; + const uchar &len_i = tree.veclen[it]; + const int &code_i = tree.veccode[it]; + len += len_i; + writeBytes(bytes, code_i, len_i, mask, index); + } + } + } else { + if (tree.usemp) { + for (size_t i = 0; i < num_bin; i++) { + const T &it = bins[i]; + const uchar &len_i = tree.mplen[it - tree.offset]; + const int &code_i = tree.mpcode[it - tree.offset]; + len += len_i; + writeBytes(bytes, code_i, len_i, mask, index); + } + } else { + for (size_t i = 0; i < num_bin; i++) { + const T &it = bins[i]; + const uchar &len_i = tree.veclen[it - tree.offset]; + const int &code_i = tree.veccode[it - tree.offset]; + len += len_i; + writeBytes(bytes, code_i, len_i, mask, index); + } + } + } + + writeBytesClearMask(bytes, mask, index); + + int64ToBytes_bigEndian(head, len ^ 0x1234abcd); + + // timer.stop("encode"); + + // printf("code size = %d\n",(int)(bytes-head)); + + // Lossless_zstd zstd; + // size_t compressed_code_size; + + // delete[] zstd.compress(head,bytes-head,compressed_code_size); + + // printf("compressed code size = %d\n",(int)compressed_code_size); + + return bytes - head; } - void postprocess_encode() override{ SZ_FreeHuffman(); } + void postprocess_encode() override {} - void preprocess_decode() override{} + void preprocess_decode() override {} - // perform decoding std::vector decode(const uchar *&bytes, size_t targetLength) override { - node t = treeRoot; + if (tree.maxval == 1) { + size_t len = bytesToInt64_bigEndian(bytes) ^ 0x1234abcd; + bytes += 8; + // assert(len==targetLength); + + return std::vector(len, tree.offset); + } + + // Timer timer(true); + + assert(tree.isConstructed()); + + // Node *u = &tree.ht[tree.root]; + + size_t len = bytesToInt64_bigEndian(bytes) ^ 0x1234abcd; + bytes += 8; std::vector out(targetLength); - size_t i = 0, byteIndex = 0, count = 0; - int r; - node n = treeRoot; - size_t encodedLength = *reinterpret_cast(bytes); - bytes += sizeof(size_t); - if (n->t) // root->t==1 means that all state values are the same (constant) - { - for (count = 0; count < targetLength; count++) out[count] = n->c + offset; + size_t outLen = 0; + + // For fixed length encoding + if (tree.n == 0) { + size_t byteIndex = 0; + size_t i = 0; + size_t b; + + T c = 0; + int j = 0; + + if (tree.offset == 0) { + for (; i + 8 < len; i += 8, byteIndex++) { + b = bytes[byteIndex]; + + c |= ((b) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c, c = j = 0; + c |= ((b >> 1) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c, c = j = 0; + c |= ((b >> 2) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c, c = j = 0; + c |= ((b >> 3) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c, c = j = 0; + c |= ((b >> 4) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c, c = j = 0; + c |= ((b >> 5) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c, c = j = 0; + c |= ((b >> 6) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c, c = j = 0; + c |= ((b >> 7) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c, c = j = 0; + } + } else { + for (; i + 8 < len; i += 8, byteIndex++) { + b = bytes[byteIndex]; + + c |= ((b) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c + tree.offset, c = j = 0; + c |= ((b >> 1) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c + tree.offset, c = j = 0; + c |= ((b >> 2) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c + tree.offset, c = j = 0; + c |= ((b >> 3) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c + tree.offset, c = j = 0; + c |= ((b >> 4) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c + tree.offset, c = j = 0; + c |= ((b >> 5) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c + tree.offset, c = j = 0; + c |= ((b >> 6) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c + tree.offset, c = j = 0; + c |= ((b >> 7) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c + tree.offset, c = j = 0; + } + } + + b = bytes[byteIndex]; + + for (size_t k = 0; k < len - i; k++) { + c |= ((b >> k) & 1) << j, ++j; + if (j == tree.mbft) out[outLen++] = c + tree.offset, c = j = 0; + } + + bytes += (len + 7) >> 3; + return out; } - for (i = 0; count < targetLength; i++) { - byteIndex = i >> 3; // i/8 - r = i % 8; - if (((bytes[byteIndex] >> (7 - r)) & 0x01) == 0) - n = n->left; - else - n = n->right; + if (tree.limit > 16) { + // if huffman tree is large, a cache of huffman codebook is used to increase the performance + // Reference paper: Xiangyu Zou, Tao Lu, Wen Xia, Xuan Wang, Weizhe Zhang, Haijun Zhang, Sheng Di, Dingwen + // Tao, and Franck Cappello, "Performance Optimization for Relative-Error-Bounded Lossy Compression on + // Scientific Data", IEEE Transactions on Parallel and Distributed Systems (IEEE TPDS), 2020. Reference + // code: + // https://github.com/szcompressor/SZ/blob/a92658e785c072de1061f549c6cbc6d42d0f7f22/sz/src/Huffman.c#L345 + + int maxBits = 16; + size_t count = 0; + Node *t = &tree.ht[tree.root]; + Node *n = t; - if (n->t) { - out[count] = n->c + offset; + size_t tableSize = 1 << maxBits; + std::vector valueTable(tableSize); + std::vector lengthTable(tableSize); + std::vector nodeTable(tableSize); + size_t j; + for (size_t i = 0; i < tableSize; i++) { n = t; - count++; + j = 0; + size_t res = i; + while (!n->isLeaf() && j < maxBits) { + n = n->p[res & 0x00000001]; + res >>= 1; + j++; + } + if (!n->isLeaf()) { + nodeTable[i] = n; + valueTable[i] = -1; + lengthTable[i] = maxBits; + } else { + valueTable[i] = n->c + tree.offset; + lengthTable[i] = j; + } + } + + size_t leftBits = 0; + T currentValue = 0; + size_t i = 0; + + while (count < targetLength) { + while (leftBits < maxBits) { + currentValue += (bytes[i] << leftBits); + leftBits += 8; + i++; + } + + size_t index = currentValue & ((1 << maxBits) - 1); + T value = valueTable[index]; + if (value != -1) { + out[count] = value; + int bitLength = lengthTable[index]; + leftBits -= bitLength; + currentValue >>= bitLength; + count++; + } else { + int bitLength = lengthTable[index]; + leftBits -= bitLength; + currentValue >>= bitLength; + n = nodeTable[index]; + while (!n->isLeaf()) { + if (!leftBits) { + currentValue += (bytes[i] << leftBits); + leftBits += 8; + i++; + } + n = n->p[(currentValue & 0x01)]; + leftBits--; + currentValue >>= 1; + } + out[count] = n->c + tree.offset; + count++; + } + } + } else { + // for small huffman tree, use loop unrolling to increase the performance + // for(int i=0;ip[readBit(bytes,i++)]; + // if(u->isLeaf()){ + // out[outLen++]=u->c+tree.offset; + // u=&tree.ht[tree.root]; + // } + // } + + int byteIndex = 0; + int i = 0; + uchar b; + Node *u = &tree.ht[tree.root]; + auto offset = tree.offset; + + for (; i + 8 < len; i += 8, byteIndex++) { + b = bytes[byteIndex]; + + u = u->p[b & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 1) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 2) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 3) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 4) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 5) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 6) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + u = u->p[(b >> 7) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + offset; + u = &tree.ht[tree.root]; + } + } + + b = bytes[byteIndex]; + + for (int j = 0; j < len - i; j++) { + u = u->p[(b >> j) & 1]; + if (u->isLeaf()) { + out[outLen++] = u->c + tree.offset; + u = &tree.ht[tree.root]; + } } } - bytes += encodedLength; + bytes += (len + 7) >> 3; + + // timer.stop("decode"); + return out; } - // empty function - void postprocess_decode() override { SZ_FreeHuffman(); } + void postprocess_decode() override {} - // load Huffman tree - void load(const uchar *&c, size_t &remaining_length) override { - read(offset, c, remaining_length); - nodeCount = bytesToInt32_bigEndian(c); - int stateNum = bytesToInt32_bigEndian(c + sizeof(int)) * 2; - size_t encodeStartIndex; - if (nodeCount <= 256) - encodeStartIndex = 1 + 3 * nodeCount * sizeof(unsigned char) + nodeCount * sizeof(T); - else if (nodeCount <= 65536) - encodeStartIndex = - 1 + 2 * nodeCount * sizeof(unsigned short) + nodeCount * sizeof(unsigned char) + nodeCount * sizeof(T); - else - encodeStartIndex = - 1 + 2 * nodeCount * sizeof(unsigned int) + nodeCount * sizeof(unsigned char) + nodeCount * sizeof(T); - - huffmanTree = createHuffmanTree(stateNum); - treeRoot = reconstruct_HuffTree_from_bytes_anyStates(c + sizeof(int) + sizeof(int), nodeCount); - c += sizeof(int) + sizeof(int) + encodeStartIndex; - loaded = true; + void save(uchar *&c) override { + // saveAsCode(c); + saveAsDFSOrder(c); } - bool isLoaded() const { return loaded; } + void load(const uchar *&c, size_t &remaining_length) override { + // loadAsCode(c,remaining_length); + loadAsDFSOrder(c, remaining_length); + } private: - HuffmanTree *huffmanTree = NULL; - node treeRoot; - unsigned int nodeCount = 0; - uchar sysEndianType; // 0: little endian, 1: big endian - bool loaded = false; - T offset; - - node reconstruct_HuffTree_from_bytes_anyStates(const unsigned char *bytes, uint nodeCount) { - if (nodeCount <= 256) { - unsigned char *L = static_cast(malloc(nodeCount * sizeof(unsigned char))); - memset(L, 0, nodeCount * sizeof(unsigned char)); - unsigned char *R = static_cast(malloc(nodeCount * sizeof(unsigned char))); - memset(R, 0, nodeCount * sizeof(unsigned char)); - T *C = static_cast(malloc(nodeCount * sizeof(T))); - memset(C, 0, nodeCount * sizeof(T)); - unsigned char *t = static_cast(malloc(nodeCount * sizeof(unsigned char))); - memset(t, 0, nodeCount * sizeof(unsigned char)); - // TODO: Endian type - // unsigned char cmpSysEndianType = bytes[0]; - // if(cmpSysEndianType!=(unsigned char)sysEndianType) - // { - // unsigned char* p = (unsigned char*)(bytes+1+2*nodeCount*sizeof(unsigned char)); - // size_t i = 0, size = nodeCount*sizeof(unsigned int); - // while(1) - // { - // symTransform_4bytes(p); - // i+=sizeof(unsigned int); - // if(inew_node2(C[0], t[0]); - this->unpad_tree(L, R, C, t, 0, root); - free(L); - free(R); - free(C); - free(t); - return root; - } else if (nodeCount <= 65536) { - unsigned short *L = static_cast(malloc(nodeCount * sizeof(unsigned short))); - memset(L, 0, nodeCount * sizeof(unsigned short)); - unsigned short *R = static_cast(malloc(nodeCount * sizeof(unsigned short))); - memset(R, 0, nodeCount * sizeof(unsigned short)); - T *C = static_cast(malloc(nodeCount * sizeof(T))); - memset(C, 0, nodeCount * sizeof(T)); - unsigned char *t = static_cast(malloc(nodeCount * sizeof(unsigned char))); - memset(t, 0, nodeCount * sizeof(unsigned char)); - - // TODO: Endian type - // unsigned char cmpSysEndianType = bytes[0]; - // if(cmpSysEndianType!=(unsigned char)sysEndianType) - // { - // unsigned char* p = (unsigned char*)(bytes+1); - // size_t i = 0, size = 3*nodeCount*sizeof(unsigned int); - // while(1) - // { - // symTransform_4bytes(p); - // i+=sizeof(unsigned int); - // if(inew_node2(0, 0); - this->unpad_tree(L, R, C, t, 0, root); - free(L); - free(R); - free(C); - free(t); - return root; - } else // nodeCount>65536 - { - unsigned int *L = static_cast(malloc(nodeCount * sizeof(unsigned int))); - memset(L, 0, nodeCount * sizeof(unsigned int)); - unsigned int *R = static_cast(malloc(nodeCount * sizeof(unsigned int))); - memset(R, 0, nodeCount * sizeof(unsigned int)); - T *C = static_cast(malloc(nodeCount * sizeof(T))); - memset(C, 0, nodeCount * sizeof(T)); - unsigned char *t = static_cast(malloc(nodeCount * sizeof(unsigned char))); - memset(t, 0, nodeCount * sizeof(unsigned char)); - // TODO: Endian type - // unsigned char cmpSysEndianType = bytes[0]; - // if(cmpSysEndianType!=(unsigned char)sysEndianType) - // { - // unsigned char* p = (unsigned char*)(bytes+1); - // size_t i = 0, size = 3*nodeCount*sizeof(unsigned int); - // while(1) - // { - // symTransform_4bytes(p); - // i+=sizeof(unsigned int); - // if(i= 1 && len <= sizeof(T) * 8); - memcpy(t, bytes + 1 + 2 * nodeCount * sizeof(unsigned int) + nodeCount * sizeof(T), - nodeCount * sizeof(unsigned char)); + if (len + index >= 8) { + mask |= (val & ((1 << (8 - index)) - 1)) << index; + val >>= 8 - index; + len -= 8 - index; + *c++ = mask; + mask = index = 0; - node root = this->new_node2(0, 0); - this->unpad_tree(L, R, C, t, 0, root); - free(L); - free(R); - free(C); - free(t); - return root; + while (len >= 8) { + *c++ = val & (1 << 8) - 1; + val >>= 8; + len -= 8; + } } + + mask |= (val & (1 << len) - 1) << index; + index += len; + + // for(int i=0;i>=1; + // } } - node new_node(size_t freq, T c, node a, node b) { - node n = huffmanTree->pool + huffmanTree->n_nodes++; - if (freq) { - n->c = c; - n->freq = freq; - n->t = 1; - } else { - n->left = a; - n->right = b; - n->freq = a->freq + b->freq; - n->t = 0; - // n->c = 0; + static void writeBytesByte(uchar *&c, uchar val) { *c++ = val; } + + static void writeBytesClearMask(uchar *&c, uchar &mask, uchar &index) { + if (index > 0) { + *c++ = mask; + // mask=i=0; } - return n; } - node new_node2(T c, unsigned char t) { - huffmanTree->pool[huffmanTree->n_nodes].c = c; - huffmanTree->pool[huffmanTree->n_nodes].t = t; - return huffmanTree->pool + huffmanTree->n_nodes++; - } + static uchar readBit(const uchar *const &c, int i) { return ((*(c + (i >> 3))) >> (i & 7)) & 1; } + + void saveAsCode(uchar *&c) { + // Timer timer(true); - /* priority queue */ - void qinsert(node n) { - int j, i = huffmanTree->qend++; - while ((j = (i >> 1))) // j=i/2 - { - if (huffmanTree->qq[j]->freq <= n->freq) break; - huffmanTree->qq[i] = huffmanTree->qq[j], i = j; + // uchar *head = c; + // whether the tree is full binary tree + + uchar &limit = tree.limit; + + std::vector> mp(limit + 1); + + if (tree.usemp) { + for (auto it : tree.mplen) { + mp[it.second].push_back(it.first); + } + } else { + for (int i = 0; i < tree.maxval; i++) { + mp[tree.veclen[i]].push_back(i); + } } - huffmanTree->qq[i] = n; - } - node qremove() { - int i, l; - node n = huffmanTree->qq[i = 1]; - node p; - if (huffmanTree->qend < 2) return nullptr; - huffmanTree->qend--; - huffmanTree->qq[i] = huffmanTree->qq[huffmanTree->qend]; - - while ((l = (i << 1)) < huffmanTree->qend) { // l=(i*2) - if (l + 1 < huffmanTree->qend && huffmanTree->qq[l + 1]->freq < huffmanTree->qq[l]->freq) l++; - if (huffmanTree->qq[i]->freq > huffmanTree->qq[l]->freq) { - p = huffmanTree->qq[i]; - huffmanTree->qq[i] = huffmanTree->qq[l]; - huffmanTree->qq[l] = p; - i = l; - } else { - break; + uchar mask = 0; + uchar index = 0; + + assert(sizeof(T) <= 8); + + if (mp[limit].size() == tree.n) { + // 00 XXXXXX (mbft) + if (tree.maxval > 1) + writeBytesByte(c, tree.mbft); + else + writeBytesByte(c, 0x80 | tree.mbft); + + writeBytesByte(c, ((sizeof(T) - 1) << 5) | (limit - 1)); + + writeBytes(c, tree.offset, sizeof(T) << 3, mask, index); + + int32ToBytes_bigEndian(c, tree.n); + c += 4; + + int cnt = mp[limit].size(); + + uchar logcnt = 0; + while (logcnt < 32 && (1 << logcnt) != cnt) ++logcnt; + assert(logcnt != 32); + + if (tree.n > 1) { + for (T it : mp[limit]) { + writeBytes(c, it, tree.mbft, mask, index); + + const int code = tree.usemp ? tree.mpcode[it] : tree.veccode[it]; + + writeBytes(c, code, logcnt, mask, index); + } + + writeBytesClearMask(c, mask, index); } + + return; } - return n; - } - /* walk the tree and put 0s and 1s */ - /** - * @out1 should be set to 0. - * @out2 should be 0 as well. - * @index: the index of the byte - * */ - void build_code(node n, int len, uint64_t out1, uint64_t out2) { - if (n->t) { - huffmanTree->code[n->c] = static_cast(malloc(2 * sizeof(uint64_t))); - if (len <= 64) { - (huffmanTree->code[n->c])[0] = out1 << (64 - len); - (huffmanTree->code[n->c])[1] = out2; - } else { - (huffmanTree->code[n->c])[0] = out1; - (huffmanTree->code[n->c])[1] = out2 << (128 - len); + writeBytesByte(c, 0x40 | tree.mbft); + + writeBytesByte(c, ((sizeof(T) - 1) << 5) | (limit - 1)); + + writeBytes(c, tree.offset, sizeof(T) << 3, mask, index); + + int32ToBytes_bigEndian(c, tree.maxval); + c += 4; + + for (uchar len = 1; len <= limit; len++) { + int cnt = mp[len].size(); + + writeBytes(c, cnt, len, mask, index); + + if (cnt) { + for (const T &it : mp[len]) { + writeBytes(c, it, tree.mbft, mask, index); + + const int code = tree.usemp ? tree.mpcode[it] : tree.veccode[it]; + + writeBytes(c, code, len, mask, index); + } } - huffmanTree->cout[n->c] = static_cast(len); + } + + writeBytesClearMask(c, mask, index); + + // timer.stop("saveAsCode"); + + // printf("n = %d\n",tree.n); + // + // printf("huffman tree size = %d\n",(int)(c-head)); + // + // Lossless_zstd zstd; + // size_t compressed_tree_size; + // + // // uchar *compressed_tree = zstd.compress(head,c-head,compressed_tree_size); + // delete[] zstd.compress(head,c-head,compressed_tree_size); + // + // printf("compressed huffman tree size = %d\n",(int)compressed_tree_size); + + // return; + } + + void saveAsDFSOrder(uchar *&c) { + // uchar *head = c; + + uchar mask = 0, index = 0; + + writeBytesByte(c, (tree.usemp << 7) | ((tree.n == 1) << 6) | tree.mbft); + writeBytes(c, tree.offset, sizeof(T) << 3, mask, index); + int64ToBytes_bigEndian(c, tree.n); + c += sizeof(size_t); + if (tree.usemp == 0x00) { + int64ToBytes_bigEndian(c, tree.maxval); + c += sizeof(size_t); + } + + if (tree.n == 0 || tree.n == 1) { + writeBytesClearMask(c, mask, index); return; } - int index = len >> 6; //=len/64 - if (index == 0) { - out1 = out1 << 1; - out1 = out1 | 0; - build_code(n->left, len + 1, out1, 0); - out1 = out1 | 1; - build_code(n->right, len + 1, out1, 0); - } else { - if (len % 64 != 0) out2 = out2 << 1; - out2 = out2 | 0; - build_code(n->left, len + 1, out1, out2); - out2 = out2 | 1; - build_code(n->right, len + 1, out1, out2); + + std::stack stk; + + stk.push(&tree.ht[tree.root]); + + while (!stk.empty()) { + Node *u = stk.top(); + stk.pop(); + if (u->isLeaf()) { + writeBytesBit(c, 0x01, mask, index); + writeBytes(c, u->c, tree.mbft, mask, index); + } else { + writeBytesBit(c, 0x00, mask, index); + stk.push(u->p[1]); + stk.push(u->p[0]); + } } + + writeBytesClearMask(c, mask, index); + + // printf("n = %d\n",tree.n); + // + // printf("huffman tree size = %d\n",(int)(c-head)); + // + // Lossless_zstd zstd; + // size_t compressed_tree_size; + // + // // uchar *compressed_tree = zstd.compress(head,c-head,compressed_tree_size); + // delete[] zstd.compress(head,c-head,compressed_tree_size); + // + // printf("compressed huffman tree size = %d\n",(int)compressed_tree_size); } - /** - * Compute the frequency of the data and build the Huffman tree - * @param HuffmanTree* huffmanTree (output) - * @param int *s (input) - * @param size_t length (input) - * */ - void init(const T *s, size_t length) { - T max = s[0]; - offset = s[0]; // offset is min + void loadAsCode(const uchar *&bytes, size_t &remaining_length) { + // Timer timer(true); + + tree.init(); + + uchar feature = (*bytes) >> 6; + tree.mbft = (*bytes) & 0x3f; + ++bytes; -#if INTPTR_MAX == INT64_MAX // 64bit system - ska::unordered_map frequency; -#else // most likely 32bit system - std::unordered_map frequency; -#endif // INTPTR_MAX == INT64_MAX + // uchar szT = ((*bytes) >> 5) + 1; + tree.limit = ((*bytes) & 0x1f) + 1; + ++bytes; - for (size_t i = 0; i < length; i++) { - frequency[s[i]]++; + // assert(szT == sizeof(T)); + + for (int i = 0; i < sizeof(T); i++) { + tree.offset |= static_cast(*bytes) << (i << 3); + ++bytes; + } + + tree.maxval = bytesToInt32_bigEndian(bytes); + bytes += 4; + + tree.usemp = tree.maxval >= (1 << 12) && (1 << (tree.limit - 1)) < tree.maxval ? 1 : 0; + + if (tree.usemp) { + tree.ht.reserve(2 << tree.limit); + // tree.mpfreq.reserve(1< max) { - max = k; + tree.ht.push_back(Node()); + + if (feature == 0x00 || feature == 0x02) { + int i = 0; + tree.n = 1 << tree.limit; + if (feature == 0x02) { + tree.n = 1; + tree.ht.resize(2); + tree.root = 0; + tree.ht[0] = Node(0, &tree.ht[1]); + tree.ht[1] = Node(0); + tree.mplen[0] = 1; + tree.mpcode[0] = 0; + tree.setConstructed(); + return; } - if (k < offset) { - offset = k; + + for (int j = 0; j < tree.n; j++) { + T c = 0; + + for (uchar k = 0; k < tree.mbft; k++) { + c |= static_cast(readBit(bytes, i++)) << k; + } + + Node *u = &tree.ht[tree.root]; + int vec = 0; + + for (uchar k = 0; k < tree.limit; k++) { + int e = readBit(bytes, i++); + vec |= e << k; + + if (u->p[e] == nullptr) { + tree.ht.push_back(Node()); + u->p[e] = &tree.ht[tree.ht.size() - 1]; + } + + u = u->p[e]; + } + + u->c = c; + if (tree.usemp) { + tree.mplen[c] = tree.limit; + tree.mpcode[c] = vec; + } else { + tree.veclen[c] = tree.limit; + tree.veccode[c] = vec; + } } + + bytes += (i + 7) >> 3; + + tree.setConstructed(); + + return; } - int stateNum = max - offset + 2; - huffmanTree = createHuffmanTree(stateNum); + tree.n = 0; + + int i = 0; + + for (uchar len = 1; len <= tree.limit; len++) { + int cnt = 0; + + for (uchar j = 0; j < len; j++) { + cnt |= static_cast(readBit(bytes, i++)) << j; + } - for (const auto &f : frequency) { - qinsert(new_node(f.second, f.first - offset, nullptr, nullptr)); + for (int j = 0; j < cnt; j++) { + T c = 0; + + for (uchar k = 0; k < tree.mbft; k++) { + c |= static_cast(readBit(bytes, i++)) << k; + } + + Node *u = &tree.ht[0]; + int vec = 0; + + for (int k = 0; k < len; k++) { + int e = readBit(bytes, i++); + vec |= e << k; + if (u->p[e] == nullptr) { + tree.ht.push_back(Node()); + u->p[e] = &tree.ht[tree.ht.size() - 1]; + } + + u = u->p[e]; + } + + u->c = c; + ++tree.n; + if (tree.usemp) { + tree.mplen[c] = len; + tree.mpcode[c] = vec; + } else { + tree.veclen[c] = len; + tree.veccode[c] = vec; + } + } } - while (huffmanTree->qend > 2) qinsert(new_node(0, 0, qremove(), qremove())); + bytes += (i + 7) >> 3; - build_code(huffmanTree->qq[1], 0, 0, 0); - treeRoot = huffmanTree->qq[1]; + tree.setConstructed(); + + // timer.stop("loadAsCode"); } - template - void pad_tree(T1 *L, T1 *R, T *C, unsigned char *t, unsigned int i, node root) { - C[i] = root->c; - t[i] = root->t; - node lroot = root->left; - if (lroot != nullptr) { - huffmanTree->n_inode++; - L[i] = huffmanTree->n_inode; - pad_tree(L, R, C, t, huffmanTree->n_inode, lroot); - } - node rroot = root->right; - if (rroot != nullptr) { - huffmanTree->n_inode++; - R[i] = huffmanTree->n_inode; - pad_tree(L, R, C, t, huffmanTree->n_inode, rroot); + void loadAsDFSOrder(const uchar *&bytes, size_t &remaining_length) { + tree.init(); + + tree.usemp = (*bytes) >> 7; + tree.mbft = (*bytes) & 0x3f; + ++bytes; + + for (int i = 0; i < sizeof(T); i++) { + tree.offset |= static_cast(*bytes) << (i << 3); + ++bytes; } - } - template - void unpad_tree(T1 *L, T1 *R, T *C, unsigned char *t, unsigned int i, node root) { - // root->c = C[i]; - if (root->t == 0) { - T1 l, r; - l = L[i]; - if (l != 0) { - node lroot = new_node2(C[l], t[l]); - root->left = lroot; - unpad_tree(L, R, C, t, l, lroot); + tree.n = bytesToInt64_bigEndian(bytes); + bytes += sizeof(size_t); + tree.ht.reserve(tree.n << 1); + + if (tree.usemp == 0x00) { + tree.maxval = bytesToInt64_bigEndian(bytes); + bytes += sizeof(size_t); + if (tree.n > 0) { + tree.veccode.resize(tree.maxval); + tree.veclen.resize(tree.maxval); } - r = R[i]; - if (r != 0) { - node rroot = new_node2(C[r], t[r]); - root->right = rroot; - unpad_tree(L, R, C, t, r, rroot); + } + + if (tree.n == 0) { + tree.setConstructed(); + return; + } + + if (tree.n == 1) { + tree.ht.resize(2); + tree.root = 0; + tree.ht[0] = Node(0, &tree.ht[1]); + tree.ht[1] = Node(0); + if (tree.usemp) { + tree.mplen[0] = 1; + tree.mpcode[0] = 0; + } else { + tree.veclen[0] = 1; + tree.veccode[0] = 0; } + tree.setConstructed(); + return; } - } - template - unsigned int convert_HuffTree_to_bytes_anyStates(unsigned int nodeCount, unsigned char *out) { - T1 *L = static_cast(malloc(nodeCount * sizeof(T1))); - memset(L, 0, nodeCount * sizeof(T1)); - T1 *R = static_cast(malloc(nodeCount * sizeof(T1))); - memset(R, 0, nodeCount * sizeof(T1)); - T *C = static_cast(malloc(nodeCount * sizeof(T))); - memset(C, 0, nodeCount * sizeof(T)); - unsigned char *t = static_cast(malloc(nodeCount * sizeof(unsigned char))); - memset(t, 0, nodeCount * sizeof(unsigned char)); - - pad_tree(L, R, C, t, 0, huffmanTree->qq[1]); - - unsigned int totalSize = - 1 + 2 * nodeCount * sizeof(T1) + nodeCount * sizeof(unsigned char) + nodeCount * sizeof(T); - //*out = (unsigned char*)malloc(totalSize); - out[0] = sysEndianType; - memcpy(out + 1, L, nodeCount * sizeof(T1)); - memcpy(out + 1 + nodeCount * sizeof(T1), R, nodeCount * sizeof(T1)); - memcpy(out + 1 + 2 * nodeCount * sizeof(T1), C, nodeCount * sizeof(T)); - memcpy(out + 1 + 2 * nodeCount * sizeof(T1) + nodeCount * sizeof(T), t, nodeCount * sizeof(unsigned char)); - - free(L); - free(R); - free(C); - free(t); - return totalSize; - } + tree.root = 0; + tree.ht.push_back(Node()); + std::stack stk; + stk.push(&tree.ht[0]); + size_t i = 1; - void SZ_FreeHuffman() { - if (huffmanTree != NULL) { - size_t i; - free(huffmanTree->pool); - huffmanTree->pool = NULL; - free(huffmanTree->qqq); - huffmanTree->qqq = NULL; - for (i = 0; i < huffmanTree->stateNum; i++) { - if (huffmanTree->code[i] != NULL) free(huffmanTree->code[i]); - } - free(huffmanTree->code); - huffmanTree->code = NULL; - free(huffmanTree->cout); - huffmanTree->cout = NULL; - free(huffmanTree); - huffmanTree = NULL; + while (!stk.empty()) { + Node *u = stk.top(); + + if (readBit(bytes, i++) == 0x00) { + tree.ht.push_back(Node()); + if (u->p[0] == nullptr) { + u->p[0] = &tree.ht[tree.ht.size() - 1]; + } else { + u->p[1] = &tree.ht[tree.ht.size() - 1]; + } + stk.push(&tree.ht[tree.ht.size() - 1]); + } else { + T c = 0; + for (int j = 0; j < tree.mbft; j++) c |= static_cast(readBit(bytes, i++)) << j; + tree.ht.push_back(Node(c)); + if (u->p[0] == nullptr) + u->p[0] = &tree.ht[tree.ht.size() - 1]; + else + u->p[1] = &tree.ht[tree.ht.size() - 1]; + while (!stk.empty() && stk.top()->p[1] != nullptr) { + // Node *tem=stk.top(); + stk.pop(); + // if(!stk.empty()){ + // if(stk.top()->p[0]==nullptr) stk.top()->p[0]=tem; + // else stk.top()->p[1]=tem; + // } + } + } } + + bytes += (i + 7) >> 3; + + if (tree.usemp) { + tree.dfs_mp(&tree.ht[tree.root]); + } else { + tree.dfs_vec(&tree.ht[tree.root]); + } + + tree.setConstructed(); } }; } // namespace SZ3 From f1c21b66cb3dec3592ae5d58ef33291ac1e9b168 Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Sat, 7 Dec 2024 16:08:24 -0500 Subject: [PATCH 21/23] BioMDXTC bugfix 1. checking the double value of quantization before casting it to int, avoid out of range error 2. adjusting XTCEncoder to support arbitrary input length. Previous it requires input length is a multiple of 3. 3. checking the input value range before XTC encoding, throw exception if |value| > maxInt/2 --- .../decomposition/SZBioMDXtcDecomposition.hpp | 21 ++++++++++++++++--- include/SZ3/encoder/XtcBasedEncoder.hpp | 11 ++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp b/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp index 66eb8141..3f1088c7 100644 --- a/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp +++ b/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp @@ -71,9 +71,17 @@ class SZBioMDXtcDecomposition : public concepts::DecompositionInterface(std::numeric_limits::min())/4; + const auto INT_MAX_D = static_cast(std::numeric_limits::max())/4; for (size_t i = 0; i < conf.num; i++) { - quantData[i] = std::floor(data[i] * reciprocalPrecision + 0.5); + double quant = std::floor(data[i] * reciprocalPrecision + 0.5); + // Range checking + if (quant < INT_MIN_D || quant > INT_MAX_D) { + throw std::out_of_range("Quantization value out of int range in SZBioMDXtcDecomposition, consider " + "increasing the error bound"); + } + quantData[i] = static_cast(quant); } return quantData; } @@ -143,6 +151,8 @@ class SZBioMDXtcDecomposition : public concepts::DecompositionInterface(std::numeric_limits::min())/4; + const auto INT_MAX_D = static_cast(std::numeric_limits::max())/4; for (size_t i = 0; i < lastFrame; i++) // time { @@ -151,7 +161,12 @@ class SZBioMDXtcDecomposition : public concepts::DecompositionInterface INT_MAX_D) { + throw std::out_of_range("Quantization value out of int range in SZBioMDXtcDecomposition, consider " + "increasing the error bound"); + } + quantData[idx] = static_cast(quant); } } } diff --git a/include/SZ3/encoder/XtcBasedEncoder.hpp b/include/SZ3/encoder/XtcBasedEncoder.hpp index 88346a56..bc429612 100644 --- a/include/SZ3/encoder/XtcBasedEncoder.hpp +++ b/include/SZ3/encoder/XtcBasedEncoder.hpp @@ -343,7 +343,7 @@ class XtcBasedEncoder : public concepts::EncoderInterface { int oldLocalValue2 = 0; int oldLocalValue3 = 0; const int *inputDataPtr = quantData.data(); - while (inputDataPtr < quantData.data() + size3) { + while (inputDataPtr + 2 < quantData.data() + size3) { int localValue1 = *inputDataPtr++; if (localValue1 < minInt[0]) { minInt[0] = localValue1; @@ -389,13 +389,20 @@ class XtcBasedEncoder : public concepts::EncoderInterface { if (static_cast(maxInt[0]) - static_cast(minInt[0]) >= maxAbsoluteInt || static_cast(maxInt[1]) - static_cast(minInt[1]) >= maxAbsoluteInt || - static_cast(maxInt[2]) - static_cast(minInt[2]) >= maxAbsoluteInt) { + static_cast(maxInt[2]) - static_cast(minInt[2]) >= maxAbsoluteInt || + static_cast(maxInt[0]) >= maxAbsoluteInt/4 || + static_cast(maxInt[1]) >= maxAbsoluteInt/4 || + static_cast(maxInt[2]) >= maxAbsoluteInt/4 || + static_cast(minInt[0]) <= -maxAbsoluteInt/4 || + static_cast(minInt[1]) <= -maxAbsoluteInt/4 || + static_cast(minInt[2]) <= -maxAbsoluteInt/4 ){ /* turning value in unsigned by subtracting minInt * would cause overflow */ fprintf(stderr, "Error. Turning value in unsigned by subtracting minInt would cause " "overflow.\n"); + throw std::runtime_error("Error. Turning value in unsigned by subtracting minInt would cause overflow."); } unsigned int sizeInt[3]; sizeInt[0] = maxInt[0] - minInt[0] + 1; From d3e8732fd88c6bdeacb12120933bafd6ee553ed5 Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Sat, 7 Dec 2024 16:08:53 -0500 Subject: [PATCH 22/23] tools update --- tools/H5Z-SZ3/src/H5Z_SZ3.cpp | 4 ++-- tools/H5Z-SZ3/test/h5repack.sh | 2 +- tools/H5Z-SZ3/test/print_h5repack_args.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/H5Z-SZ3/src/H5Z_SZ3.cpp b/tools/H5Z-SZ3/src/H5Z_SZ3.cpp index 35a11b93..a4e68a1a 100644 --- a/tools/H5Z-SZ3/src/H5Z_SZ3.cpp +++ b/tools/H5Z-SZ3/src/H5Z_SZ3.cpp @@ -72,7 +72,7 @@ herr_t get_SZ3_conf_from_H5(const hid_t propertyList, SZ3::Config &conf) { } static herr_t H5Z_sz3_set_local(hid_t dcpl_id, hid_t type_id, hid_t chunk_space_id) { - printf("start H5Z_sz3_set_local\n"); + // printf("start H5Z_sz3_set_local\n"); // printf("start in H5Z_sz3_set_local, dcpl_id = %d\n", dcpl_id); static char const *_funcname_ = "H5Z_sz3_set_local"; @@ -172,7 +172,7 @@ void process_data(SZ3::Config &conf, void **buf, size_t *buf_size, size_t nbytes */ static size_t H5Z_filter_sz3(unsigned int flags, size_t cd_nelmts, const unsigned int cd_values[], size_t nbytes, size_t *buf_size, void **buf) { - printf("start H5Z_filter_sz3\n"); + // printf("start H5Z_filter_sz3\n"); if (cd_nelmts == 0) // this is special data such as string, which should not be treated as values. return nbytes; diff --git a/tools/H5Z-SZ3/test/h5repack.sh b/tools/H5Z-SZ3/test/h5repack.sh index 24fead4a..18539e8d 100755 --- a/tools/H5Z-SZ3/test/h5repack.sh +++ b/tools/H5Z-SZ3/test/h5repack.sh @@ -16,4 +16,4 @@ repackArgs=$("${script_dir}/print_h5repack_args" -c "$configFile") echo $repackArgs echo "Executing h5repack-shared with the following parameters: $repackArgs $inputFile $outputFile" -h5repack-shared $repackArgs $inputFile $outputFile \ No newline at end of file +h5repack-shared -f $repackArgs $inputFile $outputFile \ No newline at end of file diff --git a/tools/H5Z-SZ3/test/print_h5repack_args.cpp b/tools/H5Z-SZ3/test/print_h5repack_args.cpp index 72088820..3c05c199 100644 --- a/tools/H5Z-SZ3/test/print_h5repack_args.cpp +++ b/tools/H5Z-SZ3/test/print_h5repack_args.cpp @@ -49,7 +49,7 @@ int main(int argc, char *argv[]) { printf("h5repack can only take 20 cd_values, but got %d\n cd_values in SZ3", cd_nelmts); return 0; } - printf("-f UD=32024,0,%d", cd_nelmts); + printf("UD=32024,0,%d", cd_nelmts); for (int i = 0; i < cd_nelmts; i++) { printf(",%u", cd_values[i]); } From d3e02201755f6018d3777289d2b93da658d4e9c3 Mon Sep 17 00:00:00 2001 From: Kai Zhao Date: Tue, 10 Dec 2024 21:13:41 -0500 Subject: [PATCH 23/23] update BIOMDXTC to use LinearQuantizer --- include/SZ3/api/impl/SZAlgoBioMD.hpp | 98 ++++++++++--------- .../decomposition/SZBioMDDecomposition.hpp | 13 +-- .../decomposition/SZBioMDXtcDecomposition.hpp | 77 +++++---------- 3 files changed, 77 insertions(+), 111 deletions(-) diff --git a/include/SZ3/api/impl/SZAlgoBioMD.hpp b/include/SZ3/api/impl/SZAlgoBioMD.hpp index 3e1a9d02..df5103ed 100644 --- a/include/SZ3/api/impl/SZAlgoBioMD.hpp +++ b/include/SZ3/api/impl/SZAlgoBioMD.hpp @@ -1,59 +1,61 @@ #ifndef SZ3_SZ_BIOMD_HPP #define SZ3_SZ_BIOMD_HPP -#include "SZ3/quantizer/LinearQuantizer.hpp" -#include "SZ3/decomposition/SZBioMDXtcDecomposition.hpp" #include "SZ3/decomposition/SZBioMDDecomposition.hpp" +#include "SZ3/decomposition/SZBioMDXtcDecomposition.hpp" +#include "SZ3/def.hpp" #include "SZ3/encoder/XtcBasedEncoder.hpp" -#include "SZ3/lossless/Lossless_zstd.hpp" #include "SZ3/lossless/Lossless_bypass.hpp" -#include "SZ3/utils/Statistic.hpp" +#include "SZ3/lossless/Lossless_zstd.hpp" +#include "SZ3/quantizer/LinearQuantizer.hpp" #include "SZ3/utils/Config.hpp" -#include "SZ3/def.hpp" +#include "SZ3/utils/Statistic.hpp" namespace SZ3 { - - template - size_t SZ_compress_bioMD(Config &conf, T *data, uchar *cmpData, size_t cmpCap) { - assert(N == conf.N); - assert(conf.cmprAlgo == ALGO_BIOMD); - calAbsErrorBound(conf, data); - - auto quantizer = LinearQuantizer(conf.absErrorBound, conf.quantbinCnt / 2); - auto sz = make_compressor_sz_generic(make_decomposition_biomd(conf, quantizer), HuffmanEncoder(), - Lossless_zstd()); - return sz->compress(conf, data, cmpData, cmpCap); - } - - template - void SZ_decompress_bioMD(const Config &conf, const uchar *cmpData, size_t cmpSize, T *decData) { - assert(conf.cmprAlgo == ALGO_BIOMD); - - LinearQuantizer quantizer; - auto sz = make_compressor_sz_generic(make_decomposition_biomd(conf, quantizer), - HuffmanEncoder(), Lossless_zstd()); - sz->decompress(conf, cmpData, cmpSize, decData); - } - - template - size_t SZ_compress_bioMDXtcBased(Config &conf, T *data, uchar *cmpData, size_t cmpCap) { - assert(N == conf.N); - assert(conf.cmprAlgo == ALGO_BIOMDXTC); - calAbsErrorBound(conf, data); - - auto sz = make_compressor_sz_generic(SZBioMDXtcDecomposition(conf), XtcBasedEncoder(), - Lossless_bypass()); - return sz->compress(conf, data, cmpData, cmpCap); - } - - template - void SZ_decompress_bioMDXtcBased(const Config &conf, const uchar *cmpData, size_t cmpSize, T *decData) { - assert(conf.cmprAlgo == ALGO_BIOMDXTC); - - auto sz = make_compressor_sz_generic(SZBioMDXtcDecomposition(conf), - XtcBasedEncoder(), Lossless_bypass()); - sz->decompress(conf, cmpData, cmpSize, decData); - } - + +template +size_t SZ_compress_bioMD(Config &conf, T *data, uchar *cmpData, size_t cmpCap) { + assert(N == conf.N); + assert(conf.cmprAlgo == ALGO_BIOMD); + calAbsErrorBound(conf, data); + + auto quantizer = LinearQuantizer(conf.absErrorBound, conf.quantbinCnt / 2); + auto sz = make_compressor_sz_generic(make_decomposition_biomd(conf, quantizer), HuffmanEncoder(), + Lossless_zstd()); + return sz->compress(conf, data, cmpData, cmpCap); +} + +template +void SZ_decompress_bioMD(const Config &conf, const uchar *cmpData, size_t cmpSize, T *decData) { + assert(conf.cmprAlgo == ALGO_BIOMD); + + LinearQuantizer quantizer; + auto sz = make_compressor_sz_generic(make_decomposition_biomd(conf, quantizer), HuffmanEncoder(), + Lossless_zstd()); + sz->decompress(conf, cmpData, cmpSize, decData); } + +template +size_t SZ_compress_bioMDXtcBased(Config &conf, T *data, uchar *cmpData, size_t cmpCap) { + assert(N == conf.N); + assert(conf.cmprAlgo == ALGO_BIOMDXTC); + calAbsErrorBound(conf, data); + + auto quantizer = LinearQuantizer(conf.absErrorBound, std::numeric_limits::max() / 16); + auto sz = make_compressor_sz_generic(make_decomposition_biomdxtc(conf, quantizer), + XtcBasedEncoder(), Lossless_bypass()); + return sz->compress(conf, data, cmpData, cmpCap); +} + +template +void SZ_decompress_bioMDXtcBased(const Config &conf, const uchar *cmpData, size_t cmpSize, T *decData) { + assert(conf.cmprAlgo == ALGO_BIOMDXTC); + + LinearQuantizer quantizer; + auto sz = make_compressor_sz_generic(make_decomposition_biomdxtc(conf, quantizer), + XtcBasedEncoder(), Lossless_bypass()); + sz->decompress(conf, cmpData, cmpSize, decData); +} + +} // namespace SZ3 #endif diff --git a/include/SZ3/decomposition/SZBioMDDecomposition.hpp b/include/SZ3/decomposition/SZBioMDDecomposition.hpp index 2bed33f1..47aa1370 100644 --- a/include/SZ3/decomposition/SZBioMDDecomposition.hpp +++ b/include/SZ3/decomposition/SZBioMDDecomposition.hpp @@ -17,15 +17,10 @@ class SZBioMDDecomposition : public concepts::DecompositionInterface public: SZBioMDDecomposition(const Config &conf, Quantizer quantizer) : quantizer(quantizer), conf(conf) { if (N != 1 && N != 2 && N != 3) { - throw std::invalid_argument("SZBioFront only support 1D, 2D or 3D data"); + throw std::invalid_argument("SZBioMDDecomposition only support 1D, 2D or 3D data"); } } - ~SZBioMDDecomposition() { - // clear(); - } - - std::vector compress(const Config &conf, T *data) override { if (N == 1) { return compress_1d(data); @@ -353,9 +348,9 @@ class SZBioMDDecomposition : public concepts::DecompositionInterface T fillValue_; }; -template -SZBioMDDecomposition make_decomposition_biomd(const Config &conf, Predictor predictor) { - return SZBioMDDecomposition(conf, predictor); +template +SZBioMDDecomposition make_decomposition_biomd(const Config &conf, Quantizer quantizer) { + return SZBioMDDecomposition(conf, quantizer); } } // namespace SZ3 diff --git a/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp b/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp index 3f1088c7..0a4e39dd 100644 --- a/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp +++ b/include/SZ3/decomposition/SZBioMDXtcDecomposition.hpp @@ -13,12 +13,12 @@ namespace SZ3 { -template +template class SZBioMDXtcDecomposition : public concepts::DecompositionInterface { public: - SZBioMDXtcDecomposition(const Config &conf) : conf(conf) { + SZBioMDXtcDecomposition(const Config &conf, Quantizer quantizer) : quantizer(quantizer), conf(conf) { if (N != 1 && N != 2 && N != 3) { - throw std::invalid_argument("SZBioFront only support 1D, 2D or 3D data"); + throw std::invalid_argument("SZBioMDXtcDecomposition only support 1D, 2D or 3D data"); } } @@ -41,20 +41,18 @@ class SZBioMDXtcDecomposition : public concepts::DecompositionInterface get_out_range() override { return {0, 0}; } + std::pair get_out_range() override { return quantizer.get_out_range(); } size_t get_num_elements() const { if (N == 3) { @@ -66,35 +64,18 @@ class SZBioMDXtcDecomposition : public concepts::DecompositionInterface compressSingleFrame(T *data) { std::vector quantData(conf.num); - - /* To prevent that potential rounding errors make the error slightly larger than the - * absolute error bound, scale down the error limit slightly. - * The precision is twice the required maximum error. */ - double reciprocalPrecision = 1.0 / (conf.absErrorBound * 0.99999 * 2.0); - // Define the range limits for int as double - const auto INT_MIN_D = static_cast(std::numeric_limits::min())/4; - const auto INT_MAX_D = static_cast(std::numeric_limits::max())/4; for (size_t i = 0; i < conf.num; i++) { - double quant = std::floor(data[i] * reciprocalPrecision + 0.5); - // Range checking - if (quant < INT_MIN_D || quant > INT_MAX_D) { - throw std::out_of_range("Quantization value out of int range in SZBioMDXtcDecomposition, consider " - "increasing the error bound"); - } - quantData[i] = static_cast(quant); + quantData[i] = quantizer.quantize_and_overwrite(data[i], 0); } + quantizer.postcompress_data(); return quantData; } T *decompressSingleFrame(std::vector &quantData, T *decData) { - /* To prevent that potential rounding errors make the error slightly larger than the - * absolute error bound, scale down the error limit slightly. - * The precision is twice the required maximum error. */ - double precision = conf.absErrorBound * 0.99999 * 2.0; - for (size_t i = 0; i < conf.num; i++) { - decData[i] = quantData[i] * precision; + decData[i] = quantizer.recover(0, quantData[i]); } + quantizer.postdecompress_data(); return decData; } @@ -147,29 +128,15 @@ class SZBioMDXtcDecomposition : public concepts::DecompositionInterface quantData(lastFrame * dims[1] * dims[2]); - /* To prevent that potential rounding errors make the error slightly larger than the - * absolute error bound, scale down the error limit slightly. - * The precision is twice the required maximum error. */ - double reciprocalPrecision = 1.0 / (conf.absErrorBound * 0.99999 * 2.0); - const auto INT_MIN_D = static_cast(std::numeric_limits::min())/4; - const auto INT_MAX_D = static_cast(std::numeric_limits::max())/4; - - for (size_t i = 0; i < lastFrame; i++) // time - { - for (size_t j = 0; j < dims[1]; j++) // atoms - { - for (size_t k = 0; k < dims[2]; k++) // xyz - { + for (size_t i = 0; i < lastFrame; i++) { // time + for (size_t j = 0; j < dims[1]; j++) { // atoms + for (size_t k = 0; k < dims[2]; k++) { // xyz size_t idx = i * stride[0] + j * stride[1] + k; - double quant = std::floor(data[idx] * reciprocalPrecision + 0.5); - if (quant < INT_MIN_D || quant > INT_MAX_D) { - throw std::out_of_range("Quantization value out of int range in SZBioMDXtcDecomposition, consider " - "increasing the error bound"); - } - quantData[idx] = static_cast(quant); + quantData[idx] = quantizer.quantize_and_overwrite(data[idx], 0); } } } + quantizer.postcompress_data(); return quantData; } @@ -182,19 +149,15 @@ class SZBioMDXtcDecomposition : public concepts::DecompositionInterface +SZBioMDXtcDecomposition make_decomposition_biomdxtc(const Config &conf, Quantizer quantizer) { + return SZBioMDXtcDecomposition(conf, quantizer); +} + } // namespace SZ3 #endif