8000 Update LZ4 to 1.10.0 by Sewer56 · Pull Request #25 · picoHz/lzzzz · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Update LZ4 to 1.10.0 #25

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/common/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::ffi::CStr;
/// # Example
///
/// ```
/// assert_eq!(lzzzz::version_number(), 10904); // 1.9.4
/// assert_eq!(lzzzz::version_number(), 11000); // 1.9.4
/// ```
pub fn version_number() -> u32 {
unsafe { binding::LZ4_versionNumber() as u32 }
Expand All @@ -19,7 +19,7 @@ pub fn version_number() -> u32 {
/// # Example
///
/// ```
/// assert_eq!(lzzzz::version_string(), "1.9.4");
/// assert_eq!(lzzzz::version_string(), "1.10.0");
/// ```
pub fn version_string() -> &'static str {
unsafe {
Expand Down
6 changes: 6 additions & 0 deletions src/lz4/binding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ extern "C" {
pub fn LZ4_freeStream(ptr: *mut LZ4Stream) -> c_int;
pub fn LZ4_initStream(buffer: *mut c_void, size: usize) -> *mut LZ4Stream;
pub fn LZ4_loadDict(ptr: *mut LZ4Stream, dictionary: *const c_char, dict_size: c_int) -> c_int;
pub fn LZ4_loadDictSlow(ptr: *mut LZ4Stream, dictionary: *const c_char, dict_size: c_int) -> c_int;
pub fn LZ4_saveDict(
ptr: *mut LZ4Stream,
safe_buffer: *mut c_char,
Expand Down Expand Up @@ -101,4 +102,9 @@ extern "C" {
src_size: c_int,
dst_capacity: c_int,
) -> c_int;
pub fn LZ4_attach_dictionary(
working_stream: *mut LZ4Stream,
dictionary_stream: *const LZ4Stream,
);
pub fn LZ4_resetStream_fast(streamPtr: *mut LZ4Stream);
}
24 changes: 24 additions & 0 deletions src/lz4/stream/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::{
mem::{size_of, MaybeUninit},
os::raw::{c_char, c_int, c_void},
ptr::NonNull,
ptr::null_mut
};

#[allow(clippy::large_enum_variant)]
Expand Down Expand Up @@ -75,6 +76,16 @@ impl CompressionContext {
}
}

pub fn load_dict_slow(&mut self, dict: &[u8]) {
unsafe {
binding::LZ4_loadDictSlow(
self.get_ptr(),
dict.as_ptr() as *const c_char,
dict.len() as c_int,
);
}
}

pub fn save_dict(&mut self, dict: &mut [u8]) {
unsafe {
binding::LZ4_saveDict(
Expand All @@ -84,6 +95,19 @@ impl CompressionContext {
);
}
}

pub fn attach_dict(&mut self, dict_stream: Option<&mut CompressionContext>) {
unsafe {
if dict_stream.is_none() {
// Note(sewer56): When detaching dictionary, we need to reset the stream state
// This behaviour is consistent with what the LZ4 library itself does internally.
binding::LZ4_resetStream_fast(self.get_ptr());
}

let dict_ptr = dict_stream.map(|ctx| ctx.get_ptr()).unwrap_or(null_mut());
binding::LZ4_attach_dictionary(self.get_ptr(), dict_ptr);
}
}
}

impl Drop for CompressionContext {
Expand Down
58 changes: 58 additions & 0 deletions src/lz4/stream/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,21 @@ impl<'a> Compressor<'a> {
Ok(comp)
}

/// Creates a new `Compressor` with a dictionary.
/// This variant which consumes more initialization time to better reference the dictionary,
/// resulting in slightly improved compression ratios at expense of time.
pub fn with_dict_slow<D>(dict: D) -> Result<Self>
where
D: Into<Cow<'a, [u8]>>,
{
let mut comp = Self {
dict: Pin::new(dict.into()),
..Self::new()?
};
comp.ctx.load_dict_slow(&comp.dict);
Ok(comp)
}

/// Performs LZ4 streaming compression.
///
/// Returns the number of bytes written into the destination buffer.
Expand Down Expand Up @@ -103,6 +118,49 @@ impl<'a> Compressor<'a> {
self.safe_buf.resize(DICTIONARY_SIZE, 0);
self.ctx.save_dict(&mut self.safe_buf);
}

/// Attaches a dictionary stream for efficient dictionary reuse.
///
/// This allows efficient re-use of a static dictionary multiple times by referencing
/// the dictionary stream in-place rather than copying it.
///
/// # Arguments
///
/// * `dict_stream` - The dictionary stream to attach, or None to unset any existing dictionary
///
/// # Notes
///
/// - The dictionary stream must have been prepared using `with_dict()` or `with_dict_slow()`
/// - The dictionary will only remain attached through the first compression call
/// - The dictionary stream (and its source buffer) must remain valid through the compression session
///
/// # Example
///
/// ```
/// use lzzzz::lz4;
9E7A ///
/// let dict_data = b"some dictionary data";
/// let data = b"data to compress";
///
/// // Create dictionary stream
/// let mut dict_comp = lz4::Compressor::with_dict(dict_data)?;
///
/// // Create working stream and attach dictionary
/// let mut comp = lz4::Compressor::new()?;
/// comp.attach_dict(Some(&mut dict_comp));
///
/// // Compress data using the attached dictionary
/// let mut buf = [0u8; 256];
/// let len = comp.next(data, &mut buf, lz4::ACC_LEVEL_DEFAULT)?;
/// # Ok::<(), std::io::Error>(())
/// ```
pub fn attach_dict(&mut self, dict_stream: Option<&mut Compressor<'a>>) {
if let Some(dict) = dict_stream {
self.ctx.attach_dict(Some(&mut dict.ctx));
} else {
self.ctx.attach_dict(None);
}
}
}

/// Streaming LZ4 decompressor.
Expand Down
9 changes: 9 additions & 0 deletions src/lz4_hc/binding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,13 @@ extern "C" {
) -> c_int;
pub fn LZ4_setCompressionLevel(ptr: *mut LZ4StreamHC, compression_level: c_int);
pub fn LZ4_favorDecompressionSpeed(ptr: *mut LZ4StreamHC, favor: c_int);
pub fn LZ4_attach_HC_dictionary(
working_stream: *mut LZ4StreamHC,
dictionary_stream: *const LZ4StreamHC
);

pub fn LZ4_resetStreamHC_fast(
streamPtr: *mut LZ4StreamHC,
compressionLevel: c_int
);
}
15 changes: 15 additions & 0 deletions src/lz4_hc/stream/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::{Error, ErrorKind, Result};
use std::{
os::raw::{c_char, c_int},
ptr::NonNull,
ptr::null_mut
};

pub struct CompressionContext {
Expand Down Expand Up @@ -91,6 +92,20 @@ impl CompressionContext {
Err(Error::new(ErrorKind::CompressionFailed))
}
}

pub fn attach_dict(&mut self, dict_stream: Option<&CompressionContext>, compression_level: i32) {
unsafe {
if dict_stream.is_none() {
// Note(sewer56): When detaching dictionary, we need to reset the stream state
// This behaviour is consistent with what the LZ4 library itself does internally.
// The LZ4HC API does not have a way to retrieve compression level, so we must pass it manually,
// since the HC API differs here.
binding::LZ4_resetStreamHC_fast(self.stream.as_ptr(), compression_level);
}
let dict_ptr = dict_stream.map(|ctx| ctx.stream.as_ptr()).unwrap_or(null_mut());
binding::LZ4_attach_HC_dictionary(self.stream.as_ptr(), dict_ptr);
}
}
}

impl Drop for CompressionContext {
Expand Down
54 changes: 53 additions & 1 deletion src/lz4_hc/stream/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,22 @@ impl<'a> Compressor<'a> {
}

/// Creates a new `Compressor` with a dictionary.
pub fn with_dict<D>(dict: D) -> Result<Self>
pub fn with_dict<D>(dict: D, compression_level: i32) -> Result<Self>
where
D: Into<Cow<'a, [u8]>>,
{
// Note(sewer56).
// The LZ4 documentation states the following:
// - In order for LZ4_loadDictHC() to create the correct data structure,
// it is essential to set the compression level _before_ loading the dictionary.
// Therefore this API requires a `compression_level`.

let mut comp = Self {
dict: Pin::new(dict.into()),
..Self::new()?
};

comp.ctx.set_compression_level(compression_level);
comp.ctx.load_dict(&comp.dict);
Ok(comp)
}
Expand Down Expand Up @@ -151,4 +159,48 @@ impl<'a> Compressor<'a> {
self.safe_buf.resize(DICTIONARY_SIZE, 0);
self.ctx.save_dict(&mut self.safe_buf);
}

/// Attaches a dictionary stream for efficient dictionary reuse.
///
/// This allows efficient re-use of a static dictionary multiple times by referencing
/// the dictionary stream in-place rather than copying it.
///
/// # Arguments
///
/// * `dict_stream` - The dictionary stream to attach, or None to unset any existing dictionary
/// * `compression_level` - The compression level to use (CLEVEL_MIN to CLEVEL_MAX)
///
/// # Notes
///
/// - The dictionary stream must have been prepared using `with_dict()`
/// - The dictionary will only remain attached through the first compression call
/// - The dictionary stream (and its source buffer) must remain valid through the compression session
///
/// # Example
///
/// ```
/// use lzzzz::lz4_hc;
///
/// let dict_data = b"dictionary data";
/// let data = b"data to compress";
///
/// // Create dictionary stream
/// let dict_comp = lz4_hc::Compressor::with_dict(dict_data, lz4_hc::CLEVEL_DEFAULT)?;
///
/// // Create working stream and attach dictionary
/// let mut comp = lz4_hc::Compressor::new()?;
/// comp.attach_dict(Some(&dict_comp), lz4_hc::CLEVEL_DEFAULT);
///
/// // Compress data using the attached dictionary
/// let mut buf = [0u8; 256];
/// let len = comp.next(data, &mut buf)?;
/// # Ok::<(), std::io::Error>(())
/// ```
pub fn attach_dict(&mut self, dict_stream: Option<&Compressor<'a>>, compression_level: i32) {
if let Some(dict) = dict_stream {
self.ctx.attach_dict(Some(&dict.ctx), compression_level);
} else {
self.ctx.attach_dict(None, compression_level);
}
}
}
1 change: 0 additions & 1 deletion src/lz4f/stream/comp/bufread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ use std::{
/// ```
///
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html

pub struct BufReadCompressor<R: BufRead> {
pub(super) inner: R,
pub(super) comp: Compressor,
Expand Down
1 change: 0 additions & 1 deletion src/lz4f/stream/comp/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ use std::{
/// ```
///
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html

pub struct ReadCompressor<R: Read> {
inner: BufReadCompressor<BufReader<R>>,
}
Expand Down
1 change: 0 additions & 1 deletion src/lz4f/stream/comp/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ use std::{fmt, io::Write};
/// ```
///
/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html

pub struct WriteCompressor<W: Write> {
inner: Option<W>,
comp: Compressor,
Expand Down
1 change: 0 additions & 1 deletion src/lz4f/stream/decomp/bufread.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ use std::{
/// ```
///
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html

pub struct BufReadDecompressor<'a, R: BufRead> {
pub(super) inner: R,
decomp: Decompressor<'a>,
Expand Down
1 change: 0 additions & 1 deletion src/lz4f/stream/decomp/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ use std::{
/// ```
///
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html

pub struct ReadDecompressor<'a, R: Read> {
inner: BufReadDecompressor<'a, BufReader<R>>,
}
Expand Down
1 change: 0 additions & 1 deletion src/lz4f/stream/decomp/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ use std::{borrow::Cow, fmt, io::Write};
/// ```
///
/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html

pub struct WriteDecompressor<'a, W: Write> {
inner: W,
decomp: Decompressor<'a>,
Expand Down
16 changes: 9 additions & 7 deletions tests/lz4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,16 @@ mod decompress {
}

#[test]
fn with_dict() {
fn with_dict_and_dict_slow() {
lz4_test_set().par_bridge().for_each(|(src, mode)| {
let mut comp_buf = Vec::new();
let mut decomp_buf = vec![0; src.len()];
let mut comp = lz4::Compressor::with_dict(src.as_ref()).unwrap();
comp.next_to_vec(&src, &mut comp_buf, mode).unwrap();
lz4::decompress_with_dict(&comp_buf, &mut decomp_buf, &src).unwrap();
assert_eq!(src, &decomp_buf);
for with_dict in [lz4::Compressor::with_dict, lz4::Compressor::with_dict_slow] {
let mut comp_buf = Vec::new();
let mut decomp_buf = vec![0; src.len()];
let mut comp = with_dict(src.as_ref()).unwrap();
comp.next_to_vec(&src, &mut comp_buf, mode).unwrap();
lz4::decompress_with_dict(&comp_buf, &mut decomp_buf, &src).unwrap();
assert_eq!(src, &decomp_buf);
}
});
}
}
38 changes: 37 additions & 1 deletion tests/lz4_hc_stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ mod compressor {
.sample_iter(Standard)
.take(64 * 1024)
.collect::<Vec<_>>();
let mut comp = lz4_hc::Compressor::with_dict(&dict).unwrap();
let mut comp = lz4_hc::Compressor::with_dict(&dict, lz4_hc::CLEVEL_DEFAULT).unwrap();
let mut decomp = lz4::Decompressor::with_dict(&dict).unwrap();
comp.set_compression_level(level);
for src in src_set {
Expand Down Expand Up @@ -70,4 +70,40 @@ mod compressor {
}
});
}

#[test]
fn attach_dictionary() {
// Basic test data
let data = b"The quick brown fox jumps over the lazy dog";
let level = lz4_hc::CLEVEL_DEFAULT;

// Create dictionary stream and set its compression level
let mut dict_comp = lz4_hc::Compressor::with_dict(data, level).unwrap();

// Create working stream
let mut comp = lz4_hc::Compressor::new().unwrap();
comp.set_compression_level(level); // Set level before attachment

// Compress with attached dictionary
comp.attach_dict(Some(&mut dict_comp), level);
let mut output_attached_dict = Vec::new();
comp.next_to_vec(data, &mut output_attached_dict).unwrap();

// Compress with no dictionary
comp.attach_dict(None, level);
let mut output_no_dict = Vec::new();
comp.next_to_vec(data, &mut output_no_dict).unwrap();

// Results should match
assert_ne!(output_attached_dict, output_no_dict, "Data with no dict should be different");

// Code below is disabled because it (unexpectedly) does not work.
// Seems to be an upstream lz4 issue.

// Create regular dictionary compressor with same level
// let mut comp_regular_dict = lz4_hc::Compressor::with_dict(data, level).unwrap();
// let mut output_regular_dict = Vec::new();
// comp_regular_dict.next_to_vec(data, &mut output_regular_dict).unwrap();
// assert_eq!(output_attached_dict, output_regular_dict, "Compressed data should match");
}
}
Loading
Loading
0