From e78df5adb510f5e32a7e7b98614e43324eb47991 Mon Sep 17 00:00:00 2001 From: Pavel Zwerschke Date: Mon, 3 Mar 2025 02:06:56 +0100 Subject: [PATCH 01/10] impl --- crates/rattler_index/src/lib.rs | 84 ++++++++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 6 deletions(-) diff --git a/crates/rattler_index/src/lib.rs b/crates/rattler_index/src/lib.rs index 83fd0deea..a93a65686 100644 --- a/crates/rattler_index/src/lib.rs +++ b/crates/rattler_index/src/lib.rs @@ -10,13 +10,16 @@ use fxhash::FxHashMap; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use rattler_conda_types::{ package::{ArchiveType, IndexJson, PackageFile}, - ChannelInfo, PackageRecord, Platform, RepoData, + ChannelInfo, PackageRecord, PatchInstructions, Platform, RepoData, }; use rattler_networking::{Authentication, AuthenticationStorage}; -use rattler_package_streaming::{read, seek}; +use rattler_package_streaming::{ + read, + seek::{self, stream_conda_content}, +}; use std::{ collections::{HashMap, HashSet}, - io::{Cursor, Read}, + io::{Cursor, Read, Seek}, path::{Path, PathBuf}, str::FromStr, sync::Arc, @@ -71,6 +74,40 @@ pub fn package_record_from_index_json( Ok(package_record) } +fn repodata_patch_from_package_stream<'a>( + package: impl Read + Seek + 'a, +) -> std::io::Result { + let mut subdirs = FxHashMap::default(); + + // todo: what about .tar.bz2? + let mut content_reader = stream_conda_content(package).unwrap(); + let entries = content_reader.entries().unwrap(); + for entry in entries { + let mut entry = entry.unwrap(); + if !entry.header().entry_type().is_file() { + todo!(); + } + let mut buf = Vec::new(); + entry.read_to_end(&mut buf).unwrap(); + let path = entry.path().unwrap(); + let components = path.components().into_iter().collect::>(); + let subdir = if components.len() != 2 { + todo!(); + } else { + if components[1].as_os_str() != "patch_instructions.json" { + todo!(); + } + components[0].as_os_str().to_string_lossy().to_string() + }; + + // println!("Contents: {:?}", String::from_utf8_lossy(&buf)); + let instructions: PatchInstructions = serde_json::from_slice(&buf).unwrap(); + subdirs.insert(subdir, instructions); + } + + Ok(rattler_conda_types::RepoDataPatch { subdirs }) +} + /// Extract the package record from a `.tar.bz2` package file. /// This function will look for the `info/index.json` file in the conda package /// and extract the package record from it. @@ -132,12 +169,17 @@ async fn index_subdir( subdir: Platform, op: Operator, force: bool, + repodata_patch: Option, progress: Option, semaphore: Arc, ) -> Result<()> { + let repodata_path = if repodata_patch.is_some() { + format!("{subdir}/repodata_from_packages.json") + } else { + format!("{subdir}/repodata.json") + }; let mut registered_packages: FxHashMap = HashMap::default(); if !force { - let repodata_path = format!("{subdir}/repodata.json"); let repodata_bytes = op.read(&repodata_path).await; let repodata: RepoData = match repodata_bytes { Ok(bytes) => serde_json::from_slice(&bytes.to_vec())?, @@ -304,9 +346,16 @@ async fn index_subdir( version: Some(2), }; - let repodata_path = format!("{subdir}/repodata.json"); let repodata_bytes = serde_json::to_vec(&repodata)?; op.write(&repodata_path, repodata_bytes).await?; + + if let Some(instructions) = repodata_patch { + let mut patched_repodata = repodata.clone(); + patched_repodata.apply_patches(&instructions); + let patched_repodata_bytes = serde_json::to_vec(&patched_repodata)?; + op.write(&format!("{subdir}/repodata.json"), patched_repodata_bytes) + .await?; + } // todo: also write repodata.json.bz2, repodata.json.zst, repodata.json.jlap and sharded repodata once available in rattler // https://github.com/conda/rattler/issues/1096 @@ -323,7 +372,15 @@ pub async fn index_fs( ) -> anyhow::Result<()> { let mut config = FsConfig::default(); config.root = Some(channel.into().canonicalize()?.to_string_lossy().to_string()); - index(target_platform, config, force, max_parallel, multi_progress).await + index( + target_platform, + config, + force, + max_parallel, + multi_progress, + Some("conda-forge-repodata-patches-20250228.14.29.06-hd8ed1ab_1.conda"), + ) + .await } /// Create a new `repodata.json` for all packages in the channel at the given S3 URL. @@ -379,6 +436,7 @@ pub async fn index_s3( force, max_parallel, multi_progress, + Some("conda-forge-repodata-patches-20250228.14.29.06-hd8ed1ab_1.conda"), ) .await } @@ -401,6 +459,7 @@ pub async fn index( force: bool, max_parallel: usize, multi_progress: Option, + repodata_patch: Option<&str>, ) -> anyhow::Result<()> { let builder = config.into_builder(); @@ -443,6 +502,16 @@ pub async fn index( subdirs.insert(Platform::NoArch); } + let repodata_patch = if let Some(path) = repodata_patch { + let repodata_patch_path = format!("noarch/{path}"); + let repodata_patch_bytes = op.read(&repodata_patch_path).await?.to_bytes(); + let reader = Cursor::new(repodata_patch_bytes); + let repodata_patch = repodata_patch_from_package_stream(reader)?; + Some(repodata_patch) + } else { + None + }; + let semaphore = Semaphore::new(max_parallel); let semaphore = Arc::new(semaphore); @@ -453,6 +522,9 @@ pub async fn index( *subdir, op.clone(), force, + repodata_patch + .as_ref() + .and_then(|p| p.subdirs.get(&subdir.to_string()).cloned()), multi_progress.clone(), semaphore.clone(), )) From afa31c18f20ab7778bd3de649e74963c4bde1193 Mon Sep 17 00:00:00 2001 From: Pavel Zwerschke Date: Mon, 3 Mar 2025 02:36:41 +0100 Subject: [PATCH 02/10] . --- crates/rattler_index/src/lib.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/crates/rattler_index/src/lib.rs b/crates/rattler_index/src/lib.rs index a93a65686..e18ddd306 100644 --- a/crates/rattler_index/src/lib.rs +++ b/crates/rattler_index/src/lib.rs @@ -252,7 +252,7 @@ async fn index_subdir( .cloned() .collect::>(); - tracing::debug!( + tracing::info!( "Adding {} packages to subdir {}.", packages_to_add.len(), subdir @@ -310,7 +310,7 @@ async fn index_subdir( pb.finish_with_message(format!("Finished {}", subdir.as_str())); - tracing::debug!( + tracing::info!( "Successfully added {} packages to subdir {}.", results.len(), subdir @@ -346,14 +346,17 @@ async fn index_subdir( version: Some(2), }; + tracing::info!("Writing repodata to {}", repodata_path); let repodata_bytes = serde_json::to_vec(&repodata)?; op.write(&repodata_path, repodata_bytes).await?; if let Some(instructions) = repodata_patch { + let patched_repodata_path = format!("{subdir}/repodata.json"); + tracing::info!("Writing patched repodata to {}", patched_repodata_path); let mut patched_repodata = repodata.clone(); patched_repodata.apply_patches(&instructions); let patched_repodata_bytes = serde_json::to_vec(&patched_repodata)?; - op.write(&format!("{subdir}/repodata.json"), patched_repodata_bytes) + op.write(&patched_repodata_path, patched_repodata_bytes) .await?; } // todo: also write repodata.json.bz2, repodata.json.zst, repodata.json.jlap and sharded repodata once available in rattler @@ -530,7 +533,7 @@ pub async fn index( )) }) .collect::>(); - try_join_all(tasks).await?; - + let results = try_join_all(tasks).await?; + results.into_iter().collect::>>()?; Ok(()) } From 29430c414177c47ec9af86ed286d1f8cad62cdfb Mon Sep 17 00:00:00 2001 From: Pavel Zwerschke Date: Mon, 3 Mar 2025 03:09:45 +0100 Subject: [PATCH 03/10] fail earlier --- crates/rattler_index/src/lib.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/crates/rattler_index/src/lib.rs b/crates/rattler_index/src/lib.rs index e18ddd306..c6e0dc72e 100644 --- a/crates/rattler_index/src/lib.rs +++ b/crates/rattler_index/src/lib.rs @@ -307,7 +307,12 @@ async fn index_subdir( }) .collect::>(); let results = try_join_all(tasks).await?; - + let results = results.into_iter().collect::>>(); + if let Err(err) = results { + pb.set_message("Failed"); + return Err(err.into()); + } + let results = results.unwrap(); pb.finish_with_message(format!("Finished {}", subdir.as_str())); tracing::info!( @@ -316,8 +321,7 @@ async fn index_subdir( subdir ); - for result in results { - let (filename, record) = result?; + for (filename, record) in results { registered_packages.insert(filename, record); } From 01a2da5dc941bf84b86bed57283589acec40154b Mon Sep 17 00:00:00 2001 From: Pavel Zwerschke Date: Wed, 5 Mar 2025 01:39:10 +0100 Subject: [PATCH 04/10] wip --- Cargo.lock | 7 -- crates/rattler_index/src/lib.rs | 185 +++++++++++++++++++------------ crates/rattler_index/src/main.rs | 9 +- 3 files changed, 123 insertions(+), 78 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5915dd13c..c9ca34d54 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6462,13 +6462,6 @@ dependencies = [ "wit-bindgen-rt", ] -[[package]] -name = "wasm-bin" -version = "0.1.0" -dependencies = [ - "rattler_solve", -] - [[package]] name = "wasm-bindgen" version = "0.2.100" diff --git a/crates/rattler_index/src/lib.rs b/crates/rattler_index/src/lib.rs index c6e0dc72e..f9f84b39e 100644 --- a/crates/rattler_index/src/lib.rs +++ b/crates/rattler_index/src/lib.rs @@ -5,7 +5,7 @@ use anyhow::Result; use bytes::buf::Buf; use fs_err::{self as fs}; -use futures::future::try_join_all; +use futures::{stream::FuturesUnordered, StreamExt}; use fxhash::FxHashMap; use indicatif::{MultiProgress, ProgressBar, ProgressStyle}; use rattler_conda_types::{ @@ -74,12 +74,11 @@ pub fn package_record_from_index_json( Ok(package_record) } -fn repodata_patch_from_package_stream<'a>( +fn repodata_patch_from_conda_package_stream<'a>( package: impl Read + Seek + 'a, ) -> std::io::Result { let mut subdirs = FxHashMap::default(); - // todo: what about .tar.bz2? let mut content_reader = stream_conda_content(package).unwrap(); let entries = content_reader.entries().unwrap(); for entry in entries { @@ -90,17 +89,16 @@ fn repodata_patch_from_package_stream<'a>( let mut buf = Vec::new(); entry.read_to_end(&mut buf).unwrap(); let path = entry.path().unwrap(); - let components = path.components().into_iter().collect::>(); - let subdir = if components.len() != 2 { - todo!(); - } else { + let components = path.components().collect::>(); + let subdir = if components.len() == 2 { if components[1].as_os_str() != "patch_instructions.json" { todo!(); } components[0].as_os_str().to_string_lossy().to_string() + } else { + todo!(); }; - // println!("Contents: {:?}", String::from_utf8_lossy(&buf)); let instructions: PatchInstructions = serde_json::from_slice(&buf).unwrap(); subdirs.insert(subdir, instructions); } @@ -271,49 +269,72 @@ async fn index_subdir( .progress_chars("##-"); pb.set_style(sty); - let tasks = packages_to_add - .iter() - .map(|filename| { - tokio::spawn({ - let op = op.clone(); - let filename = filename.clone(); - let pb = pb.clone(); - let semaphore = semaphore.clone(); - { - async move { - let _permit = semaphore - .acquire() - .await - .expect("Semaphore was unexpectedly closed"); - pb.set_message(format!( - "Indexing {} {}", - subdir.as_str(), - console::style(filename.clone()).dim() - )); - let file_path = format!("{subdir}/{filename}"); - let buffer = op.read(&file_path).await?; - let reader = buffer.reader(); - // We already know it's not None - let archive_type = ArchiveType::try_from(&filename).unwrap(); - let record = match archive_type { - ArchiveType::TarBz2 => package_record_from_tar_bz2_reader(reader), - ArchiveType::Conda => package_record_from_conda_reader(reader), - }?; - pb.inc(1); - Ok::<(String, PackageRecord), std::io::Error>((filename.clone(), record)) - } + let mut tasks = FuturesUnordered::new(); + for filename in packages_to_add.iter() { + let task = { + let op = op.clone(); + let filename = filename.clone(); + let pb = pb.clone(); + let semaphore = semaphore.clone(); + { + async move { + let _permit = semaphore + .acquire() + .await + .expect("Semaphore was unexpectedly closed"); + pb.set_message(format!( + "Indexing {} {}", + subdir.as_str(), + console::style(filename.clone()).dim() + )); + let file_path = format!("{subdir}/{filename}"); + let buffer = op.read(&file_path).await?; + let reader = buffer.reader(); + // We already know it's not None + let archive_type = ArchiveType::try_from(&filename).unwrap(); + let record = match archive_type { + ArchiveType::TarBz2 => package_record_from_tar_bz2_reader(reader), + ArchiveType::Conda => package_record_from_conda_reader(reader), + }?; + pb.inc(1); + // todo: make this future ok/err instead of results + Ok::<(String, PackageRecord), std::io::Error>((filename.clone(), record)) } - }) - }) - .collect::>(); - let results = try_join_all(tasks).await?; - let results = results.into_iter().collect::>>(); - if let Err(err) = results { - pb.set_message("Failed"); - return Err(err.into()); + } + }; + tasks.push(tokio::spawn(task)); + } + let mut results = Vec::new(); + while let Some(join_result) = tasks.next().await { + match join_result { + Ok(Ok(result)) => results.push(result), + Ok(Err(e)) => { + tasks.clear(); + tracing::error!("Failed to process package: {}", e); + pb.abandon_with_message(format!( + "{} {}", + console::style("Failed to index").red(), + console::style(subdir.as_str()).dim() + )); + return Err(e.into()); + } + Err(join_err) => { + tasks.clear(); + tracing::error!("Task panicked: {}", join_err); + pb.abandon_with_message(format!( + "{} {}", + console::style("Failed to index").red(), + console::style(subdir.as_str()).dim() + )); + return Err(anyhow::anyhow!("Task panicked: {}", join_err)); + } + } } - let results = results.unwrap(); - pb.finish_with_message(format!("Finished {}", subdir.as_str())); + pb.finish_with_message(format!( + "{} {}", + console::style("Finished").green(), + subdir.as_str() + )); tracing::info!( "Successfully added {} packages to subdir {}.", @@ -373,6 +394,7 @@ async fn index_subdir( pub async fn index_fs( channel: impl Into, target_platform: Option, + repodata_patch: Option, force: bool, max_parallel: usize, multi_progress: Option, @@ -385,7 +407,7 @@ pub async fn index_fs( force, max_parallel, multi_progress, - Some("conda-forge-repodata-patches-20250228.14.29.06-hd8ed1ab_1.conda"), + repodata_patch, ) .await } @@ -401,6 +423,7 @@ pub async fn index_s3( secret_access_key: Option, session_token: Option, target_platform: Option, + repodata_patch: Option, force: bool, max_parallel: usize, multi_progress: Option, @@ -443,7 +466,7 @@ pub async fn index_s3( force, max_parallel, multi_progress, - Some("conda-forge-repodata-patches-20250228.14.29.06-hd8ed1ab_1.conda"), + repodata_patch, ) .await } @@ -466,7 +489,7 @@ pub async fn index( force: bool, max_parallel: usize, multi_progress: Option, - repodata_patch: Option<&str>, + repodata_patch: Option, ) -> anyhow::Result<()> { let builder = config.into_builder(); @@ -510,10 +533,19 @@ pub async fn index( } let repodata_patch = if let Some(path) = repodata_patch { + match ArchiveType::try_from(path.clone()) { + Some(ArchiveType::Conda) => {} + Some(ArchiveType::TarBz2) | None => { + return Err(anyhow::anyhow!( + "Only .conda packages are supported for repodata patches. Got: {}", + path + )) + } + } let repodata_patch_path = format!("noarch/{path}"); let repodata_patch_bytes = op.read(&repodata_patch_path).await?.to_bytes(); let reader = Cursor::new(repodata_patch_bytes); - let repodata_patch = repodata_patch_from_package_stream(reader)?; + let repodata_patch = repodata_patch_from_conda_package_stream(reader)?; Some(repodata_patch) } else { None @@ -522,22 +554,35 @@ pub async fn index( let semaphore = Semaphore::new(max_parallel); let semaphore = Arc::new(semaphore); - let tasks = subdirs - .iter() - .map(|subdir| { - tokio::spawn(index_subdir( - *subdir, - op.clone(), - force, - repodata_patch - .as_ref() - .and_then(|p| p.subdirs.get(&subdir.to_string()).cloned()), - multi_progress.clone(), - semaphore.clone(), - )) - }) - .collect::>(); - let results = try_join_all(tasks).await?; - results.into_iter().collect::>>()?; + let mut tasks = FuturesUnordered::new(); + for subdir in subdirs.iter() { + let task = index_subdir( + *subdir, + op.clone(), + force, + repodata_patch + .as_ref() + .and_then(|p| p.subdirs.get(&subdir.to_string()).cloned()), + multi_progress.clone(), + semaphore.clone(), + ); + tasks.push(tokio::spawn(task)); + } + + while let Some(join_result) = tasks.next().await { + match join_result { + Ok(Ok(_)) => {} + Ok(Err(e)) => { + tracing::error!("Failed to process subdir: {}", e); + tasks.clear(); + return Err(e); + } + Err(join_err) => { + tracing::error!("Task panicked: {}", join_err); + tasks.clear(); + return Err(anyhow::anyhow!("Task panicked: {}", join_err)); + } + } + } Ok(()) } diff --git a/crates/rattler_index/src/main.rs b/crates/rattler_index/src/main.rs index c72fc1d47..c8c08754c 100644 --- a/crates/rattler_index/src/main.rs +++ b/crates/rattler_index/src/main.rs @@ -32,13 +32,18 @@ struct Cli { /// The maximum number of packages to process in-memory simultaneously. /// This is necessary to limit memory usage when indexing large channels. - #[arg(long, default_value = "128", global = true)] + #[arg(long, default_value = "32", global = true)] max_parallel: usize, /// A specific platform to index. /// Defaults to all platforms available in the channel. #[arg(long, global = true)] target_platform: Option, + + /// The name of the conda package (expected to be in the `noarch` subdir) that should be used for repodata patching. + /// For more information, see `https://prefix.dev/blog/repodata_patching`. + #[arg(long, global = true)] + repodata_patch: Option, } /// The subcommands for the `rattler-index` CLI. @@ -106,6 +111,7 @@ async fn main() -> anyhow::Result<()> { index_fs( channel, cli.target_platform, + cli.repodata_patch, cli.force, cli.max_parallel, Some(multi_progress), @@ -130,6 +136,7 @@ async fn main() -> anyhow::Result<()> { secret_access_key, session_token, cli.target_platform, + cli.repodata_patch, cli.force, cli.max_parallel, Some(multi_progress), From a1d4ac0990b0074afaab9cb12b687cc3826ff68c Mon Sep 17 00:00:00 2001 From: Pavel Zwerschke Date: Wed, 5 Mar 2025 02:01:13 +0100 Subject: [PATCH 05/10] finish --- crates/rattler_index/src/lib.rs | 43 +++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/crates/rattler_index/src/lib.rs b/crates/rattler_index/src/lib.rs index f9f84b39e..08ed36d06 100644 --- a/crates/rattler_index/src/lib.rs +++ b/crates/rattler_index/src/lib.rs @@ -2,7 +2,7 @@ //! files #![deny(missing_docs)] -use anyhow::Result; +use anyhow::{Context, Result}; use bytes::buf::Buf; use fs_err::{self as fs}; use futures::{stream::FuturesUnordered, StreamExt}; @@ -76,30 +76,38 @@ pub fn package_record_from_index_json( fn repodata_patch_from_conda_package_stream<'a>( package: impl Read + Seek + 'a, -) -> std::io::Result { +) -> anyhow::Result { let mut subdirs = FxHashMap::default(); - let mut content_reader = stream_conda_content(package).unwrap(); - let entries = content_reader.entries().unwrap(); + let mut content_reader = stream_conda_content(package)?; + let entries = content_reader.entries()?; for entry in entries { - let mut entry = entry.unwrap(); + let mut entry = entry?; if !entry.header().entry_type().is_file() { - todo!(); + return Err(anyhow::anyhow!( + "Expected repodata patch package to be a file" + )); } let mut buf = Vec::new(); - entry.read_to_end(&mut buf).unwrap(); - let path = entry.path().unwrap(); + entry.read_to_end(&mut buf)?; + let path = entry.path()?; let components = path.components().collect::>(); - let subdir = if components.len() == 2 { - if components[1].as_os_str() != "patch_instructions.json" { - todo!(); - } - components[0].as_os_str().to_string_lossy().to_string() - } else { - todo!(); - }; + let subdir = + if components.len() == 2 && components[1].as_os_str() == "patch_instructions.json" { + let subdir_str = components[0] + .as_os_str() + .to_str() + .context("Could not convert OsStr to str")?; + let _ = Platform::from_str(subdir_str)?; + subdir_str.to_string() + } else { + return Err(anyhow::anyhow!( + "Expected files of form /patch_instructions.json, but found {}", + path.display() + )); + }; - let instructions: PatchInstructions = serde_json::from_slice(&buf).unwrap(); + let instructions: PatchInstructions = serde_json::from_slice(&buf)?; subdirs.insert(subdir, instructions); } @@ -297,7 +305,6 @@ async fn index_subdir( ArchiveType::Conda => package_record_from_conda_reader(reader), }?; pb.inc(1); - // todo: make this future ok/err instead of results Ok::<(String, PackageRecord), std::io::Error>((filename.clone(), record)) } } From b71a227a076aecd0b73c837f5dc0e16c7b5d6f38 Mon Sep 17 00:00:00 2001 From: Pavel Zwerschke Date: Wed, 5 Mar 2025 02:01:46 +0100 Subject: [PATCH 06/10] fix --- crates/rattler_conda_types/src/repo_data/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/rattler_conda_types/src/repo_data/mod.rs b/crates/rattler_conda_types/src/repo_data/mod.rs index 6e30641f5..4d27d0da2 100644 --- a/crates/rattler_conda_types/src/repo_data/mod.rs +++ b/crates/rattler_conda_types/src/repo_data/mod.rs @@ -470,7 +470,7 @@ fn determine_subdir( Arch::X86_64 => "64", _ => arch.as_str(), }; - Ok(format!("{}-{}", platform, arch_str)) + Ok(format!("{platform}-{arch_str}")) } Err(_) => Err(ConvertSubdirError::NoKnownCombination { platform, arch }), } From 7bbb1a36a58068f3586bc650a2fcbf12f2313c1c Mon Sep 17 00:00:00 2001 From: Pavel Zwerschke Date: Wed, 5 Mar 2025 02:09:49 +0100 Subject: [PATCH 07/10] fix --- crates/rattler_index/src/lib.rs | 7 ++++--- py-rattler/rattler/index/index.py | 4 ++++ py-rattler/src/index.rs | 8 ++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/crates/rattler_index/src/lib.rs b/crates/rattler_index/src/lib.rs index 08ed36d06..8be363301 100644 --- a/crates/rattler_index/src/lib.rs +++ b/crates/rattler_index/src/lib.rs @@ -398,6 +398,7 @@ async fn index_subdir( } /// Create a new `repodata.json` for all packages in the channel at the given directory. +#[allow(clippy::too_many_arguments)] pub async fn index_fs( channel: impl Into, target_platform: Option, @@ -411,10 +412,10 @@ pub async fn index_fs( index( target_platform, config, + repodata_patch, force, max_parallel, multi_progress, - repodata_patch, ) .await } @@ -470,10 +471,10 @@ pub async fn index_s3( index( target_platform, s3_config, + repodata_patch, force, max_parallel, multi_progress, - repodata_patch, ) .await } @@ -493,10 +494,10 @@ pub async fn index_s3( pub async fn index( target_platform: Option, config: T, + repodata_patch: Option, force: bool, max_parallel: usize, multi_progress: Option, - repodata_patch: Option, ) -> anyhow::Result<()> { let builder = config.into_builder(); diff --git a/py-rattler/rattler/index/index.py b/py-rattler/rattler/index/index.py index f8c716508..c1767ad85 100644 --- a/py-rattler/rattler/index/index.py +++ b/py-rattler/rattler/index/index.py @@ -10,6 +10,7 @@ async def index_fs( channel_directory: os.PathLike[str], target_platform: Optional[Platform] = None, + repodata_patch: Optional[str] = None, force: bool = False, max_parallel: int = 128, ) -> None: @@ -30,6 +31,7 @@ async def index_fs( await py_index_fs( channel_directory, target_platform._inner if target_platform else target_platform, + repodata_patch, force, max_parallel, ) @@ -44,6 +46,7 @@ async def index_s3( secret_access_key: Optional[str] = None, session_token: Optional[str] = None, target_platform: Optional[Platform] = None, + repodata_patch: Optional[str] = None, force: bool = False, max_parallel: int = 128, ) -> None: @@ -76,6 +79,7 @@ async def index_s3( secret_access_key, session_token, target_platform._inner if target_platform else target_platform, + repodata_patch, force, max_parallel, ) diff --git a/py-rattler/src/index.rs b/py-rattler/src/index.rs index c7b221a0e..72d585576 100644 --- a/py-rattler/src/index.rs +++ b/py-rattler/src/index.rs @@ -9,11 +9,12 @@ use std::path::PathBuf; use crate::{error::PyRattlerError, platform::PyPlatform}; #[pyfunction] -#[pyo3(signature = (channel_directory, target_platform=None, force=false, max_parallel=128))] +#[pyo3(signature = (channel_directory, target_platform=None, repodata_patch=None, force=false, max_parallel=32))] pub fn py_index_fs( py: Python<'_>, channel_directory: PathBuf, target_platform: Option, + repodata_patch: Option, force: bool, max_parallel: usize, ) -> PyResult> { @@ -22,6 +23,7 @@ pub fn py_index_fs( index_fs( channel_directory, target_platform, + repodata_patch, force, max_parallel, None, @@ -33,7 +35,7 @@ pub fn py_index_fs( #[pyfunction] #[allow(clippy::too_many_arguments)] -#[pyo3(signature = (channel_url, region, endpoint_url, force_path_style, access_key_id=None,secret_access_key=None, session_token=None, target_platform=None, force=false, max_parallel=128))] +#[pyo3(signature = (channel_url, region, endpoint_url, force_path_style, access_key_id=None,secret_access_key=None, session_token=None, target_platform=None, repodata_patch=None, force=false, max_parallel=32))] pub fn py_index_s3( py: Python<'_>, channel_url: String, @@ -44,6 +46,7 @@ pub fn py_index_s3( secret_access_key: Option, session_token: Option, target_platform: Option, + repodata_patch: Option, force: bool, max_parallel: usize, ) -> PyResult> { @@ -60,6 +63,7 @@ pub fn py_index_s3( secret_access_key, session_token, target_platform, + repodata_patch, force, max_parallel, None, From 5964509533d657d1927318520343244b6e18113a Mon Sep 17 00:00:00 2001 From: Pavel Zwerschke Date: Wed, 5 Mar 2025 02:10:57 +0100 Subject: [PATCH 08/10] fix --- py-rattler/tests/unit/test_index.py | 1 + 1 file changed, 1 insertion(+) diff --git a/py-rattler/tests/unit/test_index.py b/py-rattler/tests/unit/test_index.py index 829f9da22..90769a103 100644 --- a/py-rattler/tests/unit/test_index.py +++ b/py-rattler/tests/unit/test_index.py @@ -135,6 +135,7 @@ async def test_index_s3( force_path_style=True, access_key_id=s3_config.access_key_id, secret_access_key=s3_config.secret_access_key, + repodata_patch=None, force=True, ) From d896ab4fc414d811ef4783670adceb1259ab3896 Mon Sep 17 00:00:00 2001 From: Pavel Zwerschke Date: Wed, 5 Mar 2025 02:16:11 +0100 Subject: [PATCH 09/10] fix --- crates/rattler_index/tests/test_index.rs | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/crates/rattler_index/tests/test_index.rs b/crates/rattler_index/tests/test_index.rs index 5b088ff3b..fe515c814 100644 --- a/crates/rattler_index/tests/test_index.rs +++ b/crates/rattler_index/tests/test_index.rs @@ -58,7 +58,15 @@ async fn test_index() { ) .unwrap(); - let res = index_fs(temp_dir.path(), Some(Platform::Win64), true, 100, None).await; + let res = index_fs( + temp_dir.path(), + Some(Platform::Win64), + None, + true, + 100, + None, + ) + .await; assert!(res.is_ok()); let repodata_path = temp_dir.path().join(subdir_path).join("repodata.json"); @@ -98,7 +106,7 @@ async fn test_index_empty_directory_creates_noarch_repodata() { let noarch_path = temp_dir.path().join("noarch"); let repodata_path = noarch_path.join("repodata.json"); - let res = index_fs(temp_dir.path(), None, true, 100, None).await; + let res = index_fs(temp_dir.path(), None, None, true, 100, None).await; assert!(res.is_ok()); assert!(noarch_path.is_dir()); From 4b716f90faecb8f569e92c4b9db16c0e180d947f Mon Sep 17 00:00:00 2001 From: Pavel Zwerschke Date: Wed, 5 Mar 2025 09:57:28 +0100 Subject: [PATCH 10/10] fix docstring --- py-rattler/rattler/index/index.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/py-rattler/rattler/index/index.py b/py-rattler/rattler/index/index.py index c1767ad85..712ce4610 100644 --- a/py-rattler/rattler/index/index.py +++ b/py-rattler/rattler/index/index.py @@ -25,6 +25,7 @@ async def index_fs( channel_directory: A `os.PathLike[str]` that is the directory containing subdirectories of dependencies to index. target_platform(optional): A `Platform` to index dependencies for. + repodata_patch(optional): The name of the conda package (expected to be in the `noarch` subdir) that should be used for repodata patching. force: Whether to forcefully re-index all subdirs. max_parallel: The maximum number of packages to process in-memory simultaneously. """ @@ -67,6 +68,7 @@ async def index_s3( secret_access_key(optional): The secret access key to use for authentication. session_token(optional): The session token to use for authentication. target_platform(optional): A `Platform` to index dependencies for. + repodata_patch(optional): The name of the conda package (expected to be in the `noarch` subdir) that should be used for repodata patching. force: Whether to forcefully re-index all subdirs. max_parallel: The maximum number of packages to process in-memory simultaneously. """