From 797fcbd26ad240a89e319349a30cd298e312cd20 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Mon, 13 Dec 2021 20:43:13 -0500 Subject: [PATCH] Support for split layers Closes: https://github.com/ostreedev/ostree-rs-ext/issues/69 This is initial basic support for splitting files (objects) from a commit into separate container image layers, and reassembling those layers into a commit on the client. We retain our present logic around e.g. GPG signature verification. There's a new `chunking.rs` file which has logic to automatically factor out things like the kernel/initramfs and large files. In order to fetch these images client side, we now heavily intermix/cross the previous code for fetching non-ostree layers. --- lib/Cargo.toml | 3 +- lib/src/chunking.rs | 505 ++++++++++++++++++ lib/src/cli.rs | 24 +- lib/src/container/deploy.rs | 9 +- lib/src/container/encapsulate.rs | 129 ++++- lib/src/container/mod.rs | 2 + lib/src/container/store.rs | 249 ++++++--- lib/src/container/unencapsulate.rs | 116 +--- .../fedora-coreos-contentmeta.json.gz | Bin 0 -> 10233 bytes lib/src/lib.rs | 1 + lib/src/tar/export.rs | 70 +++ lib/src/tar/import.rs | 143 ++++- lib/tests/it/main.rs | 84 +-- 13 files changed, 1073 insertions(+), 262 deletions(-) create mode 100644 lib/src/chunking.rs create mode 100644 lib/src/fixtures/fedora-coreos-contentmeta.json.gz diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 067b6779..755d3968 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -10,7 +10,7 @@ version = "0.6.5" [dependencies] anyhow = "1.0" -containers-image-proxy = "0.4.0" +containers-image-proxy = { features = ["proxy_v0_2_3"], version = "0.4.0" } async-compression = { version = "0.3", features = ["gzip", "tokio"] } bitflags = "1" @@ -59,4 +59,3 @@ features = ["dox"] [features] dox = ["ostree/dox"] internal-testing-api = ["sh-inline", "indoc"] -proxy_v0_2_3 = ["containers-image-proxy/proxy_v0_2_3"] diff --git a/lib/src/chunking.rs b/lib/src/chunking.rs new file mode 100644 index 00000000..7a128bae --- /dev/null +++ b/lib/src/chunking.rs @@ -0,0 +1,505 @@ +//! Split an OSTree commit into separate chunks + +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use std::borrow::{Borrow, Cow}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::convert::TryInto; +use std::fmt::Write; +use std::num::NonZeroU32; +use std::rc::Rc; + +use crate::objectsource::{ContentID, ObjectMeta, ObjectMetaMap, ObjectSourceMeta}; +use crate::objgv::*; +use anyhow::{anyhow, Result}; +use camino::Utf8PathBuf; +use gvariant::aligned_bytes::TryAsAligned; +use gvariant::{Marker, Structure}; +use ostree::{gio, glib}; +use serde::{Deserialize, Serialize}; + +/// Maximum number of layers (chunks) we will use. +// We take half the limit of 128. +// https://github.com/ostreedev/ostree-rs-ext/issues/69 +pub(crate) const MAX_CHUNKS: u32 = 64; + +type RcStr = Rc; + +#[derive(Debug, Default)] +pub(crate) struct Chunk { + pub(crate) name: String, + pub(crate) content: BTreeMap)>, + pub(crate) size: u64, +} + +#[derive(Debug)] +pub(crate) enum Meta { + DirTree(RcStr), + DirMeta(RcStr), +} + +impl Meta { + pub(crate) fn objtype(&self) -> ostree::ObjectType { + match self { + Meta::DirTree(_) => ostree::ObjectType::DirTree, + Meta::DirMeta(_) => ostree::ObjectType::DirMeta, + } + } + + pub(crate) fn checksum(&self) -> &str { + match self { + Meta::DirTree(v) => v, + Meta::DirMeta(v) => v, + } + } +} + +#[derive(Debug, Deserialize, Serialize)] +/// Object metadata, but with additional size data +pub struct ObjectSourceMetaSized { + /// The original metadata + #[serde(flatten)] + meta: ObjectSourceMeta, + /// Total size of associated objects + size: u64, +} + +/// Extend content source metadata with sizes. +#[derive(Debug)] +pub struct ObjectMetaSized { + /// Mapping from content object to source. + pub map: ObjectMetaMap, + /// Computed sizes of each content source + pub sizes: Vec, +} + +impl ObjectMetaSized { + /// Given object metadata and a repo, compute the size of each content source. + pub fn compute_sizes(repo: &ostree::Repo, meta: ObjectMeta) -> Result { + let cancellable = gio::NONE_CANCELLABLE; + // Destructure into component parts; we'll create the version with sizes + let map = meta.map; + let mut set = meta.set; + // Maps content id -> total size of associated objects + let mut sizes = HashMap::<&str, u64>::new(); + // Populate two mappings above, iterating over the object -> contentid mapping + for (checksum, contentid) in map.iter() { + let (_, finfo, _) = repo.load_file(checksum, cancellable)?; + let finfo = finfo.unwrap(); + let sz = sizes.entry(contentid).or_default(); + *sz += finfo.size() as u64; + } + // Combine data from sizes and the content mapping. + let sized: Result> = sizes + .into_iter() + .map(|(id, size)| -> Result { + set.take(id) + .ok_or_else(|| anyhow!("Failed to find {} in content set", id)) + .map(|meta| ObjectSourceMetaSized { meta, size }) + }) + .collect(); + let mut sizes = sized?; + sizes.sort_by(|a, b| b.size.cmp(&a.size)); + Ok(ObjectMetaSized { map, sizes }) + } +} + +/// How to split up an ostree commit into "chunks" - designed to map to container image layers. +#[derive(Debug, Default)] +pub struct Chunking { + pub(crate) metadata_size: u64, + pub(crate) commit: Box, + pub(crate) meta: Vec, + pub(crate) remainder: Chunk, + pub(crate) chunks: Vec, + + pub(crate) max: u32, + + processed_mapping: bool, + /// Number of components (e.g. packages) provided originally + pub(crate) n_provided_components: u32, + /// The above, but only ones with non-zero size + pub(crate) n_sized_components: u32, +} + +#[derive(Default)] +struct Generation { + path: Utf8PathBuf, + metadata_size: u64, + meta: Vec, + dirtree_found: BTreeSet, + dirmeta_found: BTreeSet, +} + +fn push_dirmeta(repo: &ostree::Repo, gen: &mut Generation, checksum: &str) -> Result<()> { + if gen.dirtree_found.contains(checksum) { + return Ok(()); + } + let checksum = RcStr::from(checksum); + gen.dirmeta_found.insert(RcStr::clone(&checksum)); + let child_v = repo.load_variant(ostree::ObjectType::DirMeta, checksum.borrow())?; + gen.metadata_size += child_v.data_as_bytes().as_ref().len() as u64; + gen.meta.push(Meta::DirMeta(checksum)); + Ok(()) +} + +fn push_dirtree( + repo: &ostree::Repo, + gen: &mut Generation, + checksum: &str, +) -> Result> { + if gen.dirtree_found.contains(checksum) { + return Ok(None); + } + let child_v = repo.load_variant(ostree::ObjectType::DirTree, checksum)?; + let checksum = RcStr::from(checksum); + gen.dirtree_found.insert(RcStr::clone(&checksum)); + gen.meta.push(Meta::DirTree(checksum)); + gen.metadata_size += child_v.data_as_bytes().as_ref().len() as u64; + Ok(Some(child_v)) +} + +fn generate_chunking_recurse( + repo: &ostree::Repo, + gen: &mut Generation, + chunk: &mut Chunk, + dt: &glib::Variant, +) -> Result<()> { + let dt = dt.data_as_bytes(); + let dt = dt.try_as_aligned()?; + let dt = gv_dirtree!().cast(dt); + let (files, dirs) = dt.to_tuple(); + // A reusable buffer to avoid heap allocating these + let mut hexbuf = [0u8; 64]; + for file in files { + let (name, csum) = file.to_tuple(); + let fpath = gen.path.join(name.to_str()); + hex::encode_to_slice(csum, &mut hexbuf)?; + let checksum = std::str::from_utf8(&hexbuf)?; + let (_, meta, _) = repo.load_file(checksum, gio::NONE_CANCELLABLE)?; + // SAFETY: We know this API returns this value; it only has a return nullable because the + // caller can pass NULL to skip it. + let meta = meta.unwrap(); + let size = meta.size() as u64; + let entry = chunk.content.entry(RcStr::from(checksum)).or_default(); + entry.0 = size; + let first = entry.1.is_empty(); + if first { + chunk.size += size; + } + entry.1.push(fpath); + } + for item in dirs { + let (name, contents_csum, meta_csum) = item.to_tuple(); + let name = name.to_str(); + // Extend our current path + gen.path.push(name); + hex::encode_to_slice(contents_csum, &mut hexbuf)?; + let checksum_s = std::str::from_utf8(&hexbuf)?; + if let Some(child_v) = push_dirtree(repo, gen, checksum_s)? { + generate_chunking_recurse(repo, gen, chunk, &child_v)?; + } + hex::encode_to_slice(meta_csum, &mut hexbuf)?; + let checksum_s = std::str::from_utf8(&hexbuf)?; + push_dirmeta(repo, gen, checksum_s)?; + // We did a push above, so pop must succeed. + assert!(gen.path.pop()); + } + Ok(()) +} + +impl Chunk { + fn new(name: &str) -> Self { + Chunk { + name: name.to_string(), + ..Default::default() + } + } + + fn move_obj(&mut self, dest: &mut Self, checksum: &str) -> bool { + // In most cases, we expect the object to exist in the source. However, it's + // conveneient here to simply ignore objects which were already moved into + // a chunk. + if let Some((name, (size, paths))) = self.content.remove_entry(checksum) { + let v = dest.content.insert(name, (size, paths)); + debug_assert!(v.is_none()); + self.size -= size; + dest.size += size; + true + } else { + false + } + } +} + +impl Chunking { + /// Generate an initial single chunk. + pub fn new(repo: &ostree::Repo, rev: &str) -> Result { + // Find the target commit + let rev = repo.require_rev(rev)?; + + // Load and parse the commit object + let (commit_v, _) = repo.load_commit(&rev)?; + let commit_v = commit_v.data_as_bytes(); + let commit_v = commit_v.try_as_aligned()?; + let commit = gv_commit!().cast(commit_v); + let commit = commit.to_tuple(); + + // Load it all into a single chunk + let mut gen = Generation { + path: Utf8PathBuf::from("/"), + ..Default::default() + }; + let mut chunk: Chunk = Default::default(); + + // Find the root directory tree + let contents_checksum = &hex::encode(commit.6); + let contents_v = repo.load_variant(ostree::ObjectType::DirTree, contents_checksum)?; + push_dirtree(repo, &mut gen, contents_checksum)?; + let meta_checksum = &hex::encode(commit.7); + push_dirmeta(repo, &mut gen, meta_checksum.as_str())?; + + generate_chunking_recurse(repo, &mut gen, &mut chunk, &contents_v)?; + + let chunking = Chunking { + commit: Box::from(rev.as_str()), + metadata_size: gen.metadata_size, + meta: gen.meta, + remainder: chunk, + ..Default::default() + }; + Ok(chunking) + } + + /// Generate a chunking from an object mapping. + pub fn from_mapping( + repo: &ostree::Repo, + rev: &str, + meta: ObjectMetaSized, + max_layers: Option, + ) -> Result { + let mut r = Self::new(repo, rev)?; + r.process_mapping(meta, max_layers)?; + Ok(r) + } + + fn remaining(&self) -> u32 { + self.max.saturating_sub(self.chunks.len() as u32) + } + + /// Given metadata about which objects are owned by a particular content source, + /// generate chunks that group together those objects. + #[allow(clippy::or_fun_call)] + pub fn process_mapping( + &mut self, + meta: ObjectMetaSized, + max_layers: Option, + ) -> Result<()> { + self.max = max_layers + .unwrap_or(NonZeroU32::new(MAX_CHUNKS).unwrap()) + .get(); + + let sizes = &meta.sizes; + // It doesn't make sense to handle multiple mappings + assert!(!self.processed_mapping); + self.processed_mapping = true; + let remaining = self.remaining(); + if remaining == 0 { + return Ok(()); + } + + // Reverses `contentmeta.map` i.e. contentid -> Vec + let mut rmap = HashMap::>::new(); + for (checksum, contentid) in meta.map.iter() { + rmap.entry(Rc::clone(contentid)).or_default().push(checksum); + } + + // Safety: Let's assume no one has over 4 billion components. + self.n_provided_components = meta.sizes.len().try_into().unwrap(); + self.n_sized_components = sizes + .iter() + .filter(|v| v.size > 0) + .count() + .try_into() + .unwrap(); + + // TODO: Compute bin packing in a better way + let packing = basic_packing(sizes, NonZeroU32::new(self.max).unwrap()); + + for bin in packing.into_iter() { + let first = bin[0]; + let first_name = &*first.meta.name; + let name = match bin.len() { + 0 => unreachable!(), + 1 => Cow::Borrowed(first_name), + 2..=5 => { + let r = bin.iter().map(|v| &*v.meta.name).fold( + String::from(first_name), + |mut acc, v| { + write!(acc, " and {}", v).unwrap(); + acc + }, + ); + Cow::Owned(r) + } + n => Cow::Owned(format!("{} components", n)), + }; + let mut chunk = Chunk::new(&*name); + for szmeta in bin { + for &obj in rmap.get(&szmeta.meta.identifier).unwrap() { + self.remainder.move_obj(&mut chunk, obj.as_str()); + } + } + if !chunk.content.is_empty() { + self.chunks.push(chunk); + } + } + + assert_eq!(self.remainder.content.len(), 0); + + Ok(()) + } + + pub(crate) fn take_chunks(&mut self) -> Vec { + let mut r = Vec::new(); + std::mem::swap(&mut self.chunks, &mut r); + r + } + + /// Print information about chunking to standard output. + pub fn print(&self) { + println!("Metadata: {}", glib::format_size(self.metadata_size)); + if self.n_provided_components > 0 { + println!( + "Components: provided={} sized={}", + self.n_provided_components, self.n_sized_components + ); + } + for (n, chunk) in self.chunks.iter().enumerate() { + let sz = glib::format_size(chunk.size); + println!( + "Chunk {}: \"{}\": objects:{} size:{}", + n, + chunk.name, + chunk.content.len(), + sz + ); + } + if !self.remainder.content.is_empty() { + let sz = glib::format_size(self.remainder.size); + println!( + "Remainder: \"{}\": objects:{} size:{}", + self.remainder.name, + self.remainder.content.len(), + sz + ); + } + } +} + +type ChunkedComponents<'a> = Vec<&'a ObjectSourceMetaSized>; + +fn components_size(components: &[&ObjectSourceMetaSized]) -> u64 { + components.iter().map(|k| k.size).sum() +} + +/// Compute the total size of a packing +#[cfg(test)] +fn packing_size(packing: &[ChunkedComponents]) -> u64 { + packing.iter().map(|v| components_size(&v)).sum() +} + +fn sort_packing(packing: &mut [ChunkedComponents]) { + packing.sort_by(|a, b| { + let a: u64 = components_size(a); + let b: u64 = components_size(b); + b.cmp(&a) + }); +} + +/// Given a set of components with size metadata (e.g. boxes of a certain size) +/// and a number of bins (possible container layers) to use, determine which components +/// go in which bin. This algorithm is pretty simple: +/// +/// - order by size +/// - If we have fewer components than bins, we're done +/// - Take the "tail" (all components past maximum), and group by source package +/// - If we have fewer components than bins, we're done +/// - Take the whole tail and group them toether (this is the overly simplistic part) +fn basic_packing(components: &[ObjectSourceMetaSized], bins: NonZeroU32) -> Vec { + // let total_size: u64 = components.iter().map(|v| v.size).sum(); + // let avg_size: u64 = total_size / components.len() as u64; + let mut r = Vec::new(); + // And handle the easy case of enough bins for all components + // TODO: Possibly try to split off large files? + if components.len() <= bins.get() as usize { + r.extend(components.iter().map(|v| vec![v])); + return r; + } + // Create a mutable copy + let mut components: Vec<_> = components.iter().collect(); + // Iterate over the component tail, folding by source id + let mut by_src = HashMap::<_, Vec<&ObjectSourceMetaSized>>::new(); + // Take the tail off components, then build up mapping from srcid -> Vec + for component in components.split_off(bins.get() as usize) { + by_src + .entry(&component.meta.srcid) + .or_default() + .push(component); + } + // Take all the non-tail (largest) components, and append them first + r.extend(components.into_iter().map(|v| vec![v])); + // Add the tail + r.extend(by_src.into_values()); + // And order the new list + sort_packing(&mut r); + // It's possible that merging components gave us enough space; if so + // we're done! + if r.len() <= bins.get() as usize { + return r; + } + + let last = (bins.get().checked_sub(1).unwrap()) as usize; + // The "tail" is components past our maximum. For now, we simply group all of that together as a single unit. + if let Some(tail) = r.drain(last..).reduce(|mut a, b| { + a.extend(b.into_iter()); + a + }) { + r.push(tail); + } + + assert!(r.len() <= bins.get() as usize); + r +} + +#[cfg(test)] +mod test { + use super::*; + + const FCOS_CONTENTMETA: &[u8] = include_bytes!("fixtures/fedora-coreos-contentmeta.json.gz"); + + #[test] + fn test_packing_basics() -> Result<()> { + // null cases + for v in [1u32, 7].map(|v| NonZeroU32::new(v).unwrap()) { + assert_eq!(basic_packing(&[], v).len(), 0); + } + Ok(()) + } + + #[test] + fn test_packing_fcos() -> Result<()> { + let contentmeta: Vec = + serde_json::from_reader(flate2::read::GzDecoder::new(FCOS_CONTENTMETA))?; + let total_size = contentmeta.iter().map(|v| v.size).sum::(); + + let packing = basic_packing(&contentmeta, NonZeroU32::new(MAX_CHUNKS).unwrap()); + assert!(!contentmeta.is_empty()); + // We should fit into the assigned chunk size + assert_eq!(packing.len() as u32, MAX_CHUNKS); + // And verify that the sizes match + let packed_total_size = packing_size(&packing); + assert_eq!(total_size, packed_total_size); + Ok(()) + } +} diff --git a/lib/src/cli.rs b/lib/src/cli.rs index ad61e45b..fffa2a56 100644 --- a/lib/src/cli.rs +++ b/lib/src/cli.rs @@ -17,9 +17,10 @@ use structopt::StructOpt; use tokio_stream::StreamExt; use crate::commit::container_commit; -use crate::container::store::{LayeredImageImporter, PrepareResult}; -use crate::container::{self as ostree_container, UnencapsulationProgress}; +use crate::container as ostree_container; use crate::container::{Config, ImageReference, OstreeImageReference, UnencapsulateOptions}; +use ostree_container::store::{ImageImporter, PrepareResult}; +use ostree_container::UnencapsulationProgress; /// Parse an [`OstreeImageReference`] from a CLI arguemnt. pub fn parse_imgref(s: &str) -> Result { @@ -257,7 +258,7 @@ struct ImaSignOpts { /// Options for internal testing #[derive(Debug, StructOpt)] enum TestingOpts { - // Detect the current environment + /// Detect the current environment DetectEnv, /// Execute integration tests, assuming mutable environment Run, @@ -413,7 +414,8 @@ async fn container_export( copy_meta_keys, ..Default::default() }; - let pushed = crate::container::encapsulate(repo, rev, &config, Some(opts), imgref).await?; + let pushed = + crate::container::encapsulate(repo, rev, &config, Some(opts), None, imgref).await?; println!("{}", pushed); Ok(()) } @@ -431,7 +433,7 @@ async fn container_store( imgref: &OstreeImageReference, proxyopts: ContainerProxyOpts, ) -> Result<()> { - let mut imp = LayeredImageImporter::new(repo, imgref, proxyopts.into()).await?; + let mut imp = ImageImporter::new(repo, imgref, proxyopts.into()).await?; let prep = match imp.prepare().await? { PrepareResult::AlreadyPresent(c) => { println!("No changes in {} => {}", imgref, c.merge_commit); @@ -439,17 +441,7 @@ async fn container_store( } PrepareResult::Ready(r) => r, }; - if prep.base_layer.commit.is_none() { - let size = crate::glib::format_size(prep.base_layer.size()); - println!( - "Downloading base layer: {} ({})", - prep.base_layer.digest(), - size - ); - } else { - println!("Using base: {}", prep.base_layer.digest()); - } - for layer in prep.layers.iter() { + for layer in prep.all_layers() { if layer.commit.is_some() { println!("Using layer: {}", layer.digest()); } else { diff --git a/lib/src/container/deploy.rs b/lib/src/container/deploy.rs index 39b2b688..0137ab79 100644 --- a/lib/src/container/deploy.rs +++ b/lib/src/container/deploy.rs @@ -41,12 +41,9 @@ pub async fn deploy( let cancellable = ostree::gio::NONE_CANCELLABLE; let options = options.unwrap_or_default(); let repo = &sysroot.repo().unwrap(); - let mut imp = super::store::LayeredImageImporter::new( - repo, - imgref, - options.proxy_cfg.unwrap_or_default(), - ) - .await?; + let mut imp = + super::store::ImageImporter::new(repo, imgref, options.proxy_cfg.unwrap_or_default()) + .await?; if let Some(target) = options.target_imgref { imp.set_target(target); } diff --git a/lib/src/container/encapsulate.rs b/lib/src/container/encapsulate.rs index 6a01897f..6cf5fc36 100644 --- a/lib/src/container/encapsulate.rs +++ b/lib/src/container/encapsulate.rs @@ -3,6 +3,7 @@ use super::ocidir::OciDir; use super::{ocidir, OstreeImageReference, Transport}; use super::{ImageReference, SignatureSource, OSTREE_COMMIT_LABEL}; +use crate::chunking::{Chunking, ObjectMetaSized}; use crate::container::skopeo; use crate::tar as ostree_tar; use anyhow::{anyhow, Context, Result}; @@ -12,6 +13,7 @@ use oci_spec::image as oci_image; use ostree::gio; use std::borrow::Cow; use std::collections::{BTreeMap, HashMap}; +use std::num::NonZeroU32; use std::path::Path; use std::rc::Rc; use tracing::{instrument, Level}; @@ -70,6 +72,46 @@ fn commit_meta_to_labels<'a>( Ok(()) } +/// Write an ostree commit to an OCI blob +#[context("Writing ostree root to blob")] +#[allow(clippy::too_many_arguments)] +fn export_chunked( + repo: &ostree::Repo, + ociw: &mut OciDir, + manifest: &mut oci_image::ImageManifest, + imgcfg: &mut oci_image::ImageConfiguration, + labels: &mut HashMap, + mut chunking: Chunking, + compression: Option, + description: &str, +) -> Result<()> { + let layers: Result> = chunking + .take_chunks() + .into_iter() + .enumerate() + .map(|(i, chunk)| -> Result<_> { + let mut w = ociw.create_layer(compression)?; + ostree_tar::export_chunk(repo, &chunk, &mut w) + .with_context(|| format!("Exporting chunk {}", i))?; + let w = w.into_inner()?; + Ok((w.complete()?, chunk.name)) + }) + .collect(); + for (layer, name) in layers? { + ociw.push_layer(manifest, imgcfg, layer, &name); + } + let mut w = ociw.create_layer(compression)?; + ostree_tar::export_final_chunk(repo, &chunking, &mut w)?; + let w = w.into_inner()?; + let final_layer = w.complete()?; + labels.insert( + crate::container::OSTREE_LAYER_LABEL.into(), + format!("sha256:{}", final_layer.blob.sha256), + ); + ociw.push_layer(manifest, imgcfg, final_layer, description); + Ok(()) +} + /// Generate an OCI image from a given ostree root #[context("Building oci")] fn build_oci( @@ -78,6 +120,7 @@ fn build_oci( ocidir_path: &Path, config: &Config, opts: ExportOpts, + contentmeta: Option, ) -> Result { // Explicitly error if the target exists std::fs::create_dir(ocidir_path).context("Creating OCI dir")?; @@ -109,30 +152,21 @@ fn build_oci( let mut manifest = ocidir::new_empty_manifest().build().unwrap(); + let chunking = contentmeta + .map(|meta| crate::chunking::Chunking::from_mapping(repo, commit, meta, opts.max_layers)) + .transpose()?; + if let Some(version) = commit_meta.lookup_value("version", Some(glib::VariantTy::new("s").unwrap())) { let version = version.str().unwrap(); labels.insert("version".into(), version.into()); } - labels.insert(OSTREE_COMMIT_LABEL.into(), commit.into()); for (k, v) in config.labels.iter().flat_map(|k| k.iter()) { labels.insert(k.into(), v.into()); } - // Lookup the cmd embedded in commit metadata - let cmd = commit_meta.lookup::>(ostree::COMMIT_META_CONTAINER_CMD)?; - // But support it being overridden by CLI options - - // https://github.com/rust-lang/rust-clippy/pull/7639#issuecomment-1050340564 - #[allow(clippy::unnecessary_lazy_evaluations)] - let cmd = config.cmd.as_ref().or_else(|| cmd.as_ref()); - if let Some(cmd) = cmd { - ctrcfg.set_cmd(Some(cmd.clone())); - } - - imgcfg.set_config(Some(ctrcfg)); let compression = if opts.compress { flate2::Compression::default() @@ -140,21 +174,52 @@ fn build_oci( flate2::Compression::none() }; - let rootfs_blob = export_ostree_ref(repo, commit, &mut writer, Some(compression))?; + let mut annos = HashMap::new(); + annos.insert(BLOB_OSTREE_ANNOTATION.to_string(), "true".to_string()); let description = if commit_subject.is_empty() { Cow::Owned(format!("ostree export of commit {}", commit)) } else { Cow::Borrowed(commit_subject) }; - let mut annos = HashMap::new(); - annos.insert(BLOB_OSTREE_ANNOTATION.to_string(), "true".to_string()); - writer.push_layer_annotated( - &mut manifest, - &mut imgcfg, - rootfs_blob, - Some(annos), - &description, - ); + + if let Some(chunking) = chunking { + export_chunked( + repo, + &mut writer, + &mut manifest, + &mut imgcfg, + labels, + chunking, + Some(compression), + &description, + )?; + } else { + let rootfs_blob = export_ostree_ref(repo, commit, &mut writer, Some(compression))?; + labels.insert( + crate::container::OSTREE_LAYER_LABEL.into(), + format!("sha256:{}", rootfs_blob.blob.sha256), + ); + writer.push_layer_annotated( + &mut manifest, + &mut imgcfg, + rootfs_blob, + Some(annos), + &description, + ); + } + + // Lookup the cmd embedded in commit metadata + let cmd = commit_meta.lookup::>(ostree::COMMIT_META_CONTAINER_CMD)?; + // But support it being overridden by CLI options + + // https://github.com/rust-lang/rust-clippy/pull/7639#issuecomment-1050340564 + #[allow(clippy::unnecessary_lazy_evaluations)] + let cmd = config.cmd.as_ref().or_else(|| cmd.as_ref()); + if let Some(cmd) = cmd { + ctrcfg.set_cmd(Some(cmd.clone())); + } + + imgcfg.set_config(Some(ctrcfg)); let ctrcfg = writer.write_config(imgcfg)?; manifest.set_config(ctrcfg); writer.write_manifest(manifest, oci_image::Platform::default())?; @@ -166,12 +231,13 @@ fn build_oci( } /// Helper for `build()` that avoids generics -#[instrument(skip(repo))] +#[instrument(skip(repo, contentmeta))] async fn build_impl( repo: &ostree::Repo, ostree_ref: &str, config: &Config, opts: Option, + contentmeta: Option, dest: &ImageReference, ) -> Result { let mut opts = opts.unwrap_or_default(); @@ -185,6 +251,7 @@ async fn build_impl( Path::new(dest.name.as_str()), config, opts, + contentmeta, )?; None } else { @@ -193,7 +260,14 @@ async fn build_impl( let tempdest = tempdest.to_str().unwrap(); let digestfile = tempdir.path().join("digestfile"); - let src = build_oci(repo, ostree_ref, Path::new(tempdest), config, opts)?; + let src = build_oci( + repo, + ostree_ref, + Path::new(tempdest), + config, + opts, + contentmeta, + )?; let mut cmd = skopeo::new_cmd(); tracing::event!(Level::DEBUG, "Copying {} to {}", src, dest); @@ -230,6 +304,8 @@ pub struct ExportOpts { pub compress: bool, /// A set of commit metadata keys to copy as image labels. pub copy_meta_keys: Vec, + /// Maximum number of layers to use + pub max_layers: Option, } /// Given an OSTree repository and ref, generate a container image. @@ -240,7 +316,8 @@ pub async fn encapsulate>( ostree_ref: S, config: &Config, opts: Option, + contentmeta: Option, dest: &ImageReference, ) -> Result { - build_impl(repo, ostree_ref.as_ref(), config, opts, dest).await + build_impl(repo, ostree_ref.as_ref(), config, opts, contentmeta, dest).await } diff --git a/lib/src/container/mod.rs b/lib/src/container/mod.rs index 713108f6..5a19fc31 100644 --- a/lib/src/container/mod.rs +++ b/lib/src/container/mod.rs @@ -32,6 +32,8 @@ use std::ops::Deref; /// The label injected into a container image that contains the ostree commit SHA-256. pub const OSTREE_COMMIT_LABEL: &str = "ostree.commit"; +/// The label/annotation which contains the sha256 of the final commit. +const OSTREE_LAYER_LABEL: &str = "ostree.layer"; /// Our generic catchall fatal error, expected to be converted /// to a string to output to a terminal or logs. diff --git a/lib/src/container/store.rs b/lib/src/container/store.rs index b3dd11da..31bf55a8 100644 --- a/lib/src/container/store.rs +++ b/lib/src/container/store.rs @@ -10,11 +10,12 @@ use crate::refescape; use anyhow::{anyhow, Context}; use containers_image_proxy::{ImageProxy, OpenedImage}; use fn_error_context::context; -use oci_spec::image::{self as oci_image, ImageManifest}; +use oci_spec::image::{self as oci_image, Descriptor, ImageManifest}; use ostree::prelude::{Cast, ToVariant}; use ostree::{gio, glib}; use std::collections::HashMap; use std::iter::FromIterator; +use std::sync::{Arc, Mutex}; /// Configuration for the proxy. /// @@ -84,12 +85,12 @@ impl LayeredImageState { /// Context for importing a container image. #[derive(Debug)] -pub struct LayeredImageImporter { +pub struct ImageImporter { repo: ostree::Repo, - proxy: ImageProxy, + pub(crate) proxy: ImageProxy, imgref: OstreeImageReference, target_imgref: Option, - proxy_img: OpenedImage, + pub(crate) proxy_img: OpenedImage, } /// Result of invoking [`LayeredImageImporter::prepare`]. @@ -104,7 +105,7 @@ pub enum PrepareResult { /// A container image layer with associated downloaded-or-not state. #[derive(Debug)] pub struct ManifestLayerState { - layer: oci_image::Descriptor, + pub(crate) layer: oci_image::Descriptor, /// The ostree ref name for this layer. pub ostree_ref: String, /// The ostree commit that caches this layer, if present. @@ -131,19 +132,34 @@ pub struct PreparedImport { /// The deserialized manifest. pub manifest: oci_image::ImageManifest, /// The deserialized configuration. - pub config: Option, + pub config: oci_image::ImageConfiguration, /// The previously stored manifest digest. pub previous_manifest_digest: Option, /// The previously stored image ID. pub previous_imageid: Option, - /// The required base layer. - pub base_layer: ManifestLayerState, - /// Any further layers. + /// The layers containing split objects + pub ostree_layers: Vec, + /// The layer for the ostree commit. + pub ostree_commit_layer: ManifestLayerState, + /// Any further non-ostree (derived) layers. pub layers: Vec, } +impl PreparedImport { + /// Iterate over all layers; the ostree split object layers, the commit layer, and any non-ostree layers. + pub fn all_layers(&self) -> impl Iterator { + self.ostree_layers + .iter() + .chain(std::iter::once(&self.ostree_commit_layer)) + .chain(self.layers.iter()) + } +} + // Given a manifest, compute its ostree ref name and cached ostree commit -fn query_layer(repo: &ostree::Repo, layer: oci_image::Descriptor) -> Result { +pub(crate) fn query_layer( + repo: &ostree::Repo, + layer: oci_image::Descriptor, +) -> Result { let ostree_ref = ref_for_layer(&layer)?; let commit = repo.resolve_rev(&ostree_ref, true)?.map(|s| s.to_string()); Ok(ManifestLayerState { @@ -177,7 +193,7 @@ pub fn manifest_digest_from_commit(commit: &glib::Variant) -> Result { Ok(manifest_data_from_commitmeta(commit_meta)?.1) } -impl LayeredImageImporter { +impl ImageImporter { /// Create a new importer. pub async fn new( repo: &ostree::Repo, @@ -189,7 +205,7 @@ impl LayeredImageImporter { let proxy = ImageProxy::new_with_config(config).await?; let proxy_img = proxy.open_image(&imgref.imgref.to_string()).await?; let repo = repo.clone(); - Ok(LayeredImageImporter { + Ok(ImageImporter { repo, proxy, proxy_img, @@ -202,15 +218,19 @@ impl LayeredImageImporter { pub fn set_target(&mut self, target: &OstreeImageReference) { self.target_imgref = Some(target.clone()) } + /// Determine if there is a new manifest, and if so return its digest. + pub async fn prepare(&mut self) -> Result { + self.prepare_internal(false).await + } /// Determine if there is a new manifest, and if so return its digest. #[context("Fetching manifest")] - pub async fn prepare(&mut self) -> Result { + pub(crate) async fn prepare_internal(&mut self, verify_layers: bool) -> Result { match &self.imgref.sigverify { SignatureSource::ContainerPolicy if skopeo::container_policy_is_default_insecure()? => { return Err(anyhow!("containers-policy.json specifies a default of `insecureAcceptAnything`; refusing usage")); } - SignatureSource::OstreeRemote(_) => { + SignatureSource::OstreeRemote(_) if verify_layers => { return Err(anyhow!( "Cannot currently verify layered containers via ostree remote" )); @@ -246,25 +266,39 @@ impl LayeredImageImporter { (None, None) }; - #[cfg(feature = "proxy_v0_2_3")] - let config = { - let config_bytes = self.proxy.fetch_config(&self.proxy_img).await?; - let config: oci_image::ImageConfiguration = - serde_json::from_slice(&config_bytes).context("Parsing image configuration")?; - Some(config) - }; - #[cfg(not(feature = "proxy_v0_2_3"))] - let config = None; - - let mut layers = manifest.layers().iter().cloned(); - // We require a base layer. - let base_layer = layers.next().ok_or_else(|| anyhow!("No layers found"))?; - let base_layer = query_layer(&self.repo, base_layer)?; - - let layers: Result> = layers - .map(|layer| -> Result<_> { query_layer(&self.repo, layer) }) - .collect(); - let layers = layers?; + let config_bytes = self.proxy.fetch_config(&self.proxy_img).await?; + let config: oci_image::ImageConfiguration = + serde_json::from_slice(&config_bytes).context("Parsing image configuration")?; + + let label = crate::container::OSTREE_LAYER_LABEL; + let config_labels = config.config().as_ref().and_then(|c| c.labels().as_ref()); + let commit_layer_digest = config_labels + .and_then(|labels| labels.get(label)) + .ok_or_else(|| { + anyhow!( + "Missing annotation {} (not an ostree-exported container?)", + label + ) + })?; + let mut component_layers = Vec::new(); + let mut commit_layer = None; + let mut remaining_layers = Vec::new(); + let query = |l: &Descriptor| query_layer(&self.repo, l.clone()); + for layer in manifest.layers() { + if layer.digest() == commit_layer_digest { + commit_layer = Some(query(layer)?); + } else if commit_layer.is_none() { + component_layers.push(query(layer)?); + } else { + remaining_layers.push(query(layer)?); + } + } + let commit_layer = commit_layer.ok_or_else(|| { + anyhow!( + "Image does not contain ostree-exported layer {}", + commit_layer_digest + ) + })?; let imp = PreparedImport { manifest, @@ -272,43 +306,132 @@ impl LayeredImageImporter { config, previous_manifest_digest, previous_imageid, - base_layer, - layers, + ostree_layers: component_layers, + ostree_commit_layer: commit_layer, + layers: remaining_layers, }; Ok(PrepareResult::Ready(Box::new(imp))) } - /// Import a layered container image - pub async fn import(self, import: Box) -> Result { - let mut proxy = self.proxy; - let target_imgref = self.target_imgref.as_ref().unwrap_or(&self.imgref); + /// Extract the base ostree commit. + pub(crate) async fn unencapsulate_base( + &mut self, + import: &mut store::PreparedImport, + options: Option, + write_refs: bool, + ) -> Result<()> { + tracing::debug!("Fetching base"); + if matches!(self.imgref.sigverify, SignatureSource::ContainerPolicy) + && skopeo::container_policy_is_default_insecure()? + { + return Err(anyhow!("containers-policy.json specifies a default of `insecureAcceptAnything`; refusing usage")); + } + let options = options.unwrap_or_default(); + let remote = match &self.imgref.sigverify { + SignatureSource::OstreeRemote(remote) => Some(remote.clone()), + SignatureSource::ContainerPolicy | SignatureSource::ContainerPolicyAllowInsecure => { + None + } + }; - // First download the base image (if necessary) - we need the SELinux policy - // there to label all following layers. - let base_layer = import.base_layer; - let base_commit = if let Some(c) = base_layer.commit { - c - } else { - let base_commit = super::unencapsulate_from_manifest_impl( - &self.repo, - &mut proxy, - target_imgref, + let progress = options.progress.map(|v| Arc::new(Mutex::new(v))); + for layer in import.ostree_layers.iter_mut() { + if layer.commit.is_some() { + continue; + } + let (blob, driver) = + fetch_layer_decompress(&mut self.proxy, &self.proxy_img, &layer.layer).await?; + let blob = super::unencapsulate::ProgressReader { + reader: blob, + progress: progress.as_ref().map(Arc::clone), + }; + let repo = self.repo.clone(); + let target_ref = layer.ostree_ref.clone(); + let import_task = + crate::tokio_util::spawn_blocking_cancellable_flatten(move |cancellable| { + let txn = repo.auto_transaction(Some(cancellable))?; + let mut importer = crate::tar::Importer::new_for_object_set(&repo); + let blob = tokio_util::io::SyncIoBridge::new(blob); + let mut archive = tar::Archive::new(blob); + importer.import_objects(&mut archive, Some(cancellable))?; + let commit = if write_refs { + let commit = importer.finish_import_object_set()?; + repo.transaction_set_ref(None, &target_ref, Some(commit.as_str())); + tracing::debug!("Wrote {} => {}", target_ref, commit); + Some(commit) + } else { + None + }; + txn.commit(Some(cancellable))?; + Ok::<_, anyhow::Error>(commit) + }); + let commit = super::unencapsulate::join_fetch(import_task, driver).await?; + layer.commit = commit; + } + if import.ostree_commit_layer.commit.is_none() { + let (blob, driver) = fetch_layer_decompress( + &mut self.proxy, &self.proxy_img, - &import.manifest, - None, - true, + &import.ostree_commit_layer.layer, ) .await?; - // Write the ostree ref for that single layer; TODO - // handle this as part of the overall transaction. - self.repo.set_ref_immediate( - None, - base_layer.ostree_ref.as_str(), - Some(base_commit.as_str()), - gio::NONE_CANCELLABLE, - )?; - base_commit + let blob = ProgressReader { + reader: blob, + progress: progress.as_ref().map(Arc::clone), + }; + let repo = self.repo.clone(); + let target_ref = import.ostree_commit_layer.ostree_ref.clone(); + let import_task = + crate::tokio_util::spawn_blocking_cancellable_flatten(move |cancellable| { + let txn = repo.auto_transaction(Some(cancellable))?; + let mut importer = crate::tar::Importer::new_for_commit(&repo, remote); + let blob = tokio_util::io::SyncIoBridge::new(blob); + let mut archive = tar::Archive::new(blob); + importer.import_commit(&mut archive, Some(cancellable))?; + let commit = importer.finish_import_commit(); + if write_refs { + repo.transaction_set_ref(None, &target_ref, Some(commit.as_str())); + tracing::debug!("Wrote {} => {}", target_ref, commit); + } + repo.mark_commit_partial(&commit, false)?; + txn.commit(Some(cancellable))?; + Ok::<_, anyhow::Error>(commit) + }); + let commit = super::unencapsulate::join_fetch(import_task, driver).await?; + import.ostree_commit_layer.commit = Some(commit); }; + Ok(()) + } + + /// Retrieve an inner ostree commit. + /// + /// This does not write cached references for each blob, and errors out if + /// the image has any non-ostree layers. + pub async fn unencapsulate( + mut self, + mut import: Box, + options: Option, + ) -> Result { + if !import.layers.is_empty() { + anyhow::bail!("Image has {} non-ostree layers", import.layers.len()); + } + self.unencapsulate_base(&mut import, options, false).await?; + let ostree_commit = import.ostree_commit_layer.commit.unwrap(); + let image_digest = import.manifest_digest; + Ok(Import { + ostree_commit, + image_digest, + }) + } + + /// Import a layered container image + pub async fn import(mut self, mut import: Box) -> Result { + // First download all layers for the base image (if necessary) - we need the SELinux policy + // there to label all following layers. + self.unencapsulate_base(&mut import, None, true).await?; + let mut proxy = self.proxy; + let target_imgref = self.target_imgref.as_ref().unwrap_or(&self.imgref); + let base_commit = import.ostree_commit_layer.commit.clone().unwrap(); let ostree_ref = ref_for_image(&target_imgref.imgref)?; @@ -331,9 +454,9 @@ impl LayeredImageImporter { base: Some(base_commit.clone()), selinux: true, }; - let w = + let r = crate::tar::write_tar(&self.repo, blob, layer.ostree_ref.as_str(), Some(opts)); - let r = super::unencapsulate::join_fetch(w, driver) + let r = super::unencapsulate::join_fetch(r, driver) .await .with_context(|| format!("Parsing layer blob {}", layer.digest()))?; layer_commits.push(r.commit); diff --git a/lib/src/container/unencapsulate.rs b/lib/src/container/unencapsulate.rs index 0f728b7a..bb3db5bb 100644 --- a/lib/src/container/unencapsulate.rs +++ b/lib/src/container/unencapsulate.rs @@ -32,13 +32,13 @@ // which is exactly what is exported by the [`crate::tar::export`] process. use super::*; -use anyhow::{anyhow, Context}; use containers_image_proxy::{ImageProxy, OpenedImage}; use fn_error_context::context; use futures_util::Future; use oci_spec::image as oci_image; +use std::sync::{Arc, Mutex}; use tokio::io::{AsyncBufRead, AsyncRead}; -use tracing::{event, instrument, Level}; +use tracing::instrument; /// The result of an import operation #[derive(Copy, Clone, Debug, Default)] @@ -52,11 +52,11 @@ type Progress = tokio::sync::watch::Sender; /// A read wrapper that updates the download progress. #[pin_project::pin_project] #[derive(Debug)] -struct ProgressReader { +pub(crate) struct ProgressReader { #[pin] - reader: T, + pub(crate) reader: T, #[pin] - progress: Option, + pub(crate) progress: Option>>, } impl AsyncRead for ProgressReader { @@ -70,6 +70,7 @@ impl AsyncRead for ProgressReader { match this.reader.poll_read(cx, buf) { v @ std::task::Poll::Ready(Ok(_)) => { if let Some(progress) = this.progress.as_ref().get_ref() { + let progress = progress.lock().unwrap(); let state = { let mut state = *progress.borrow(); let newlen = buf.filled().len(); @@ -116,20 +117,6 @@ pub struct Import { pub image_digest: String, } -fn require_one_layer_blob(manifest: &oci_image::ImageManifest) -> Result<&oci_image::Descriptor> { - let n = manifest.layers().len(); - if let Some(layer) = manifest.layers().get(0) { - if n > 1 { - Err(anyhow!("Expected 1 layer, found {}", n)) - } else { - Ok(layer) - } - } else { - // Validated by find_layer_blobids() - unreachable!() - } -} - /// Use this to process potential errors from a worker and a driver. /// This is really a brutal hack around the fact that an error can occur /// on either our side or in the proxy. But if an error occurs on our @@ -180,18 +167,17 @@ pub async fn unencapsulate( imgref: &OstreeImageReference, options: Option, ) -> Result { - let mut proxy = ImageProxy::new().await?; - let oi = &proxy.open_image(&imgref.imgref.to_string()).await?; - let (image_digest, raw_manifest) = proxy.fetch_manifest(oi).await?; - let manifest = serde_json::from_slice(&raw_manifest)?; - let ostree_commit = - unencapsulate_from_manifest_impl(repo, &mut proxy, imgref, oi, &manifest, options, false) - .await?; - proxy.close_image(oi).await?; - Ok(Import { - ostree_commit, - image_digest, - }) + let mut importer = super::store::ImageImporter::new(repo, imgref, Default::default()).await?; + let prep = match importer.prepare().await? { + store::PrepareResult::AlreadyPresent(r) => { + return Ok(Import { + ostree_commit: r.base_commit, + image_digest: r.manifest_digest, + }); + } + store::PrepareResult::Ready(r) => r, + }; + importer.unencapsulate(prep, options).await } /// Create a decompressor for this MIME type, given a stream of input. @@ -225,71 +211,3 @@ pub(crate) async fn fetch_layer_decompress<'a>( let blob = new_async_decompressor(layer.media_type(), blob)?; Ok((blob, driver)) } - -pub(crate) async fn unencapsulate_from_manifest_impl( - repo: &ostree::Repo, - proxy: &mut ImageProxy, - imgref: &OstreeImageReference, - oi: &containers_image_proxy::OpenedImage, - manifest: &oci_spec::image::ImageManifest, - options: Option, - ignore_layered: bool, -) -> Result { - if matches!(imgref.sigverify, SignatureSource::ContainerPolicy) - && skopeo::container_policy_is_default_insecure()? - { - return Err(anyhow!("containers-policy.json specifies a default of `insecureAcceptAnything`; refusing usage")); - } - let options = options.unwrap_or_default(); - let layer = if ignore_layered { - manifest - .layers() - .get(0) - .ok_or_else(|| anyhow!("No layers in image"))? - } else { - require_one_layer_blob(manifest)? - }; - event!( - Level::DEBUG, - "target blob digest:{} size: {}", - layer.digest().as_str(), - layer.size() - ); - let (blob, driver) = fetch_layer_decompress(proxy, oi, layer).await?; - let blob = ProgressReader { - reader: blob, - progress: options.progress, - }; - let mut taropts: crate::tar::TarImportOptions = Default::default(); - match &imgref.sigverify { - SignatureSource::OstreeRemote(remote) => taropts.remote = Some(remote.clone()), - SignatureSource::ContainerPolicy | SignatureSource::ContainerPolicyAllowInsecure => {} - } - let import = crate::tar::import_tar(repo, blob, Some(taropts)); - let ostree_commit = join_fetch(import, driver) - .await - .with_context(|| format!("Parsing blob {}", layer.digest()))?; - - event!(Level::DEBUG, "created commit {}", ostree_commit); - Ok(ostree_commit) -} - -/// Fetch a container image using an in-memory manifest and import its embedded OSTree commit. -#[context("Importing {}", imgref)] -#[instrument(skip(repo, options, manifest))] -pub async fn unencapsulate_from_manifest( - repo: &ostree::Repo, - imgref: &OstreeImageReference, - manifest: &oci_spec::image::ImageManifest, - options: Option, -) -> Result { - let mut proxy = ImageProxy::new().await?; - let oi = &proxy.open_image(&imgref.imgref.to_string()).await?; - let r = - unencapsulate_from_manifest_impl(repo, &mut proxy, imgref, oi, manifest, options, false) - .await?; - proxy.close_image(oi).await?; - // FIXME write ostree commit after proxy finalization - proxy.finalize().await?; - Ok(r) -} diff --git a/lib/src/fixtures/fedora-coreos-contentmeta.json.gz b/lib/src/fixtures/fedora-coreos-contentmeta.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..a1276a3f34046a9ecd83fbc96c1c3f7fee818f6e GIT binary patch literal 10233 zcmVyqlYc7R{y)c%#!Ii_^A ztbT{&1@dFcfT2ANhOtfWODcKxNCry=Eoq5qcB& z`fqRF4sCb&QS93O_`U9%f)Qr^A-bSUuXYlgf}%0Eise?uNT%b%nX*-N3E>?PMFrKJdDKc3bBL~?=Z8oqUIlF zz6umQi=8D;!HD2Q5-&JoOzEl$1aG@`?Caxh_{YC}G=0|`{vlV?{8O24BTJe7-T#26 z#>_wDsX@^15ljnRDyHT9t5ijWDk~XZNnJC3KlLANw=d3zi5e4o!_21v$iNVzn;TFVwhX1Fo!&KR&pmO)4J3w zG=f8YT-U|s(3<~Nm|Zgz>`mAwCX2|LePV>0>yzG*fl?b)hz8$aHh^@>A z(%4GCuvtpW-6NQLR5I_{395w(?eX5skaf2diQ9lA&1%=Ki#$;Sy%qzdP-zjPb( z0O^Xxw|tUu#?(T02qGQT-bZ7?Xj2ZX&Y#2B9Jd7%dZl8zJ`K4w2vNvRFTL8NjB3VJ zsEy3<;^sxz*dVTuaB$(5z$CUF{hpyzNWqB;#J!B|p}590X05CEqQt>j+$f1H&xMmt zDn=l2?Y=z?g>fav`fzA^qdklQjBfQ@+%MYUsv8o6uwUV=0VR|v0)$96?`?L9^ISrV zevNBrWo8sG40cxRlO4?b9FNBIR4dL3o|yOy+J_7?e#c=RSn1a{G0IE=@z1=?FWcrb zPi-W~7wHZyjI!h5viQ$yy!0P+XCjZ@+S2YgIV_ynP&Xp?lcVO}@G$HFM}7?Rmd-d4 zCYlbzoqcyX@9pRDgrQ3@&d>o6vYsV(u6ZS8sc2~DcI~0LdMk1zNTFc?5o#83!j=TK zaD8N32`RM-*TTNP81p`!PG(QgH?|^WK_d=W4(cXzV(%DCHz@b0D=tB+Y`=Pa`eF1W zS!uoEv6YH)vy?Fq5!JY~PE>04m<&bTeZQP-;!7FFGqNaCh{O?!DKu-h#|}1;DN#7EA1aIRh25kWJbhD$4&ol zw)rLD6{F!C4#TjW-43=YjrxeC1W?c!d`sRT$aHeA;UzQMMTM%U-i=MazVw}0Lq;LT z@~{|ow_hSK_Uwtv3rJd)oP~b(@Lu=Lwm6z`#xdTVY~mr7#K>aZ%qC!DDQN1{y(CH# zAlhD!{cb4E{b@fX)5>@>mO;9GA4QRQ)|7dHiBU{I3Zogv=ErCh9;0SozzQJ6I1Kzt`f`uZI0f~*P z=X2Zb4{p+9RWOTviAyDnbIO2xVZ(OAb%oKDNT@KBo286_X;B$beqy;)N&peoWoz@L zCK5VFU6mx`Q~>}X;w@&9YvcB-P;;HmO*agOs~1nFq!m5H)&L9`Afkzi0R^?dCCM~N{ge&&?h|U{VXv?w( z8T@P305ww%sD*9w*>0NRSf9@(dNcnU+QF8li)}sD#?WiCSt3j0?+l%P%M)YKh2-|A ziOUE9eD|^Lr^8FcVWmD)qy--Td>P1*I$N;YG9r%DMU6Y#tD-^6i5L zCMrr8V17@h(WZBd>%1zXb~g##1!W+Lc?mv)$%LpR&|46cDeU){5UQ`AEsCH6e@}UY z2_`ERE|QP6jRyytzYs>ZRIpA)DAypd-rf^P@`$LVe4MIqw+(y#$JHoVS32tQrYUFS zKrBDmR47ppR83VD%hj?Sf1TjWAG*)M$Bget2a~kjIKqzOlKnz_7UggqK5EA-2AN^Y1 z;+YlIar1?8Zbw=0oJ7AL7Iuz zcm-s{Y*AjUsETLq62!8ye~)0Pxy+Q4Z^xj%N6t z!$NfG9=`3Es8pEj*z`ZoqoFV7**6jKj7J{qlFXjI1F?Q#-`htuF-}+5=dt-|6iqSI z*KC1JSFKolqBChHw#Oe(WVTD?EBTdK9W2yZ=KHs6quKe2RfUWMzG=ugi4cW&HhfXE zue8)0prXLR8olb%Jl z_#>1SS&@Y?HEsz3x}kS%x4i`vZ^9VrsC$BgzP)88@3{OX7b@)el+YAy+uiQ^!F&~+ z)imU?R1k$2ODeop6do~VS*UUU z{wFwX{PahlE1xZXIM*C-Z~FFPLylWiHUdAKB4APm6D|%1m=QAm%6$8r5!@-Q3TIyJ|_K{?qd;pZ6 zyfEUK@Bk3+W-Q%FvOF`4;jgmAHbD<)GxI$6Q@k-1F{2eb3|m+>h{FN^>76ic_7YGL z{b+tPn_E$f*~Y4(G z@V*Vzx&$`u zXV&j2QX1zgRIzhb%!J40+2-ZjlI)ApZ6?w!je?CfMom+e(;XEDU(+H*3F89jhOqtj z2X1qd=EuvpkAiXwJ7B>(2=(vo&Y)C)n9bqlE}M@x|KApIs%Wt*4|c6aGku34dRB?W z5fq|1;EQ*gOFzuHO51g3f+wTNi6oIJ&L7~49=8z?oL>MjHcUZ^tp5Hn^$SNsvIOB4 zZGfOlb|IHg6sR5p3oNo)f9FconlqppxH-2|Qz*A}UN{bk6D|x15KJzgrPO690T+5> zj9t^)f8Eq$GpTV~hn8%*;{fzgo*CjK$HySbmHQ}Hmz1cuyKS?QS&nube36T-Wu<|1 z%&*>j@xC!0{uBpKeRnaVmBrrG3{xX81LoZrBRY-N3McnCaY!AZ?rVjQ7?OBjoCeiG zLhInTt+%!w#fp9tSpg|zmJVQ$7=RP61*q|~LeaY2Tzh)6>ptqNqMXz84qRf0oXW>4 z+QwF(E*(yXPhV91&3P!gy-kR$sEW-m%DH@Z1VnAUOT$(|3gFmj*Ni?Gw)H3D zWD;5uP1(L3BWw4WF>W1a49fRhD#4-lPGfqEO6;b>0JWvxh#^KWZ__{yF%Oa~#AKR- zo6oMs_OqFMkZ7v|$}Kh^2x2J>AAZ?uAOKhJzNhkdIgIVO9^dDnrCOCmB*y(4BjH9) zKlE-C))nJxd=)Qs84oP8BFf(yN>)@>KpT>cVLomDs!gBMXvqx27Hb6JP%XU>QxKrC zHMQTMkv(Tz&KuNbUiU-+uN( zRjdNKwQNJsw3YGJq7?Ru{vxF$v@RbG5s#Pl>66OoF|+KiI<2hiiAFF_vp?^Prtc?T zUql;@aB!Ki2T4R&zSv?V;S%hK`8AB&B6-7A8k&U+5oAelwm7+NEpHe%oOiulX7Thv zf^bnofS_&*@p6gCtPRcibg<4{!WW1$OGo?>!n_rKa85BTfW(HiY#+_fV%ttF)KkU@ zj=E9sE{u)hXkE^EEsAO>!>wM`W4U zoK5BpTL}H4WRN0UK@FIf^W3-kW(&~#bY}7X%PiOhD3%T|WIZzJZ&0YX1e}mQIp)VcRE;(g@|2`OX zEml?3*@*nvt;hSn=lA+p{V0KSLD%jt{pDC2ZKiFWbN-<;jLkAo7oNpg)U9-mTS=J! z4l$mG>$x*~A6djerVzInLnc4%D798Vm7y8mZ8ET6Z}wu6sE$liPCeeRC4||)#g#0& zg3e6sx9#vT1^3abmr{sxfI=p}4k|UTNVqd>NPpPY*OMYLn!LxbWrhIY5MwB-#WQ9C zC^7WI&u(Kr&G;v=O`JkptPnETX7;*8+DuUBk{O$O*siA^oN^=L%pxcVAk58Se&Q;@ z0lU{V<9KMMv)9pZFv%=(?g5maV>dQg12=zlpT}nE2B9lTZ_BF}DN7)K=C8TAooLIuJSPP^b@=5Qbfi#>hGFWT z!>KE5Lt0^9$Pv}r7&glp;jq5Zg?co>7ZClsC_h<@JDH)(+iJ z+=l`r+RB~>JCQwo2cnsDzq#z|Q-ISv#y>U6SVZrpi~~y-_l`g&QZC;LQI){8e|AEK z_vvg+bZsV?Wftub!a_se#cDh;sE=(pZECy5)Ji1o{!c>AwS_3e>Ifenh>8n9Bkj-o zqir^}FKCE`{Yl2TJOG3xmP_iT0}nWX-KCLk)!*BwD$0rhx#ev)9P8mPv&E`tx-7}K zya@nd!Jk}6S85hUFY|f1amu`A9>Pn*ZXT zyqXb`aNzRmb3F{-?IkG{iFW!W88>kN^u(+&S?X}se*Btl)ReJ2a`f-BRF44WYcs!d z8R5XoF8Ze49_*{Is(24vl5v3m0HKk9*LpU?4ZOgtzjW45O02Rt;Z|9IV7V{vz8b|0 zT|m^HZ#R$G)?US;(xIGpUtkcMLEkWFB&E=etAAbU;e9vUx_e9rX@fQUEt>LuJ2pgA zvWlJE`dfpYA$(>uV!^3@fsY^r49j-2Gj+SI#b)T`loaVVk%j%#7~k?Y4+>b!4e^N= zu2luxEogn48Sc1YBzRiM+#5E##a<;|(xiew$tUS>N;^R|Spu*cj## zbyseIpzVWxE~y}ZVlvmsHr=nB>OXx+LV8t{DGRZN{(omaBMoZLh?}uWlg6^J7DY_t3&S77oLHz}=3cTj= z_M+J9^$dq6UM-t5!mZe^y;@cgC@z^)^l@rLN7a4;yVLj~m)|ADO08mzXSWyeX1w?) z1-`(Vljo5x`XnwBh0?Z0&HLSaXhTevf;^6R{~nd7g9(u zg!(RBR|#70Mi3-La`@mx0KHzz_^Ni=X? z^Uw@KA$8ohkNw?A^#EX-*`H5g(o7+Es;b>~>~^zv=%a`zFypoZ(kJ+9`U<61V%ZKIdx4opF?yS^Z3P#CI{pv#qB$! zfh+FpVY$OcyTu*PB=$!X6Yy;>Kdv_z3R}dD-tdZYORow7(YC?m`v)LNZ~6DR_bVf#pG04k z);H>^)ae(Tp6gAHuaChm@%IU@pe^6Jx@&1~V2Yj4LNK@GVGdoa-uWkl1KP9R93}@T znkJ6}E(`HUpm}G1O`p~pXkv`Lz97_>2B*mErzb3nBUmXG@BqP7W?xo+QZ+`#mN7!xN~W86h{K^FQNj0c=c`Wd*9PWhm5gJ7X}aDs&+-}e_u%$ zAt0p`ZEE&36Hy;-8B3{r=d>JfBtl($iXSHmL@#4g`m`NVZw;c{gFhq@K@_mZz)%aB zTAZ0iMB9f;27PzhF$rR@5ev6@Vss@nITjhpR207Uj6)dpRNL5Scv z`PFoVM~w9m7Ghl8;_n-*8k#`SPuBOX{GP@|SqOD3RtTz2N7}l;47V>j=7VH!MkftI zsh{+V&WR1m7NrJlpPP55vg(WWXWrCxFFe2A&;Y=qE;v7<`@OM;+`Zt0a?2K$;8oe) z`1Nm@uY^$g`B`Hd))=Nxw|Z$iEfcCh1Iq~Qk(fn9syO%tAM9Gmnc7CbfF07_-F>&$y#O}&x`H$Vl!Wu~ zaGi4a&Ba(Kr#6}Zbpp9VeClgUq=1?)0<7IpmU&x;BUm79pRK74%4-ls0bqv|k1;_n zHs2`-rK}PV+i>Rv5jCVT)TNN7r}8X)y2PXAcZsvgwIqfVd4GfN5D>E(;f-nPDJ6V1j z07aA1w=`tS+Wk`|=5;5PpQKQ$y!+u$6`w7;3ICEg0*7+r0!<+D3KL(O;*}7fM||$F zR<@)eiKT+w3aN-IL?H{LnYIJ@1_TpS#N-Fj-PbHRO~l;R(UA>S1J|7Zbwqq*R|WFo zHg+$!L;C7g)RqN?F;vmfoZF^na;7RtEyfqdsbw0#4VMpJDO|tTA7x~UOD?mCYi9Il z`V!i-Is$Y)QEi*5;>_}g=hzU+POE71t39EJPag}Z$1 ztr7w>XOT5l%}3Ez>~RubKQZr6Cw5$}4MtQseYRDc0mZx3mG7>n7M{KF`@(djFive) z0nQos{hGxI&|OBH0oK)`R)C1QOS8wois?rR;M}EC7C{Zm`Nw{+ma8o9GH&Gsw#09Y z)Ydwpfn)af?0pjGHyt#lRZ~NS4rc`M7h~{2SE4ahBOO-={{fi<<2+K(+4b?k(rk2 zKq@VGKOh)g34G#*!92d`fN4wjvUB>Ghd}qElqOLB8~(Yh>+YQiO9h@gL&(wOqMh@B*3qV!oZ)~#LRx`KdvrI!RfCL z_N#i9)SDU{*pP8)0E3efxFTpl4&C2Lo@5OOxTqy!Zz*faa$)!kY z`cch*+HFh1-D#1E3Ce5Nkm$0S3sGQ{fHp+Ah}Mj|n<(L0#M*8+Rjj|UcdskDPE26D z6xC2yM*>U1;HHPvE%SV8j$!Tvm!ugy6X({+;E|qDU*}}V(0Kx-U8KP>Mb=z@=#9sTqHPSPl;swxhS8Ln0+o$`RAX@{^*~M`ROqHQ>($xfAHvZPdYz~$k}M>2Af0_ zFp`#5`g1pIG3;%R>}K63GzO=A2~RKUuVXh8nI2)38!qs4GDmt2B9};*Ou~agHNHSg z6;R{<&dw}J=)<`)`Jrj$=wX{5&jRTwdn?La9Zd+-@1*IxTiYDu6_q5Wz$`+ID}*sL zUZ*WF0Fvm7Q+a5b{(9yg8yj;n5o%l_jG=;88~X)i65j3yE>*hw=~Vva2che|BRh3$ zkTDTaqcxWDZ1ZzUFs~k5K7zh=^_Ai@C7{>zJ!iM_89Fu_Q#MN5x&VgmU6pGUM=3D! z2Dm}t+xQzV;+S$7x>jmg-E~9fFU!kAD5cthn7eixhkmqnk(KS=2UamzD)WO#kzzRf z4y3w~CUgdFSZt65hSvGr&%9{+tIS%quoQcTe1ixupnKIUGe6u5sd#h0vp6 zbHv>;i2VexT&~Ug;5zFxWJLy5QwG8lmZ=PP-)WBY_IO`4(7j!+QA3FR|nVE zu(FY!U_yvTEaT>FnC?CSICoVk;^s7Xvh${@io6$w{Zk7wXJ6s$e%V)18o)Vs_rAE^ zda00!tWk4$9|G$6K_Kbw)f3gaV9ThURaKMA!f)G@VJkL^E3LULtBioI_fV53$j~q{ zx~1{w-J+r-%+^XrSF_vgA-(^`nQK-?U5WsN8U5P4I{xC;i+jJZw*PJKW@Ba4VcEBN zjTnCgxG0*sXJyU*fw87Xwt7vHNg}UY6B`~;f^_}N{0T%E;oD&P9zNg!tnXKr_tj{d z^UK(nAn1aQ>mJ!ThF2UfJ(!Ss#zXR5R<~afH-ltpP=rn@u%RQRo9{qn(i9A~l3q<{;W3 zCDR}Zy{pP>f8ba79?Q{W{yR}8R^niN|ol4ml zGB6j-84+!9O9pv)VW!XI0<;0?GZHNkh6W0Fkvqoj;f$gL3a$%Djc}Ffv zpcYcMw&|P0{p95`pVllsF-}QP1L*E#rhW#Pd`uLu)~qi-hU9Z&)*BA0bA5SNWm&{A z^^IFt+MqLNPODHX%5ztoKcuJF;<2}4+QET1cV}6c20-b}Xz0i=?8EE10#WqE(c>RY zlqI~ht+qX+j??_98H}b@OmjjKABbT;oy-mrz=7Q3{D(1A7^>nf7jCfStjZmuIS9U2 zSSO5Ac=&mTO`NteD%SV%py@CsK$0o{$B(SIO2-~Ie*?2%wPM;^ggEyLG&GIes84m# z%YE< zigMy~VWv*z>Vrns{qE8k+eVEG-DRX%Xc!2|V=zLtslYZ zj2}hY4}l${+YO;KF$Q4n@qr+t;aY-yUdsI9$%bF1j@&k6TpYll+Mzba3Mw-bT0Yt_ z#||l5;Dx3+K}uubLUr&`e7B22)V{!Tt7PwXUr01_`j!RK-J~nZ{mKkdsNc2}VAavd z47Ff;$ipCo+WYZuUEbKw%K>Yzjf1-^GKsUb#(Ck2fmnu^&K#PeQ+8-v97v(RvNne! z1&rnT)?b7(*yOM^{#^xZ7Jz%GCI}75+FTv^S+p&#rz7Bg%6bq&BhrQj4TC@Z+%}&g z#wdioYN52!(3~Hb@*$=tbkIGEEC?c?h*jnHc10_cgJq)ca^=pvO4oWdftdflVK_GH zFF(gO64=|+;Ou^h5~9H~xZvZ3b(d$(jb*J?Y|mv9xWG*)x842NIBCHaa2HwX6JU;w zEatBG?yEa41cM)$s7wo#Ik4QbuDD9p6#N20;y963EuKQv>>uUTJ2I{}1V2)~0V$mZ zVl0>If{sfS>=LpsHSx8|doWGrR2-X@d2LY@Vw&sICJZAlj8|)QVwJW&hX)Fa-)ikY z5(?Ctm34XXsIhm7>CZ|$&HwIxi;@It#y&Nib?i&~jSXodsL!TwQQWHgxvW(_2o>E< zjCBL<59nTyIf5)YnquI5(Lf<{B}U$yezLY@582f^vdK>r-A{({<-(fFfpulx9DWsf zFDK8~ZzmUe&qYK1e(J9CSS6Ziin^ttI|?xv=1#2A3oWp_jz}UO=qx#gw6Am?)n3&= zVBAhIauURVu5vuwC)e5!S$`R*R*}o4y1{BU2zmh_IeyXP8_!a*Uac*pfdozdiC9Sj zbqwLZS=UsBS2aRTJ?mkwMc1)&A*QHw_u^dCL$#)As{y!8U-%Hsn`-%#`NN1(5m2R6 z#E=^F%#nVMNkLPt&D>;U{CU!KNWc_F=XbH~*Z9Bu2-F7xHMhJi0>*t{w{R6>nIDkjSBq;Y0I&8q$N+}cKl&0hJPkk9zu-zfo-P-<2;>Xe9S z&rQp(Atm9L-_8|RHjLmkV1sWPh*xyak&Yykvr-Ha7@;Pu8p6;}-ej@>NwY)mm2qUw zqEpLioldY??w_=E(E?~-6nVJxMFJ)@g)cBnk0=A2dtBN7?|=RW4+K{M=>h=&shb8I literal 0 HcmV?d00001 diff --git a/lib/src/lib.rs b/lib/src/lib.rs index c0b9b8e8..23a41ac9 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -37,6 +37,7 @@ pub mod refescape; pub mod tar; pub mod tokio_util; +pub mod chunking; pub(crate) mod commit; pub mod objectsource; pub(crate) mod objgv; diff --git a/lib/src/tar/export.rs b/lib/src/tar/export.rs index 2d188b1c..646722d1 100644 --- a/lib/src/tar/export.rs +++ b/lib/src/tar/export.rs @@ -1,5 +1,7 @@ //! APIs for creating container images from OSTree commits +use crate::chunking; +use crate::chunking::Chunking; use crate::objgv::*; use anyhow::{anyhow, bail, ensure, Context, Result}; use camino::{Utf8Path, Utf8PathBuf}; @@ -9,6 +11,7 @@ use gio::prelude::*; use gvariant::aligned_bytes::TryAsAligned; use gvariant::{Marker, Structure}; use ostree::gio; +use std::borrow::Borrow; use std::borrow::Cow; use std::collections::HashSet; use std::io::BufReader; @@ -526,6 +529,73 @@ pub fn export_commit( Ok(()) } +/// Output a chunk. +pub(crate) fn export_chunk( + repo: &ostree::Repo, + chunk: &chunking::Chunk, + out: &mut tar::Builder, +) -> Result<()> { + let writer = &mut OstreeTarWriter::new(repo, out, ExportOptions::default()); + writer.write_repo_structure()?; + for (checksum, (_size, paths)) in chunk.content.iter() { + let (objpath, h) = writer.append_content(checksum.borrow())?; + for path in paths.iter() { + let path = path.strip_prefix("/").unwrap_or(path); + let h = h.clone(); + writer.append_content_hardlink(&objpath, h, path)?; + } + } + Ok(()) +} + +/// Output the last chunk in a chunking. +#[context("Exporting final chunk")] +pub(crate) fn export_final_chunk( + repo: &ostree::Repo, + chunking: &Chunking, + out: &mut tar::Builder, +) -> Result<()> { + let cancellable = gio::NONE_CANCELLABLE; + // For chunking, we default to format version 1 + #[allow(clippy::needless_update)] + let options = ExportOptions { + format_version: 1, + ..Default::default() + }; + let writer = &mut OstreeTarWriter::new(repo, out, options); + writer.write_repo_structure()?; + + let (commit_v, _) = repo.load_commit(&chunking.commit)?; + let commit_v = &commit_v; + writer.append(ostree::ObjectType::Commit, &chunking.commit, commit_v)?; + if let Some(commitmeta) = repo.read_commit_detached_metadata(&chunking.commit, cancellable)? { + writer.append( + ostree::ObjectType::CommitMeta, + &chunking.commit, + &commitmeta, + )?; + } + + // In the chunked case, the final layer has all ostree metadata objects. + for meta in &chunking.meta { + let objtype = meta.objtype(); + let checksum = meta.checksum(); + let v = repo.load_variant(objtype, checksum)?; + writer.append(objtype, checksum, &v)?; + } + + for (checksum, (_size, paths)) in chunking.remainder.content.iter() { + let (objpath, h) = writer.append_content(checksum.borrow())?; + for path in paths.iter() { + let path = path.strip_prefix("/").unwrap_or(path); + let h = h.clone(); + writer.append_content_hardlink(&objpath, h, path)?; + } + } + + Ok(()) +} + #[cfg(test)] mod tests { use super::*; diff --git a/lib/src/tar/import.rs b/lib/src/tar/import.rs index 9109ce1a..29befa1e 100644 --- a/lib/src/tar/import.rs +++ b/lib/src/tar/import.rs @@ -9,6 +9,7 @@ use gio::glib; use gio::prelude::*; use glib::Variant; use ostree::gio; +use std::collections::BTreeSet; use std::collections::HashMap; use std::convert::TryInto; use std::io::prelude::*; @@ -39,7 +40,7 @@ struct ImportStats { } /// Importer machine. -struct Importer { +pub(crate) struct Importer { repo: ostree::Repo, remote: Option, // Cache of xattrs, keyed by their content checksum. @@ -47,10 +48,18 @@ struct Importer { // Reusable buffer for xattrs references. It maps a file checksum (.0) // to an xattrs checksum (.1) in the `xattrs` cache above. next_xattrs: Option<(String, String)>, + + /// Set when we import a commit object + commit_checksum: Option, + // Reusable buffer for reads. See also https://github.com/rust-lang/rust/issues/78485 buf: Vec, stats: ImportStats, + + /// Used in the "object set" path only. + /// We need to generate a commit object which strongly references these content objects. + wrote_content_objects: Option>, } /// Validate size/type of a tar header for OSTree metadata object. @@ -151,7 +160,8 @@ fn parse_xattrs_link_target(path: &Utf8Path) -> Result { } impl Importer { - fn new(repo: &ostree::Repo, remote: Option) -> Self { + /// Create an importer which will import an OSTree commit object. + pub(crate) fn new_for_commit(repo: &ostree::Repo, remote: Option) -> Self { Self { repo: repo.clone(), remote, @@ -159,6 +169,23 @@ impl Importer { xattrs: Default::default(), next_xattrs: None, stats: Default::default(), + commit_checksum: None, + wrote_content_objects: None, + } + } + + /// Create an importer to write an "object set"; a chunk of objects which is + /// usually streamed from a separate storage system, such as an OCI container image layer. + pub(crate) fn new_for_object_set(repo: &ostree::Repo) -> Self { + Self { + repo: repo.clone(), + remote: None, + buf: vec![0u8; 16384], + xattrs: Default::default(), + next_xattrs: None, + stats: Default::default(), + commit_checksum: None, + wrote_content_objects: Some(Default::default()), } } @@ -375,13 +402,29 @@ impl Importer { match suffix { "commit" => Err(anyhow!("Found multiple commit objects")), - "file" => self.import_content_object(entry, &checksum, cancellable), + "file" => { + self.import_content_object(entry, &checksum, cancellable)?; + // Track the objects we wrote + if let Some(o) = self.wrote_content_objects.as_mut() { + if let Some(p) = o.replace(checksum) { + anyhow::bail!("Duplicate object: {}", p); + } + } + Ok(()) + } "file-xattrs" => self.process_file_xattrs(entry, checksum), "file-xattrs-link" => self.process_file_xattrs_link(entry, checksum), "xattrs" => self.process_xattr_ref(entry, checksum), kind => { let objtype = objtype_from_string(kind) .ok_or_else(|| anyhow!("Invalid object type {}", kind))?; + if self.wrote_content_objects.is_some() { + anyhow::bail!( + "Found metadata object {}.{} in object set mode", + checksum, + objtype + ); + } self.import_metadata(entry, &checksum, objtype) } } @@ -539,17 +582,46 @@ impl Importer { Ok(xattrs_checksum) } - fn import( - mut self, + fn import_objects_impl<'a>( + &mut self, + ents: impl Iterator, Utf8PathBuf)>>, + cancellable: Option<&gio::Cancellable>, + ) -> Result<()> { + for entry in ents { + let (entry, path) = entry?; + if let Ok(p) = path.strip_prefix("objects/") { + self.import_object(entry, p, cancellable)?; + } else if path.strip_prefix("xattrs/").is_ok() { + self.process_split_xattrs_content(entry)?; + } + } + Ok(()) + } + + pub(crate) fn import_objects( + &mut self, archive: &mut tar::Archive, cancellable: Option<&gio::Cancellable>, - ) -> Result { + ) -> Result<()> { + let ents = archive.entries()?.filter_map(|e| match e { + Ok(e) => Self::filter_entry(e).transpose(), + Err(e) => Some(Err(anyhow::Error::msg(e))), + }); + self.import_objects_impl(ents, cancellable) + } + + pub(crate) fn import_commit( + &mut self, + archive: &mut tar::Archive, + cancellable: Option<&gio::Cancellable>, + ) -> Result<()> { + // This can only be invoked once + assert!(self.commit_checksum.is_none()); // Create an iterator that skips over directories; we just care about the file names. let mut ents = archive.entries()?.filter_map(|e| match e { Ok(e) => Self::filter_entry(e).transpose(), Err(e) => Some(Err(anyhow::Error::msg(e))), }); - // Read the commit object. let (commit_ent, commit_path) = ents .next() @@ -642,18 +714,52 @@ impl Importer { } } } + self.commit_checksum = Some(checksum); - for entry in ents { - let (entry, path) = entry?; + self.import_objects_impl(ents, cancellable)?; - if let Ok(p) = path.strip_prefix("objects/") { - self.import_object(entry, p, cancellable)?; - } else if path.strip_prefix("xattrs/").is_ok() { - self.process_split_xattrs_content(entry)?; - } - } + Ok(()) + } + + pub(crate) fn finish_import_commit(self) -> String { + tracing::debug!("Import stats: {:?}", self.stats); + self.commit_checksum.unwrap() + } - Ok(checksum) + pub(crate) fn default_dirmeta() -> glib::Variant { + let finfo = gio::FileInfo::new(); + finfo.set_attribute_uint32("unix::uid", 0); + finfo.set_attribute_uint32("unix::gid", 0); + finfo.set_attribute_uint32("unix::mode", libc::S_IFDIR | 0o755); + // SAFETY: TODO: This is not a nullable return, fix it in ostree + ostree::create_directory_metadata(&finfo, None).unwrap() + } + + pub(crate) fn finish_import_object_set(self) -> Result { + let objset = self.wrote_content_objects.expect("Expected object set"); + tracing::debug!("Imported {} content objects", objset.len()); + let mtree = ostree::MutableTree::new(); + for checksum in objset.into_iter() { + mtree.replace_file(&checksum, &checksum)?; + } + let dirmeta = self.repo.write_metadata( + ostree::ObjectType::DirMeta, + None, + &Self::default_dirmeta(), + gio::NONE_CANCELLABLE, + )?; + mtree.set_metadata_checksum(&dirmeta.to_hex()); + let tree = self.repo.write_mtree(&mtree, gio::NONE_CANCELLABLE)?; + let commit = self.repo.write_commit_with_time( + None, + None, + None, + None, + tree.downcast_ref().unwrap(), + 0, + gio::NONE_CANCELLABLE, + )?; + Ok(commit.to_string()) } } @@ -689,8 +795,9 @@ pub async fn import_tar( crate::tokio_util::spawn_blocking_cancellable_flatten(move |cancellable| { let mut archive = tar::Archive::new(src); let txn = repo.auto_transaction(Some(cancellable))?; - let importer = Importer::new(&repo, options.remote); - let checksum = importer.import(&mut archive, Some(cancellable))?; + let mut importer = Importer::new_for_commit(&repo, options.remote); + importer.import_commit(&mut archive, Some(cancellable))?; + let checksum = importer.finish_import_commit(); txn.commit(Some(cancellable))?; repo.mark_commit_partial(&checksum, false)?; Ok::<_, anyhow::Error>(checksum) diff --git a/lib/tests/it/main.rs b/lib/tests/it/main.rs index 78a1f371..c9b310e8 100644 --- a/lib/tests/it/main.rs +++ b/lib/tests/it/main.rs @@ -1,9 +1,10 @@ use anyhow::{Context, Result}; use camino::Utf8Path; use once_cell::sync::Lazy; +use ostree_ext::chunking::ObjectMetaSized; use ostree_ext::container::store::PrepareResult; use ostree_ext::container::{ - Config, ImageReference, OstreeImageReference, SignatureSource, Transport, + Config, ExportOpts, ImageReference, OstreeImageReference, SignatureSource, Transport, }; use ostree_ext::prelude::FileExt; use ostree_ext::tar::TarImportOptions; @@ -20,7 +21,7 @@ const TEST_REGISTRY_DEFAULT: &str = "localhost:5000"; fn assert_err_contains(r: Result, s: impl AsRef) { let s = s.as_ref(); - let msg = format!("{:#}", r.err().unwrap()); + let msg = format!("{:#}", r.err().expect("Expecting an error")); if !msg.contains(s) { panic!(r#"Error message "{}" did not contain "{}""#, msg, s); } @@ -220,8 +221,6 @@ fn test_tar_export_structure() -> Result<()> { use tar::EntryType::{Directory, Regular}; let mut fixture = Fixture::new_v1()?; - // Just test that we can retrieve ownership for all objects - let _objmeta = fixture.get_object_meta()?; let src_tar = fixture.export_tar()?; let src_tar = std::io::BufReader::new(fixture.dir.open(src_tar)?); @@ -391,8 +390,7 @@ fn skopeo_inspect_config(imgref: &str) -> Result Result<()> { +async fn impl_test_container_import_export(chunked: bool) -> Result<()> { let fixture = Fixture::new_v1()?; let testrev = fixture .srcrepo() @@ -413,7 +411,14 @@ async fn test_container_import_export() -> Result<()> { ), ..Default::default() }; - let opts = ostree_ext::container::ExportOpts { + // If chunking is requested, compute object ownership and size mappings + let contentmeta = chunked + .then(|| { + let meta = fixture.get_object_meta().context("Computing object meta")?; + ObjectMetaSized::compute_sizes(fixture.srcrepo(), meta).context("Computing sizes") + }) + .transpose()?; + let opts = ExportOpts { copy_meta_keys: vec!["buildsys.checksum".to_string()], ..Default::default() }; @@ -422,6 +427,7 @@ async fn test_container_import_export() -> Result<()> { fixture.testref(), &config, Some(opts), + contentmeta, &srcoci_imgref, ) .await @@ -451,6 +457,10 @@ async fn test_container_import_export() -> Result<()> { "/usr/bin/bash" ); + let n_chunks = if chunked { 7 } else { 1 }; + assert_eq!(cfg.rootfs().diff_ids().len(), n_chunks); + assert_eq!(cfg.history().len(), n_chunks); + let srcoci_unverified = OstreeImageReference { sigverify: SignatureSource::ContainerPolicyAllowInsecure, imgref: srcoci_imgref.clone(), @@ -522,6 +532,13 @@ async fn oci_clone(src: impl AsRef, dest: impl AsRef) -> Res Ok(()) } +#[tokio::test] +async fn test_container_import_export() -> Result<()> { + impl_test_container_import_export(false).await?; + impl_test_container_import_export(true).await?; + Ok(()) +} + /// But layers work via the container::write module. #[tokio::test] async fn test_container_write_derive() -> Result<()> { @@ -535,6 +552,7 @@ async fn test_container_write_derive() -> Result<()> { ..Default::default() }, None, + None, &ImageReference { transport: Transport::OciDir, name: base_oci_path.to_string(), @@ -578,28 +596,28 @@ async fn test_container_write_derive() -> Result<()> { let images = ostree_ext::container::store::list_images(fixture.destrepo())?; assert!(images.is_empty()); - // Verify importing a derive dimage fails + // Verify importing a derived image fails let r = ostree_ext::container::unencapsulate(fixture.destrepo(), &derived_ref, None).await; - assert_err_contains(r, "Expected 1 layer, found 2"); + assert_err_contains(r, "Image has 1 non-ostree layers"); // Pull a derived image - two layers, new base plus one layer. - let mut imp = ostree_ext::container::store::LayeredImageImporter::new( + let mut imp = ostree_ext::container::store::ImageImporter::new( fixture.destrepo(), &derived_ref, Default::default(), ) .await?; - let prep = match imp.prepare().await? { + let prep = match imp.prepare().await.context("Init prep derived")? { PrepareResult::AlreadyPresent(_) => panic!("should not be already imported"), PrepareResult::Ready(r) => r, }; let expected_digest = prep.manifest_digest.clone(); - assert!(prep.base_layer.commit.is_none()); + assert!(prep.ostree_commit_layer.commit.is_none()); assert_eq!(prep.layers.len(), 1); for layer in prep.layers.iter() { assert!(layer.commit.is_none()); } - let import = imp.import(prep).await?; + let import = imp.import(prep).await.context("Init pull derived")?; // We should have exactly one image stored. let images = ostree_ext::container::store::list_images(fixture.destrepo())?; assert_eq!(images.len(), 1); @@ -613,17 +631,13 @@ async fn test_container_write_derive() -> Result<()> { assert!(digest.starts_with("sha256:")); assert_eq!(digest, expected_digest); - #[cfg(feature = "proxy_v0_2_3")] - { - let commit_meta = &imported_commit.child_value(0); - let proxy = containers_image_proxy::ImageProxy::new().await?; - let commit_meta = glib::VariantDict::new(Some(commit_meta)); - let config = commit_meta - .lookup::("ostree.container.image-config")? - .unwrap(); - let config: oci_spec::image::ImageConfiguration = serde_json::from_str(&config)?; - assert_eq!(config.os(), &oci_spec::image::Os::Linux); - } + let commit_meta = &imported_commit.child_value(0); + let commit_meta = glib::VariantDict::new(Some(commit_meta)); + let config = commit_meta + .lookup::("ostree.container.image-config")? + .unwrap(); + let config: oci_spec::image::ImageConfiguration = serde_json::from_str(&config)?; + assert_eq!(config.os(), &oci_spec::image::Os::Linux); // Parse the commit and verify we pulled the derived content. bash_in!( @@ -633,7 +647,7 @@ async fn test_container_write_derive() -> Result<()> { )?; // Import again, but there should be no changes. - let mut imp = ostree_ext::container::store::LayeredImageImporter::new( + let mut imp = ostree_ext::container::store::ImageImporter::new( fixture.destrepo(), &derived_ref, Default::default(), @@ -650,7 +664,7 @@ async fn test_container_write_derive() -> Result<()> { // Test upgrades; replace the oci-archive with new content. std::fs::remove_dir_all(derived_path)?; std::fs::rename(derived2_path, derived_path)?; - let mut imp = ostree_ext::container::store::LayeredImageImporter::new( + let mut imp = ostree_ext::container::store::ImageImporter::new( fixture.destrepo(), &derived_ref, Default::default(), @@ -661,7 +675,7 @@ async fn test_container_write_derive() -> Result<()> { PrepareResult::Ready(r) => r, }; // We *should* already have the base layer. - assert!(prep.base_layer.commit.is_some()); + assert!(prep.ostree_commit_layer.commit.is_some()); // One new layer assert_eq!(prep.layers.len(), 1); for layer in prep.layers.iter() { @@ -689,7 +703,7 @@ async fn test_container_write_derive() -> Result<()> { )?; // And there should be no changes on upgrade again. - let mut imp = ostree_ext::container::store::LayeredImageImporter::new( + let mut imp = ostree_ext::container::store::ImageImporter::new( fixture.destrepo(), &derived_ref, Default::default(), @@ -744,10 +758,16 @@ async fn test_container_import_export_registry() -> Result<()> { cmd: Some(vec!["/bin/bash".to_string()]), ..Default::default() }; - let digest = - ostree_ext::container::encapsulate(fixture.srcrepo(), testref, &config, None, &src_imgref) - .await - .context("exporting to registry")?; + let digest = ostree_ext::container::encapsulate( + fixture.srcrepo(), + testref, + &config, + None, + None, + &src_imgref, + ) + .await + .context("exporting to registry")?; let mut digested_imgref = src_imgref.clone(); digested_imgref.name = format!("{}@{}", src_imgref.name, digest);