diff --git a/src/clidef.rs b/src/clidef.rs index 512268e..22cb267 100644 --- a/src/clidef.rs +++ b/src/clidef.rs @@ -50,6 +50,13 @@ pub fn cli(version: &'static str) -> Command { .action(clap::ArgAction::SetTrue) .help("Do not remove anything, only display what will be removed") ) + .arg( + Arg::new("autodeps") + .short('a') + .long("autodeps") + .action(clap::ArgAction::SetTrue) + .help(format!("Include graph of package dependencies\n{}", "NOTE: This can increase the size, but might not always be useful".yellow())) + ) .arg( Arg::new("root") .short('r') @@ -78,6 +85,12 @@ pub fn cli(version: &'static str) -> Command { .arg( Arg::new("f_log").long("logs").action(clap::ArgAction::SetTrue).help("Leave any kind of logs") ) + .arg( + Arg::new("f_pic").long("pic").action(clap::ArgAction::SetTrue).help("Leave any graphics (pictures)") + ) + .arg( + Arg::new("f_arc").long("arc").action(clap::ArgAction::SetTrue).help("Leave any kind of archives/tarballs") + ) // Other .next_help_heading("Other") diff --git a/src/filters/defs.rs b/src/filters/defs.rs new file mode 100644 index 0000000..e6861bf --- /dev/null +++ b/src/filters/defs.rs @@ -0,0 +1,23 @@ +/* +Definitions, constants +*/ + +/// Stub doc files +pub const DOC_STUB_FILES: &[&str] = + &["AUTHORS", "COPYING", "LICENSE", "DEBUG", "DISTRIB", "DOC", "HISTORY", "README", "TERMS", "TODO"]; + +/// Docfiles +pub const DOC_F_EXT: &[&str] = &[".txt", ".doc", ".rtf", ".md", ".rtx", ".tex", ".xml"]; + +/// Docfiles portable +pub const DOC_FP_EXT: &[&str] = &[".eps", ".pdf", ".ps"]; + +/// Headers +pub const H_SRC_F_EXT: &[&str] = &[".h", ".hpp"]; + +/// Archives +pub const ARC_F_EXT: &[&str] = &[".gz", ".bz2", ".xz", ".zip", ".tar"]; + +/// Graphic files +pub const IMG_F_EXT: &[&str] = + &[".bmp", ".jpg", ".jpeg", ".png", ".gif", ".xpm", ".xbm", ".tif", ".tiff", ".pbm", ".svg", ".ico"]; diff --git a/src/filters/mod.rs b/src/filters/mod.rs index f384baf..48eda1e 100644 --- a/src/filters/mod.rs +++ b/src/filters/mod.rs @@ -1,3 +1,4 @@ +pub mod defs; pub mod dirs; pub mod intf; pub mod resources; diff --git a/src/filters/resources.rs b/src/filters/resources.rs index 4b3f448..6c48916 100644 --- a/src/filters/resources.rs +++ b/src/filters/resources.rs @@ -1,12 +1,10 @@ +use super::{defs, intf::DataFilter}; +use crate::profile::Profile; use std::{ collections::HashSet, path::{Path, PathBuf}, }; -use crate::profile::Profile; - -use super::intf::DataFilter; - pub struct ResourcesDataFilter { data: Vec, remove_archives: bool, @@ -37,7 +35,7 @@ impl ResourcesDataFilter { let p = p.to_str().unwrap(); - for s in [".gz", ".bz2", ".xz", ".zip", ".tar"] { + for s in defs::ARC_F_EXT { if p.ends_with(s) { return true; } @@ -53,7 +51,7 @@ impl ResourcesDataFilter { } let p = p.to_str().unwrap(); - for s in [".bmp", ".jpg", ".jpeg", ".png", ".gif", ".xpm", ".tif", ".tiff", ".pbm", ".svg", ".ico"] { + for s in defs::IMG_F_EXT { if p.ends_with(s) { return true; } diff --git a/src/main.rs b/src/main.rs index 1e79f5e..04b2bf5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,11 +5,9 @@ mod procdata; mod profile; mod rootfs; mod scanner; - +use crate::profile::Profile; use clap::{ArgMatches, Command}; use colored::Colorize; - -use crate::profile::Profile; use std::{ env, path::{Path, PathBuf}, @@ -54,7 +52,9 @@ fn get_profile(mut cli: Command, params: &ArgMatches) -> Profile { .set_doc(f(params, "f_doc")) .set_i18n(f(params, "f_i18n")) .set_l10n(f(params, "f_l10n")) - .set_log(f(params, "f_log")); + .set_log(f(params, "f_log")) + .set_arch(f(params, "f_arc")) + .set_img(f(params, "f_pic")); } else if let Some(profile_path) = profile_path { log::info!("Getting profile at {profile_path}"); match Profile::new(Path::new(profile_path)) { @@ -80,6 +80,12 @@ fn get_profile(mut cli: Command, params: &ArgMatches) -> Profile { if is_f(params, "f_log") { profile.set_manpages(f(params, "f_log")); } + if is_f(params, "f_pic") { + profile.set_img(f(params, "f_pic")); + } + if is_f(params, "f_arc") { + profile.set_arch(f(params, "f_arc")); + } } Err(err) => { log::error!("{}", err); @@ -129,12 +135,24 @@ fn main() -> Result<(), std::io::Error> { log::error!("Mountpoint \"{}\" does not exist or is not accessible", rpth.to_str().unwrap().bright_yellow()); process::exit(exitcode::IOERR); } - if let Err(err) = - procdata::TintProcessor::new(rpth).set_profile(get_profile(cli, ¶ms)).set_dry_run(params.get_flag("dry-run")).start() + + log::info!("Launching scanner and data processor"); + + if let Err(err) = procdata::TintProcessor::new(rpth) + .set_profile(get_profile(cli, ¶ms)) + .set_dry_run(params.get_flag("dry-run")) + .set_autodeps(params.get_flag("autodeps")) + .start() { log::error!("{}", err); process::exit(exitcode::IOERR); } + if params.get_flag("dry-run") { + log::warn!("This was a dry-run. Changes were not applied."); + } else { + log::info!("Finished. Hopefully it even works :-)"); + } + Ok(()) } diff --git a/src/procdata.rs b/src/procdata.rs index 409a0e1..cea1ecc 100644 --- a/src/procdata.rs +++ b/src/procdata.rs @@ -1,20 +1,18 @@ -use std::fs::{self, canonicalize, remove_file, DirEntry}; -use std::{ - collections::HashSet, - io::Error, - os::unix, - path::{Path, PathBuf}, -}; - use crate::{ filters::{dirs::PathsDataFilter, intf::DataFilter, resources::ResourcesDataFilter, texts::TextDataFilter}, profile::Profile, rootfs, - scanner::{binlib::ElfScanner, debpkg::DebPackageScanner, general::Scanner}, + scanner::{binlib::ElfScanner, debpkg::DebPackageScanner, dlst::ContentFormatter, general::Scanner}, }; - use bytesize::ByteSize; use filesize::PathExt; +use std::fs::{self, canonicalize, remove_file, DirEntry}; +use std::{ + collections::HashSet, + io::Error, + os::unix, + path::{Path, PathBuf}, +}; /// Main processing of profiles or other data #[derive(Clone)] @@ -22,11 +20,12 @@ pub struct TintProcessor { profile: Profile, root: PathBuf, dry_run: bool, + autodeps: bool, } impl TintProcessor { pub fn new(root: PathBuf) -> Self { - TintProcessor { profile: Profile::default(), root, dry_run: true } + TintProcessor { profile: Profile::default(), root, dry_run: true, autodeps: false } } /// Set configuration from a profile @@ -35,11 +34,18 @@ impl TintProcessor { self } + /// Set dry-run flag (no actual writes on the target image) pub fn set_dry_run(&mut self, dr: bool) -> &mut Self { self.dry_run = dr; self } + /// Set flag for automatic dependency tracing + pub fn set_autodeps(&mut self, ad: bool) -> &mut Self { + self.autodeps = ad; + self + } + // Chroot to the mount point fn switch_root(&self) -> Result<(), Error> { unix::fs::chroot(self.root.to_str().unwrap())?; @@ -107,7 +113,7 @@ impl TintProcessor { for p in paths { total_size += p.size_on_disk_fast(&p.metadata().unwrap()).unwrap(); total_files += 1; - log::info!(" - {}", p.to_str().unwrap()); + log::debug!(" - {}", p.to_str().unwrap()); } println!("\nTotal files to be removed: {}, disk size freed: {}\n", total_files, ByteSize::b(total_size)); @@ -143,7 +149,8 @@ impl TintProcessor { paths.extend(ElfScanner::new().scan(Path::new(target_path).to_owned())); log::debug!("Find package dependencies for {target_path}"); - paths.extend(DebPackageScanner::new().scan(Path::new(target_path).to_owned())); + // XXX: This will re-scan again and again, if target_path belongs to the same package + paths.extend(DebPackageScanner::new(self.autodeps).scan(Path::new(target_path).to_owned())); // Add the target itself paths.insert(Path::new(target_path).to_owned()); @@ -153,7 +160,7 @@ impl TintProcessor { // and then let TextDataFilter removes what still should be removed. // The idea is to keep parts only relevant to the runtime. log::debug!("Filtering packages"); - let pscan = DebPackageScanner::new(); + let pscan = DebPackageScanner::new(false); // XXX: Maybe --autodeps=LEVEL to optionally include these too? for p in self.profile.get_packages() { log::debug!("Getting content of package \"{}\"", p); paths.extend(pscan.get_package_contents(p.to_string())?); @@ -195,11 +202,7 @@ impl TintProcessor { if self.dry_run { self.dry_run(p)?; - - log::info!("Preserve:"); - for x in paths { - log::info!(" + {}", x.to_str().unwrap()); - } + ContentFormatter::new(&paths).format(); } else { self.apply_changes(p)?; } diff --git a/src/scanner/debpkg.rs b/src/scanner/debpkg.rs index 760cd5e..cb0c248 100644 --- a/src/scanner/debpkg.rs +++ b/src/scanner/debpkg.rs @@ -1,4 +1,9 @@ -use crate::scanner::general::{Scanner, ScannerCommons}; +use crate::scanner::{ + general::{Scanner, ScannerCommons}, + tracedeb, + traceitf::PkgDepTrace, +}; +use colored::Colorize; use std::{ io::{Error, ErrorKind}, path::PathBuf, @@ -8,12 +13,13 @@ use std::{ /// a target belongs to. pub struct DebPackageScanner { commons: ScannerCommons, + autodeps: bool, } impl DebPackageScanner { /// Constructor - pub fn new() -> Self { - DebPackageScanner { commons: ScannerCommons::new() } + pub fn new(autodeps: bool) -> Self { + DebPackageScanner { commons: ScannerCommons::new(), autodeps } } /// Expands target taking to the account Linux /bin symlinks to /usr/bin etc. @@ -84,20 +90,38 @@ impl DebPackageScanner { impl Scanner for DebPackageScanner { fn scan(&mut self, pth: PathBuf) -> Vec { log::debug!("Scanning package contents for {:?}", pth.to_str()); + + let mut out: Vec = vec![]; let pkgname = self.get_package_for(pth.to_str().unwrap().to_string()); if let Ok(Some(pkgname)) = pkgname { log::debug!("{} corresponds to {}", pth.to_str().unwrap(), pkgname); - match self.get_package_contents(pkgname) { + + match self.get_package_contents(pkgname.to_owned()) { Ok(fp) => { - return fp; + out.extend(fp); } Err(err) => { - log::error!("{}", err); + log::error!("Failed getting contents of {}: {}", pkgname, err); + } + } + + if self.autodeps { + // Trace dependencies graph for the package + for p in tracedeb::DebPackageTrace::new().trace(pkgname.to_owned()) { + log::info!("Keeping dependency package: {}", p.bright_yellow()); + match self.get_package_contents(p.to_owned()) { + Ok(fp) => { + out.extend(fp); + } + Err(err) => { + log::error!("Failed getting contents of {}: {}", p, err); + } + } } } } - vec![] + out } } diff --git a/src/scanner/dlst.rs b/src/scanner/dlst.rs new file mode 100644 index 0000000..45edd64 --- /dev/null +++ b/src/scanner/dlst.rs @@ -0,0 +1,101 @@ +/* +Data lister (fancy STDOUT printer) +*/ + +use crate::filters::defs::{self}; +use bytesize::ByteSize; +use colored::Colorize; +use std::{ + os::unix::prelude::PermissionsExt, + path::{Path, PathBuf}, +}; + +/// ContentFormatter is a lister for finally gathered information, +/// that needs to be displayed on the screen for the user for review +pub struct ContentFormatter<'a> { + fs_data: &'a Vec, + last_dir: String, +} + +impl<'a> ContentFormatter<'a> { + pub(crate) fn new(fs_data: &'a Vec) -> Self { + ContentFormatter { fs_data, last_dir: "".to_string() } + } + + pub(crate) fn format(&mut self) { + let d_len = self.fs_data.len() - 1; + let mut t_size: u64 = 0; + for (pi, p) in self.fs_data.iter().enumerate() { + t_size += p.metadata().unwrap().len(); + let (dname, mut fname) = self.dn(p); + + if self.last_dir != dname { + self.last_dir = dname.to_owned(); + println!("\n{}", self.last_dir.bright_blue().bold()); + println!("{}", "──┬──┄┄╌╌ ╌ ╌".blue()); + } + + let mut leaf = " ├─"; + if pi == d_len || (pi < d_len && dname != self.fs_data[pi + 1].parent().unwrap().to_str().unwrap()) { + leaf = " ╰─"; + } + + if p.is_symlink() { + println!( + "{} {} {} {}", + leaf.blue(), + fname.bright_cyan().bold(), + "⮕".yellow().dimmed(), + p.read_link().unwrap().as_path().to_str().unwrap().cyan() + ); + } else if p.metadata().unwrap().permissions().mode() & 0o111 != 0 { + println!("{} {}", leaf.blue(), fname.bright_green().bold()); + } else { + if fname.ends_with(".so") || fname.contains(".so.") { + fname = fname.green().to_string(); + } else if self.is_potential_junk(&fname) { + fname = format!("{} {}", "⚠️".bright_red().bold(), fname.bright_red()); + } + + println!("{} {}", leaf.blue(), fname); + } + } + + println!("\nPreserved {} files, taking space: {}\n", d_len + 1, ByteSize::b(t_size)); + } + + fn is_potential_junk(&self, fname: &str) -> bool { + for ext in + defs::DOC_F_EXT.iter().chain(defs::ARC_F_EXT.iter()).chain(defs::H_SRC_F_EXT.iter()).chain(defs::DOC_FP_EXT.iter()) + { + if fname.ends_with(ext) { + return true; + } + } + + for sf in defs::DOC_STUB_FILES { + if fname == *sf { + return true; + } + } + + // Potentially doc stubfile that doesn't look like a known one + if fname == fname.to_uppercase() { + return true; + } + + false + } + + /// Get dir/name split, painted accordingly + fn dn(&mut self, p: &Path) -> (String, String) { + let dname = p.parent().unwrap().to_str().unwrap().to_string(); + let fname = p.file_name().unwrap().to_str().unwrap().to_string(); + + if p.is_dir() { + return (format!("{}", dname.bright_blue().bold()), "".to_string()); + } + + (dname, fname) + } +} diff --git a/src/scanner/mod.rs b/src/scanner/mod.rs index 1f257d5..d25cc80 100644 --- a/src/scanner/mod.rs +++ b/src/scanner/mod.rs @@ -1,3 +1,6 @@ pub mod binlib; pub mod debpkg; +pub(crate) mod dlst; pub mod general; +pub mod tracedeb; +pub mod traceitf; diff --git a/src/scanner/tracedeb.rs b/src/scanner/tracedeb.rs new file mode 100644 index 0000000..c9484b4 --- /dev/null +++ b/src/scanner/tracedeb.rs @@ -0,0 +1,62 @@ +use super::traceitf::PkgDepTrace; +use std::{collections::HashSet, process::Command}; + +pub struct DebPackageTrace { + data: HashSet, +} + +impl DebPackageTrace { + pub fn new() -> Self { + DebPackageTrace { data: HashSet::default() } + } + + /// Get list of package dependencies for the first nearby level + /// + /// NOTE: currently it is quite bad way by hammering with apt, + /// but it works and is okey-ish for the time being. + /// This needs to be rewritten by slurping the entire /var/lib/dpkg/status + /// and the processing it at once. + fn get_dependencies(&mut self, pkg: String, start: bool) -> Vec { + if start { + self.data.clear(); + } + + let mut c = Command::new("apt"); + c.args(["depends", pkg.as_str()]); + + match c.output() { + Ok(out) => { + if let Ok(out) = String::from_utf8(out.stdout) { + for l in out.lines().map(|s| s.trim().to_string()).collect::>() { + if l.to_lowercase().starts_with("depends:") { + let l = l.split(' ').collect::>(); + if l.len() > 2 { + let pkgname = l[1].to_string(); + if !self.data.contains(&pkgname) { + self.data.insert(pkgname.to_owned()); + self.get_dependencies(pkgname, false); + } + } + } + } + } + } + Err(err) => { + log::error!("Cannot get package dependencies: {}", err); + return vec![]; + } + } + + let mut data = self.data.clone().into_iter().collect::>(); + data.sort(); + data + } +} + +impl PkgDepTrace for DebPackageTrace { + fn trace(&mut self, pkgname: String) -> Vec { + log::info!("Getting dependencies for a package {}", pkgname); + + self.get_dependencies(pkgname, true) + } +} diff --git a/src/scanner/traceitf.rs b/src/scanner/traceitf.rs new file mode 100644 index 0000000..f31b2eb --- /dev/null +++ b/src/scanner/traceitf.rs @@ -0,0 +1,4 @@ +/// Package dependency trace +pub trait PkgDepTrace { + fn trace(&mut self, pkgname: String) -> Vec; +}