From 7a7d889a0ca20b03dc54b000131cf69cf64a9ad5 Mon Sep 17 00:00:00 2001 From: Jerboa-app Date: Sat, 27 Apr 2024 14:10:47 +0100 Subject: [PATCH 1/2] Adds Observed trait for checking an object is modified Used in content, disk files are hashed and check for subsequent modifications by both checking a system modification date (if present) and/otherwise the actual files hash --- src/content/mod.rs | 66 ++++++++++++++++++++++++++----- src/content/pages/page.rs | 4 +- src/content/resources/resource.rs | 4 +- src/filesystem/file.rs | 39 ++++++++++++++++-- src/filesystem/observed.rs | 5 +++ src/util.rs | 30 ++++++++++++-- tests/test_filesystem.rs | 2 +- tests/test_utils.rs | 65 ++++++++++++++++++++++++++++++ 8 files changed, 194 insertions(+), 21 deletions(-) create mode 100644 src/filesystem/observed.rs create mode 100644 tests/test_utils.rs diff --git a/src/content/mod.rs b/src/content/mod.rs index ada0b48..57ff087 100644 --- a/src/content/mod.rs +++ b/src/content/mod.rs @@ -1,10 +1,11 @@ use std::cmp::min; +use std::time::SystemTime; use serde::{Deserialize, Serialize}; -use crate::filesystem::file::File; -use crate::filesystem::file::{read_file_bytes, read_file_utf8, write_file_bytes, FileNotReadError}; -use crate::util::dump_bytes; +use crate::filesystem::file::{file_hash, modified, File, Observed}; +use crate::filesystem::file::{read_file_bytes, read_file_utf8, write_file_bytes, FileError}; +use crate::util::{dump_bytes, hash}; use self::mime_type::infer_mime_type; @@ -23,14 +24,28 @@ pub mod mime_type; /// /// - The body is unpopulated until [Content::load_from_file] is called /// - The body may be converted to a utf8 string using [Content::utf8_body] -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct Content { uri: String, body: Vec, content_type: String, disk_path: String, - cache_period_seconds: u16 + cache_period_seconds: u16, + hash: Vec, + last_refreshed: SystemTime +} + +impl PartialEq for Content +{ + fn eq(&self, other: &Content) -> bool + { + return self.uri == other.uri && self.body == other.body && + self.content_type == other.content_type && + self.disk_path == other.disk_path && + self.cache_period_seconds == other.cache_period_seconds && + self.hash == other.hash + } } impl File for Content @@ -51,6 +66,31 @@ impl File for Content } } +impl Observed for Content +{ + fn stale(&self) -> bool + { + match modified(&self.disk_path) + { + Some(t) => + { + if t <= self.last_refreshed + { + return false; + } + }, + None => () + } + + return file_hash(&self.disk_path) != self.hash + } + + fn refresh(&mut self) + { + let _ = self.load_from_file(); + } +} + impl Content { pub fn new(uri: &str, disk_path: &str, cache: u16) -> Content @@ -61,18 +101,26 @@ impl Content body: vec![], disk_path: disk_path.to_string(), content_type: infer_mime_type(disk_path).to_string(), - cache_period_seconds: cache + cache_period_seconds: cache, + hash: vec![], + last_refreshed: SystemTime::now() } } - pub fn load_from_file(&mut self) -> Result<(), FileNotReadError> + pub fn load_from_file(&mut self) -> Result<(), FileError> { match self.read_bytes() { - Some(data) => {self.body = data; Ok(())} + Some(data) => + { + self.body = data.clone(); + self.hash = hash(data); + self.last_refreshed = SystemTime::now(); + Ok(()) + } None => { - Err(FileNotReadError { why: format!("Could not read bytes from {}", self.disk_path)}) + Err(FileError { why: format!("Could not read bytes from {}", self.disk_path)}) } } } diff --git a/src/content/pages/page.rs b/src/content/pages/page.rs index ecae3d1..dc894b0 100644 --- a/src/content/pages/page.rs +++ b/src/content/pages/page.rs @@ -2,7 +2,7 @@ use axum::response::{IntoResponse, Response, Html}; use regex::Regex; use serde::{Deserialize, Serialize}; -use crate::{content::Content, filesystem::file::{File, FileNotReadError}}; +use crate::{content::Content, filesystem::file::{File, FileError}}; /// An HTML webpage /// @@ -48,7 +48,7 @@ impl Page } } - pub fn load_from_file(&mut self) -> Result<(), FileNotReadError> + pub fn load_from_file(&mut self) -> Result<(), FileError> { self.content.load_from_file() } diff --git a/src/content/resources/resource.rs b/src/content/resources/resource.rs index c2e8feb..8539168 100644 --- a/src/content/resources/resource.rs +++ b/src/content/resources/resource.rs @@ -1,7 +1,7 @@ use axum::response::{Html, IntoResponse, Response}; use serde::{Serialize, Deserialize}; -use crate::{content::Content, filesystem::file::FileNotReadError}; +use crate::{content::Content, filesystem::file::FileError}; /// A non-HTML resource /// @@ -43,7 +43,7 @@ impl Resource } } - pub fn load_from_file(&mut self) -> Result<(), FileNotReadError> + pub fn load_from_file(&mut self) -> Result<(), FileError> { self.content.load_from_file() } diff --git a/src/filesystem/file.rs b/src/filesystem/file.rs index 52bd453..f376037 100644 --- a/src/filesystem/file.rs +++ b/src/filesystem/file.rs @@ -1,12 +1,14 @@ -use std::{fmt, fs, io::{Read, Write}}; +use std::{fmt, fs, io::{Read, Write}, time::SystemTime}; + +use crate::util::hash; #[derive(Debug, Clone)] -pub struct FileNotReadError +pub struct FileError { pub why: String } -impl fmt::Display for FileNotReadError { +impl fmt::Display for FileError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", self.why) } @@ -20,6 +22,12 @@ pub trait File fn read_utf8(&self) -> Option; } +pub trait Observed +{ + fn stale(&self) -> bool; + fn refresh(&mut self); +} + pub fn write_file_bytes(path: &str, data: &[u8]) { let mut file = fs::File::create(path).unwrap(); @@ -68,4 +76,29 @@ pub fn read_file_bytes(path: &str) -> Option> }, Ok(_) => Some(s) } +} + +pub fn file_hash(path: &str) -> Vec +{ + match read_file_bytes(path) + { + Some(d) => hash(d), + None => vec![] + } +} + +pub fn modified(path: &str) -> Option +{ + match fs::metadata(path) + { + Ok(m) => + { + match m.modified() + { + Ok(s) => Some(s), + Err(_e) => None + } + }, + Err(_e) => None + } } \ No newline at end of file diff --git a/src/filesystem/observed.rs b/src/filesystem/observed.rs new file mode 100644 index 0000000..900a41b --- /dev/null +++ b/src/filesystem/observed.rs @@ -0,0 +1,5 @@ +pub trait Observed +{ + fn stale(&self) -> bool; + fn refresh(&mut self); +} \ No newline at end of file diff --git a/src/util.rs b/src/util.rs index 7cdc707..73c55d3 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,6 +1,7 @@ use core::fmt; use std::{fmt::Write, io::{Read, Write as ioWrite}}; use libflate::deflate::{Encoder, Decoder}; +use openssl::sha::Sha256; use regex::Regex; pub fn dump_bytes(v: &[u8]) -> String @@ -86,20 +87,34 @@ pub fn compress(bytes: &[u8]) -> Result, CompressionError> } } -pub fn decompress(bytes: Vec) -> Result +pub fn decompress(bytes: Vec) -> Result, CompressionError> { let mut decoder = Decoder::new(&bytes[..]); let mut decoded_data = Vec::new(); match decoder.read_to_end(&mut decoded_data) { - Ok(_) => (), + Ok(_) => Ok(decoded_data), Err(e) => { - return Err(CompressionError { why: format!("Error decoding data: {}", e) }) + Err(CompressionError { why: format!("Error decoding data: {}", e) }) } } - +} + +pub fn compress_string(s: &String) -> Result, CompressionError> +{ + compress(s.as_bytes()) +} + +pub fn decompress_utf8_string(compressed: Vec) -> Result +{ + let decoded_data = match decompress(compressed) + { + Ok(d) => d, + Err(e) => return Err(e) + }; + match std::str::from_utf8(&decoded_data) { Ok(s) => Ok(s.to_string()), @@ -109,3 +124,10 @@ pub fn decompress(bytes: Vec) -> Result } } } + +pub fn hash(v: Vec) -> Vec +{ + let mut sha = Sha256::new(); + sha.update(&v); + sha.finish().to_vec() +} \ No newline at end of file diff --git a/tests/test_filesystem.rs b/tests/test_filesystem.rs index f47adaa..5949da3 100644 --- a/tests/test_filesystem.rs +++ b/tests/test_filesystem.rs @@ -6,7 +6,7 @@ mod filesystem use std::fs::remove_file; - use busser::filesystem::{file::{read_file_bytes, read_file_utf8, write_file_bytes, FileNotReadError}, folder::list_dir_by}; + use busser::filesystem::{file::{read_file_bytes, read_file_utf8, write_file_bytes, FileError}, folder::list_dir_by}; use regex::Regex; diff --git a/tests/test_utils.rs b/tests/test_utils.rs new file mode 100644 index 0000000..fa59700 --- /dev/null +++ b/tests/test_utils.rs @@ -0,0 +1,65 @@ +mod common; + +#[cfg(test)] +mod util +{ + use busser::util::{hash, matches_one, read_bytes}; + + use busser::util::{compress, compress_string, decompress, decompress_utf8_string}; + + #[test] + fn test_compress_decompress() + { + let data = "this is some data".as_bytes(); + let compressed = compress(data); + assert!(compressed.is_ok()); + let decompressed = decompress(compressed.unwrap()); + assert!(decompressed.is_ok()); + assert_eq!(data, decompressed.unwrap()); + } + + #[test] + fn test_compress_decompress_strings() + { + let data = "this is some data".to_string(); + let compressed = compress_string(&data); + assert!(compressed.is_ok()); + let decompressed = decompress_utf8_string(compressed.unwrap()); + assert!(decompressed.is_ok()); + assert_eq!(data, decompressed.unwrap()); + } + + #[test] + fn test_hash() + { + let hashed = hash("00".as_bytes().to_vec()); + let expected: Vec = vec![241, 83, 67, 146, 39, 155, 221, 191, 157, 67, 221, 232, 112, 28, 181, 190, 20, 184, 47, 118, 236, 102, 7, 191, 141, 106, 213, 87, 246, 15, 48, 78]; + assert_eq!(hashed, expected); + + let hashed = hash("from openssl command line".as_bytes().to_vec()); + let expected: Vec = vec![36, 48, 61, 185, 111, 196, 129, 155, 155, 187, 39, 255, 34, 84, 74, 189, 132, 168, 13, 60, 207, 212, 76, 98, 219, 209, 139, 83, 132, 78, 50, 115]; + assert_eq!(hashed, expected); + } + + #[test] + fn test_matches_one() + { + let uri = "this/is/some/uri.txt"; + + assert!(matches_one(uri, &vec!["this".to_string()])); + assert!(matches_one(uri, &vec![r"\.txt$".to_string()])); + assert!(!matches_one(uri, &vec!["rnaomd".to_string()])); + assert!(matches_one(uri, &vec!["rnaomd".to_string(), r"\.txt$".to_string()])); + assert!(matches_one(uri, &vec!["this".to_string(), r"\.txt$".to_string()])); + assert!(matches_one(uri, &vec!["rnaomd".to_string(),"this".to_string(), r"\.txt$".to_string()])); + assert!(!matches_one(uri, &vec!["rnaomd".to_string(), "adsklfaldk".to_string(), "adskgkfld".to_string()])); + } + + #[test] + fn test_read_bytes() + { + let expected = vec![36, 48, 61, 185, 111, 196, 129, 155, 155, 187, 39, 255, 34, 84, 74, 189, 132, 168, 13, 60, 207, 212, 76, 98, 219, 209, 139, 83, 132, 78, 50, 115]; + let actual = read_bytes("24303db96fc4819b9bbb27ff22544abd84a80d3ccfd44c62dbd18b53844e3273".to_string()); + assert_eq!(actual, expected); + } +} \ No newline at end of file From be2a684be3a7083239401c2a2a8a86047320a604 Mon Sep 17 00:00:00 2001 From: Jerboa-app Date: Sun, 28 Apr 2024 08:12:04 +0100 Subject: [PATCH 2/2] Deprecate the file modified time Does not seem to work when called in close succession --- src/content/mod.rs | 21 +++++--------- src/filesystem/file.rs | 20 ++----------- tests/test_content.rs | 59 ++++++++++++++++++++++++++++++++++++++ tests/test_filesystem.rs | 25 ++++++++++++---- tests/test_page_load.rs | 3 +- tests/text_content_type.rs | 4 ++- 6 files changed, 91 insertions(+), 41 deletions(-) create mode 100644 tests/test_content.rs diff --git a/src/content/mod.rs b/src/content/mod.rs index 57ff087..a45c779 100644 --- a/src/content/mod.rs +++ b/src/content/mod.rs @@ -3,7 +3,7 @@ use std::time::SystemTime; use serde::{Deserialize, Serialize}; -use crate::filesystem::file::{file_hash, modified, File, Observed}; +use crate::filesystem::file::{file_hash, File, Observed}; use crate::filesystem::file::{read_file_bytes, read_file_utf8, write_file_bytes, FileError}; use crate::util::{dump_bytes, hash}; @@ -68,20 +68,13 @@ impl File for Content impl Observed for Content { - fn stale(&self) -> bool + fn is_stale(&self) -> bool { - match modified(&self.disk_path) - { - Some(t) => - { - if t <= self.last_refreshed - { - return false; - } - }, - None => () - } - + // this is 4x slower than using the modified date + // but the modified date fails when is_stale is called + // very soon after creation/modification, plus may + // not be guaranteed cross platform, this is. + // We can check 100,000 files in 447 millis return file_hash(&self.disk_path) != self.hash } diff --git a/src/filesystem/file.rs b/src/filesystem/file.rs index f376037..e1e8c6a 100644 --- a/src/filesystem/file.rs +++ b/src/filesystem/file.rs @@ -1,4 +1,4 @@ -use std::{fmt, fs, io::{Read, Write}, time::SystemTime}; +use std::{fmt, fs, io::{Read, Write}}; use crate::util::hash; @@ -24,7 +24,7 @@ pub trait File pub trait Observed { - fn stale(&self) -> bool; + fn is_stale(&self) -> bool; fn refresh(&mut self); } @@ -85,20 +85,4 @@ pub fn file_hash(path: &str) -> Vec Some(d) => hash(d), None => vec![] } -} - -pub fn modified(path: &str) -> Option -{ - match fs::metadata(path) - { - Ok(m) => - { - match m.modified() - { - Ok(s) => Some(s), - Err(_e) => None - } - }, - Err(_e) => None - } } \ No newline at end of file diff --git a/tests/test_content.rs b/tests/test_content.rs new file mode 100644 index 0000000..8cbe9cb --- /dev/null +++ b/tests/test_content.rs @@ -0,0 +1,59 @@ +mod common; + +#[cfg(test)] +mod test_content +{ + use std::{fs::remove_file, path::Path}; + + use busser::{content::Content, filesystem::file::{file_hash, write_file_bytes, Observed}, util::read_bytes}; + + #[test] + fn test_load_content() + { + let mut content = Content::new("tests/pages/a.html", "tests/pages/a.html", 3600); + + assert_eq!(content.get_uri(), "tests/pages/a.html".to_string()); + assert!(content.utf8_body().is_ok_and(|b| b == "".to_string())); + + assert!(content.load_from_file().is_ok()); + assert!(content.utf8_body().is_ok_and(|b| b == "this is /a".to_string())); + + let file = "test_load_content"; + let path = Path::new("file"); + if path.exists() + { + let _ = remove_file(file); + } + let mut content_missing = Content::new(file, file, 3600); + assert!(content_missing.load_from_file().is_err()); + } + + #[test] + fn test_observed_content() + { + let path = "test_observed_content"; + let test_content = "this is some test content"; + let test_content_hash = "2d5bb7c3afbe68c05bcd109d890dca28ceb0105bf529ea1111f9ef8b44b217b9".to_string(); + let modified_test_content = "this is some modified content"; + let modified_test_content_hash = "c4ea4898725c3390549d40a19a26a57730730b42050def80f1d157581e33b2db".to_string(); + + write_file_bytes(path, test_content.as_bytes()); + + let mut content = Content::new(path, path, 3600); + + assert!(content.load_from_file().is_ok()); + assert!(!content.is_stale()); + assert_eq!(file_hash(path), read_bytes(test_content_hash)); + assert!(content.utf8_body().is_ok_and(|b| b == test_content.to_string())); + write_file_bytes(path, modified_test_content.as_bytes()); + + assert!(content.is_stale()); + assert_eq!(file_hash(path), read_bytes(modified_test_content_hash)); + content.refresh(); + assert!(content.utf8_body().is_ok_and(|b| b == modified_test_content.to_string())); + + let _ = remove_file(path); + } + +} + diff --git a/tests/test_filesystem.rs b/tests/test_filesystem.rs index 5949da3..53abf34 100644 --- a/tests/test_filesystem.rs +++ b/tests/test_filesystem.rs @@ -4,9 +4,9 @@ mod common; mod filesystem { - use std::fs::remove_file; + use std::{fs::remove_file, path::Path}; - use busser::filesystem::{file::{read_file_bytes, read_file_utf8, write_file_bytes, FileError}, folder::list_dir_by}; + use busser::filesystem::{file::{read_file_bytes, read_file_utf8, write_file_bytes}, folder::list_dir_by}; use regex::Regex; @@ -15,7 +15,14 @@ mod filesystem { let expected = "this is /a".as_bytes(); let actual = read_file_bytes("tests/pages/a.html").unwrap(); - assert_eq!(actual, expected) + assert_eq!(actual, expected); + + let path = Path::new("test_file_error"); + if path.exists() + { + let _ = remove_file(path); + } + assert!(read_file_bytes(path.to_str().unwrap()).is_none()); } #[test] @@ -23,7 +30,14 @@ mod filesystem { let expected = "this is /a"; let actual = read_file_utf8("tests/pages/a.html").unwrap(); - assert_eq!(actual, expected) + assert_eq!(actual, expected); + + let path = Path::new("test_file_error"); + if path.exists() + { + let _ = remove_file(path); + } + assert!(read_file_utf8(path.to_str().unwrap()).is_none()); } #[test] @@ -36,7 +50,7 @@ mod filesystem let actual = read_file_utf8("test_write_bytes").unwrap(); assert_eq!(actual, expected); - remove_file("test_write_bytes"); + let _ = remove_file("test_write_bytes"); } #[test] @@ -48,6 +62,5 @@ mod filesystem assert!(actual.contains(&"tests/pages/data/jpg.jpg".to_string())); assert!(actual.contains(&"tests/pages/data/png.jpg".to_string())); assert_eq!(actual.len(), 2); - } } \ No newline at end of file diff --git a/tests/test_page_load.rs b/tests/test_page_load.rs index 7bff64c..4d3eef0 100644 --- a/tests/test_page_load.rs +++ b/tests/test_page_load.rs @@ -13,7 +13,7 @@ mod test_page_load let pages = get_pages(Some("tests/pages"), None); assert_eq!(pages.len(), 3); - + let paths = HashMap::from( [ ("tests/pages/a.html", "this is /a"), @@ -31,7 +31,6 @@ mod test_page_load assert_eq!(actual_body, expected_body) } - } } \ No newline at end of file diff --git a/tests/text_content_type.rs b/tests/text_content_type.rs index 0e44517..afbd120 100644 --- a/tests/text_content_type.rs +++ b/tests/text_content_type.rs @@ -5,7 +5,7 @@ mod test_resource_load { use std::collections::HashMap; - use busser::content::resources::{get_resources, resource::{self, Resource}}; + use busser::content::resources::{get_resources, resource::Resource}; #[test] fn test_content_types() @@ -22,6 +22,8 @@ mod test_resource_load ("tests/pages/data/gif.gif", "image/gif"), ("tests/pages/data/ico.ico", "image/x-icon"), ("tests/pages/data/jpg.jpg", "image/jpeg"), + ("tests/pages/data/mp4.mp4", "video/mp4"), + ("tests/pages/data/mpeg.mpeg", "video/mpeg"), ("tests/pages/data/js.js", "text/javascript"), ("tests/pages/data/mp4.gif", "image/gif"), ("tests/pages/data/png.jpg", "image/jpeg"),