From 175a83c99c2fc464134958f19f955387b6fd46ce Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Wed, 6 Nov 2024 18:04:42 +0500 Subject: [PATCH 01/48] sqlite --- Cargo.toml | 5 + .../V100__drop_leaf_payload.sql | 0 .../{ => postgres}/V10__init_schema.sql | 0 .../V20__payload_hash_index.sql | 0 ...__drop_leaf_block_hash_fkey_constraint.sql | 0 migrations/sqlite/V10__init_schema.sql | 75 ++++ src/data_source/sql.rs | 8 +- src/data_source/storage/sql.rs | 344 +++++++++++++----- src/data_source/storage/sql/db.rs | 4 + src/data_source/storage/sql/queries.rs | 7 +- .../storage/sql/queries/availability.rs | 6 +- .../storage/sql/queries/explorer.rs | 33 +- src/data_source/storage/sql/queries/node.rs | 4 +- src/data_source/storage/sql/queries/state.rs | 182 ++++++--- src/data_source/storage/sql/transaction.rs | 211 ++++------- 15 files changed, 570 insertions(+), 309 deletions(-) rename migrations/{ => postgres}/V100__drop_leaf_payload.sql (100%) rename migrations/{ => postgres}/V10__init_schema.sql (100%) rename migrations/{ => postgres}/V20__payload_hash_index.sql (100%) rename migrations/{ => postgres}/V30__drop_leaf_block_hash_fkey_constraint.sql (100%) create mode 100644 migrations/sqlite/V10__init_schema.sql diff --git a/Cargo.toml b/Cargo.toml index 878cf7a64..e798a4bb8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,11 @@ license = "GPL-3.0-or-later" [features] default = ["file-system-data-source", "metrics-data-source", "sql-data-source"] +# Enables support for an embedded SQLite database instead of PostgreSQL. +# Ideal for lightweight nodes that benefit from pruning and merklized state storage, +# offering advantages over file system storage. +embedded-db = [] + # Enable the availability data source backed by the local file system. file-system-data-source = ["atomic_store"] diff --git a/migrations/V100__drop_leaf_payload.sql b/migrations/postgres/V100__drop_leaf_payload.sql similarity index 100% rename from migrations/V100__drop_leaf_payload.sql rename to migrations/postgres/V100__drop_leaf_payload.sql diff --git a/migrations/V10__init_schema.sql b/migrations/postgres/V10__init_schema.sql similarity index 100% rename from migrations/V10__init_schema.sql rename to migrations/postgres/V10__init_schema.sql diff --git a/migrations/V20__payload_hash_index.sql b/migrations/postgres/V20__payload_hash_index.sql similarity index 100% rename from migrations/V20__payload_hash_index.sql rename to migrations/postgres/V20__payload_hash_index.sql diff --git a/migrations/V30__drop_leaf_block_hash_fkey_constraint.sql b/migrations/postgres/V30__drop_leaf_block_hash_fkey_constraint.sql similarity index 100% rename from migrations/V30__drop_leaf_block_hash_fkey_constraint.sql rename to migrations/postgres/V30__drop_leaf_block_hash_fkey_constraint.sql diff --git a/migrations/sqlite/V10__init_schema.sql b/migrations/sqlite/V10__init_schema.sql new file mode 100644 index 000000000..99197141a --- /dev/null +++ b/migrations/sqlite/V10__init_schema.sql @@ -0,0 +1,75 @@ +CREATE TABLE header +( + height BIGINT PRIMARY KEY, + hash TEXT NOT NULL UNIQUE, + payload_hash TEXT NOT NULL, + timestamp BIGINT NOT NULL, + + -- For convenience, we store the entire application-specific header type as JSON. Just like + -- `leaf.leaf` and `leaf.qc`, this allows us to easily reconstruct the entire header using + -- `serde_json`, and to run queries and create indexes on application-specific header fields + -- without having a specific column for those fields. In many cases, this will enable new + -- application-specific API endpoints to be implemented without altering the schema (beyond + -- possibly adding an index for performance reasons). + data JSONB NOT NULL +); + +CREATE INDEX header_timestamp_idx ON header (timestamp); + +CREATE TABLE payload +( + height BIGINT PRIMARY KEY REFERENCES header (height) ON DELETE CASCADE, + size INTEGER, + data BLOB +); + +CREATE TABLE vid +( + height BIGINT PRIMARY KEY REFERENCES header (height) ON DELETE CASCADE, + common BLOB NOT NULL, + share BLOB +); + +CREATE TABLE leaf +( + height BIGINT PRIMARY KEY REFERENCES header (height) ON DELETE CASCADE, + hash TEXT NOT NULL UNIQUE, + block_hash TEXT NOT NULL, + + -- For convenience, we store the entire leaf and justifying QC as JSON blobs. There is a bit of + -- redundancy here with the indexed fields above, but it makes it easy to reconstruct the entire + -- leaf without depending on the specific fields of the application-specific leaf type. We + -- choose JSON over a binary format, even though it has a larger storage footprint, because + -- Postgres actually has decent JSON support: we don't have to worry about escaping non-ASCII + -- characters in inputs, and we can even do queries on the JSON and add indices over sub-objects + -- of the JSON blobs. + leaf JSONB NOT NULL, + qc JSONB NOT NULL +); + +CREATE TABLE `transaction` +( + hash TEXT NOT NULL, + -- Block containing this transaction. + block_height BIGINT NOT NULL REFERENCES header(height) ON DELETE CASCADE, + -- Position within the block. Transaction indices are an application-specific type, so we store + -- it as a serialized blob. We use JSON instead of a binary format so that the application can + -- make use of the transaction index in its own SQL queries. + idx JSONB NOT NULL, + PRIMARY KEY (block_height, idx) +); +-- This index is not unique, because nothing stops HotShot from sequencing duplicate transactions. +CREATE INDEX transaction_hash ON `transaction` (hash); + +CREATE TABLE pruned_height ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + -- The height of the last pruned block. + last_height BIGINT NOT NULL +); + +CREATE TABLE last_merklized_state_height ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + height BIGINT NOT NULL +); + +CREATE INDEX header_payload_hash_idx ON header (payload_hash); \ No newline at end of file diff --git a/src/data_source/sql.rs b/src/data_source/sql.rs index 44ef0739b..4798667ec 100644 --- a/src/data_source/sql.rs +++ b/src/data_source/sql.rs @@ -26,10 +26,16 @@ pub use anyhow::Error; use hotshot_types::traits::node_implementation::NodeType; pub use refinery::Migration; -pub use sql::{Config, Transaction}; +pub use sql::Transaction; pub type Builder = fetching::Builder; +#[cfg(feature = "embedded-db")] +pub type Config = sql::Config; + +#[cfg(not(feature = "embedded-db"))] +pub type Config = sql::Config; + impl Config { /// Connect to the database with this config. pub async fn connect>( diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 0e4faddd9..e1bcbb3c1 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -31,9 +31,10 @@ use log::LevelFilter; use sqlx::{ pool::{Pool, PoolOptions}, postgres::{PgConnectOptions, PgSslMode}, - ConnectOptions, Row, + sqlite::{SqliteAutoVacuum, SqliteConnectOptions}, + ConnectOptions, Connection, Postgres, Row, }; -use std::{cmp::min, fmt::Debug, str::FromStr, time::Duration}; +use std::{cmp::min, fmt::Debug, path::PathBuf, str::FromStr, time::Duration}; pub extern crate sqlx; pub use sqlx::{Database, Sqlite}; @@ -117,7 +118,13 @@ macro_rules! include_migrations { /// The migrations requied to build the default schema for this version of [`SqlStorage`]. pub fn default_migrations() -> Vec { - let mut migrations = include_migrations!("$CARGO_MANIFEST_DIR/migrations").collect::>(); + #[cfg(not(feature = "embedded-db"))] + let mut migrations = + include_migrations!("$CARGO_MANIFEST_DIR/migrations/postgres").collect::>(); + + #[cfg(feature = "embedded-db")] + let mut migrations = + include_migrations!("$CARGO_MANIFEST_DIR/migrations/sqlite").collect::>(); // Check version uniqueness and sort by version. validate_migrations(&mut migrations).expect("default migrations are invalid"); @@ -184,12 +191,16 @@ fn add_custom_migrations( .map(|pair| pair.reduce(|_, custom| custom)) } -/// Postgres client config. -#[derive(Clone, Debug)] -pub struct Config { - db_opt: PgConnectOptions, +pub struct Config +where + DB: Database, +{ + db_opt: ::Options, pool_opt: PoolOptions, + #[cfg(not(feature = "embedded-db"))] schema: String, + #[cfg(feature = "embedded-db")] + path: PathBuf, reset: bool, migrations: Vec, no_migrations: bool, @@ -197,16 +208,46 @@ pub struct Config { archive: bool, } -impl Default for Config { +#[cfg(not(feature = "embedded-db"))] +impl Default for Config { fn default() -> Self { PgConnectOptions::default() + .username("postgres") + .password("password") .host("localhost") .port(5432) .into() } } -impl From for Config { +#[cfg(feature = "embedded-db")] +impl Default for Config { + fn default() -> Self { + SqliteConnectOptions::default() + .auto_vacuum(SqliteAutoVacuum::Incremental) + .into() + } +} + +#[cfg(feature = "embedded-db")] +impl From for Config { + fn from(db_opt: SqliteConnectOptions) -> Self { + let db_path = db_opt.get_filename().to_path_buf(); + Self { + db_opt, + pool_opt: PoolOptions::default(), + path: db_path, + reset: false, + migrations: vec![], + no_migrations: false, + pruner_cfg: None, + archive: false, + } + } +} + +#[cfg(not(feature = "embedded-db"))] +impl From for Config { fn from(db_opt: PgConnectOptions) -> Self { Self { db_opt, @@ -221,7 +262,8 @@ impl From for Config { } } -impl FromStr for Config { +#[cfg(not(feature = "embedded-db"))] +impl FromStr for Config { type Err = ::Err; fn from_str(s: &str) -> Result { @@ -229,7 +271,25 @@ impl FromStr for Config { } } -impl Config { +#[cfg(feature = "embedded-db")] +impl FromStr for Config { + type Err = ::Err; + + fn from_str(s: &str) -> Result { + Ok(SqliteConnectOptions::from_str(s)?.into()) + } +} + +#[cfg(feature = "embedded-db")] +impl Config { + pub fn busy_timeout(mut self, timeout: Duration) -> Self { + self.db_opt = self.db_opt.busy_timeout(timeout); + self + } +} + +#[cfg(not(feature = "embedded-db"))] +impl Config { /// Set the hostname of the database server. /// /// The default is `localhost`. @@ -281,7 +341,8 @@ impl Config { self.schema = schema.into(); self } - +} +impl Config { /// Reset the schema on connection. /// /// When this [`Config`] is used to [`connect`](Self::connect) a @@ -401,30 +462,41 @@ pub struct Pruner { impl SqlStorage { /// Connect to a remote database. - pub async fn connect(mut config: Config) -> Result { + pub async fn connect(mut config: Config) -> Result { + let pool = config.pool_opt; + + #[cfg(not(feature = "embedded-db"))] let schema = config.schema.clone(); - let pool = config - .pool_opt - .after_connect(move |conn, _| { - let schema = schema.clone(); - async move { - query(&format!("SET search_path TO {schema}")) - .execute(conn) - .await?; - Ok(()) - } - .boxed() - }) - .connect_with(config.db_opt) - .await?; + #[cfg(not(feature = "embedded-db"))] + let pool = pool.after_connect(move |conn, _| { + let schema = config.schema.clone(); + async move { + query(&format!("SET search_path TO {schema}")) + .execute(conn) + .await?; + Ok(()) + } + .boxed() + }); + + #[cfg(feature = "embedded-db")] + if config.reset { + std::fs::remove_file(config.path)?; + } + + let pool = pool.connect(config.db_opt.to_url_lossy().as_ref()).await?; // Create or connect to the schema for this query service. let mut conn = pool.acquire().await?; + + #[cfg(not(feature = "embedded-db"))] if config.reset { query(&format!("DROP SCHEMA IF EXISTS {} CASCADE", config.schema)) .execute(conn.as_mut()) .await?; } + + #[cfg(not(feature = "embedded-db"))] query(&format!("CREATE SCHEMA IF NOT EXISTS {}", config.schema)) .execute(conn.as_mut()) .await?; @@ -545,10 +617,32 @@ impl PruneStorage for SqlStorage { async fn get_disk_usage(&self) -> anyhow::Result { let mut tx = self.read().await?; - let row = tx - .fetch_one("SELECT pg_database_size(current_database())") - .await?; + + #[cfg(not(feature = "embedded-db"))] + let query = "SELECT pg_database_size(current_database())"; + + #[cfg(feature = "embedded-db")] + let query = " + SELECT + SUM(total_bytes) + FROM ( + SELECT + name, + (page_count * page_size) AS total_bytes + FROM + pragma_page_size + JOIN + pragma_page_count ON 1 = 1 + JOIN + sqlite_master ON type = 'table' + WHERE + name NOT LIKE 'sqlite_%' + );"; + + let row = tx.fetch_one(query).await?; let size: i64 = row.get(0); + + #[cfg(feature = "embedded-db")] Ok(size as u64) } @@ -672,38 +766,70 @@ impl VersionedDataSource for SqlStorage { // These tests run the `postgres` Docker image, which doesn't work on Windows. #[cfg(all(any(test, feature = "testing"), not(target_os = "windows")))] pub mod testing { + use crate::data_source::storage::sql::sqlx::ConnectOptions; use async_compatibility_layer::art::async_timeout; use async_std::net::TcpStream; + use rand::Rng; + use refinery::Migration; + use sqlx::sqlite::SqliteConnectOptions; use std::{ - env, + env, fs, + path::{Path, PathBuf}, process::{Command, Stdio}, - str, + str::{self, FromStr}, time::Duration, }; use portpicker::pick_unused_port; - use refinery::Migration; - use super::Config; + use super::{Config, Db}; use crate::testing::sleep; #[derive(Debug)] pub struct TmpDb { + #[cfg(not(feature = "embedded-db"))] host: String, + #[cfg(not(feature = "embedded-db"))] port: u16, + #[cfg(not(feature = "embedded-db"))] container_id: String, + #[cfg(feature = "embedded-db")] + db_path: PathBuf, + #[cfg(not(feature = "embedded-db"))] persistent: bool, } impl TmpDb { + #[cfg(feature = "embedded-db")] + pub fn init_sqlite_db() -> Self { + let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tmp"); + + // Create the tmp directory if it doesn't exist + if !dir.exists() { + fs::create_dir(&dir).expect("Failed to create tmp directory"); + } + + Self { + db_path: dir.join(Self::gen_db_name()), + } + } pub async fn init() -> Self { - Self::init_inner(false).await + #[cfg(feature = "embedded-db")] + return Self::init_sqlite_db(); + + #[cfg(not(feature = "embedded-db"))] + Self::init_postgres(false).await } pub async fn persistent() -> Self { - Self::init_inner(true).await + #[cfg(feature = "embedded-db")] + return Self::init_sqlite_db(); + + #[cfg(not(feature = "embedded-db"))] + Self::init_postgres(true).await } - async fn init_inner(persistent: bool) -> Self { + #[cfg(not(feature = "embedded-db"))] + async fn init_postgres(persistent: bool) -> Self { let docker_hostname = env::var("DOCKER_HOSTNAME"); // This picks an unused port on the current system. If docker is // configured to run on a different host then this may not find a @@ -743,28 +869,45 @@ pub mod testing { db } + #[cfg(not(feature = "embedded-db"))] pub fn host(&self) -> String { self.host.clone() } + #[cfg(not(feature = "embedded-db"))] pub fn port(&self) -> u16 { self.port } - pub fn config(&self) -> Config { - Config::default() + pub fn config(&self) -> Config { + #[cfg(feature = "embedded-db")] + let mut cfg: Config = { + let db_path = self.db_path.to_string_lossy(); + let path = format!("sqlite:{db_path}"); + SqliteConnectOptions::from_str(&path) + .expect("invalid db path") + .create_if_missing(true) + .into() + }; + + #[cfg(not(feature = "embedded-db"))] + let mut cfg = Config::default() .user("postgres") .password("password") .host(self.host()) - .port(self.port()) - .migrations(vec![Migration::unapplied( - "V11__create_test_merkle_tree_table.sql", - &TestMerkleTreeMigration::create("test_tree"), - ) - .unwrap()]) + .port(self.port()); + + cfg = cfg.migrations(vec![Migration::unapplied( + "V11__create_test_merkle_tree_table.sql", + &TestMerkleTreeMigration::create("test_tree"), + ) + .unwrap()]); + + cfg } - pub fn stop(&mut self) { + #[cfg(not(feature = "embedded-db"))] + pub fn stop_postgres(&mut self) { tracing::info!(container = self.container_id, "stopping postgres"); let output = Command::new("docker") .args(["stop", self.container_id.as_str()]) @@ -778,7 +921,8 @@ pub mod testing { ); } - pub async fn start(&mut self) { + #[cfg(not(feature = "embedded-db"))] + pub async fn start_postgres(&mut self) { tracing::info!(container = self.container_id, "resuming postgres"); let output = Command::new("docker") .args(["start", self.container_id.as_str()]) @@ -794,6 +938,7 @@ pub mod testing { self.wait_for_ready().await; } + #[cfg(not(feature = "embedded-db"))] async fn wait_for_ready(&self) { let timeout = Duration::from_secs( env::var("SQL_TMP_DB_CONNECT_TIMEOUT") @@ -856,11 +1001,21 @@ pub mod testing { ); } } + + #[cfg(feature = "embedded-db")] + pub fn gen_db_name() -> String { + let mut rng = rand::thread_rng(); + let random: String = (0..10) + .map(|_| rng.sample(rand::distributions::Alphanumeric) as char) + .collect(); + format!("tmp-db-{random}.db") + } } + #[cfg(not(feature = "embedded-db"))] impl Drop for TmpDb { fn drop(&mut self) { - self.stop(); + self.stop_postgres(); if self.persistent { let output = Command::new("docker") .args(["container", "rm", self.container_id.as_str()]) @@ -876,33 +1031,49 @@ pub mod testing { } } + #[cfg(feature = "embedded-db")] + impl Drop for TmpDb { + fn drop(&mut self) { + fs::remove_file(self.db_path.clone()).unwrap(); + } + } + pub struct TestMerkleTreeMigration; impl TestMerkleTreeMigration { fn create(name: &str) -> String { + #[cfg(feature = "embedded-db")] + let binary_data_type = "JSONB"; + #[cfg(not(feature = "embedded-db"))] + let binary_data_type = "BYTEA"; + + #[cfg(feature = "embedded-db")] + let hash_pk_type = "INTEGER PRIMARY KEY AUTOINCREMENT"; + #[cfg(not(feature = "embedded-db"))] + let hash_pk_type = "SERIAL PRIMARY KEY"; + format!( "CREATE TABLE IF NOT EXISTS hash ( - id SERIAL PRIMARY KEY, - value BYTEA NOT NULL UNIQUE + id {hash_pk_type}, + value {binary_data_type} NOT NULL UNIQUE ); - - + ALTER TABLE header ADD column test_merkle_tree_root text GENERATED ALWAYS as (data->>'test_merkle_tree_root') STORED; CREATE TABLE {name} ( - path integer[] NOT NULL, + path JSONB NOT NULL, created BIGINT NOT NULL, - hash_id INT NOT NULL REFERENCES hash (id), - children INT[], - children_bitvec BIT(8), - index JSONB, - entry JSONB + hash_id INT NOT NULL, + children JSONB, + children_bitvec JSONB, + idx JSONB, + entry JSONB, + PRIMARY KEY (path, created) ); - ALTER TABLE {name} ADD CONSTRAINT {name}_pk PRIMARY KEY (path, created); CREATE INDEX {name}_created ON {name} (created);" ) } @@ -931,21 +1102,21 @@ mod test { setup_test(); let db = TmpDb::init().await; - let port = db.port(); - let host = &db.host(); + let db_opt = db.config().db_opt; - let connect = |migrations: bool, custom_migrations| async move { - let mut cfg = Config::default() - .user("postgres") - .password("password") - .host(host) - .port(port) - .migrations(custom_migrations); - if !migrations { - cfg = cfg.no_migrations(); + let connect = |migrations: bool, custom_migrations| { + let db_opt = db_opt.clone(); + async move { + { + let cfg: Config = db_opt.into(); + let mut cfg = cfg.migrations(custom_migrations); + if !migrations { + cfg = cfg.no_migrations(); + } + let client = SqlStorage::connect(cfg).await?; + Ok::<_, Error>(client) + } } - let client = SqlStorage::connect(cfg).await?; - Ok::<_, Error>(client) }; // Connecting with migrations disabled should fail if the database is not already up to date @@ -967,7 +1138,8 @@ mod test { "ALTER TABLE test ADD COLUMN data INTEGER;", ) .unwrap(), - Migration::unapplied("V102__create_test_table.sql", "CREATE TABLE test ();").unwrap(), + Migration::unapplied("V102__create_test_table.sql", "CREATE TABLE test (x int);") + .unwrap(), ]; connect(true, migrations.clone()).await.unwrap(); @@ -981,6 +1153,7 @@ mod test { } #[test] + #[cfg(not(feature = "embedded-db"))] fn test_config_from_str() { let cfg = Config::from_str("postgresql://user:password@host:8080").unwrap(); assert_eq!(cfg.db_opt.get_username(), "user"); @@ -988,6 +1161,13 @@ mod test { assert_eq!(cfg.db_opt.get_port(), 8080); } + #[test] + #[cfg(feature = "embedded-db")] + fn test_config_from_str() { + let cfg = Config::from_str("sqlite://data.db").unwrap(); + assert_eq!(cfg.db_opt.get_filename().to_string_lossy(), "data.db"); + } + async fn vacuum(storage: &SqlStorage) { storage .pool @@ -1004,14 +1184,7 @@ mod test { setup_test(); let db = TmpDb::init().await; - let port = db.port(); - let host = &db.host(); - - let cfg = Config::default() - .user("postgres") - .password("password") - .host(host) - .port(port); + let cfg = db.config(); let mut storage = SqlStorage::connect(cfg).await.unwrap(); let mut leaf = LeafQueryData::::genesis::( @@ -1179,14 +1352,7 @@ mod test { setup_test(); let db = TmpDb::init().await; - let port = db.port(); - let host = &db.host(); - - let cfg = Config::default() - .user("postgres") - .password("password") - .host(host) - .port(port); + let cfg = db.config(); let storage = SqlStorage::connect(cfg).await.unwrap(); assert!(storage diff --git a/src/data_source/storage/sql/db.rs b/src/data_source/storage/sql/db.rs index 94a868d63..042ea7e0e 100644 --- a/src/data_source/storage/sql/db.rs +++ b/src/data_source/storage/sql/db.rs @@ -37,4 +37,8 @@ /// {} /// ``` /// etc. + +#[cfg(feature = "embedded-db")] +pub type Db = sqlx::Sqlite; +#[cfg(not(feature = "embedded-db"))] pub type Db = sqlx::Postgres; diff --git a/src/data_source/storage/sql/queries.rs b/src/data_source/storage/sql/queries.rs index 5b5561f9d..4191ade6e 100644 --- a/src/data_source/storage/sql/queries.rs +++ b/src/data_source/storage/sql/queries.rs @@ -94,7 +94,12 @@ impl<'q> QueryBuilder<'q> { self.arguments.add(arg).map_err(|err| QueryError::Error { message: format!("{err:#}"), })?; - Ok(format!("${}", self.arguments.len())) + + if cfg!(feature = "embedded-db") { + Ok("?".to_string()) + } else { + Ok(format!("${}", self.arguments.len())) + } } /// Finalize the query with a constructed SQL statement. diff --git a/src/data_source/storage/sql/queries/availability.rs b/src/data_source/storage/sql/queries/availability.rs index 53fd59622..11f651123 100644 --- a/src/data_source/storage/sql/queries/availability.rs +++ b/src/data_source/storage/sql/queries/availability.rs @@ -221,12 +221,12 @@ where // ORDER BY ASC ensures that if there are duplicate transactions, we return the first // one. let sql = format!( - "SELECT {BLOCK_COLUMNS}, t.index AS tx_index + "SELECT {BLOCK_COLUMNS}, t.idx AS tx_index FROM header AS h JOIN payload AS p ON h.height = p.height - JOIN transaction AS t ON t.block_height = h.height + JOIN \"transaction\" AS t ON t.block_height = h.height WHERE t.hash = {hash_param} - ORDER BY (t.block_height, t.index) ASC + ORDER BY t.block_height ASC, t.idx ASC LIMIT 1" ); let row = query.query(&sql).fetch_one(self.as_mut()).await?; diff --git a/src/data_source/storage/sql/queries/explorer.rs b/src/data_source/storage/sql/queries/explorer.rs index 4da826abc..b2cd3ee7f 100644 --- a/src/data_source/storage/sql/queries/explorer.rs +++ b/src/data_source/storage/sql/queries/explorer.rs @@ -217,14 +217,14 @@ where // returned results based on. let transaction_target_query = match target { TransactionIdentifier::Latest => query( - "SELECT t.block_height AS height, t.index AS index FROM transaction AS t ORDER BY (t.block_height, t.index) DESC LIMIT 1", + "SELECT t.block_height AS height, t.idx AS \"index\" FROM \"transaction\" AS t ORDER BY t.block_height DESC, t.idx DESC LIMIT 1", ), TransactionIdentifier::HeightAndOffset(height, _) => query( - "SELECT t.block_height AS height, t.index AS index FROM transaction AS t WHERE t.block_height = $1 ORDER BY (t.block_height, t.index) DESC LIMIT 1", + "SELECT t.block_height AS height, t.idx AS \"index\" FROM \"transaction\" AS t WHERE t.block_height = $1 ORDER BY t.block_height DESC, t.idx DESC LIMIT 1", ) .bind(*height as i64), TransactionIdentifier::Hash(hash) => query( - "SELECT t.block_height AS height, t.index AS index FROM transaction AS t WHERE t.hash = $1 ORDER BY (t.block_height, t.index) DESC LIMIT 1", + "SELECT t.block_height AS height, t.idx AS \"index\" FROM \"transaction\" AS t WHERE t.hash = $1 ORDER BY t.block_height DESC, t.idx DESC LIMIT 1", ) .bind(hash.to_string()), }; @@ -238,7 +238,8 @@ where }; let block_height = transaction_target.get::("height") as usize; - let transaction_index = transaction_target.get::>, _>("index"); + let transaction_index = + transaction_target.get_unchecked::>, _>("index"); let offset = if let TransactionIdentifier::HeightAndOffset(_, offset) = target { *offset } else { @@ -262,9 +263,9 @@ where JOIN payload AS p ON h.height = p.height WHERE h.height IN ( SELECT t.block_height - FROM transaction AS t - WHERE (t.block_height, t.index) <= ({}, {}) - ORDER BY (t.block_height, t.index) DESC + FROM \"transaction\" AS t + WHERE (t.block_height, t.idx) <= ({}, {}) + ORDER BY t.block_height DESC, t.idx DESC LIMIT {} ) ORDER BY h.height DESC", @@ -338,7 +339,7 @@ where JOIN payload AS p ON h.height = p.height WHERE h.height = ( SELECT MAX(t1.block_height) - FROM transaction AS t1 + FROM \"transaction\" AS t1 ) ORDER BY h.height DESC" ), @@ -348,9 +349,9 @@ where JOIN payload AS p ON h.height = p.height WHERE h.height = ( SELECT t1.block_height - FROM transaction AS t1 + FROM \"transaction\" AS t1 WHERE t1.block_height = {} - ORDER BY (t1.block_height, t1.index) + ORDER BY t1.block_height, t1.idx OFFSET {} LIMIT 1 ) @@ -364,9 +365,9 @@ where JOIN payload AS p ON h.height = p.height WHERE h.height = ( SELECT t1.block_height - FROM transaction AS t1 + FROM \"transaction\" AS t1 WHERE t1.hash = {} - ORDER BY (t1.block_height, t1.index) DESC + ORDER BY t1.block_height DESC, t1.idx DESC LIMIT 1 ) ORDER BY h.height DESC", @@ -414,7 +415,7 @@ where h.timestamp AS timestamp, h.timestamp - lead(timestamp) OVER (ORDER BY h.height DESC) AS time, p.size AS size, - (SELECT COUNT(*) AS transactions FROM transaction AS t WHERE t.block_height = h.height) as transactions + (SELECT COUNT(*) AS transactions FROM \"transaction\" AS t WHERE t.block_height = h.height) as transactions FROM header AS h JOIN payload AS p ON p.height = h.height @@ -462,7 +463,7 @@ where let row = query( "SELECT (SELECT MAX(height) + 1 FROM header) AS blocks, - (SELECT COUNT(*) FROM transaction) AS transactions", + (SELECT COUNT(*) FROM \"transaction\") AS transactions", ) .fetch_one(self.as_mut()) .await?; @@ -486,12 +487,14 @@ where let latest_block: BlockDetail = self.get_block_detail(BlockIdentifier::Latest).await?; + let latest_blocks: Vec> = self .get_block_summaries(GetBlockSummariesRequest(BlockRange { target: BlockIdentifier::Latest, num_blocks: NonZeroUsize::new(10).unwrap(), })) .await?; + let latest_transactions: Vec> = self .get_transaction_summaries(GetTransactionSummariesRequest { range: TransactionRange { @@ -549,7 +552,7 @@ where "SELECT {BLOCK_COLUMNS} FROM header AS h JOIN payload AS p ON h.height = p.height - JOIN transaction AS t ON h.height = t.block_height + JOIN \"transaction\" AS t ON h.height = t.block_height WHERE t.hash = $1 ORDER BY h.height DESC LIMIT 5" diff --git a/src/data_source/storage/sql/queries/node.rs b/src/data_source/storage/sql/queries/node.rs index 286f02e80..8dc46cfba 100644 --- a/src/data_source/storage/sql/queries/node.rs +++ b/src/data_source/storage/sql/queries/node.rs @@ -51,7 +51,7 @@ where } async fn count_transactions(&mut self) -> QueryResult { - let (count,) = query_as::<(i64,)>("SELECT count(*) FROM transaction") + let (count,) = query_as::<(i64,)>("SELECT count(*) FROM \"transaction\"") .fetch_one(self.as_mut()) .await?; Ok(count as usize) @@ -117,7 +117,7 @@ where (SELECT count(*) AS null_payloads FROM payload WHERE data IS NULL) AS p, (SELECT count(*) AS total_vid FROM vid) AS v, (SELECT count(*) AS null_vid FROM vid WHERE share IS NULL) AS vn, - coalesce((SELECT last_height FROM pruned_height ORDER BY id DESC LIMIT 1)) as pruned_height + (SELECT(SELECT last_height FROM pruned_height ORDER BY id DESC LIMIT 1) as pruned_height) "; let row = query(sql) .fetch_optional(self.as_mut()) diff --git a/src/data_source/storage/sql/queries/state.rs b/src/data_source/storage/sql/queries/state.rs index dc8e3234a..a938ea065 100644 --- a/src/data_source/storage/sql/queries/state.rs +++ b/src/data_source/storage/sql/queries/state.rs @@ -16,6 +16,8 @@ use super::{ super::transaction::{query_as, Transaction, TransactionMode, Write}, DecodeError, QueryBuilder, }; +use crate::data_source::storage::sql::build_where_in; +use crate::data_source::storage::sql::sqlx::Row; use crate::{ data_source::storage::{MerklizedStateHeightStorage, MerklizedStateStorage}, merklized_state::{MerklizedState, Snapshot}, @@ -30,7 +32,8 @@ use jf_merkle_tree::{ prelude::{MerkleNode, MerkleProof}, DigestAlgorithm, MerkleCommitment, ToTraversalPath, }; -use sqlx::{types::BitVec, FromRow}; +use sqlx::{types::BitVec, Decode, FromRow}; +use sqlx::{types::JsonValue, ColumnIndex}; use std::collections::{HashMap, HashSet, VecDeque}; #[async_trait] @@ -58,36 +61,35 @@ where // Order by pos DESC is to return nodes from the leaf to the root let (query, sql) = build_get_path_query(state_type, traversal_path.clone(), created)?; - let nodes = query - .query_as::(&sql) - .fetch_all(self.as_mut()) - .await?; + let rows = query.query(&sql).fetch_all(self.as_mut()).await?; + + let nodes: Vec = rows.into_iter().map(|r| (&r).into()).collect(); // insert all the hash ids to a hashset which is used to query later // HashSet is used to avoid duplicates let mut hash_ids = HashSet::new(); - nodes.iter().for_each(|n| { - hash_ids.insert(n.hash_id); - if let Some(children) = &n.children { + for node in nodes.iter() { + hash_ids.insert(node.hash_id); + if let Some(children) = &node.children { + let children: Vec = serde_json::from_value(children.clone()).unwrap(); hash_ids.extend(children); } - }); + } // Find all the hash values and create a hashmap // Hashmap will be used to get the hash value of the nodes children and the node itself. - let hashes: HashMap> = - query_as("SELECT id, value FROM hash WHERE id = ANY( $1)") - .bind(hash_ids.into_iter().collect::>()) - .fetch(self.as_mut()) - .try_collect() - .await?; - + let (query, sql) = build_where_in("id", "SELECT id, value FROM hash", hash_ids)?; + let hashes: HashMap> = query + .query_as(&sql) + .fetch(self.as_mut()) + .try_collect() + .await?; let mut proof_path = VecDeque::with_capacity(State::tree_height()); for Node { hash_id, children, children_bitvec, - index, + idx, entry, .. } in nodes.iter() @@ -96,10 +98,14 @@ where let value = hashes.get(hash_id).ok_or(QueryError::Error { message: format!("node's value references non-existent hash {hash_id}"), })?; - match (children, children_bitvec, index, entry) { + + match (children, children_bitvec, idx, entry) { // If the row has children then its a branch (Some(children), Some(children_bitvec), None, None) => { + let children: Vec = serde_json::from_value(children.clone()).unwrap(); let mut children = children.iter(); + let children_bitvec: BitVec = + BitVec::from_bytes(children_bitvec.clone().as_slice()); // Reconstruct the Children MerkleNodes from storage. // Children bit_vec is used to create forgotten or empty node @@ -321,21 +327,47 @@ pub(crate) fn build_hash_batch_insert( // Represents a row in a state table #[derive(Debug, Default, Clone, FromRow)] pub(crate) struct Node { - pub(crate) path: Vec, + pub(crate) path: JsonValue, pub(crate) created: i64, pub(crate) hash_id: i32, - pub(crate) children: Option>, - pub(crate) children_bitvec: Option, - pub(crate) index: Option, - pub(crate) entry: Option, + pub(crate) children: Option, + pub(crate) children_bitvec: Option>, + pub(crate) idx: Option, + pub(crate) entry: Option, +} + +impl<'a, R> From<&'a R> for Node +where + R: Row, + JsonValue: 'a + Decode<'a, ::Database>, + Option: Decode<'a, ::Database>, + Vec: Decode<'a, ::Database>, + i64: Decode<'a, ::Database>, + i32: Decode<'a, ::Database>, + + &'a str: ColumnIndex, +{ + fn from(row: &'a R) -> Self { + Self { + path: row.get_unchecked("path"), + created: row.get_unchecked("created"), + hash_id: row.get_unchecked("hash_id"), + children: row.get_unchecked("children"), + children_bitvec: row.get_unchecked("children_bitvec"), + idx: row.get_unchecked("idx"), + entry: row.get_unchecked("entry"), + } + } } impl Node { pub(crate) async fn upsert( name: &str, - nodes: impl IntoIterator, + nodes: impl IntoIterator + std::fmt::Debug, tx: &mut Transaction, ) -> anyhow::Result<()> { + println!("nodes {nodes:?} \n\n"); + tx.upsert( name, [ @@ -344,7 +376,7 @@ impl Node { "hash_id", "children", "children_bitvec", - "index", + "idx", "entry", ], ["path", "created"], @@ -355,7 +387,7 @@ impl Node { n.hash_id, n.children.clone(), n.children_bitvec.clone(), - n.index.clone(), + n.idx.clone(), n.entry.clone(), ) }), @@ -371,15 +403,18 @@ fn build_get_path_query<'q>( ) -> QueryResult<(QueryBuilder<'q>, String)> { let mut query = QueryBuilder::default(); let mut traversal_path = traversal_path.into_iter().map(|x| x as i32); - let created = query.bind(created)?; // We iterate through the path vector skipping the first element after each iteration let len = traversal_path.len(); let mut sub_queries = Vec::new(); for _ in 0..=len { - let node_path = query.bind(traversal_path.clone().rev().collect::>())?; + let path = traversal_path.clone().rev().collect::>(); + let path: serde_json::Value = path.into(); + let node_path = query.bind(path)?; + let created = query.bind(created)?; + let sub_query = format!( - "(SELECT * FROM {table} WHERE path = {node_path} AND created <= {created} ORDER BY created DESC LIMIT 1)", + "SELECT * FROM (SELECT * FROM {table} WHERE path = {node_path} AND created <= {created} ORDER BY created DESC LIMIT 1)", ); sub_queries.push(sub_query); @@ -387,7 +422,15 @@ fn build_get_path_query<'q>( } let mut sql: String = sub_queries.join(" UNION "); - sql.push_str("ORDER BY path DESC"); + + sql = format!("SELECT * FROM ({sql}) as t "); + + if cfg!(feature = "embedded-db") { + sql.push_str("ORDER BY length(t.path) DESC"); + } else { + sql.push_str("ORDER BY t.path DESC"); + } + Ok((query, sql)) } @@ -442,7 +485,7 @@ mod test { [( block_height as i64, format!("randomHash{i}"), - "t", + "t".to_string(), 0, test_data, )], @@ -505,7 +548,13 @@ mod test { "header", ["height", "hash", "payload_hash", "timestamp", "data"], ["height"], - [(2i64, "randomstring", "t", 0, test_data)], + [( + 2i64, + "randomstring".to_string(), + "t".to_string(), + 0, + test_data, + )], ) .await .unwrap(); @@ -538,14 +587,10 @@ mod test { // Find all the nodes of Index 0 in table let mut tx = storage.read().await.unwrap(); let rows = query("SELECT * from test_tree where path = $1 ORDER BY created") - .bind(node_path) + .bind(serde_json::to_value(node_path).unwrap()) .fetch(tx.as_mut()); - let nodes: Vec<_> = rows - .map(|res| Node::from_row(&res.unwrap())) - .try_collect() - .await - .unwrap(); + let nodes: Vec = rows.map(|res| (&res.unwrap()).into()).collect().await; // There should be only 2 versions of this node assert!(nodes.len() == 2, "incorrect number of nodes"); assert_eq!(nodes[0].created, 1, "wrong block height"); @@ -600,7 +645,13 @@ mod test { "header", ["height", "hash", "payload_hash", "timestamp", "data"], ["height"], - [(block_height as i64, "randomString", "t", 0, test_data)], + [( + block_height as i64, + "randomString".to_string(), + "t".to_string(), + 0, + test_data, + )], ) .await .unwrap(); @@ -665,8 +716,8 @@ mod test { ["height"], [( 2i64, - "randomString2", - "t", + "randomString2".to_string(), + "t".to_string(), 0, serde_json::json!({ MockMerkleTree::header_state_commitment_field() : serde_json::to_value(test_tree.commitment()).unwrap()}), )], @@ -730,7 +781,7 @@ mod test { [( i as i64, format!("hash{i}"), - "t", + "t".to_string(), 0, serde_json::json!({ MockMerkleTree::header_state_commitment_field() : serde_json::to_value(test_tree.commitment()).unwrap()}) )], @@ -796,7 +847,13 @@ mod test { "header", ["height", "hash", "payload_hash", "timestamp", "data"], ["height"], - [(block_height as i64, "randomString", "t", 0, test_data)], + [( + block_height as i64, + "randomString".to_string(), + "t".to_string(), + 0, + test_data, + )], ) .await .unwrap(); @@ -862,7 +919,7 @@ mod test { [( block_height as i64, format!("rarndomString{i}"), - "t", + "t".to_string(), 0, test_data, )], @@ -924,7 +981,13 @@ mod test { "header", ["height", "hash", "payload_hash", "timestamp", "data"], ["height"], - [(block_height as i64, "randomStringgg", "t", 0, test_data)], + [( + block_height as i64, + "randomStringgg".to_string(), + "t".to_string(), + 0, + test_data, + )], ) .await .unwrap(); @@ -955,7 +1018,13 @@ mod test { "header", ["height", "hash", "payload_hash", "timestamp", "data"], ["height"], - [(2i64, "randomHashString", "t", 0, test_data)], + [( + 2i64, + "randomHashString".to_string(), + "t".to_string(), + 0, + test_data, + )], ) .await .unwrap(); @@ -975,12 +1044,12 @@ mod test { .rev() .map(|n| *n as i32) .collect::>(); - tx.execute_one( + tx.execute( query(&format!( "DELETE FROM {} WHERE created = 2 and path = $1", MockMerkleTree::state_type() )) - .bind(node_path), + .bind(serde_json::to_value(node_path).unwrap()), ) .await .expect("failed to delete internal node"); @@ -1076,7 +1145,7 @@ mod test { [( block_height as i64, format!("hash{block_height}"), - "hash", + "hash".to_string(), 0i64, serde_json::json!({ MockMerkleTree::header_state_commitment_field() : serde_json::to_value(tree.commitment()).unwrap()}), )], @@ -1199,8 +1268,8 @@ mod test { ["height"], [( 0i64, - "hash", - "hash", + "hash".to_string(), + "hash".to_string(), 0, serde_json::json!({ MockMerkleTree::header_state_commitment_field() : serde_json::to_value(test_tree.commitment()).unwrap()}), )], @@ -1247,12 +1316,13 @@ mod test { // Now delete the leaf node for the last entry we inserted, corrupting the database. let index = serde_json::to_value(tree_size - 1).unwrap(); let mut tx = storage.write().await.unwrap(); - tx.execute_one_with_retries( - &format!( - "DELETE FROM {} WHERE index = $1", + + tx.execute( + query(&format!( + "DELETE FROM {} WHERE idx = $1", MockMerkleTree::state_type() - ), - (index,), + )) + .bind(serde_json::to_value(index).unwrap()), ) .await .unwrap(); diff --git a/src/data_source/storage/sql/transaction.rs b/src/data_source/storage/sql/transaction.rs index 2c4838f1b..d62289ddf 100644 --- a/src/data_source/storage/sql/transaction.rs +++ b/src/data_source/storage/sql/transaction.rs @@ -20,6 +20,7 @@ use super::{ queries::{ + self, state::{build_hash_batch_insert, Node}, DecodeError, }, @@ -35,7 +36,7 @@ use crate::{ }, merklized_state::{MerklizedState, UpdateStateData}, types::HeightIndexed, - Header, Payload, QueryError, VidShare, + Header, Payload, QueryError, QueryResult, VidShare, }; use anyhow::{bail, ensure, Context}; use ark_serialize::CanonicalSerialize; @@ -52,7 +53,10 @@ use hotshot_types::traits::{ }; use itertools::Itertools; use jf_merkle_tree::prelude::{MerkleNode, MerkleProof}; -use sqlx::{pool::Pool, types::BitVec, Encode, Execute, FromRow, Type}; +use sqlx::{ + pool::Pool, query_builder::Separated, types::BitVec, Encode, Execute, FromRow, QueryBuilder, + Type, +}; use std::{ collections::{HashMap, HashSet}, marker::PhantomData, @@ -93,7 +97,9 @@ pub trait TransactionMode: Send + Sync { } impl TransactionMode for Write { + #[allow(unused_variables)] async fn begin(conn: &mut ::Connection) -> anyhow::Result<()> { + #[cfg(not(feature = "embedded-db"))] conn.execute("SET TRANSACTION ISOLATION LEVEL SERIALIZABLE") .await?; Ok(()) @@ -105,7 +111,9 @@ impl TransactionMode for Write { } impl TransactionMode for Read { + #[allow(unused_variables)] async fn begin(conn: &mut ::Connection) -> anyhow::Result<()> { + #[cfg(not(feature = "embedded-db"))] conn.execute("SET TRANSACTION ISOLATION LEVEL SERIALIZABLE, READ ONLY, DEFERRABLE") .await?; Ok(()) @@ -227,9 +235,12 @@ impl update::Transaction for Transaction { /// as long as the parameters outlive the duration of the query (the `'p: 'q`) bound on the /// [`bind`](Self::bind) function. pub trait Params<'p> { - fn bind<'q>(self, q: Query<'q>) -> Query<'q> + fn bind<'q, 'r>( + self, + q: &'q mut Separated<'r, 'p, Db, &'static str>, + ) -> &'q mut Separated<'r, 'p, Db, &'static str> where - 'p: 'q; + 'p: 'r; } /// A collection of parameters with a statically known length. @@ -241,19 +252,19 @@ pub trait FixedLengthParams<'p, const N: usize>: Params<'p> {} macro_rules! impl_tuple_params { ($n:literal, ($($t:ident,)+)) => { - impl<'p, $($t),+> Params<'p> for ($($t,)+) + impl<'p, $($t),+> Params<'p> for ($($t,)+) where $( - $t: 'p + for<'q> Encode<'q, Db> + Type - ),+ { - fn bind<'q>(self, q: Query<'q>) -> Query<'q> - where - 'p: 'q + $t: 'p + Encode<'p, Db> + Type + ),+{ + fn bind<'q, 'r>(self, q: &'q mut Separated<'r, 'p, Db, &'static str>) -> &'q mut Separated<'r, 'p, Db, &'static str> + where 'p: 'r, { #[allow(non_snake_case)] let ($($t,)+) = self; q $( - .bind($t) + .push_bind($t) )+ + } } @@ -274,117 +285,31 @@ impl_tuple_params!(6, (T1, T2, T3, T4, T5, T6,)); impl_tuple_params!(7, (T1, T2, T3, T4, T5, T6, T7,)); impl_tuple_params!(8, (T1, T2, T3, T4, T5, T6, T7, T8,)); -impl<'p, T> Params<'p> for Vec +pub fn build_where_in<'a, I>( + column: &'a str, + query: &'a str, + values: I, +) -> QueryResult<(queries::QueryBuilder<'a>, String)> where - T: Params<'p>, + I: IntoIterator, + I::Item: 'a + Encode<'a, Db> + Type + std::fmt::Display, { - fn bind<'q>(self, mut q: Query<'q>) -> Query<'q> - where - 'p: 'q, - { - for params in self { - q = params.bind(q); - } - q - } + let mut builder = queries::QueryBuilder::default(); + let params = values + .into_iter() + .map(|v| Ok(format!("{} ", builder.bind(v)?))) + .collect::>>()?; + + let sql = format!( + "{query} where {column} IN ({}) ", + params.into_iter().join(",") + ); + + Ok((builder, sql)) } /// Low-level, general database queries and mutation. impl Transaction { - /// Execute a statement that is expected to modify exactly one row. - /// - /// Returns an error if the database is not modified. - pub async fn execute_one<'q, E>(&mut self, statement: E) -> anyhow::Result<()> - where - E: 'q + Execute<'q, Db>, - { - let nrows = self.execute_many(statement).await?; - if nrows > 1 { - // If more than one row is affected, we don't return an error, because clearly - // _something_ happened and modified the database. So we don't necessarily want the - // caller to retry. But we do log an error, because it seems the query did something - // different than the caller intended. - tracing::error!("statement modified more rows ({nrows}) than expected (1)"); - } - Ok(()) - } - - /// Execute a statement that is expected to modify exactly one row. - /// - /// Returns an error if the database is not modified. Retries several times before failing. - pub async fn execute_one_with_retries<'q>( - &mut self, - statement: &'q str, - params: impl Params<'q> + Clone, - ) -> anyhow::Result<()> { - let interval = Duration::from_secs(1); - let mut retries = 5; - - while let Err(err) = self - .execute_one(params.clone().bind(query(statement))) - .await - { - tracing::error!( - %statement, - "error in statement execution ({retries} tries remaining): {err}" - ); - if retries == 0 { - return Err(err); - } - retries -= 1; - sleep(interval).await; - } - - Ok(()) - } - - /// Execute a statement that is expected to modify at least one row. - /// - /// Returns an error if the database is not modified. - pub async fn execute_many<'q, E>(&mut self, statement: E) -> anyhow::Result - where - E: 'q + Execute<'q, Db>, - { - let nrows = self.execute(statement).await?.rows_affected(); - ensure!(nrows > 0, "statement failed: 0 rows affected"); - Ok(nrows) - } - - /// Execute a statement that is expected to modify at least one row. - /// - /// Returns an error if the database is not modified. Retries several times before failing. - pub async fn execute_many_with_retries<'q, 'p>( - &mut self, - statement: &'q str, - params: impl Params<'p> + Clone, - ) -> anyhow::Result - where - 'p: 'q, - { - let interval = Duration::from_secs(1); - let mut retries = 5; - - loop { - match self - .execute_many(params.clone().bind(query(statement))) - .await - { - Ok(nrows) => return Ok(nrows), - Err(err) => { - tracing::error!( - %statement, - "error in statement execution ({retries} tries remaining): {err}" - ); - if retries == 0 { - return Err(err); - } - retries -= 1; - sleep(interval).await; - } - } - } - } - pub async fn upsert<'p, const N: usize, R>( &mut self, table: &str, @@ -400,35 +325,31 @@ impl Transaction { .iter() .map(|col| format!("{col} = excluded.{col}")) .join(","); - let columns = columns.into_iter().join(","); + let columns_str = columns + .into_iter() + .map(|col| format!("\"{col}\"")) + .join(","); let pk = pk.into_iter().join(","); - let mut values = vec![]; - let mut params = vec![]; + let mut query_builder = + QueryBuilder::new(format!("INSERT INTO \"{table}\" ({columns_str})")); let mut num_rows = 0; - for (row, entries) in rows.into_iter().enumerate() { - let start = row * N; - let end = (row + 1) * N; - let row_params = (start..end).map(|i| format!("${}", i + 1)).join(","); - values.push(format!("({row_params})")); - params.push(entries); + query_builder.push_values(rows, |mut b, row| { num_rows += 1; - } + row.bind(&mut b); + }); if num_rows == 0 { tracing::warn!("trying to upsert 0 rows, this has no effect"); return Ok(()); } - tracing::debug!("upserting {num_rows} rows"); - - let values = values.into_iter().join(","); - let stmt = format!( - "INSERT INTO {table} ({columns}) - VALUES {values} - ON CONFLICT ({pk}) DO UPDATE SET {set_columns}" - ); - let rows_modified = self.execute_many_with_retries(&stmt, params).await?; + query_builder.push(format!(" ON CONFLICT ({pk}) DO UPDATE SET {set_columns}")); + + let res = self.execute(query_builder.build()).await?; + let stmt = query_builder.sql(); + let rows_modified = res.rows_affected() as usize; + if rows_modified != num_rows { tracing::error!( stmt, @@ -519,11 +440,16 @@ where // The header and payload tables should already have been initialized when we inserted the // corresponding leaf. All we have to do is add the payload itself and its size. let payload = block.payload.encode(); + self.upsert( "payload", ["height", "data", "size"], ["height"], - [(block.height() as i64, payload.as_ref(), block.size() as i32)], + [( + block.height() as i64, + payload.as_ref().to_vec(), + block.size() as i32, + )], ) .await?; @@ -537,8 +463,8 @@ where if !rows.is_empty() { self.upsert( "transaction", - ["hash", "block_height", "index"], - ["block_height", "index"], + ["hash", "block_height", "idx"], + ["block_height", "idx"], rows, ) .await?; @@ -625,7 +551,7 @@ impl, const ARITY: usize> nodes.push(( Node { path: node_path, - index: Some(index), + idx: Some(index), ..Default::default() }, None, @@ -653,7 +579,7 @@ impl, const ARITY: usize> nodes.push(( Node { path, - index: Some(index), + idx: Some(index), entry: Some(entry), ..Default::default() }, @@ -697,11 +623,12 @@ impl, const ARITY: usize> // insert internal node let path = traversal_path.clone().rev().collect(); + let bit_vec_bytes = children_bitvec.to_bytes(); nodes.push(( Node { path, children: None, - children_bitvec: Some(children_bitvec), + children_bitvec: Some(bit_vec_bytes), ..Default::default() }, Some(children_values.clone()), @@ -745,7 +672,7 @@ impl, const ARITY: usize> message: "Missing child hash".to_string(), })?; - node.children = Some(children_hashes); + node.children = Some(children_hashes.into()); } } Node::upsert(name, nodes.into_iter().map(|(n, _, _)| n), self).await?; From c557e63d8bffd8a230a1c94dfdd606aa7ecbc561 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Wed, 6 Nov 2024 18:07:05 +0500 Subject: [PATCH 02/48] gitignore --- .gitignore | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 35f4835cb..b00a88f44 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,7 @@ lcov.info /vsc -/.vscode \ No newline at end of file +/.vscode + +# for sqlite databases created during the tests +/tmp \ No newline at end of file From 8cfc5563229f5a2631860cb1277a209a3b003028 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Wed, 6 Nov 2024 18:09:31 +0500 Subject: [PATCH 03/48] rename transaction to transactions --- migrations/sqlite/V10__init_schema.sql | 4 ++-- .../storage/sql/queries/availability.rs | 2 +- .../storage/sql/queries/explorer.rs | 20 +++++++++---------- src/data_source/storage/sql/queries/node.rs | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/migrations/sqlite/V10__init_schema.sql b/migrations/sqlite/V10__init_schema.sql index 99197141a..863ecf4f6 100644 --- a/migrations/sqlite/V10__init_schema.sql +++ b/migrations/sqlite/V10__init_schema.sql @@ -47,7 +47,7 @@ CREATE TABLE leaf qc JSONB NOT NULL ); -CREATE TABLE `transaction` +CREATE TABLE transactions ( hash TEXT NOT NULL, -- Block containing this transaction. @@ -59,7 +59,7 @@ CREATE TABLE `transaction` PRIMARY KEY (block_height, idx) ); -- This index is not unique, because nothing stops HotShot from sequencing duplicate transactions. -CREATE INDEX transaction_hash ON `transaction` (hash); +CREATE INDEX transaction_hash ON transactions (hash); CREATE TABLE pruned_height ( id INTEGER PRIMARY KEY AUTOINCREMENT, diff --git a/src/data_source/storage/sql/queries/availability.rs b/src/data_source/storage/sql/queries/availability.rs index 11f651123..40972634d 100644 --- a/src/data_source/storage/sql/queries/availability.rs +++ b/src/data_source/storage/sql/queries/availability.rs @@ -224,7 +224,7 @@ where "SELECT {BLOCK_COLUMNS}, t.idx AS tx_index FROM header AS h JOIN payload AS p ON h.height = p.height - JOIN \"transaction\" AS t ON t.block_height = h.height + JOIN transactions AS t ON t.block_height = h.height WHERE t.hash = {hash_param} ORDER BY t.block_height ASC, t.idx ASC LIMIT 1" diff --git a/src/data_source/storage/sql/queries/explorer.rs b/src/data_source/storage/sql/queries/explorer.rs index b2cd3ee7f..9cad5dd7a 100644 --- a/src/data_source/storage/sql/queries/explorer.rs +++ b/src/data_source/storage/sql/queries/explorer.rs @@ -217,14 +217,14 @@ where // returned results based on. let transaction_target_query = match target { TransactionIdentifier::Latest => query( - "SELECT t.block_height AS height, t.idx AS \"index\" FROM \"transaction\" AS t ORDER BY t.block_height DESC, t.idx DESC LIMIT 1", + "SELECT t.block_height AS height, t.idx AS \"index\" FROM transactions AS t ORDER BY t.block_height DESC, t.idx DESC LIMIT 1", ), TransactionIdentifier::HeightAndOffset(height, _) => query( - "SELECT t.block_height AS height, t.idx AS \"index\" FROM \"transaction\" AS t WHERE t.block_height = $1 ORDER BY t.block_height DESC, t.idx DESC LIMIT 1", + "SELECT t.block_height AS height, t.idx AS \"index\" FROM transactions AS t WHERE t.block_height = $1 ORDER BY t.block_height DESC, t.idx DESC LIMIT 1", ) .bind(*height as i64), TransactionIdentifier::Hash(hash) => query( - "SELECT t.block_height AS height, t.idx AS \"index\" FROM \"transaction\" AS t WHERE t.hash = $1 ORDER BY t.block_height DESC, t.idx DESC LIMIT 1", + "SELECT t.block_height AS height, t.idx AS \"index\" FROM transactions AS t WHERE t.hash = $1 ORDER BY t.block_height DESC, t.idx DESC LIMIT 1", ) .bind(hash.to_string()), }; @@ -263,7 +263,7 @@ where JOIN payload AS p ON h.height = p.height WHERE h.height IN ( SELECT t.block_height - FROM \"transaction\" AS t + FROM transactions AS t WHERE (t.block_height, t.idx) <= ({}, {}) ORDER BY t.block_height DESC, t.idx DESC LIMIT {} @@ -339,7 +339,7 @@ where JOIN payload AS p ON h.height = p.height WHERE h.height = ( SELECT MAX(t1.block_height) - FROM \"transaction\" AS t1 + FROM transactions AS t1 ) ORDER BY h.height DESC" ), @@ -349,7 +349,7 @@ where JOIN payload AS p ON h.height = p.height WHERE h.height = ( SELECT t1.block_height - FROM \"transaction\" AS t1 + FROM transactions AS t1 WHERE t1.block_height = {} ORDER BY t1.block_height, t1.idx OFFSET {} @@ -365,7 +365,7 @@ where JOIN payload AS p ON h.height = p.height WHERE h.height = ( SELECT t1.block_height - FROM \"transaction\" AS t1 + FROM transactions AS t1 WHERE t1.hash = {} ORDER BY t1.block_height DESC, t1.idx DESC LIMIT 1 @@ -415,7 +415,7 @@ where h.timestamp AS timestamp, h.timestamp - lead(timestamp) OVER (ORDER BY h.height DESC) AS time, p.size AS size, - (SELECT COUNT(*) AS transactions FROM \"transaction\" AS t WHERE t.block_height = h.height) as transactions + (SELECT COUNT(*) AS transactions FROM transactions AS t WHERE t.block_height = h.height) as transactions FROM header AS h JOIN payload AS p ON p.height = h.height @@ -463,7 +463,7 @@ where let row = query( "SELECT (SELECT MAX(height) + 1 FROM header) AS blocks, - (SELECT COUNT(*) FROM \"transaction\") AS transactions", + (SELECT COUNT(*) FROM transactions) AS transactions", ) .fetch_one(self.as_mut()) .await?; @@ -552,7 +552,7 @@ where "SELECT {BLOCK_COLUMNS} FROM header AS h JOIN payload AS p ON h.height = p.height - JOIN \"transaction\" AS t ON h.height = t.block_height + JOIN transactions AS t ON h.height = t.block_height WHERE t.hash = $1 ORDER BY h.height DESC LIMIT 5" diff --git a/src/data_source/storage/sql/queries/node.rs b/src/data_source/storage/sql/queries/node.rs index 8dc46cfba..82759bf5b 100644 --- a/src/data_source/storage/sql/queries/node.rs +++ b/src/data_source/storage/sql/queries/node.rs @@ -51,7 +51,7 @@ where } async fn count_transactions(&mut self) -> QueryResult { - let (count,) = query_as::<(i64,)>("SELECT count(*) FROM \"transaction\"") + let (count,) = query_as::<(i64,)>("SELECT count(*) FROM transactions") .fetch_one(self.as_mut()) .await?; Ok(count as usize) From 5203dff91dba718e30aa153297772cb202c294bd Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Wed, 6 Nov 2024 20:16:57 +0500 Subject: [PATCH 04/48] fix simple server --- examples/simple-server.rs | 23 ++++++---- src/data_source/storage/sql.rs | 50 +++++++++++++++------- src/data_source/storage/sql/transaction.rs | 8 ++-- 3 files changed, 51 insertions(+), 30 deletions(-) diff --git a/examples/simple-server.rs b/examples/simple-server.rs index 746de8bb9..0ca9b8974 100644 --- a/examples/simple-server.rs +++ b/examples/simple-server.rs @@ -85,15 +85,20 @@ async fn init_db() -> Db { } #[cfg(not(target_os = "windows"))] -async fn init_data_source(db: &Db) -> DataSource { - data_source::sql::Config::default() - .user("postgres") - .password("password") - .host(db.host()) - .port(db.port()) - .connect(Default::default()) - .await - .unwrap() +async fn init_data_source(#[allow(unused_variables)] db: &Db) -> DataSource { + let mut cfg = data_source::sql::Config::default(); + + #[cfg(not(feature = "embedded-db"))] + { + cfg = cfg.host(db.host()).port(db.port()); + } + + #[cfg(feature = "embedded-db")] + { + cfg = cfg.db_path(db.path()); + } + + cfg.connect(Default::default()).await.unwrap() } #[cfg(target_os = "windows")] diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index e1bcbb3c1..4cbe8517b 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -24,18 +24,24 @@ use crate::{ }; use async_trait::async_trait; use chrono::Utc; -use futures::future::FutureExt; + use hotshot_types::traits::metrics::Metrics; use itertools::Itertools; use log::LevelFilter; + +#[cfg(not(feature = "embedded-db"))] +use futures::future::FutureExt; +#[cfg(not(feature = "embedded-db"))] +use sqlx::postgres::{ + Postgres, {PgConnectOptions, PgSslMode}, +}; +#[cfg(feature = "embedded-db")] +use sqlx::sqlite::SqliteConnectOptions; use sqlx::{ pool::{Pool, PoolOptions}, - postgres::{PgConnectOptions, PgSslMode}, - sqlite::{SqliteAutoVacuum, SqliteConnectOptions}, - ConnectOptions, Connection, Postgres, Row, + ConnectOptions, Connection, Row, }; -use std::{cmp::min, fmt::Debug, path::PathBuf, str::FromStr, time::Duration}; - +use std::{cmp::min, fmt::Debug, str::FromStr, time::Duration}; pub extern crate sqlx; pub use sqlx::{Database, Sqlite}; @@ -200,7 +206,7 @@ where #[cfg(not(feature = "embedded-db"))] schema: String, #[cfg(feature = "embedded-db")] - path: PathBuf, + db_path: std::path::PathBuf, reset: bool, migrations: Vec, no_migrations: bool, @@ -224,7 +230,8 @@ impl Default for Config { impl Default for Config { fn default() -> Self { SqliteConnectOptions::default() - .auto_vacuum(SqliteAutoVacuum::Incremental) + .busy_timeout(Duration::from_secs(30)) + .auto_vacuum(sqlx::sqlite::SqliteAutoVacuum::Incremental) .into() } } @@ -236,7 +243,7 @@ impl From for Config { Self { db_opt, pool_opt: PoolOptions::default(), - path: db_path, + db_path, reset: false, migrations: vec![], no_migrations: false, @@ -286,6 +293,11 @@ impl Config { self.db_opt = self.db_opt.busy_timeout(timeout); self } + + pub fn db_path(mut self, path: std::path::PathBuf) -> Self { + self.db_path = path; + self + } } #[cfg(not(feature = "embedded-db"))] @@ -342,6 +354,7 @@ impl Config { self } } + impl Config { /// Reset the schema on connection. /// @@ -481,7 +494,7 @@ impl SqlStorage { #[cfg(feature = "embedded-db")] if config.reset { - std::fs::remove_file(config.path)?; + std::fs::remove_file(config.db_path)?; } let pool = pool.connect(config.db_opt.to_url_lossy().as_ref()).await?; @@ -491,13 +504,13 @@ impl SqlStorage { #[cfg(not(feature = "embedded-db"))] if config.reset { - query(&format!("DROP SCHEMA IF EXISTS {} CASCADE", config.schema)) + query(&format!("DROP SCHEMA IF EXISTS {} CASCADE", schema)) .execute(conn.as_mut()) .await?; } #[cfg(not(feature = "embedded-db"))] - query(&format!("CREATE SCHEMA IF NOT EXISTS {}", config.schema)) + query(&format!("CREATE SCHEMA IF NOT EXISTS {}", schema)) .execute(conn.as_mut()) .await?; @@ -642,7 +655,6 @@ impl PruneStorage for SqlStorage { let row = tx.fetch_one(query).await?; let size: i64 = row.get(0); - #[cfg(feature = "embedded-db")] Ok(size as u64) } @@ -766,6 +778,7 @@ impl VersionedDataSource for SqlStorage { // These tests run the `postgres` Docker image, which doesn't work on Windows. #[cfg(all(any(test, feature = "testing"), not(target_os = "windows")))] pub mod testing { + #![allow(unused_imports)] use crate::data_source::storage::sql::sqlx::ConnectOptions; use async_compatibility_layer::art::async_timeout; use async_std::net::TcpStream; @@ -774,7 +787,7 @@ pub mod testing { use sqlx::sqlite::SqliteConnectOptions; use std::{ env, fs, - path::{Path, PathBuf}, + path::Path, process::{Command, Stdio}, str::{self, FromStr}, time::Duration, @@ -794,14 +807,14 @@ pub mod testing { #[cfg(not(feature = "embedded-db"))] container_id: String, #[cfg(feature = "embedded-db")] - db_path: PathBuf, + db_path: std::path::PathBuf, #[cfg(not(feature = "embedded-db"))] persistent: bool, } impl TmpDb { #[cfg(feature = "embedded-db")] pub fn init_sqlite_db() -> Self { - let dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tmp"); + let dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tmp"); // Create the tmp directory if it doesn't exist if !dir.exists() { @@ -879,6 +892,11 @@ pub mod testing { self.port } + #[cfg(feature = "embedded-db")] + pub fn path(&self) -> std::path::PathBuf { + self.db_path.clone() + } + pub fn config(&self) -> Config { #[cfg(feature = "embedded-db")] let mut cfg: Config = { diff --git a/src/data_source/storage/sql/transaction.rs b/src/data_source/storage/sql/transaction.rs index d62289ddf..a18a9166b 100644 --- a/src/data_source/storage/sql/transaction.rs +++ b/src/data_source/storage/sql/transaction.rs @@ -38,9 +38,8 @@ use crate::{ types::HeightIndexed, Header, Payload, QueryError, QueryResult, VidShare, }; -use anyhow::{bail, ensure, Context}; +use anyhow::{bail, Context}; use ark_serialize::CanonicalSerialize; -use async_std::task::sleep; use async_trait::async_trait; use committable::Committable; use derive_more::{Deref, DerefMut}; @@ -54,13 +53,12 @@ use hotshot_types::traits::{ use itertools::Itertools; use jf_merkle_tree::prelude::{MerkleNode, MerkleProof}; use sqlx::{ - pool::Pool, query_builder::Separated, types::BitVec, Encode, Execute, FromRow, QueryBuilder, - Type, + pool::Pool, query_builder::Separated, types::BitVec, Encode, FromRow, QueryBuilder, Type, }; use std::{ collections::{HashMap, HashSet}, marker::PhantomData, - time::{Duration, Instant}, + time::Instant, }; pub use sqlx::Executor; From d5b8ddaf165297e148344b436b9fd67d605ff235 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Wed, 6 Nov 2024 20:40:46 +0500 Subject: [PATCH 05/48] fix test --- src/data_source/storage/sql.rs | 4 ++-- src/data_source/storage/sql/transaction.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 4cbe8517b..576369815 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -1061,7 +1061,7 @@ pub mod testing { impl TestMerkleTreeMigration { fn create(name: &str) -> String { #[cfg(feature = "embedded-db")] - let binary_data_type = "JSONB"; + let binary_data_type = "BLOB"; #[cfg(not(feature = "embedded-db"))] let binary_data_type = "BYTEA"; @@ -1087,7 +1087,7 @@ pub mod testing { created BIGINT NOT NULL, hash_id INT NOT NULL, children JSONB, - children_bitvec JSONB, + children_bitvec {binary_data_type}, idx JSONB, entry JSONB, PRIMARY KEY (path, created) diff --git a/src/data_source/storage/sql/transaction.rs b/src/data_source/storage/sql/transaction.rs index a18a9166b..c9f932a07 100644 --- a/src/data_source/storage/sql/transaction.rs +++ b/src/data_source/storage/sql/transaction.rs @@ -460,7 +460,7 @@ where } if !rows.is_empty() { self.upsert( - "transaction", + "transactions", ["hash", "block_height", "idx"], ["block_height", "idx"], rows, From e8742cf2c03ea40d51870f39539fb24a0fa697d8 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Wed, 6 Nov 2024 20:41:00 +0500 Subject: [PATCH 06/48] test with sqlite in ci --- .github/workflows/build.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 103e9343d..c92835243 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -48,6 +48,12 @@ jobs: cargo test --workspace --release --all-features --no-run cargo test --workspace --release --all-features --verbose -- --test-threads 2 timeout-minutes: 60 + + - name: Test with embedded db + run: | + cargo test --workspace --release --features "embedded-db, no-storage" --no-run + cargo test --workspace --release --features "embedded-db, no-storage" --verbose -- --test-threads 2 + timeout-minutes: 60 - name: Generate Documentation run: | From f27c8d791074223842f2c69a79e8f79749ebb0c3 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Thu, 7 Nov 2024 02:15:58 +0500 Subject: [PATCH 07/48] fix docs --- src/data_source/sql.rs | 21 +++++++++++++++++---- src/data_source/storage/sql.rs | 29 ++++++++++++++++++++++------- 2 files changed, 39 insertions(+), 11 deletions(-) diff --git a/src/data_source/sql.rs b/src/data_source/sql.rs index 4798667ec..04eee49cd 100644 --- a/src/data_source/sql.rs +++ b/src/data_source/sql.rs @@ -84,9 +84,11 @@ impl Config { /// /// ## Initialization /// -/// When creating a [`SqlDataSource`], the caller can use [`Config`] to specify the host, user, and -/// database to connect to. As such, [`SqlDataSource`] is not very opinionated about how the -/// Postgres instance is set up. The administrator must simply ensure that there is a database +/// When creating a PostgreSQL [`SqlDataSource`], the caller can use [`Config`] to specify the host, user, and +/// database for the connection. If the `embedded-db` feature is enabled, the caller can instead specify the +/// file path for an SQLite database. +/// As such, [`SqlDataSource`] is not very opinionated about how the +/// database instance is set up. The administrator must simply ensure that there is a database /// dedicated to the [`SqlDataSource`] and a user with appropriate permissions (all on `SCHEMA` and /// all on `DATABASE`) over that database. /// @@ -102,10 +104,13 @@ impl Config { /// GRANT ALL ON DATABASE hotshot_query_service TO hotshot_user WITH GRANT OPTION; /// ``` /// -/// One could then connect to this database with the following [`Config`]: +/// For SQLite, simply provide the file path, and the file will be created if it does not already exist. +/// +/// One could then connect to this database with the following [`Config`] for postgres: /// /// ``` /// # use hotshot_query_service::data_source::sql::Config; +/// #[cfg(not(feature= "embedded-db"))] /// Config::default() /// .host("postgres.database.hostname") /// .database("hotshot_query_service") @@ -113,7 +118,15 @@ impl Config { /// .password("password") /// # ; /// ``` +/// Or, if the `embedded-db` feature is enabled, configure it as follows for SQLite: /// +/// ``` +/// # use hotshot_query_service::data_source::sql::Config; +/// #[cfg(feature= "embedded-db")] +/// Config::default() +/// .db_path("temp.db".into()) +/// # ; +/// ``` /// ## Resetting /// /// In general, resetting the database when necessary is left up to the administrator. However, for diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 576369815..88502d6f3 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -63,7 +63,7 @@ pub use transaction::*; use self::{migrate::Migrator, transaction::PoolMetrics}; -/// Embed migrations from the given directory into the current binary. +/// Embed migrations from the given directory into the current binary for PostgreSQL or SQLite. /// /// The macro invocation `include_migrations!(path)` evaluates to an expression of type `impl /// Iterator`. Each migration must be a text file which is an immediate child of @@ -78,15 +78,30 @@ use self::{migrate::Migrator, transaction::PoolMetrics}; /// /// As an example, this is the invocation used to load the default migrations from the /// `hotshot-query-service` crate. The migrations are located in a directory called `migrations` at -/// the root of the crate. +/// - PostgreSQL migrations are in `/migrations/postgres`. +/// - SQLite migrations are in `/migrations/sqlite`. /// /// ``` /// # use hotshot_query_service::data_source::sql::{include_migrations, Migration}; -/// let mut migrations: Vec = -/// include_migrations!("$CARGO_MANIFEST_DIR/migrations").collect(); -/// migrations.sort(); -/// assert_eq!(migrations[0].version(), 10); -/// assert_eq!(migrations[0].name(), "init_schema"); +/// // For PostgreSQL +/// #[cfg(not(feature = "embedded-db"))] +/// { +/// let mut postgres_migrations: Vec = +/// include_migrations!("$CARGO_MANIFEST_DIR/migrations/postgres").collect(); +/// postgres_migrations.sort(); +/// assert_eq!(postgres_migrations[0].version(), 10); +/// assert_eq!(postgres_migrations[0].name(), "init_schema"); +/// } +/// +/// // For SQLite +/// #[cfg(feature = "embedded-db")] +/// { +/// let mut sqlite_migrations: Vec = +/// include_migrations!("$CARGO_MANIFEST_DIR/migrations/sqlite").collect(); +/// sqlite_migrations.sort(); +/// assert_eq!(sqlite_migrations[0].version(), 10); +/// assert_eq!(sqlite_migrations[0].name(), "init_schema"); +/// } /// ``` /// /// Note that a similar macro is available from Refinery: From a5eeb6d80ba40bcbaa9c9cb0f41bfc6b0e763f17 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Thu, 7 Nov 2024 06:08:06 +0500 Subject: [PATCH 08/48] fix postgres migration --- .../postgres/V200__rename_transaction_table.sql | 5 +++++ src/data_source/storage/sql.rs | 4 ++-- src/data_source/storage/sql/queries/state.rs | 17 +++++++++++------ src/data_source/storage/sql/transaction.rs | 6 ++++++ 4 files changed, 24 insertions(+), 8 deletions(-) create mode 100644 migrations/postgres/V200__rename_transaction_table.sql diff --git a/migrations/postgres/V200__rename_transaction_table.sql b/migrations/postgres/V200__rename_transaction_table.sql new file mode 100644 index 000000000..06fac6533 --- /dev/null +++ b/migrations/postgres/V200__rename_transaction_table.sql @@ -0,0 +1,5 @@ +ALTER TABLE transaction + RENAME TO transactions; + +ALTER TABLE transactions + RENAME COLUMN index TO idx; \ No newline at end of file diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 88502d6f3..91d5f205a 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -1167,11 +1167,11 @@ mod test { // The SQL commands used here will fail if not run in order. let migrations = vec![ Migration::unapplied( - "V103__create_test_table.sql", + "V203__create_test_table.sql", "ALTER TABLE test ADD COLUMN data INTEGER;", ) .unwrap(), - Migration::unapplied("V102__create_test_table.sql", "CREATE TABLE test (x int);") + Migration::unapplied("V202__create_test_table.sql", "CREATE TABLE test (x int);") .unwrap(), ]; connect(true, migrations.clone()).await.unwrap(); diff --git a/src/data_source/storage/sql/queries/state.rs b/src/data_source/storage/sql/queries/state.rs index a938ea065..99346e7f8 100644 --- a/src/data_source/storage/sql/queries/state.rs +++ b/src/data_source/storage/sql/queries/state.rs @@ -78,12 +78,17 @@ where // Find all the hash values and create a hashmap // Hashmap will be used to get the hash value of the nodes children and the node itself. - let (query, sql) = build_where_in("id", "SELECT id, value FROM hash", hash_ids)?; - let hashes: HashMap> = query - .query_as(&sql) - .fetch(self.as_mut()) - .try_collect() - .await?; + let hashes = if !hash_ids.is_empty() { + let (query, sql) = build_where_in("id", "SELECT id, value FROM hash", hash_ids)?; + query + .query_as(&sql) + .fetch(self.as_mut()) + .try_collect::>>() + .await? + } else { + HashMap::new() + }; + let mut proof_path = VecDeque::with_capacity(State::tree_height()); for Node { hash_id, diff --git a/src/data_source/storage/sql/transaction.rs b/src/data_source/storage/sql/transaction.rs index c9f932a07..15ca97bc5 100644 --- a/src/data_source/storage/sql/transaction.rs +++ b/src/data_source/storage/sql/transaction.rs @@ -298,6 +298,12 @@ where .map(|v| Ok(format!("{} ", builder.bind(v)?))) .collect::>>()?; + if params.is_empty() { + return Err(QueryError::Error { + message: "failed to build WHERE IN query. No parameter found ".to_string(), + }); + } + let sql = format!( "{query} where {column} IN ({}) ", params.into_iter().join(",") From 23c7f0be198b97a3007faa52ed093d991c3f11d6 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Thu, 7 Nov 2024 18:36:34 +0500 Subject: [PATCH 09/48] set num of nodes to 6 --- src/testing/consensus.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/testing/consensus.rs b/src/testing/consensus.rs index cdb29bffa..e981b0802 100644 --- a/src/testing/consensus.rs +++ b/src/testing/consensus.rs @@ -66,7 +66,7 @@ pub struct MockNetwork { pub type MockDataSource = FileSystemDataSource; pub type MockSqlDataSource = SqlDataSource; -pub const NUM_NODES: usize = 2; +pub const NUM_NODES: usize = 6; impl MockNetwork { pub async fn init() -> Self { From 3b0c92b02d9880369e487f76617a6df793630af3 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Thu, 7 Nov 2024 18:59:31 +0500 Subject: [PATCH 10/48] add testing feature --- .github/workflows/build.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c92835243..d5900f64e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -42,7 +42,7 @@ jobs: # don't compile with all combinations of features. - name: Clippy run: cargo clippy --workspace --all-features --all-targets -- -D warnings - + - name: Test run: | cargo test --workspace --release --all-features --no-run @@ -51,10 +51,10 @@ jobs: - name: Test with embedded db run: | - cargo test --workspace --release --features "embedded-db, no-storage" --no-run - cargo test --workspace --release --features "embedded-db, no-storage" --verbose -- --test-threads 2 + cargo test --workspace --release --features "embedded-db, no-storage, testing" --no-run + cargo test --workspace --release --features "embedded-db, no-storage, testing" --verbose -- --test-threads 2 timeout-minutes: 60 - + - name: Generate Documentation run: | cargo doc --no-deps --lib --release --all-features From 831aee81df8901ffdf29cc81bf517e95e37fc846 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Thu, 7 Nov 2024 19:13:58 +0500 Subject: [PATCH 11/48] remove unwrap --- src/data_source/storage/sql/queries/state.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/data_source/storage/sql/queries/state.rs b/src/data_source/storage/sql/queries/state.rs index 99346e7f8..9fec4334b 100644 --- a/src/data_source/storage/sql/queries/state.rs +++ b/src/data_source/storage/sql/queries/state.rs @@ -71,7 +71,10 @@ where for node in nodes.iter() { hash_ids.insert(node.hash_id); if let Some(children) = &node.children { - let children: Vec = serde_json::from_value(children.clone()).unwrap(); + let children: Vec = + serde_json::from_value(children.clone()).map_err(|e| QueryError::Error { + message: format!("Error deserializing 'children' into Vec: {e}"), + })?; hash_ids.extend(children); } } @@ -107,7 +110,14 @@ where match (children, children_bitvec, idx, entry) { // If the row has children then its a branch (Some(children), Some(children_bitvec), None, None) => { - let children: Vec = serde_json::from_value(children.clone()).unwrap(); + let children: Vec = + serde_json::from_value(children.clone()).map_err(|e| { + QueryError::Error { + message: format!( + "Error deserializing 'children' into Vec: {e}" + ), + } + })?; let mut children = children.iter(); let children_bitvec: BitVec = BitVec::from_bytes(children_bitvec.clone().as_slice()); From 4b3903c01c260416b08da6ab99fcd0658b4a7411 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Thu, 7 Nov 2024 19:30:49 +0500 Subject: [PATCH 12/48] remove fmt bound --- src/data_source/storage/sql/queries/state.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/data_source/storage/sql/queries/state.rs b/src/data_source/storage/sql/queries/state.rs index 9fec4334b..1c550d8ca 100644 --- a/src/data_source/storage/sql/queries/state.rs +++ b/src/data_source/storage/sql/queries/state.rs @@ -378,11 +378,9 @@ where impl Node { pub(crate) async fn upsert( name: &str, - nodes: impl IntoIterator + std::fmt::Debug, + nodes: impl IntoIterator, tx: &mut Transaction, ) -> anyhow::Result<()> { - println!("nodes {nodes:?} \n\n"); - tx.upsert( name, [ From f94c49126fb7535f351c15f73058e1dfa9cabcc2 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Thu, 7 Nov 2024 19:31:53 +0500 Subject: [PATCH 13/48] remove display bound --- src/data_source/storage/sql/transaction.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data_source/storage/sql/transaction.rs b/src/data_source/storage/sql/transaction.rs index 15ca97bc5..7af3bb69e 100644 --- a/src/data_source/storage/sql/transaction.rs +++ b/src/data_source/storage/sql/transaction.rs @@ -290,7 +290,7 @@ pub fn build_where_in<'a, I>( ) -> QueryResult<(queries::QueryBuilder<'a>, String)> where I: IntoIterator, - I::Item: 'a + Encode<'a, Db> + Type + std::fmt::Display, + I::Item: 'a + Encode<'a, Db> + Type, { let mut builder = queries::QueryBuilder::default(); let params = values From fd381b7d38a7668fab10694ba45a2b0d61965186 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Thu, 7 Nov 2024 19:42:29 +0500 Subject: [PATCH 14/48] fix build ci --- .github/workflows/build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d5900f64e..02804d423 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -51,8 +51,8 @@ jobs: - name: Test with embedded db run: | - cargo test --workspace --release --features "embedded-db, no-storage, testing" --no-run - cargo test --workspace --release --features "embedded-db, no-storage, testing" --verbose -- --test-threads 2 + cargo test --workspace --release --features "no-storage, testing" --no-run + cargo test --workspace --release --features "no-storage, testing" --verbose -- --test-threads 2 timeout-minutes: 60 - name: Generate Documentation From e804d830650d10d82ea9cc673f05d5f75646b3bf Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Thu, 7 Nov 2024 20:21:08 +0500 Subject: [PATCH 15/48] fix build ci --- .github/workflows/build.yml | 4 ++-- examples/simple-server.rs | 2 +- src/testing/consensus.rs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 02804d423..7cfadf0e2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -43,13 +43,13 @@ jobs: - name: Clippy run: cargo clippy --workspace --all-features --all-targets -- -D warnings - - name: Test + - name: Test with embedded-db run: | cargo test --workspace --release --all-features --no-run cargo test --workspace --release --all-features --verbose -- --test-threads 2 timeout-minutes: 60 - - name: Test with embedded db + - name: Test with postgres run: | cargo test --workspace --release --features "no-storage, testing" --no-run cargo test --workspace --release --features "no-storage, testing" --verbose -- --test-threads 2 diff --git a/examples/simple-server.rs b/examples/simple-server.rs index 0ca9b8974..5a848b7b1 100644 --- a/examples/simple-server.rs +++ b/examples/simple-server.rs @@ -52,7 +52,7 @@ use std::{num::NonZeroUsize, str::FromStr, time::Duration}; use url::Url; use vbs::version::StaticVersionType; -const NUM_NODES: usize = 2; +const NUM_NODES: usize = 5; #[derive(Parser)] struct Options { diff --git a/src/testing/consensus.rs b/src/testing/consensus.rs index e981b0802..85475e841 100644 --- a/src/testing/consensus.rs +++ b/src/testing/consensus.rs @@ -66,7 +66,7 @@ pub struct MockNetwork { pub type MockDataSource = FileSystemDataSource; pub type MockSqlDataSource = SqlDataSource; -pub const NUM_NODES: usize = 6; +pub const NUM_NODES: usize = 5; impl MockNetwork { pub async fn init() -> Self { From 7ceb5b89306fa5d0a15afa054789e2e1a27736d4 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 02:49:34 +0500 Subject: [PATCH 16/48] address PR comments --- src/data_source/sql.rs | 6 +- src/data_source/storage/sql.rs | 70 ++++++++----------- src/data_source/storage/sql/db.rs | 20 +++--- src/data_source/storage/sql/queries.rs | 8 +-- .../storage/sql/queries/availability.rs | 16 ++--- .../storage/sql/queries/explorer.rs | 2 +- src/data_source/storage/sql/queries/node.rs | 2 +- src/data_source/storage/sql/queries/state.rs | 5 +- src/data_source/storage/sql/transaction.rs | 2 +- 9 files changed, 58 insertions(+), 73 deletions(-) diff --git a/src/data_source/sql.rs b/src/data_source/sql.rs index 04eee49cd..ce17ae42c 100644 --- a/src/data_source/sql.rs +++ b/src/data_source/sql.rs @@ -30,11 +30,7 @@ pub use sql::Transaction; pub type Builder = fetching::Builder; -#[cfg(feature = "embedded-db")] -pub type Config = sql::Config; - -#[cfg(not(feature = "embedded-db"))] -pub type Config = sql::Config; +pub type Config = sql::Config; impl Config { /// Connect to the database with this config. diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 91d5f205a..11b6871df 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -39,7 +39,7 @@ use sqlx::postgres::{ use sqlx::sqlite::SqliteConnectOptions; use sqlx::{ pool::{Pool, PoolOptions}, - ConnectOptions, Connection, Row, + ConnectOptions, Row, }; use std::{cmp::min, fmt::Debug, str::FromStr, time::Duration}; pub extern crate sqlx; @@ -85,23 +85,16 @@ use self::{migrate::Migrator, transaction::PoolMetrics}; /// # use hotshot_query_service::data_source::sql::{include_migrations, Migration}; /// // For PostgreSQL /// #[cfg(not(feature = "embedded-db"))] -/// { -/// let mut postgres_migrations: Vec = +/// let mut migrations: Vec = /// include_migrations!("$CARGO_MANIFEST_DIR/migrations/postgres").collect(); -/// postgres_migrations.sort(); -/// assert_eq!(postgres_migrations[0].version(), 10); -/// assert_eq!(postgres_migrations[0].name(), "init_schema"); -/// } -/// /// // For SQLite /// #[cfg(feature = "embedded-db")] -/// { -/// let mut sqlite_migrations: Vec = +/// let mut migrations: Vec = /// include_migrations!("$CARGO_MANIFEST_DIR/migrations/sqlite").collect(); +/// /// sqlite_migrations.sort(); /// assert_eq!(sqlite_migrations[0].version(), 10); /// assert_eq!(sqlite_migrations[0].name(), "init_schema"); -/// } /// ``` /// /// Note that a similar macro is available from Refinery: @@ -212,16 +205,14 @@ fn add_custom_migrations( .map(|pair| pair.reduce(|_, custom| custom)) } -pub struct Config -where - DB: Database, -{ - db_opt: ::Options, +pub struct Config { + #[cfg(feature = "embedded-db")] + db_opt: SqliteConnectOptions, + #[cfg(not(feature = "embedded-db"))] + db_opt: PostgresConnectOptions, pool_opt: PoolOptions, #[cfg(not(feature = "embedded-db"))] schema: String, - #[cfg(feature = "embedded-db")] - db_path: std::path::PathBuf, reset: bool, migrations: Vec, no_migrations: bool, @@ -242,23 +233,22 @@ impl Default for Config { } #[cfg(feature = "embedded-db")] -impl Default for Config { +impl Default for Config { fn default() -> Self { SqliteConnectOptions::default() .busy_timeout(Duration::from_secs(30)) .auto_vacuum(sqlx::sqlite::SqliteAutoVacuum::Incremental) + .create_if_missing(true) .into() } } #[cfg(feature = "embedded-db")] -impl From for Config { +impl From for Config { fn from(db_opt: SqliteConnectOptions) -> Self { - let db_path = db_opt.get_filename().to_path_buf(); Self { db_opt, pool_opt: PoolOptions::default(), - db_path, reset: false, migrations: vec![], no_migrations: false, @@ -294,7 +284,7 @@ impl FromStr for Config { } #[cfg(feature = "embedded-db")] -impl FromStr for Config { +impl FromStr for Config { type Err = ::Err; fn from_str(s: &str) -> Result { @@ -303,20 +293,20 @@ impl FromStr for Config { } #[cfg(feature = "embedded-db")] -impl Config { +impl Config { pub fn busy_timeout(mut self, timeout: Duration) -> Self { self.db_opt = self.db_opt.busy_timeout(timeout); self } pub fn db_path(mut self, path: std::path::PathBuf) -> Self { - self.db_path = path; + self.db_opt = self.db_opt.filename(path); self } } #[cfg(not(feature = "embedded-db"))] -impl Config { +impl Config { /// Set the hostname of the database server. /// /// The default is `localhost`. @@ -370,7 +360,7 @@ impl Config { } } -impl Config { +impl Config { /// Reset the schema on connection. /// /// When this [`Config`] is used to [`connect`](Self::connect) a @@ -490,7 +480,7 @@ pub struct Pruner { impl SqlStorage { /// Connect to a remote database. - pub async fn connect(mut config: Config) -> Result { + pub async fn connect(mut config: Config) -> Result { let pool = config.pool_opt; #[cfg(not(feature = "embedded-db"))] @@ -509,10 +499,10 @@ impl SqlStorage { #[cfg(feature = "embedded-db")] if config.reset { - std::fs::remove_file(config.db_path)?; + std::fs::remove_file(config.db_opt.get_filename())?; } - let pool = pool.connect(config.db_opt.to_url_lossy().as_ref()).await?; + let pool = pool.connect_with(config.db_opt).await?; // Create or connect to the schema for this query service. let mut conn = pool.acquire().await?; @@ -823,12 +813,11 @@ pub mod testing { container_id: String, #[cfg(feature = "embedded-db")] db_path: std::path::PathBuf, - #[cfg(not(feature = "embedded-db"))] persistent: bool, } impl TmpDb { #[cfg(feature = "embedded-db")] - pub fn init_sqlite_db() -> Self { + fn init_sqlite_db(persistent: bool) -> Self { let dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tmp"); // Create the tmp directory if it doesn't exist @@ -838,11 +827,12 @@ pub mod testing { Self { db_path: dir.join(Self::gen_db_name()), + persistent, } } pub async fn init() -> Self { #[cfg(feature = "embedded-db")] - return Self::init_sqlite_db(); + return Self::init_sqlite_db(false); #[cfg(not(feature = "embedded-db"))] Self::init_postgres(false).await @@ -850,7 +840,7 @@ pub mod testing { pub async fn persistent() -> Self { #[cfg(feature = "embedded-db")] - return Self::init_sqlite_db(); + return Self::init_sqlite_db(true); #[cfg(not(feature = "embedded-db"))] Self::init_postgres(true).await @@ -912,9 +902,9 @@ pub mod testing { self.db_path.clone() } - pub fn config(&self) -> Config { + pub fn config(&self) -> Config { #[cfg(feature = "embedded-db")] - let mut cfg: Config = { + let mut cfg: Config = { let db_path = self.db_path.to_string_lossy(); let path = format!("sqlite:{db_path}"); SqliteConnectOptions::from_str(&path) @@ -1049,7 +1039,7 @@ pub mod testing { impl Drop for TmpDb { fn drop(&mut self) { self.stop_postgres(); - if self.persistent { + if !self.persistent { let output = Command::new("docker") .args(["container", "rm", self.container_id.as_str()]) .output() @@ -1067,7 +1057,9 @@ pub mod testing { #[cfg(feature = "embedded-db")] impl Drop for TmpDb { fn drop(&mut self) { - fs::remove_file(self.db_path.clone()).unwrap(); + if !self.persistent { + fs::remove_file(self.db_path.clone()).unwrap(); + } } } @@ -1141,7 +1133,7 @@ mod test { let db_opt = db_opt.clone(); async move { { - let cfg: Config = db_opt.into(); + let cfg: Config = db_opt.into(); let mut cfg = cfg.migrations(custom_migrations); if !migrations { cfg = cfg.no_migrations(); diff --git a/src/data_source/storage/sql/db.rs b/src/data_source/storage/sql/db.rs index 042ea7e0e..037ba6412 100644 --- a/src/data_source/storage/sql/db.rs +++ b/src/data_source/storage/sql/db.rs @@ -10,24 +10,25 @@ // You should have received a copy of the GNU General Public License along with this program. If not, // see . -/// The concrete database backing a SQL data source. +/// The underlying database type for a SQL data source. /// -/// Currently only Postgres is supported. In the future we can support SQLite as well by making this -/// an enum with variants for each (we'll then need to create enums and trait implementations for -/// all the associated types as well; it will be messy). +/// Currently, only PostgreSQL and SQLite are supported, with selection based on the "embedded-db" feature flag. +/// - When the "embedded-db" feature is enabled, SQLite is used. +/// - When it’s disabled, PostgreSQL is used. /// -/// The reason for taking this approach over sqlx's `Any` database is that we can support SQL types +/// ### Design Choice +/// The reason for taking this approach over sqlx's Any database is that we can support SQL types /// which are implemented for the two backends we care about (Postgres and SQLite) but not for _any_ /// SQL database, such as MySQL. Crucially, JSON types fall in this category. /// /// The reason for taking this approach rather than writing all of our code to be generic over the -/// `Database` implementation is that `sqlx` does not have the necessary trait bounds on all of the -/// associated types (e.g. `Database::Connection` does not implement `Executor` for all possible -/// databases, the `Executor` impl lives on each concrete connection type) and Rust does not provide +/// Database implementation is that sqlx does not have the necessary trait bounds on all of the +/// associated types (e.g. Database::Connection does not implement Executor for all possible +/// databases, the Executor impl lives on each concrete connection type) and Rust does not provide /// a good way of encapsulating a collection of trait bounds on associated types. Thus, our function /// signatures become untenably messy with bounds like /// -/// ``` +/// ```rust /// # use sqlx::{Database, Encode, Executor, IntoArguments, Type}; /// fn foo() /// where @@ -36,7 +37,6 @@ /// for<'a> i64: Type + Encode<'a, DB>, /// {} /// ``` -/// etc. #[cfg(feature = "embedded-db")] pub type Db = sqlx::Sqlite; diff --git a/src/data_source/storage/sql/queries.rs b/src/data_source/storage/sql/queries.rs index 4191ade6e..dc99bf33a 100644 --- a/src/data_source/storage/sql/queries.rs +++ b/src/data_source/storage/sql/queries.rs @@ -95,11 +95,7 @@ impl<'q> QueryBuilder<'q> { message: format!("{err:#}"), })?; - if cfg!(feature = "embedded-db") { - Ok("?".to_string()) - } else { - Ok(format!("${}", self.arguments.len())) - } + Ok(format!("${}", self.arguments.len())) } /// Finalize the query with a constructed SQL statement. @@ -308,7 +304,7 @@ impl Transaction { "SELECT {HEADER_COLUMNS} FROM header AS h WHERE {where_clause} - ORDER BY h.height ASC + ORDER BY h.height LIMIT 1" ); let row = query.query(&sql).fetch_one(self.as_mut()).await?; diff --git a/src/data_source/storage/sql/queries/availability.rs b/src/data_source/storage/sql/queries/availability.rs index 40972634d..2b3298606 100644 --- a/src/data_source/storage/sql/queries/availability.rs +++ b/src/data_source/storage/sql/queries/availability.rs @@ -66,7 +66,7 @@ where FROM header AS h JOIN payload AS p ON h.height = p.height WHERE {where_clause} - ORDER BY h.height ASC + ORDER BY h.height LIMIT 1" ); let row = query.query(&sql).fetch_one(self.as_mut()).await?; @@ -88,7 +88,7 @@ where FROM header AS h JOIN payload AS p ON h.height = p.height WHERE {where_clause} - ORDER BY h.height ASC + ORDER BY h.height LIMIT 1" ); let row = query.query(&sql).fetch_one(self.as_mut()).await?; @@ -109,7 +109,7 @@ where FROM header AS h JOIN vid AS v ON h.height = v.height WHERE {where_clause} - ORDER BY h.height ASC + ORDER BY h.height LIMIT 1" ); let row = query.query(&sql).fetch_one(self.as_mut()).await?; @@ -126,7 +126,7 @@ where { let mut query = QueryBuilder::default(); let where_clause = query.bounds_to_where_clause(range, "height")?; - let sql = format!("SELECT {LEAF_COLUMNS} FROM leaf {where_clause} ORDER BY height ASC"); + let sql = format!("SELECT {LEAF_COLUMNS} FROM leaf {where_clause} ORDER BY height"); Ok(query .query(&sql) .fetch(self.as_mut()) @@ -150,7 +150,7 @@ where FROM header AS h JOIN payload AS p ON h.height = p.height {where_clause} - ORDER BY h.height ASC" + ORDER BY h.height" ); Ok(query .query(&sql) @@ -175,7 +175,7 @@ where FROM header AS h JOIN payload AS p ON h.height = p.height {where_clause} - ORDER BY h.height ASC" + ORDER BY h.height" ); Ok(query .query(&sql) @@ -200,7 +200,7 @@ where FROM header AS h JOIN vid AS v ON h.height = v.height {where_clause} - ORDER BY h.height ASC" + ORDER BY h.height" ); Ok(query .query(&sql) @@ -226,7 +226,7 @@ where JOIN payload AS p ON h.height = p.height JOIN transactions AS t ON t.block_height = h.height WHERE t.hash = {hash_param} - ORDER BY t.block_height ASC, t.idx ASC + ORDER BY t.block_height, t.idx LIMIT 1" ); let row = query.query(&sql).fetch_one(self.as_mut()).await?; diff --git a/src/data_source/storage/sql/queries/explorer.rs b/src/data_source/storage/sql/queries/explorer.rs index 9cad5dd7a..be9e8dd6a 100644 --- a/src/data_source/storage/sql/queries/explorer.rs +++ b/src/data_source/storage/sql/queries/explorer.rs @@ -421,7 +421,7 @@ where p.height = h.height WHERE h.height IN (SELECT height FROM header ORDER BY height DESC LIMIT 50) - ORDER BY h.height ASC + ORDER BY h.height ", ).fetch(self.as_mut()); diff --git a/src/data_source/storage/sql/queries/node.rs b/src/data_source/storage/sql/queries/node.rs index 82759bf5b..77af42cb4 100644 --- a/src/data_source/storage/sql/queries/node.rs +++ b/src/data_source/storage/sql/queries/node.rs @@ -76,7 +76,7 @@ where "SELECT v.share AS share FROM vid AS v JOIN header AS h ON v.height = h.height WHERE {where_clause} - ORDER BY h.height ASC + ORDER BY h.height LIMIT 1" ); let (share_data,) = query diff --git a/src/data_source/storage/sql/queries/state.rs b/src/data_source/storage/sql/queries/state.rs index 1c550d8ca..a2c39b9ec 100644 --- a/src/data_source/storage/sql/queries/state.rs +++ b/src/data_source/storage/sql/queries/state.rs @@ -82,7 +82,7 @@ where // Find all the hash values and create a hashmap // Hashmap will be used to get the hash value of the nodes children and the node itself. let hashes = if !hash_ids.is_empty() { - let (query, sql) = build_where_in("id", "SELECT id, value FROM hash", hash_ids)?; + let (query, sql) = build_where_in("SELECT id, value FROM hash", "id", hash_ids)?; query .query_as(&sql) .fetch(self.as_mut()) @@ -441,7 +441,8 @@ fn build_get_path_query<'q>( if cfg!(feature = "embedded-db") { sql.push_str("ORDER BY length(t.path) DESC"); } else { - sql.push_str("ORDER BY t.path DESC"); + // array_length() takes in array and the array dimension which is 1 in this case + sql.push_str("ORDER BY array_length(t.path, 1) DESC"); } Ok((query, sql)) diff --git a/src/data_source/storage/sql/transaction.rs b/src/data_source/storage/sql/transaction.rs index 7af3bb69e..d0a0bb5c8 100644 --- a/src/data_source/storage/sql/transaction.rs +++ b/src/data_source/storage/sql/transaction.rs @@ -284,8 +284,8 @@ impl_tuple_params!(7, (T1, T2, T3, T4, T5, T6, T7,)); impl_tuple_params!(8, (T1, T2, T3, T4, T5, T6, T7, T8,)); pub fn build_where_in<'a, I>( - column: &'a str, query: &'a str, + column: &'a str, values: I, ) -> QueryResult<(queries::QueryBuilder<'a>, String)> where From 66e2c259b3dc6c66c8fe9ac73f476f4316e68da6 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 02:55:13 +0500 Subject: [PATCH 17/48] fix config for postgres --- src/data_source/storage/sql.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 11b6871df..230460eb1 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -32,9 +32,7 @@ use log::LevelFilter; #[cfg(not(feature = "embedded-db"))] use futures::future::FutureExt; #[cfg(not(feature = "embedded-db"))] -use sqlx::postgres::{ - Postgres, {PgConnectOptions, PgSslMode}, -}; +use sqlx::postgres::{PgConnectOptions, PgSslMode}; #[cfg(feature = "embedded-db")] use sqlx::sqlite::SqliteConnectOptions; use sqlx::{ @@ -209,7 +207,7 @@ pub struct Config { #[cfg(feature = "embedded-db")] db_opt: SqliteConnectOptions, #[cfg(not(feature = "embedded-db"))] - db_opt: PostgresConnectOptions, + db_opt: PgConnectOptions, pool_opt: PoolOptions, #[cfg(not(feature = "embedded-db"))] schema: String, @@ -221,7 +219,7 @@ pub struct Config { } #[cfg(not(feature = "embedded-db"))] -impl Default for Config { +impl Default for Config { fn default() -> Self { PgConnectOptions::default() .username("postgres") @@ -259,7 +257,7 @@ impl From for Config { } #[cfg(not(feature = "embedded-db"))] -impl From for Config { +impl From for Config { fn from(db_opt: PgConnectOptions) -> Self { Self { db_opt, @@ -275,7 +273,7 @@ impl From for Config { } #[cfg(not(feature = "embedded-db"))] -impl FromStr for Config { +impl FromStr for Config { type Err = ::Err; fn from_str(s: &str) -> Result { From 79902fe80f10e7e6af3868721ca4ad521bd6965b Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 03:01:25 +0500 Subject: [PATCH 18/48] separate job for tests --- .github/workflows/build.yml | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7cfadf0e2..4dee54f4a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -43,26 +43,50 @@ jobs: - name: Clippy run: cargo clippy --workspace --all-features --all-targets -- -D warnings + test-sqlite: + runs-on: ubuntu-latest + needs: [build] + steps: + - uses: actions/checkout@v4 + name: Checkout Repository + - name: Test with embedded-db run: | cargo test --workspace --release --all-features --no-run cargo test --workspace --release --all-features --verbose -- --test-threads 2 timeout-minutes: 60 + test-postgres: + runs-on: ubuntu-latest + needs: [build] + steps: + - uses: actions/checkout@v4 + name: Checkout Repository + - name: Test with postgres run: | cargo test --workspace --release --features "no-storage, testing" --no-run cargo test --workspace --release --features "no-storage, testing" --verbose -- --test-threads 2 timeout-minutes: 60 + generate-docs: + runs-on: ubuntu-latest + needs: [build, test-sqlite, test-postgres] + steps: + - uses: actions/checkout@v4 + name: Checkout Repository + - name: Generate Documentation run: | cargo doc --no-deps --lib --release --all-features echo '' > target/doc/index.html - - name: Deploy Documentation - uses: peaceiris/actions-gh-pages@v4 - if: ${{ github.ref == 'refs/heads/main' }} + deploy-docs: + runs-on: ubuntu-latest + needs: [generate-docs] + if: ${{ github.ref == 'refs/heads/main' }} + steps: + - uses: peaceiris/actions-gh-pages@v4 with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./target/doc From c0f8b747e4f94e609ecbe4c6fb903e7bcd2fb00d Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 03:06:18 +0500 Subject: [PATCH 19/48] update ci --- .github/workflows/build.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4dee54f4a..c786fe09e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -50,6 +50,9 @@ jobs: - uses: actions/checkout@v4 name: Checkout Repository + - uses: Swatinem/rust-cache@v2 + name: Enable Rust Caching + - name: Test with embedded-db run: | cargo test --workspace --release --all-features --no-run @@ -63,6 +66,9 @@ jobs: - uses: actions/checkout@v4 name: Checkout Repository + - uses: Swatinem/rust-cache@v2 + name: Enable Rust Caching + - name: Test with postgres run: | cargo test --workspace --release --features "no-storage, testing" --no-run @@ -71,11 +77,14 @@ jobs: generate-docs: runs-on: ubuntu-latest - needs: [build, test-sqlite, test-postgres] + needs: [build] steps: - uses: actions/checkout@v4 name: Checkout Repository + - uses: Swatinem/rust-cache@v2 + name: Enable Rust Caching + - name: Generate Documentation run: | cargo doc --no-deps --lib --release --all-features From b73ff86e50da81a7dc80e946df37dee69ff0c002 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 03:17:22 +0500 Subject: [PATCH 20/48] fix build ci --- .github/workflows/build.yml | 40 ++++++++++++---------------------- src/data_source/storage/sql.rs | 4 +--- 2 files changed, 15 insertions(+), 29 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c786fe09e..7a68e87ff 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -43,8 +43,19 @@ jobs: - name: Clippy run: cargo clippy --workspace --all-features --all-targets -- -D warnings + - name: Generate Documentation + run: | + cargo doc --no-deps --lib --release --all-features + echo '' > target/doc/index.html + - name: Deploy Documentation + uses: peaceiris/actions-gh-pages@v4 + if: ${{ github.ref == 'refs/heads/main' }} + test-sqlite: runs-on: ubuntu-latest + env: + RUSTFLAGS: '--cfg async_executor_impl="async-std" --cfg async_channel_impl="async-std"' + RUST_LOG: info needs: [build] steps: - uses: actions/checkout@v4 @@ -61,6 +72,9 @@ jobs: test-postgres: runs-on: ubuntu-latest + env: + RUSTFLAGS: '--cfg async_executor_impl="async-std" --cfg async_channel_impl="async-std"' + RUST_LOG: info needs: [build] steps: - uses: actions/checkout@v4 @@ -74,29 +88,3 @@ jobs: cargo test --workspace --release --features "no-storage, testing" --no-run cargo test --workspace --release --features "no-storage, testing" --verbose -- --test-threads 2 timeout-minutes: 60 - - generate-docs: - runs-on: ubuntu-latest - needs: [build] - steps: - - uses: actions/checkout@v4 - name: Checkout Repository - - - uses: Swatinem/rust-cache@v2 - name: Enable Rust Caching - - - name: Generate Documentation - run: | - cargo doc --no-deps --lib --release --all-features - echo '' > target/doc/index.html - - deploy-docs: - runs-on: ubuntu-latest - needs: [generate-docs] - if: ${{ github.ref == 'refs/heads/main' }} - steps: - - uses: peaceiris/actions-gh-pages@v4 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - publish_dir: ./target/doc - cname: tide-disco.docs.espressosys.com diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 230460eb1..230920838 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -860,9 +860,7 @@ pub mod testing { .arg("-d") .args(["-p", &format!("{port}:5432")]) .args(["-e", "POSTGRES_PASSWORD=password"]); - if !persistent { - cmd.arg("--rm"); - } + let output = cmd.arg("postgres").output().unwrap(); let stdout = str::from_utf8(&output.stdout).unwrap(); let stderr = str::from_utf8(&output.stderr).unwrap(); From b44905b23119e044a43591b2bc7b4961cc08468a Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 03:24:37 +0500 Subject: [PATCH 21/48] shared_key --- .github/workflows/build.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7a68e87ff..18ce8c95e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,6 +28,8 @@ jobs: name: Checkout Repository - uses: Swatinem/rust-cache@v2 + with: + shared-key: "all_features" name: Enable Rust Caching - name: Build @@ -62,9 +64,11 @@ jobs: name: Checkout Repository - uses: Swatinem/rust-cache@v2 + with: + shared-key: "all_features" name: Enable Rust Caching - - name: Test with embedded-db + - name: Test run: | cargo test --workspace --release --all-features --no-run cargo test --workspace --release --all-features --verbose -- --test-threads 2 @@ -83,7 +87,7 @@ jobs: - uses: Swatinem/rust-cache@v2 name: Enable Rust Caching - - name: Test with postgres + - name: Test run: | cargo test --workspace --release --features "no-storage, testing" --no-run cargo test --workspace --release --features "no-storage, testing" --verbose -- --test-threads 2 From 81be6c39c72f1adf761871d411fae7f0c8bd19d6 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 03:39:02 +0500 Subject: [PATCH 22/48] remove shared_key --- .github/workflows/build.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 18ce8c95e..312db8b95 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,8 +28,6 @@ jobs: name: Checkout Repository - uses: Swatinem/rust-cache@v2 - with: - shared-key: "all_features" name: Enable Rust Caching - name: Build @@ -64,8 +62,6 @@ jobs: name: Checkout Repository - uses: Swatinem/rust-cache@v2 - with: - shared-key: "all_features" name: Enable Rust Caching - name: Test From c10371794d01b94012d8121537f9d642c5854260 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 03:52:12 +0500 Subject: [PATCH 23/48] remove only if container exists --- src/data_source/storage/sql.rs | 31 +++++++++++++++----- src/data_source/storage/sql/queries/state.rs | 4 +-- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 230920838..ef4788275 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -203,6 +203,7 @@ fn add_custom_migrations( .map(|pair| pair.reduce(|_, custom| custom)) } +#[derive(Clone)] pub struct Config { #[cfg(feature = "embedded-db")] db_opt: SqliteConnectOptions, @@ -861,6 +862,10 @@ pub mod testing { .args(["-p", &format!("{port}:5432")]) .args(["-e", "POSTGRES_PASSWORD=password"]); + if !persistent { + cmd.arg("--rm"); + } + let output = cmd.arg("postgres").output().unwrap(); let stdout = str::from_utf8(&output.stdout).unwrap(); let stderr = str::from_utf8(&output.stderr).unwrap(); @@ -1036,16 +1041,26 @@ pub mod testing { fn drop(&mut self) { self.stop_postgres(); if !self.persistent { - let output = Command::new("docker") - .args(["container", "rm", self.container_id.as_str()]) + // Check if container exists + let check_output = Command::new("docker") + .args(["container", "inspect", self.container_id.as_str()]) .output() .unwrap(); - assert!( - output.status.success(), - "error removing postgres docker {}: {}", - self.container_id, - str::from_utf8(&output.stderr).unwrap() - ); + + // If the container exists (inspect command succeeded) + if check_output.status.success() { + let remove_output = Command::new("docker") + .args(["container", "rm", self.container_id.as_str()]) + .output() + .unwrap(); + + assert!( + remove_output.status.success(), + "error removing postgres docker {}: {}", + self.container_id, + str::from_utf8(&remove_output.stderr).unwrap() + ); + }; } } } diff --git a/src/data_source/storage/sql/queries/state.rs b/src/data_source/storage/sql/queries/state.rs index a2c39b9ec..41a24e7e5 100644 --- a/src/data_source/storage/sql/queries/state.rs +++ b/src/data_source/storage/sql/queries/state.rs @@ -32,7 +32,7 @@ use jf_merkle_tree::{ prelude::{MerkleNode, MerkleProof}, DigestAlgorithm, MerkleCommitment, ToTraversalPath, }; -use sqlx::{types::BitVec, Decode, FromRow}; +use sqlx::{types::BitVec, Decode}; use sqlx::{types::JsonValue, ColumnIndex}; use std::collections::{HashMap, HashSet, VecDeque}; @@ -340,7 +340,7 @@ pub(crate) fn build_hash_batch_insert( } // Represents a row in a state table -#[derive(Debug, Default, Clone, FromRow)] +#[derive(Debug, Default, Clone)] pub(crate) struct Node { pub(crate) path: JsonValue, pub(crate) created: i64, From 2d2b39d50d53119bd24d54f8d0b96aaa12cf36fc Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 04:35:26 +0500 Subject: [PATCH 24/48] use tempfile --- src/data_source/storage/sql.rs | 51 +++++++++----------- src/data_source/storage/sql/queries/state.rs | 5 +- 2 files changed, 27 insertions(+), 29 deletions(-) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index ef4788275..30394c7be 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -90,9 +90,9 @@ use self::{migrate::Migrator, transaction::PoolMetrics}; /// let mut migrations: Vec = /// include_migrations!("$CARGO_MANIFEST_DIR/migrations/sqlite").collect(); /// -/// sqlite_migrations.sort(); -/// assert_eq!(sqlite_migrations[0].version(), 10); -/// assert_eq!(sqlite_migrations[0].name(), "init_schema"); +/// migrations.sort(); +/// assert_eq!(migrations[0].version(), 10); +/// assert_eq!(migrations[0].name(), "init_schema"); /// ``` /// /// Note that a similar macro is available from Refinery: @@ -812,20 +812,29 @@ pub mod testing { container_id: String, #[cfg(feature = "embedded-db")] db_path: std::path::PathBuf, + #[allow(dead_code)] persistent: bool, } impl TmpDb { #[cfg(feature = "embedded-db")] fn init_sqlite_db(persistent: bool) -> Self { - let dir = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tmp"); + let file = tempfile::Builder::new() + .prefix("sqlite-") + .suffix(".db") + .tempfile() + .unwrap(); + + if persistent { + let (_, db_path) = file.keep().unwrap(); - // Create the tmp directory if it doesn't exist - if !dir.exists() { - fs::create_dir(&dir).expect("Failed to create tmp directory"); + return Self { + db_path, + persistent, + }; } Self { - db_path: dir.join(Self::gen_db_name()), + db_path: file.into_temp_path().to_path_buf(), persistent, } } @@ -1065,15 +1074,6 @@ pub mod testing { } } - #[cfg(feature = "embedded-db")] - impl Drop for TmpDb { - fn drop(&mut self) { - if !self.persistent { - fs::remove_file(self.db_path.clone()).unwrap(); - } - } - } - pub struct TestMerkleTreeMigration; impl TestMerkleTreeMigration { @@ -1138,20 +1138,17 @@ mod test { setup_test(); let db = TmpDb::init().await; - let db_opt = db.config().db_opt; + let cfg = db.config(); let connect = |migrations: bool, custom_migrations| { - let db_opt = db_opt.clone(); + let cfg = cfg.clone(); async move { - { - let cfg: Config = db_opt.into(); - let mut cfg = cfg.migrations(custom_migrations); - if !migrations { - cfg = cfg.no_migrations(); - } - let client = SqlStorage::connect(cfg).await?; - Ok::<_, Error>(client) + let mut cfg = cfg.migrations(custom_migrations); + if !migrations { + cfg = cfg.no_migrations(); } + let client = SqlStorage::connect(cfg).await?; + Ok::<_, Error>(client) } }; diff --git a/src/data_source/storage/sql/queries/state.rs b/src/data_source/storage/sql/queries/state.rs index 41a24e7e5..8d5ca1476 100644 --- a/src/data_source/storage/sql/queries/state.rs +++ b/src/data_source/storage/sql/queries/state.rs @@ -438,11 +438,12 @@ fn build_get_path_query<'q>( sql = format!("SELECT * FROM ({sql}) as t "); + // PostgreSQL already orders JSON arrays by length, so no additional function is needed + // For SQLite, `length()` is used to sort by length. if cfg!(feature = "embedded-db") { sql.push_str("ORDER BY length(t.path) DESC"); } else { - // array_length() takes in array and the array dimension which is 1 in this case - sql.push_str("ORDER BY array_length(t.path, 1) DESC"); + sql.push_str("ORDER BY t.path DESC"); } Ok((query, sql)) From 8563d5d932e02c87aa96e1fa1549555f9b593fb2 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 04:39:59 +0500 Subject: [PATCH 25/48] revert deploy docs ci changes --- .github/workflows/build.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 312db8b95..ee276683e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -47,10 +47,14 @@ jobs: run: | cargo doc --no-deps --lib --release --all-features echo '' > target/doc/index.html + - name: Deploy Documentation uses: peaceiris/actions-gh-pages@v4 if: ${{ github.ref == 'refs/heads/main' }} - + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./target/doc + cname: tide-disco.docs.espressosys.com test-sqlite: runs-on: ubuntu-latest env: From f88c6bc6011ebb630da2ad9ae4685a68d0be6d28 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 05:03:18 +0500 Subject: [PATCH 26/48] do not delete container in destructor && check lint for postgres --- .github/workflows/build.yml | 10 ++++++++-- src/data_source/storage/sql.rs | 22 ---------------------- 2 files changed, 8 insertions(+), 24 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ee276683e..fb54b0ce3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -14,6 +14,12 @@ on: jobs: build: runs-on: ubuntu-latest + strategy: + matrix: + features: + - "--all-features" + - "--features no-storage" + - "--features \"no-storage, embedded-db\"" env: RUSTFLAGS: '--cfg async_executor_impl="async-std" --cfg async_channel_impl="async-std"' RUST_LOG: info @@ -40,8 +46,8 @@ jobs: # Run Clippy on all targets. The lint workflow doesn't run Clippy on tests, because the tests # don't compile with all combinations of features. - - name: Clippy - run: cargo clippy --workspace --all-features --all-targets -- -D warnings + - name: Clippy all-features + run: cargo clippy --workspace ${{ matrix.features }} --all-targets -- -D warnings - name: Generate Documentation run: | diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 30394c7be..c1b42dd4c 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -1049,28 +1049,6 @@ pub mod testing { impl Drop for TmpDb { fn drop(&mut self) { self.stop_postgres(); - if !self.persistent { - // Check if container exists - let check_output = Command::new("docker") - .args(["container", "inspect", self.container_id.as_str()]) - .output() - .unwrap(); - - // If the container exists (inspect command succeeded) - if check_output.status.success() { - let remove_output = Command::new("docker") - .args(["container", "rm", self.container_id.as_str()]) - .output() - .unwrap(); - - assert!( - remove_output.status.success(), - "error removing postgres docker {}: {}", - self.container_id, - str::from_utf8(&remove_output.stderr).unwrap() - ); - }; - } } } From cd49fa8649f061025b305f574e00ff3f6b4d96e9 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 05:21:07 +0500 Subject: [PATCH 27/48] cancel workflow --- .github/workflows/build.yml | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fb54b0ce3..36263d0b4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,25 +11,17 @@ on: - release-* workflow_dispatch: +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: build: runs-on: ubuntu-latest - strategy: - matrix: - features: - - "--all-features" - - "--features no-storage" - - "--features \"no-storage, embedded-db\"" env: RUSTFLAGS: '--cfg async_executor_impl="async-std" --cfg async_channel_impl="async-std"' RUST_LOG: info steps: - - uses: styfle/cancel-workflow-action@0.12.1 - name: Cancel Outdated Builds - with: - all_but_latest: true - access_token: ${{ github.token }} - - uses: actions/checkout@v4 name: Checkout Repository @@ -46,8 +38,11 @@ jobs: # Run Clippy on all targets. The lint workflow doesn't run Clippy on tests, because the tests # don't compile with all combinations of features. - - name: Clippy all-features - run: cargo clippy --workspace ${{ matrix.features }} --all-targets -- -D warnings + - name: Clippy(all-features) + run: cargo clippy --workspace --all-features --all-targets -- -D + + - name: Clippy(no-storage) + run: cargo clippy --workspace --features no-storage --all-targets -- -D warnings - name: Generate Documentation run: | From 06e7a2490961b7f38ebbae9771fb781e407836ac Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 05:25:56 +0500 Subject: [PATCH 28/48] fix clippy ci --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 36263d0b4..b1f49c8a7 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,7 +39,7 @@ jobs: # Run Clippy on all targets. The lint workflow doesn't run Clippy on tests, because the tests # don't compile with all combinations of features. - name: Clippy(all-features) - run: cargo clippy --workspace --all-features --all-targets -- -D + run: cargo clippy --workspace --all-features --all-targets -- -D warnings - name: Clippy(no-storage) run: cargo clippy --workspace --features no-storage --all-targets -- -D warnings From 706b77f8c3c49c796f7170f05cdc25529a1aa674 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 06:08:41 +0500 Subject: [PATCH 29/48] remove dead code --- src/data_source/storage/sql.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index c1b42dd4c..95d43d076 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -1034,15 +1034,6 @@ pub mod testing { ); } } - - #[cfg(feature = "embedded-db")] - pub fn gen_db_name() -> String { - let mut rng = rand::thread_rng(); - let random: String = (0..10) - .map(|_| rng.sample(rand::distributions::Alphanumeric) as char) - .collect(); - format!("tmp-db-{random}.db") - } } #[cfg(not(feature = "embedded-db"))] From 441c9f7810f6b1c945c0656ade280d515c7920fe Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 06:53:39 +0500 Subject: [PATCH 30/48] address comments --- .github/workflows/build.yml | 2 -- examples/simple-server.rs | 2 +- src/data_source/storage/sql.rs | 21 +++++++++++---------- src/testing/consensus.rs | 2 +- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b1f49c8a7..55a14d9f1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -61,7 +61,6 @@ jobs: env: RUSTFLAGS: '--cfg async_executor_impl="async-std" --cfg async_channel_impl="async-std"' RUST_LOG: info - needs: [build] steps: - uses: actions/checkout@v4 name: Checkout Repository @@ -80,7 +79,6 @@ jobs: env: RUSTFLAGS: '--cfg async_executor_impl="async-std" --cfg async_channel_impl="async-std"' RUST_LOG: info - needs: [build] steps: - uses: actions/checkout@v4 name: Checkout Repository diff --git a/examples/simple-server.rs b/examples/simple-server.rs index 5a848b7b1..0ca9b8974 100644 --- a/examples/simple-server.rs +++ b/examples/simple-server.rs @@ -52,7 +52,7 @@ use std::{num::NonZeroUsize, str::FromStr, time::Duration}; use url::Url; use vbs::version::StaticVersionType; -const NUM_NODES: usize = 5; +const NUM_NODES: usize = 2; #[derive(Parser)] struct Options { diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 95d43d076..d435ae607 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -812,7 +812,6 @@ pub mod testing { container_id: String, #[cfg(feature = "embedded-db")] db_path: std::path::PathBuf, - #[allow(dead_code)] persistent: bool, } impl TmpDb { @@ -824,17 +823,10 @@ pub mod testing { .tempfile() .unwrap(); - if persistent { - let (_, db_path) = file.keep().unwrap(); - - return Self { - db_path, - persistent, - }; - } + let (_, db_path) = file.keep().unwrap(); Self { - db_path: file.into_temp_path().to_path_buf(), + db_path, persistent, } } @@ -1043,6 +1035,15 @@ pub mod testing { } } + #[cfg(feature = "embedded-db")] + impl Drop for TmpDb { + fn drop(&mut self) { + if !self.persistent { + std::fs::remove_file(self.db_path.clone()).unwrap(); + } + } + } + pub struct TestMerkleTreeMigration; impl TestMerkleTreeMigration { diff --git a/src/testing/consensus.rs b/src/testing/consensus.rs index 85475e841..cdb29bffa 100644 --- a/src/testing/consensus.rs +++ b/src/testing/consensus.rs @@ -66,7 +66,7 @@ pub struct MockNetwork { pub type MockDataSource = FileSystemDataSource; pub type MockSqlDataSource = SqlDataSource; -pub const NUM_NODES: usize = 5; +pub const NUM_NODES: usize = 2; impl MockNetwork { pub async fn init() -> Self { From 1d063a382ea612879e0f5f72e922aa483fbcadd3 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Sat, 9 Nov 2024 06:56:05 +0500 Subject: [PATCH 31/48] ... --- src/data_source/storage/sql.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index d435ae607..bde3b6fea 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -812,6 +812,7 @@ pub mod testing { container_id: String, #[cfg(feature = "embedded-db")] db_path: std::path::PathBuf, + #[allow(dead_code)] persistent: bool, } impl TmpDb { From cfafb2433a546bf290a7f56197a72bc73b2a0e50 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Thu, 14 Nov 2024 07:25:30 +0500 Subject: [PATCH 32/48] use bitstring for postgres --- src/data_source/storage/sql.rs | 7 ++- src/data_source/storage/sql/queries/state.rs | 66 +++++++++++++------- src/data_source/storage/sql/transaction.rs | 13 ++-- 3 files changed, 55 insertions(+), 31 deletions(-) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index bde3b6fea..2adad52f2 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -1049,6 +1049,11 @@ pub mod testing { impl TestMerkleTreeMigration { fn create(name: &str) -> String { + #[cfg(feature = "embedded-db")] + let bitvec_type = "TEXT"; + #[cfg(not(feature = "embedded-db"))] + let bitvec_type = "BIT(8)"; + #[cfg(feature = "embedded-db")] let binary_data_type = "BLOB"; #[cfg(not(feature = "embedded-db"))] @@ -1076,7 +1081,7 @@ pub mod testing { created BIGINT NOT NULL, hash_id INT NOT NULL, children JSONB, - children_bitvec {binary_data_type}, + children_bitvec {bitvec_type}, idx JSONB, entry JSONB, PRIMARY KEY (path, created) diff --git a/src/data_source/storage/sql/queries/state.rs b/src/data_source/storage/sql/queries/state.rs index 8d5ca1476..a68bd0911 100644 --- a/src/data_source/storage/sql/queries/state.rs +++ b/src/data_source/storage/sql/queries/state.rs @@ -32,8 +32,8 @@ use jf_merkle_tree::{ prelude::{MerkleNode, MerkleProof}, DigestAlgorithm, MerkleCommitment, ToTraversalPath, }; -use sqlx::{types::BitVec, Decode}; -use sqlx::{types::JsonValue, ColumnIndex}; +use sqlx::types::BitVec; +use sqlx::types::JsonValue; use std::collections::{HashMap, HashSet, VecDeque}; #[async_trait] @@ -59,11 +59,10 @@ where // Get all the nodes in the path to the index. // Order by pos DESC is to return nodes from the leaf to the root - let (query, sql) = build_get_path_query(state_type, traversal_path.clone(), created)?; let rows = query.query(&sql).fetch_all(self.as_mut()).await?; - let nodes: Vec = rows.into_iter().map(|r| (&r).into()).collect(); + let nodes: Vec = rows.into_iter().map(|r| r.into()).collect(); // insert all the hash ids to a hashset which is used to query later // HashSet is used to avoid duplicates @@ -119,8 +118,6 @@ where } })?; let mut children = children.iter(); - let children_bitvec: BitVec = - BitVec::from_bytes(children_bitvec.clone().as_slice()); // Reconstruct the Children MerkleNodes from storage. // Children bit_vec is used to create forgotten or empty node @@ -314,7 +311,9 @@ impl Transaction { }; // Make sure the requested snapshot is up to date. + let height = self.get_last_state_height().await?; + if height < (created as usize) { return Err(QueryError::NotFound); } @@ -346,23 +345,33 @@ pub(crate) struct Node { pub(crate) created: i64, pub(crate) hash_id: i32, pub(crate) children: Option, - pub(crate) children_bitvec: Option>, + pub(crate) children_bitvec: Option, pub(crate) idx: Option, pub(crate) entry: Option, } -impl<'a, R> From<&'a R> for Node -where - R: Row, - JsonValue: 'a + Decode<'a, ::Database>, - Option: Decode<'a, ::Database>, - Vec: Decode<'a, ::Database>, - i64: Decode<'a, ::Database>, - i32: Decode<'a, ::Database>, - - &'a str: ColumnIndex, -{ - fn from(row: &'a R) -> Self { +#[cfg(feature = "embedded-db")] +impl From for Node { + fn from(row: sqlx::sqlite::SqliteRow) -> Self { + let bit_string: Option = row.get_unchecked("children_bitvec"); + let children_bitvec: Option = + bit_string.map(|b| b.chars().map(|c| c == '1').collect()); + + Self { + path: row.get_unchecked("path"), + created: row.get_unchecked("created"), + hash_id: row.get_unchecked("hash_id"), + children: row.get_unchecked("children"), + children_bitvec, + idx: row.get_unchecked("idx"), + entry: row.get_unchecked("entry"), + } + } +} + +#[cfg(not(feature = "embedded-db"))] +impl From for Node { + fn from(row: sqlx::postgres::PgRow) -> Self { Self { path: row.get_unchecked("path"), created: row.get_unchecked("created"), @@ -394,12 +403,21 @@ impl Node { ], ["path", "created"], nodes.into_iter().map(|n| { + #[cfg(feature = "embedded-db")] + let children_bitvec: Option = n + .children_bitvec + .clone() + .map(|b| b.iter().map(|bit| if bit { '1' } else { '0' }).collect()); + + #[cfg(not(feature = "embedded-db"))] + let children_bitvec = n.children_bitvec.clone(); + ( n.path.clone(), n.created, n.hash_id, n.children.clone(), - n.children_bitvec.clone(), + children_bitvec, n.idx.clone(), n.entry.clone(), ) @@ -420,14 +438,16 @@ fn build_get_path_query<'q>( // We iterate through the path vector skipping the first element after each iteration let len = traversal_path.len(); let mut sub_queries = Vec::new(); + + query.bind(created)?; + for _ in 0..=len { let path = traversal_path.clone().rev().collect::>(); let path: serde_json::Value = path.into(); let node_path = query.bind(path)?; - let created = query.bind(created)?; let sub_query = format!( - "SELECT * FROM (SELECT * FROM {table} WHERE path = {node_path} AND created <= {created} ORDER BY created DESC LIMIT 1)", + "SELECT * FROM (SELECT * FROM {table} WHERE path = {node_path} AND created <= $1 ORDER BY created DESC LIMIT 1)", ); sub_queries.push(sub_query); @@ -605,7 +625,7 @@ mod test { .bind(serde_json::to_value(node_path).unwrap()) .fetch(tx.as_mut()); - let nodes: Vec = rows.map(|res| (&res.unwrap()).into()).collect().await; + let nodes: Vec = rows.map(|res| res.unwrap().into()).collect().await; // There should be only 2 versions of this node assert!(nodes.len() == 2, "incorrect number of nodes"); assert_eq!(nodes[0].created, 1, "wrong block height"); diff --git a/src/data_source/storage/sql/transaction.rs b/src/data_source/storage/sql/transaction.rs index d0a0bb5c8..80add2346 100644 --- a/src/data_source/storage/sql/transaction.rs +++ b/src/data_source/storage/sql/transaction.rs @@ -52,17 +52,15 @@ use hotshot_types::traits::{ }; use itertools::Itertools; use jf_merkle_tree::prelude::{MerkleNode, MerkleProof}; -use sqlx::{ - pool::Pool, query_builder::Separated, types::BitVec, Encode, FromRow, QueryBuilder, Type, -}; +use sqlx::types::BitVec; +pub use sqlx::Executor; +use sqlx::{pool::Pool, query_builder::Separated, Encode, FromRow, QueryBuilder, Type}; use std::{ collections::{HashMap, HashSet}, marker::PhantomData, time::Instant, }; -pub use sqlx::Executor; - pub type Query<'q> = sqlx::query::Query<'q, Db, ::Arguments<'q>>; pub type QueryAs<'q, T> = sqlx::query::QueryAs<'q, Db, T, ::Arguments<'q>>; @@ -627,12 +625,11 @@ impl, const ARITY: usize> // insert internal node let path = traversal_path.clone().rev().collect(); - let bit_vec_bytes = children_bitvec.to_bytes(); nodes.push(( Node { path, children: None, - children_bitvec: Some(bit_vec_bytes), + children_bitvec: Some(children_bitvec), ..Default::default() }, Some(children_values.clone()), @@ -679,7 +676,9 @@ impl, const ARITY: usize> node.children = Some(children_hashes.into()); } } + Node::upsert(name, nodes.into_iter().map(|(n, _, _)| n), self).await?; + Ok(()) } } From 43b065097ba48daa66714deb33a1f2d83d1ec6fd Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Fri, 15 Nov 2024 00:17:52 +0500 Subject: [PATCH 33/48] lint --- src/data_source/storage/sql/transaction.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/data_source/storage/sql/transaction.rs b/src/data_source/storage/sql/transaction.rs index 571d6bc7f..80add2346 100644 --- a/src/data_source/storage/sql/transaction.rs +++ b/src/data_source/storage/sql/transaction.rs @@ -60,7 +60,6 @@ use std::{ marker::PhantomData, time::Instant, }; -use tokio::time::sleep; pub type Query<'q> = sqlx::query::Query<'q, Db, ::Arguments<'q>>; pub type QueryAs<'q, T> = sqlx::query::QueryAs<'q, Db, T, ::Arguments<'q>>; From 8889ea92e9437af221ecc25165183b6502b18057 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Fri, 15 Nov 2024 00:33:19 +0500 Subject: [PATCH 34/48] lint --- src/data_source/storage/sql.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 4a82c0559..b967e9456 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -782,11 +782,12 @@ impl VersionedDataSource for SqlStorage { // These tests run the `postgres` Docker image, which doesn't work on Windows. #[cfg(all(any(test, feature = "testing"), not(target_os = "windows")))] pub mod testing { + #![allow(unused_imports)] use refinery::Migration; use std::{ env, process::{Command, Stdio}, - str::{self}, + str::{self, FromStr}, time::Duration, }; use tokio::net::TcpStream; @@ -905,7 +906,7 @@ pub mod testing { let mut cfg: Config = { let db_path = self.db_path.to_string_lossy(); let path = format!("sqlite:{db_path}"); - SqliteConnectOptions::from_str(&path) + sqlx::sqlite::SqliteConnectOptions::from_str(&path) .expect("invalid db path") .create_if_missing(true) .into() From 50c4a2fc27525bb0604f6821cd48faa8203fbce7 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Fri, 15 Nov 2024 00:50:46 +0500 Subject: [PATCH 35/48] use nextest --- .github/workflows/build.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 48ccf429e..bbdb3ba5d 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -67,7 +67,6 @@ jobs: test-sqlite: runs-on: ubuntu-latest env: - RUSTFLAGS: '--cfg async_executor_impl="async-std" --cfg async_channel_impl="async-std"' RUST_LOG: info steps: - uses: actions/checkout@v4 @@ -88,7 +87,6 @@ jobs: test-postgres: runs-on: ubuntu-latest env: - RUSTFLAGS: '--cfg async_executor_impl="async-std" --cfg async_channel_impl="async-std"' RUST_LOG: info steps: - uses: actions/checkout@v4 @@ -99,6 +97,5 @@ jobs: - name: Test run: | - cargo test --workspace --release --features "no-storage, testing" --no-run - cargo test --workspace --release --features "no-storage, testing" --verbose -- --test-threads 2 + cargo nextest run --workspace --release --features "no-storage, testing" timeout-minutes: 60 From 09a1723a442286cb7d71b929304cb9c1d6f60280 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Fri, 15 Nov 2024 00:52:20 +0500 Subject: [PATCH 36/48] fix --- .github/workflows/build.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bbdb3ba5d..c68ca7434 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -39,15 +39,6 @@ jobs: # don't compile with all combinations of features. - name: Clippy(all-features) run: cargo clippy --workspace --all-features --all-targets -- -D warnings - - # Install nextest - - name: Install Nextest - run: cargo install cargo-nextest - - - name: Test - run: | - cargo nextest run --workspace --release --all-features - timeout-minutes: 60 - name: Clippy(no-storage) run: cargo clippy --workspace --features no-storage --all-targets -- -D warnings From a395b9128639bde897bf5c85f2fe8a5ad0177cd6 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Fri, 15 Nov 2024 01:00:18 +0500 Subject: [PATCH 37/48] install nextest --- .github/workflows/build.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c68ca7434..35dbdc049 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -86,6 +86,10 @@ jobs: - uses: Swatinem/rust-cache@v2 name: Enable Rust Caching + # Install nextest + - name: Install Nextest + run: cargo install cargo-nextest + - name: Test run: | cargo nextest run --workspace --release --features "no-storage, testing" From 8fe6980c510d51c2ffbb919963926c8ac9322e37 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Mon, 18 Nov 2024 03:14:04 +0500 Subject: [PATCH 38/48] reuse pool --- src/data_source/storage/sql.rs | 39 ++++++++++++++++++---- src/data_source/storage/sql/transaction.rs | 4 ++- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index b967e9456..53326adb4 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -217,6 +217,7 @@ pub struct Config { no_migrations: bool, pruner_cfg: Option, archive: bool, + pool: Option>, } #[cfg(not(feature = "embedded-db"))] @@ -235,6 +236,8 @@ impl Default for Config { impl Default for Config { fn default() -> Self { SqliteConnectOptions::default() + .journal_mode(sqlx::sqlite::SqliteJournalMode::Wal) + .synchronous(sqlx::sqlite::SqliteSynchronous::Normal) .busy_timeout(Duration::from_secs(30)) .auto_vacuum(sqlx::sqlite::SqliteAutoVacuum::Incremental) .create_if_missing(true) @@ -253,6 +256,7 @@ impl From for Config { no_migrations: false, pruner_cfg: None, archive: false, + pool: None, } } } @@ -269,6 +273,7 @@ impl From for Config { no_migrations: false, pruner_cfg: None, archive: false, + pool: None, } } } @@ -360,6 +365,13 @@ impl Config { } impl Config { + /// Sets the database connection pool + /// This allows reusing an existing connection pool when building a new `SqlStorage` instance. + pub fn pool(mut self, pool: Pool) -> Self { + self.pool = Some(pool); + self + } + /// Reset the schema on connection. /// /// When this [`Config`] is used to [`connect`](Self::connect) a @@ -462,7 +474,7 @@ impl Config { } /// Storage for the APIs provided in this crate, backed by a remote PostgreSQL database. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct SqlStorage { pool: Pool, metrics: PrometheusMetrics, @@ -478,9 +490,25 @@ pub struct Pruner { } impl SqlStorage { + pub fn pool(&self) -> Pool { + self.pool.clone() + } /// Connect to a remote database. pub async fn connect(mut config: Config) -> Result { - let pool = config.pool_opt; + let metrics = PrometheusMetrics::default(); + let pool_metrics = PoolMetrics::new(&*metrics.subgroup("sql".into())); + let pool_opt = config.pool_opt; + let pruner_cfg = config.pruner_cfg; + + // Re use the same pool and return + if let Some(pool) = config.pool { + return Ok(Self { + metrics, + pool_metrics, + pool, + pruner_cfg, + }); + } #[cfg(not(feature = "embedded-db"))] let schema = config.schema.clone(); @@ -501,7 +529,7 @@ impl SqlStorage { std::fs::remove_file(config.db_opt.get_filename())?; } - let pool = pool.connect_with(config.db_opt).await?; + let pool = pool_opt.connect_with(config.db_opt).await?; // Create or connect to the schema for this query service. let mut conn = pool.acquire().await?; @@ -560,12 +588,11 @@ impl SqlStorage { .await?; } - let metrics = PrometheusMetrics::default(); Ok(Self { pool, - pool_metrics: PoolMetrics::new(&*metrics.subgroup("sql".into())), + pool_metrics, metrics, - pruner_cfg: config.pruner_cfg, + pruner_cfg, }) } } diff --git a/src/data_source/storage/sql/transaction.rs b/src/data_source/storage/sql/transaction.rs index 80add2346..8fb605fb6 100644 --- a/src/data_source/storage/sql/transaction.rs +++ b/src/data_source/storage/sql/transaction.rs @@ -98,6 +98,7 @@ impl TransactionMode for Write { #[cfg(not(feature = "embedded-db"))] conn.execute("SET TRANSACTION ISOLATION LEVEL SERIALIZABLE") .await?; + Ok(()) } @@ -112,6 +113,7 @@ impl TransactionMode for Read { #[cfg(not(feature = "embedded-db"))] conn.execute("SET TRANSACTION ISOLATION LEVEL SERIALIZABLE, READ ONLY, DEFERRABLE") .await?; + Ok(()) } @@ -334,7 +336,7 @@ impl Transaction { let pk = pk.into_iter().join(","); let mut query_builder = - QueryBuilder::new(format!("INSERT INTO \"{table}\" ({columns_str})")); + QueryBuilder::new(format!("INSERT INTO \"{table}\" ({columns_str}) ")); let mut num_rows = 0; query_builder.push_values(rows, |mut b, row| { From 78aaaa7197360285ec7d3da1bb47e4aa32b1cb36 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Mon, 18 Nov 2024 03:15:35 +0500 Subject: [PATCH 39/48] fix comment --- src/data_source/storage/sql.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 53326adb4..555e5ea11 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -500,7 +500,7 @@ impl SqlStorage { let pool_opt = config.pool_opt; let pruner_cfg = config.pruner_cfg; - // Re use the same pool and return + // re-use the same pool if present and return early if let Some(pool) = config.pool { return Ok(Self { metrics, From d94f16eeec6cbdfa5d4077ee729ffba58fdc2d2b Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Mon, 18 Nov 2024 04:37:37 +0500 Subject: [PATCH 40/48] add migration --- .../V300__create_aggregates_table.sql} | 0 migrations/sqlite/V20__create_aggregates_table.sql | 5 +++++ 2 files changed, 5 insertions(+) rename migrations/{V200__create_aggregates_table.sql => postgres/V300__create_aggregates_table.sql} (100%) create mode 100644 migrations/sqlite/V20__create_aggregates_table.sql diff --git a/migrations/V200__create_aggregates_table.sql b/migrations/postgres/V300__create_aggregates_table.sql similarity index 100% rename from migrations/V200__create_aggregates_table.sql rename to migrations/postgres/V300__create_aggregates_table.sql diff --git a/migrations/sqlite/V20__create_aggregates_table.sql b/migrations/sqlite/V20__create_aggregates_table.sql new file mode 100644 index 000000000..edca9e468 --- /dev/null +++ b/migrations/sqlite/V20__create_aggregates_table.sql @@ -0,0 +1,5 @@ +CREATE TABLE aggregate ( + height BIGINT PRIMARY KEY REFERENCES header (height) ON DELETE CASCADE, + num_transactions BIGINT NOT NULL, + payload_size BIGINT NOT NULL +); From a7cd0dea96ed12d6a3ed7404ff78e55b6b5e195f Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Mon, 18 Nov 2024 05:01:08 +0500 Subject: [PATCH 41/48] fix test_migrations --- migrations/sqlite/V20__create_aggregates_table.sql | 2 +- src/data_source/storage/sql.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/migrations/sqlite/V20__create_aggregates_table.sql b/migrations/sqlite/V20__create_aggregates_table.sql index edca9e468..b81e5a8e0 100644 --- a/migrations/sqlite/V20__create_aggregates_table.sql +++ b/migrations/sqlite/V20__create_aggregates_table.sql @@ -2,4 +2,4 @@ CREATE TABLE aggregate ( height BIGINT PRIMARY KEY REFERENCES header (height) ON DELETE CASCADE, num_transactions BIGINT NOT NULL, payload_size BIGINT NOT NULL -); +); \ No newline at end of file diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 99ac8ee8a..1a51b1c7a 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -1170,7 +1170,7 @@ mod test { "ALTER TABLE test ADD COLUMN data INTEGER;", ) .unwrap(), - Migration::unapplied("V998__create_test_table.sql", "CREATE TABLE test ();").unwrap(), + Migration::unapplied("V998__create_test_table.sql", "CREATE TABLE test (x bigint);").unwrap(), ]; connect(true, migrations.clone()).await.unwrap(); From db5d90b7732404b753f0d9caa0f01392a9e4ea15 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Mon, 18 Nov 2024 05:31:44 +0500 Subject: [PATCH 42/48] try2 --- src/data_source/storage/sql.rs | 6 +++++- src/fetching/provider/query_service.rs | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 1a51b1c7a..fed6d9086 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -1170,7 +1170,11 @@ mod test { "ALTER TABLE test ADD COLUMN data INTEGER;", ) .unwrap(), - Migration::unapplied("V998__create_test_table.sql", "CREATE TABLE test (x bigint);").unwrap(), + Migration::unapplied( + "V998__create_test_table.sql", + "CREATE TABLE test (x bigint);", + ) + .unwrap(), ]; connect(true, migrations.clone()).await.unwrap(); diff --git a/src/fetching/provider/query_service.rs b/src/fetching/provider/query_service.rs index 489aa3950..6587bb992 100644 --- a/src/fetching/provider/query_service.rs +++ b/src/fetching/provider/query_service.rs @@ -1129,6 +1129,8 @@ mod test { tracing::info!("retrieve from storage"); let fetch = data_source.get_leaf(1).await; assert_eq!(leaves[0], fetch.try_resolve().ok().unwrap()); + + drop(db); } #[tokio::test(flavor = "multi_thread")] From d60cec43d9f34d72c144b9deeb0f6f9ea248268e Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Tue, 19 Nov 2024 02:54:51 +0500 Subject: [PATCH 43/48] fix query --- ...0__init_schema.sql => V100__init_schema.sql} | 0 ...le.sql => V200__create_aggregates_table.sql} | 0 src/data_source/storage/sql.rs | 17 +---------------- src/data_source/storage/sql/db.rs | 6 ++++++ 4 files changed, 7 insertions(+), 16 deletions(-) rename migrations/sqlite/{V10__init_schema.sql => V100__init_schema.sql} (100%) rename migrations/sqlite/{V20__create_aggregates_table.sql => V200__create_aggregates_table.sql} (100%) diff --git a/migrations/sqlite/V10__init_schema.sql b/migrations/sqlite/V100__init_schema.sql similarity index 100% rename from migrations/sqlite/V10__init_schema.sql rename to migrations/sqlite/V100__init_schema.sql diff --git a/migrations/sqlite/V20__create_aggregates_table.sql b/migrations/sqlite/V200__create_aggregates_table.sql similarity index 100% rename from migrations/sqlite/V20__create_aggregates_table.sql rename to migrations/sqlite/V200__create_aggregates_table.sql diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index fed6d9086..dc4c1de05 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -237,7 +237,6 @@ impl Default for Config { fn default() -> Self { SqliteConnectOptions::default() .journal_mode(sqlx::sqlite::SqliteJournalMode::Wal) - .synchronous(sqlx::sqlite::SqliteSynchronous::Normal) .busy_timeout(Duration::from_secs(30)) .auto_vacuum(sqlx::sqlite::SqliteAutoVacuum::Incremental) .create_if_missing(true) @@ -667,21 +666,7 @@ impl PruneStorage for SqlStorage { #[cfg(feature = "embedded-db")] let query = " - SELECT - SUM(total_bytes) - FROM ( - SELECT - name, - (page_count * page_size) AS total_bytes - FROM - pragma_page_size - JOIN - pragma_page_count ON 1 = 1 - JOIN - sqlite_master ON type = 'table' - WHERE - name NOT LIKE 'sqlite_%' - );"; + SELECT( (SELECT page_count FROM pragma_page_count) * (SELECT * FROM pragma_page_size)) AS total_bytes"; let row = tx.fetch_one(query).await?; let size: i64 = row.get(0); diff --git a/src/data_source/storage/sql/db.rs b/src/data_source/storage/sql/db.rs index 037ba6412..f220227cc 100644 --- a/src/data_source/storage/sql/db.rs +++ b/src/data_source/storage/sql/db.rs @@ -42,3 +42,9 @@ pub type Db = sqlx::Sqlite; #[cfg(not(feature = "embedded-db"))] pub type Db = sqlx::Postgres; + +#[cfg(not(feature = "embedded-db"))] +pub const MAX_FN: &str = "GREATEST"; + +#[cfg(feature = "embedded-db")] +pub const MAX_FN: &str = "MAX"; From 83535f65e010682ce698ffcf9e3a75e698c7bbe9 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Tue, 19 Nov 2024 03:49:12 +0500 Subject: [PATCH 44/48] fix migration for sqlite --- src/data_source/storage/sql.rs | 39 +++++++++++++++++----------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 2009f6418..24512eb11 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -932,7 +932,7 @@ pub mod testing { .port(self.port()); cfg = cfg.migrations(vec![Migration::unapplied( - "V11__create_test_merkle_tree_table.sql", + "V101__create_test_merkle_tree_table.sql", &TestMerkleTreeMigration::create("test_tree"), ) .unwrap()]); @@ -1057,31 +1057,32 @@ pub mod testing { impl TestMerkleTreeMigration { fn create(name: &str) -> String { - #[cfg(feature = "embedded-db")] - let bitvec_type = "TEXT"; - #[cfg(not(feature = "embedded-db"))] - let bitvec_type = "BIT(8)"; - - #[cfg(feature = "embedded-db")] - let binary_data_type = "BLOB"; - #[cfg(not(feature = "embedded-db"))] - let binary_data_type = "BYTEA"; - - #[cfg(feature = "embedded-db")] - let hash_pk_type = "INTEGER PRIMARY KEY AUTOINCREMENT"; - #[cfg(not(feature = "embedded-db"))] - let hash_pk_type = "SERIAL PRIMARY KEY"; + let (bit_vec, binary, hash_pk, root_stored_column) = if cfg!(feature = "embedded-db") { + ( + "TEXT", + "BLOB", + "INTEGER PRIMARY KEY AUTOINCREMENT", + " (json_extract(data, '$.fields.fee_merkle_tree_root'))", + ) + } else { + ( + "BIT(8)", + "BYTEA", + "SERIAL PRIMARY KEY", + "(data->>'test_merkle_tree_root')", + ) + }; format!( "CREATE TABLE IF NOT EXISTS hash ( - id {hash_pk_type}, - value {binary_data_type} NOT NULL UNIQUE + id {hash_pk}, + value {binary} NOT NULL UNIQUE ); ALTER TABLE header ADD column test_merkle_tree_root text - GENERATED ALWAYS as (data->>'test_merkle_tree_root') STORED; + GENERATED ALWAYS as {root_stored_column} STORED CREATE TABLE {name} ( @@ -1089,7 +1090,7 @@ pub mod testing { created BIGINT NOT NULL, hash_id INT NOT NULL, children JSONB, - children_bitvec {bitvec_type}, + children_bitvec {bit_vec}, idx JSONB, entry JSONB, PRIMARY KEY (path, created) From 925ea98260e4ac3455997cce8c8e3218f02982ac Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Tue, 19 Nov 2024 03:50:29 +0500 Subject: [PATCH 45/48] fix migration for sqlite --- src/data_source/storage/sql.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 24512eb11..43cdd91a7 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -1062,7 +1062,7 @@ pub mod testing { "TEXT", "BLOB", "INTEGER PRIMARY KEY AUTOINCREMENT", - " (json_extract(data, '$.fields.fee_merkle_tree_root'))", + " (json_extract(data, '$.test_merkle_tree_root'))", ) } else { ( From fcd7933c598c4f220d887f660cf692047c074f00 Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Tue, 19 Nov 2024 03:59:51 +0500 Subject: [PATCH 46/48] fix test tree migration --- src/data_source/storage/sql.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 43cdd91a7..350f33580 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -1082,7 +1082,7 @@ pub mod testing { ALTER TABLE header ADD column test_merkle_tree_root text - GENERATED ALWAYS as {root_stored_column} STORED + GENERATED ALWAYS as {root_stored_column} STORED; CREATE TABLE {name} ( From 9860c41f7fc015534e96f634ce8c07f313be682a Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Wed, 20 Nov 2024 04:48:11 +0500 Subject: [PATCH 47/48] fix MAX_FN --- src/data_source/storage/sql/db.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/data_source/storage/sql/db.rs b/src/data_source/storage/sql/db.rs index 98c3552e9..5b3c86aa7 100644 --- a/src/data_source/storage/sql/db.rs +++ b/src/data_source/storage/sql/db.rs @@ -45,12 +45,12 @@ pub type Db = sqlx::Postgres; #[cfg(feature = "embedded-db")] pub mod syntax_helpers { - pub const MAX_FN: &str = "GREATEST"; + pub const MAX_FN: &str = "MAX"; pub const BINARY_TYPE: &str = "BLOB"; } #[cfg(not(feature = "embedded-db"))] pub mod syntax_helpers { - pub const MAX_FN: &str = "MAX"; + pub const MAX_FN: &str = "GREATEST"; pub const BINARY_TYPE: &str = "BYTEA"; } From d4e59307ebdf94dfd6eaddd2919b76241e95c9bb Mon Sep 17 00:00:00 2001 From: Abdul Basit Date: Thu, 21 Nov 2024 02:35:53 +0500 Subject: [PATCH 48/48] close connection when running migrations --- src/data_source/storage/sql.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/data_source/storage/sql.rs b/src/data_source/storage/sql.rs index 350f33580..c38275db6 100644 --- a/src/data_source/storage/sql.rs +++ b/src/data_source/storage/sql.rs @@ -587,6 +587,8 @@ impl SqlStorage { .await?; } + conn.close().await?; + Ok(Self { pool, pool_metrics,