Skip to content

Commit

Permalink
[Data Service] Implement simple upstream transaction filtering (#13699)
Browse files Browse the repository at this point in the history
There are two types of transaction filtering we will support in the future:
1. Per stream configuration: The downstream declares what txns they want to receive.
2. Global configuration: At the data service level we refuse to include full txns for all streams.

This PR implements the second of these, using @CapCap's work here:
aptos-labs/aptos-indexer-processors#398.

Rather than not sending txns at all if they match the blocklist filters, we just omit the writesets and events. Not sending the txns entirely would cause issues with processors, which today assume that they will receive all txns.
  • Loading branch information
banool authored Jun 21, 2024
1 parent 43d709c commit 153133b
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 63 deletions.
13 changes: 7 additions & 6 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ aptos-indexer-grpc-utils = { workspace = true }
aptos-metrics-core = { workspace = true }
aptos-moving-average = { workspace = true }
aptos-protos = { workspace = true }
aptos-transaction-filter = { workspace = true }
async-trait = { workspace = true }
clap = { workspace = true }
futures = { workspace = true }
Expand Down
32 changes: 20 additions & 12 deletions ecosystem/indexer-grpc/indexer-grpc-data-service/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ use aptos_protos::{
transaction::v1::FILE_DESCRIPTOR_SET as TRANSACTION_V1_TESTING_FILE_DESCRIPTOR_SET,
util::timestamp::FILE_DESCRIPTOR_SET as UTIL_TIMESTAMP_FILE_DESCRIPTOR_SET,
};
use aptos_transaction_filter::BooleanTransactionFilter;
use serde::{Deserialize, Serialize};
use std::{collections::HashSet, net::SocketAddr, sync::Arc};
use std::{net::SocketAddr, sync::Arc};
use tonic::{codec::CompressionEncoding, transport::Server};

pub const SERVER_NAME: &str = "idxdatasvc";
Expand Down Expand Up @@ -69,9 +70,18 @@ pub struct IndexerGrpcDataServiceConfig {
pub enable_cache_compression: bool,
#[serde(default)]
pub in_memory_cache_config: InMemoryCacheConfig,
/// Sender addresses to ignore. Transactions from these addresses will not be indexed.
#[serde(default = "IndexerGrpcDataServiceConfig::default_sender_addresses_to_ignore")]
pub sender_addresses_to_ignore: Vec<String>,
/// Any transaction that matches this filter will be stripped. This means we remove
/// the payload, signature, events, and writesets from it before sending it
/// downstream. This should only be used in an emergency situation, e.g. when txns
/// related to a certain module are too large and are causing issues for the data
/// service. Learn more here:
///
/// https://www.notion.so/aptoslabs/Runbook-c006a37259394ac2ba904d6b54d180fa?pvs=4#171c210964ec42a89574fc80154f9e85
///
/// Generally you will want to start with this with an OR, and then list out
/// separate filters that describe each type of txn we want to strip.
#[serde(default = "IndexerGrpcDataServiceConfig::default_txns_to_strip_filter")]
pub txns_to_strip_filter: BooleanTransactionFilter,
}

impl IndexerGrpcDataServiceConfig {
Expand All @@ -84,7 +94,7 @@ impl IndexerGrpcDataServiceConfig {
redis_read_replica_address: RedisUrl,
enable_cache_compression: bool,
in_memory_cache_config: InMemoryCacheConfig,
sender_addresses_to_ignore: Vec<String>,
txns_to_strip_filter: BooleanTransactionFilter,
) -> Self {
Self {
data_service_grpc_tls_config,
Expand All @@ -97,7 +107,7 @@ impl IndexerGrpcDataServiceConfig {
redis_read_replica_address,
enable_cache_compression,
in_memory_cache_config,
sender_addresses_to_ignore,
txns_to_strip_filter,
}
}

Expand All @@ -109,8 +119,9 @@ impl IndexerGrpcDataServiceConfig {
false
}

pub const fn default_sender_addresses_to_ignore() -> Vec<String> {
vec![]
pub fn default_txns_to_strip_filter() -> BooleanTransactionFilter {
// This filter matches no txns.
BooleanTransactionFilter::new_or(vec![])
}
}

Expand Down Expand Up @@ -170,10 +181,7 @@ impl RunnableConfig for IndexerGrpcDataServiceConfig {
self.redis_read_replica_address.clone(),
self.file_store_config.clone(),
self.data_service_response_channel_size,
self.sender_addresses_to_ignore
.clone()
.into_iter()
.collect::<HashSet<_>>(),
self.txns_to_strip_filter.clone(),
cache_storage_format,
Arc::new(in_memory_cache),
)?;
Expand Down
52 changes: 49 additions & 3 deletions ecosystem/indexer-grpc/indexer-grpc-data-service/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,58 @@ pub static SHORT_CONNECTION_COUNT: Lazy<IntCounterVec> = Lazy::new(|| {
.unwrap()
});

/// Count of bytes transfered to the client. This only represents the bytes prepared and ready
/// to send to the client. It does not represent the bytes actually sent to the client.
/// Count of bytes transfered to the client. This only represents the bytes prepared and
/// ready to send to the client. This only t It does not represent the bytes actually
/// sent to the client.
///
/// This is pre stripping, so it may include bytes for transactions that were later
/// stripped. See BYTES_READY_TO_TRANSFER_FROM_SERVER_AFTER_STRIPPING for post
/// stirpping.
pub static BYTES_READY_TO_TRANSFER_FROM_SERVER: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"indexer_grpc_data_service_bytes_ready_to_transfer_from_server",
"Count of bytes ready to transfer to the client",
"Count of bytes ready to transfer to the client (pre stripping)",
&[
"identifier_type",
"identifier",
"email",
"application_name",
"processor"
],
)
.unwrap()
});

/// Count of bytes transfered to the client. This only represents the bytes prepared and
/// ready to send to the client. This only t It does not represent the bytes actually
/// sent to the client.
///
/// This is post stripping, meaning some transactions may have been stripped (removing
/// things such as events, writesets, payload, signature). Compare this with
/// BYTES_READY_TO_TRANSFER_FROM_SERVER to see how many bytes were stripped.
pub static BYTES_READY_TO_TRANSFER_FROM_SERVER_AFTER_STRIPPING: Lazy<IntCounterVec> =
Lazy::new(|| {
register_int_counter_vec!(
"indexer_grpc_data_service_bytes_ready_to_transfer_from_server_after_stripping",
"Count of bytes ready to transfer to the client (post stripping)",
&[
"identifier_type",
"identifier",
"email",
"application_name",
"processor"
],
)
.unwrap()
});

/// The number of transactions that had data (such as events, writesets, payload,
/// signature) stripped from them due to the `txns_to_strip_filter`. See
/// `strip_transactions` for more.
pub static NUM_TRANSACTIONS_STRIPPED: Lazy<IntCounterVec> = Lazy::new(|| {
register_int_counter_vec!(
"indexer_grpc_data_service_num_transactions_stripped",
"Number of transactions that had data (such as events, writesets, payload, signature) stripped from them",
&[
"identifier_type",
"identifier",
Expand Down
Loading

0 comments on commit 153133b

Please sign in to comment.