diff --git a/Makefile b/Makefile index 202d7930..3e8c7df3 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ all: clean build test clean: ./gradlew clean + rm config.rst metrics.rst checkstyle: ./gradlew checkstyleMain checkstyleTest checkstyleIntegrationTest @@ -43,9 +44,14 @@ storage/azure/build/distributions/azure-$(VERSION).tgz: ./gradlew build :storage:azure:distTar -x test -x integrationTest -x e2e:test .PHONY: docs -docs: +docs: config.rst metrics.rst + +config.rst: ./gradlew :docs:genConfigDocs +metrics.rst: + ./gradlew :docs:genMetricsDocs + test: build ./gradlew test -x e2e:test diff --git a/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/cache/ChunkCache.java b/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/cache/ChunkCache.java index 9dafcf46..8c2af76b 100644 --- a/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/cache/ChunkCache.java +++ b/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/cache/ChunkCache.java @@ -47,8 +47,8 @@ import com.github.benmanes.caffeine.cache.Weigher; public abstract class ChunkCache implements ChunkManager, Configurable { - private static final String METRIC_GROUP = "chunk-cache-metrics"; - private static final String THREAD_POOL_METRIC_GROUP = "chunk-cache-thread-pool-metrics"; + public static final String METRIC_GROUP = "chunk-cache-metrics"; + public static final String THREAD_POOL_METRIC_GROUP = "chunk-cache-thread-pool-metrics"; private final ChunkManager chunkManager; private ExecutorService executor; diff --git a/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/index/MemorySegmentIndexesCache.java b/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/index/MemorySegmentIndexesCache.java index 5bf4f64a..deb35067 100644 --- a/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/index/MemorySegmentIndexesCache.java +++ b/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/index/MemorySegmentIndexesCache.java @@ -50,8 +50,8 @@ public class MemorySegmentIndexesCache implements SegmentIndexesCache { private static final Logger log = LoggerFactory.getLogger(MemorySegmentIndexesCache.class); private static final long DEFAULT_MAX_SIZE_BYTES = 10 * 1024 * 1024; - private static final String METRIC_GROUP = "segment-indexes-cache-metrics"; - private static final String THREAD_POOL_METRIC_GROUP = "segment-indexes-cache-thread-pool-metrics"; + public static final String METRIC_GROUP = "segment-indexes-cache-metrics"; + public static final String THREAD_POOL_METRIC_GROUP = "segment-indexes-cache-thread-pool-metrics"; private final CaffeineStatsCounter statsCounter = new CaffeineStatsCounter(METRIC_GROUP); diff --git a/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/manifest/MemorySegmentManifestCache.java b/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/manifest/MemorySegmentManifestCache.java index cadf2d80..e4ea3f9f 100644 --- a/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/manifest/MemorySegmentManifestCache.java +++ b/core/src/main/java/io/aiven/kafka/tieredstorage/fetch/manifest/MemorySegmentManifestCache.java @@ -46,8 +46,8 @@ public class MemorySegmentManifestCache implements SegmentManifestCache { private static final Logger log = LoggerFactory.getLogger(MemorySegmentManifestCache.class); - private static final String METRIC_GROUP = "segment-manifest-cache-metrics"; - private static final String THREAD_POOL_METRIC_GROUP = "segment-manifest-cache-thread-pool-metrics"; + public static final String METRIC_GROUP = "segment-manifest-cache-metrics"; + public static final String THREAD_POOL_METRIC_GROUP = "segment-manifest-cache-thread-pool-metrics"; private static final long DEFAULT_MAX_SIZE = 1000L; private static final long DEFAULT_RETENTION_MS = 3_600_000; diff --git a/docs/build.gradle b/docs/build.gradle index 7ae589ca..e2e3d1e4 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -31,3 +31,9 @@ tasks.register('genConfigDocs', JavaExec) { mainClass = 'io.aiven.kafka.tieredstorage.misc.ConfigDocs' standardOutput = new File("config.rst").newOutputStream() } + +tasks.register('genMetricsDocs', JavaExec) { + classpath = sourceSets.main.runtimeClasspath + mainClass = 'io.aiven.kafka.tieredstorage.misc.MetricDocs' + standardOutput = new File("metrics.rst").newOutputStream() +} diff --git a/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/MetricDocs.java b/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/MetricDocs.java new file mode 100644 index 00000000..1dcd8152 --- /dev/null +++ b/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/MetricDocs.java @@ -0,0 +1,189 @@ +/* + * Copyright 2024 Aiven Oy + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.aiven.kafka.tieredstorage.misc; + +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.kafka.common.MetricName; +import org.apache.kafka.common.MetricNameTemplate; +import org.apache.kafka.common.metrics.Metrics; +import org.apache.kafka.common.utils.Sanitizer; + +import io.aiven.kafka.tieredstorage.fetch.cache.ChunkCache; +import io.aiven.kafka.tieredstorage.fetch.index.MemorySegmentIndexesCache; +import io.aiven.kafka.tieredstorage.fetch.manifest.MemorySegmentManifestCache; +import io.aiven.kafka.tieredstorage.metrics.CaffeineMetricsRegistry; +import io.aiven.kafka.tieredstorage.metrics.MetricsRegistry; +import io.aiven.kafka.tieredstorage.metrics.ThreadPoolMonitorMetricsRegistry; + +public class MetricDocs { + public static void main(final String[] args) { + printSectionTitle("Core components metrics"); + System.out.println(); + printSubsectionTitle("RemoteStorageManager metrics"); + System.out.println(); + System.out.println(toRstTable("test", new MetricsRegistry().all())); + + System.out.println(); + printSubsectionTitle("SegmentManifestCache metrics"); + System.out.println(); + System.out.println(toRstTable( + "test", + new CaffeineMetricsRegistry(MemorySegmentManifestCache.METRIC_GROUP).all())); + System.out.println(); + System.out.println(toRstTable( + "test", + new ThreadPoolMonitorMetricsRegistry(MemorySegmentManifestCache.THREAD_POOL_METRIC_GROUP).all())); + + System.out.println(); + printSubsectionTitle("SegmentIndexesCache metrics"); + System.out.println(toRstTable( + "test", + new CaffeineMetricsRegistry(MemorySegmentIndexesCache.METRIC_GROUP).all())); + System.out.println(toRstTable( + "test", + new ThreadPoolMonitorMetricsRegistry(MemorySegmentIndexesCache.THREAD_POOL_METRIC_GROUP).all())); + System.out.println(); + printSubsectionTitle("ChunkCache metrics"); + System.out.println(); + System.out.println(toRstTable( + "test", + new CaffeineMetricsRegistry(ChunkCache.METRIC_GROUP).all())); + System.out.println(); + System.out.println(toRstTable( + "test", + new ThreadPoolMonitorMetricsRegistry(ChunkCache.THREAD_POOL_METRIC_GROUP).all())); + + System.out.println(); + printSectionTitle("Storage Backend metrics"); + System.out.println(); + printSubsectionTitle("AzureBlobStorage metrics"); + System.out.println(); + System.out.println(toRstTable( + "test", + new io.aiven.kafka.tieredstorage.storage.azure.MetricRegistry().all())); + System.out.println(); + printSubsectionTitle("GcsStorage metrics"); + System.out.println(); + System.out.println(toRstTable( + "test", + new io.aiven.kafka.tieredstorage.storage.gcs.MetricRegistry().all())); + System.out.println(); + printSubsectionTitle("S3Storage metrics"); + System.out.println(); + System.out.println(toRstTable( + "test", + new io.aiven.kafka.tieredstorage.storage.s3.MetricRegistry().all())); + } + + public static String toRstTable(final String domain, final Iterable allMetrics) { + final Map> beansAndAttributes = new TreeMap<>(); + + try (final Metrics metrics = new Metrics()) { + for (final MetricNameTemplate template : allMetrics) { + final Map tags = new LinkedHashMap<>(); + for (final String s : template.tags()) { + tags.put(s, "{" + s + "}"); + } + + final MetricName metricName = metrics.metricName( + template.name(), + template.group(), + template.description(), + tags + ); + final String beanName = getMBeanName(domain, metricName); + beansAndAttributes.computeIfAbsent(beanName, k -> new TreeMap<>()); + final Map attrAndDesc = beansAndAttributes.get(beanName); + if (!attrAndDesc.containsKey(template.name())) { + attrAndDesc.put(template.name(), template.description()); + } else { + throw new IllegalArgumentException( + "mBean '" + beanName + + "' attribute '" + + template.name() + + "' is defined twice." + ); + } + } + } + + final StringBuilder b = new StringBuilder(); + + for (final Map.Entry> e : beansAndAttributes.entrySet()) { + // Add mBean name as a section title + b.append(e.getKey()).append("\n"); + b.append("=".repeat(e.getKey().length())).append("\n\n"); + + // Determine the maximum lengths for each column + final int maxAttrLength = Math.max("Attribute name".length(), + e.getValue().keySet().stream().mapToInt(String::length).max().orElse(0)); + final int maxDescLength = Math.max("Description".length(), + e.getValue().values().stream().mapToInt(String::length).max().orElse(0)); + + // Create the table header + final String headerFormat = "%-" + maxAttrLength + "s %-" + maxDescLength + "s\n"; + final String separatorLine = "=" + "=".repeat(maxAttrLength) + " " + "=".repeat(maxDescLength) + "\n"; + + b.append(separatorLine); + b.append(String.format(headerFormat, "Attribute name", "Description")); + b.append(separatorLine); + + // Add table rows + for (final Map.Entry e2 : e.getValue().entrySet()) { + b.append(String.format(headerFormat, e2.getKey(), e2.getValue())); + } + + // Close the table + b.append(separatorLine); + b.append("\n"); // Add an empty line between tables + } + + return b.toString(); + } + + static String getMBeanName(final String prefix, final MetricName metricName) { + final StringBuilder beanName = new StringBuilder(); + beanName.append(prefix); + beanName.append(":type="); + beanName.append(metricName.group()); + for (final Map.Entry entry : metricName.tags().entrySet()) { + if (entry.getKey().length() <= 0 || entry.getValue().length() <= 0) { + continue; + } + beanName.append(","); + beanName.append(entry.getKey()); + beanName.append("="); + beanName.append(Sanitizer.jmxSanitize(entry.getValue())); + } + return beanName.toString(); + } + + static void printSectionTitle(final String title) { + System.out.println("=================\n" + + title + "\n" + + "================="); + } + + static void printSubsectionTitle(final String title) { + System.out.println("-----------------\n" + + title + "\n" + + "-----------------"); + } +} diff --git a/metrics.rst b/metrics.rst new file mode 100644 index 00000000..87375c09 --- /dev/null +++ b/metrics.rst @@ -0,0 +1,367 @@ +================= +Core components metrics +================= + +----------------- +RemoteStorageManager metrics +----------------- + +test:type=remote-storage-manager-metrics +======================================== + +==================================== =========== +Attribute name Description +==================================== =========== +object-upload-bytes-rate +object-upload-bytes-total +object-upload-rate +object-upload-total +segment-copy-time-avg +segment-copy-time-max +segment-delete-bytes-total +segment-delete-errors-rate +segment-delete-errors-total +segment-delete-rate +segment-delete-time-avg +segment-delete-time-max +segment-delete-total +segment-fetch-requested-bytes-rate +segment-fetch-requested-bytes-total +==================================== =========== + +test:type=remote-storage-manager-metrics,object-type="{object-type}" +==================================================================== + +========================== =========== +Attribute name Description +========================== =========== +object-upload-bytes-rate +object-upload-bytes-total +object-upload-rate +object-upload-total +========================== =========== + +test:type=remote-storage-manager-metrics,topic="{topic}" +======================================================== + +==================================== =========== +Attribute name Description +==================================== =========== +object-upload-bytes-rate +object-upload-bytes-total +object-upload-rate +object-upload-total +segment-copy-time-avg +segment-copy-time-max +segment-delete-bytes-total +segment-delete-errors-rate +segment-delete-errors-total +segment-delete-rate +segment-delete-time-avg +segment-delete-time-max +segment-delete-total +segment-fetch-requested-bytes-rate +segment-fetch-requested-bytes-total +==================================== =========== + +test:type=remote-storage-manager-metrics,topic="{topic}",object-type="{object-type}" +==================================================================================== + +========================== =========== +Attribute name Description +========================== =========== +object-upload-bytes-rate +object-upload-bytes-total +object-upload-rate +object-upload-total +========================== =========== + +test:type=remote-storage-manager-metrics,topic="{topic}",partition="{partition}" +================================================================================ + +==================================== =========== +Attribute name Description +==================================== =========== +object-upload-bytes-rate +object-upload-bytes-total +object-upload-rate +object-upload-total +segment-copy-time-avg +segment-copy-time-max +segment-delete-bytes-total +segment-delete-errors-rate +segment-delete-errors-total +segment-delete-rate +segment-delete-time-avg +segment-delete-time-max +segment-delete-total +segment-fetch-requested-bytes-rate +segment-fetch-requested-bytes-total +==================================== =========== + +test:type=remote-storage-manager-metrics,topic="{topic}",partition="{partition}",object-type="{object-type}" +============================================================================================================ + +========================== =========== +Attribute name Description +========================== =========== +object-upload-bytes-rate +object-upload-bytes-total +object-upload-rate +object-upload-total +========================== =========== + + + +----------------- +SegmentManifestCache metrics +----------------- + +test:type=segment-manifest-cache-metrics +======================================== + +============================== =========== +Attribute name Description +============================== =========== +cache-eviction-total +cache-eviction-weight-total +cache-hits-total +cache-load-failure-time-total +cache-load-failure-total +cache-load-success-time-total +cache-load-success-total +cache-misses-total +cache-size-total +============================== =========== + +test:type=segment-manifest-cache-metrics,cause="{cause}" +======================================================== + +============================ =========== +Attribute name Description +============================ =========== +cache-eviction-total +cache-eviction-weight-total +============================ =========== + + + +test:type=segment-manifest-cache-thread-pool-metrics +==================================================== + +=========================== =========== +Attribute name Description +=========================== =========== +active-thread-count-total +parallelism-total +pool-size-total +queued-task-count-total +running-thread-count-total +steal-task-count-total +=========================== =========== + + + +----------------- +SegmentIndexesCache metrics +----------------- +test:type=segment-indexes-cache-metrics +======================================= + +============================== =========== +Attribute name Description +============================== =========== +cache-eviction-total +cache-eviction-weight-total +cache-hits-total +cache-load-failure-time-total +cache-load-failure-total +cache-load-success-time-total +cache-load-success-total +cache-misses-total +cache-size-total +============================== =========== + +test:type=segment-indexes-cache-metrics,cause="{cause}" +======================================================= + +============================ =========== +Attribute name Description +============================ =========== +cache-eviction-total +cache-eviction-weight-total +============================ =========== + + +test:type=segment-indexes-cache-thread-pool-metrics +=================================================== + +=========================== =========== +Attribute name Description +=========================== =========== +active-thread-count-total +parallelism-total +pool-size-total +queued-task-count-total +running-thread-count-total +steal-task-count-total +=========================== =========== + + + +----------------- +ChunkCache metrics +----------------- + +test:type=chunk-cache-metrics +============================= + +============================== =========== +Attribute name Description +============================== =========== +cache-eviction-total +cache-eviction-weight-total +cache-hits-total +cache-load-failure-time-total +cache-load-failure-total +cache-load-success-time-total +cache-load-success-total +cache-misses-total +cache-size-total +============================== =========== + +test:type=chunk-cache-metrics,cause="{cause}" +============================================= + +============================ =========== +Attribute name Description +============================ =========== +cache-eviction-total +cache-eviction-weight-total +============================ =========== + + + +test:type=chunk-cache-thread-pool-metrics +========================================= + +=========================== =========== +Attribute name Description +=========================== =========== +active-thread-count-total +parallelism-total +pool-size-total +queued-task-count-total +running-thread-count-total +steal-task-count-total +=========================== =========== + + + +================= +Storage Backend metrics +================= + +----------------- +AzureBlobStorage metrics +----------------- + +test:type=azure-blob-storage-client-metrics +=========================================== + +======================== =========== +Attribute name Description +======================== =========== +blob-delete-rate +blob-delete-total +blob-get-rate +blob-get-total +blob-upload-rate +blob-upload-total +block-list-upload-rate +block-list-upload-total +block-upload-rate +block-upload-total +======================== =========== + + + +----------------- +GcsStorage metrics +----------------- + +test:type=gcs-client-metrics +============================ + +================================ =========== +Attribute name Description +================================ =========== +object-delete-rate +object-delete-total +object-get-rate +object-get-total +object-metadata-get-rate +object-metadata-get-total +resumable-chunk-upload-rate +resumable-chunk-upload-total +resumable-upload-initiate-rate +resumable-upload-initiate-total +================================ =========== + + + +----------------- +S3Storage metrics +----------------- + +test:type=s3-client-metrics +=========================== + +========================================= =========== +Attribute name Description +========================================= =========== +abort-multipart-upload-requests-rate +abort-multipart-upload-requests-total +abort-multipart-upload-time-avg +abort-multipart-upload-time-max +complete-multipart-upload-requests-rate +complete-multipart-upload-requests-total +complete-multipart-upload-time-avg +complete-multipart-upload-time-max +configured-timeout-errors-rate +configured-timeout-errors-total +create-multipart-upload-requests-rate +create-multipart-upload-requests-total +create-multipart-upload-time-avg +create-multipart-upload-time-max +delete-object-requests-rate +delete-object-requests-total +delete-object-time-avg +delete-object-time-max +delete-objects-requests-rate +delete-objects-requests-total +delete-objects-time-avg +delete-objects-time-max +get-object-requests-rate +get-object-requests-total +get-object-time-avg +get-object-time-max +io-errors-rate +io-errors-total +other-errors-rate +other-errors-total +put-object-requests-rate +put-object-requests-total +put-object-time-avg +put-object-time-max +server-errors-rate +server-errors-total +throttling-errors-rate +throttling-errors-total +upload-part-requests-rate +upload-part-requests-total +upload-part-time-avg +upload-part-time-max +========================================= =========== + +