From 7eb8f380cd73294ab466c0c02516ae801f1ff2f6 Mon Sep 17 00:00:00 2001 From: Suraiya Hameed <22776421+Suraiya-Hameed@users.noreply.github.com> Date: Tue, 30 Jan 2024 08:52:18 -0800 Subject: [PATCH] chore: add labels to metrics --- docs/book/src/topics/metrics.md | 88 +++++++++---------- pkg/rotation/reconciler.go | 7 +- pkg/rotation/stats_reporter.go | 27 ++++-- .../mocks/stats_reporter_mock.go | 36 +++++++- pkg/secrets-store/nodeserver.go | 8 +- pkg/secrets-store/nodeserver_test.go | 74 +++++++++++----- pkg/secrets-store/stats_reporter.go | 32 ++++--- 7 files changed, 177 insertions(+), 95 deletions(-) diff --git a/docs/book/src/topics/metrics.md b/docs/book/src/topics/metrics.md index a3a2f67c6..a65f7bced 100644 --- a/docs/book/src/topics/metrics.md +++ b/docs/book/src/topics/metrics.md @@ -6,17 +6,15 @@ Prometheus is the only exporter that's currently supported with the driver. ## List of metrics provided by the driver -| Metric | Description | Tags | -| ------------------------------- | ------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | -| total_node_publish | Total number of successful volume mount requests | `os_type=`
`provider=` | -| total_node_unpublish | Total number of successful volume unmount requests | `os_type=` | -| total_node_publish_error | Total number of errors with volume mount requests | `os_type=`
`provider=`
`error_type=` | -| total_node_unpublish_error | Total number of errors with volume unmount requests | `os_type=` | -| total_sync_k8s_secret | Total number of k8s secrets synced | `os_type=`
`provider=` | -| sync_k8s_secret_duration_sec | Distribution of how long it took to sync k8s secret | `os_type=` | -| total_rotation_reconcile | Total number of rotation reconciles | `os_type=`
`rotated=` | -| total_rotation_reconcile_error | Total number of rotation reconciles with error | `os_type=`
`rotated=`
`error_type=` | -| rotation_reconcile_duration_sec | Distribution of how long it took to rotate secrets-store content for pods | `os_type=` | +| Metric | Description | Tags | +|---------------------------------|---------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| node_publish_total | Total number of successful volume mount requests | `os_type=`
`provider=`
`pod_name=`
`pod_namespace=`
`secret_provider_class=` | +| node_unpublish_total | Total number of successful volume unmount requests | `os_type=` | +| node_publish_error_total | Total number of errors with volume mount requests | `os_type=`
`provider=`
`error_type=`
`pod_name=`
`pod_namespace=`
`secret_provider_class=` | +| node_unpublish_error_total | Total number of errors with volume unmount requests | `os_type=` | +| rotation_reconcile_total | Total number of rotation reconciles | `os_type=`
`rotated=`
`pod_name=`
`pod_namespace=`
`secret_provider_class=` | +| rotation_reconcile_error_total | Total number of rotation reconciles with error | `os_type=`
`rotated=`
`error_type=`
`pod_name=`
`pod_namespace=`
`secret_provider_class=` | +| rotation_reconcile_duration_sec | Distribution of how long it took to rotate secrets-store content for pods | `os_type=`
`pod_name=`
`pod_namespace=`
`secret_provider_class=` | Metrics are served from port 8095, but this port is not exposed outside the pod by default. Use kubectl port-forward to access the metrics over localhost: @@ -28,36 +26,38 @@ curl localhost:8095/metrics ### Sample Metrics output ```shell -# HELP sync_k8s_secret_duration_sec Distribution of how long it took to sync k8s secret -# TYPE sync_k8s_secret_duration_sec histogram -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.1"} 0 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.2"} 0 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.3"} 0 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.4"} 1 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="0.5"} 1 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="1"} 1 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="1.5"} 1 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="2"} 1 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="2.5"} 1 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="3"} 1 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="5"} 1 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="10"} 1 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="15"} 1 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="30"} 1 -sync_k8s_secret_duration_sec_bucket{os_type="linux",le="+Inf"} 1 -sync_k8s_secret_duration_sec_sum{os_type="linux"} 0.3115892 -sync_k8s_secret_duration_sec_count{os_type="linux"} 1 -# HELP total_node_publish Total number of node publish calls -# TYPE total_node_publish counter -total_node_publish{os_type="linux",provider="azure"} 1 -# HELP total_node_publish_error Total number of node publish calls with error -# TYPE total_node_publish_error counter -total_node_publish_error{error_type="ProviderBinaryNotFound",os_type="linux",provider="azure"} 2 -total_node_publish_error{error_type="SecretProviderClassNotFound",os_type="linux",provider=""} 4 -# HELP total_node_unpublish Total number of node unpublish calls -# TYPE total_node_unpublish counter -total_node_unpublish{os_type="linux"} 1 -# HELP total_sync_k8s_secret Total number of k8s secrets synced -# TYPE total_sync_k8s_secret counter -total_sync_k8s_secret{os_type="linux",provider="azure"} 1 -``` +# HELP rotation_reconcile_duration_sec Distribution of how long it took to rotate secrets-store content for pods +# TYPE rotation_reconcile_duration_sec histogram +rotation_reconcile_duration_sec_bucket{os_type="linux",le="0.1"} 0 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="0.2"} 0 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="0.3"} 0 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="0.4"} 1 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="0.5"} 1 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="1"} 1 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="1.5"} 1 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="2"} 1 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="2.5"} 1 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="3"} 1 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="5"} 1 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="10"} 1 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="15"} 1 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="30"} 1 +rotation_reconcile_duration_sec_bucket{os_type="linux",le="+Inf"} 1 +rotation_reconcile_duration_sec_sum{os_type="linux",} 0.3115892 +rotation_reconcile_duration_sec_count{os_type="linux"} 1 +# HELP rotation_reconcile_total Total number of rotation reconciles +# TYPE rotation_reconcile_total counter +rotation_reconcile_total{os_type="linux",pod_name="csi-test-app-wcsxk",pod_namespace="csi-test-secret-ns",provider="azure",rotated="false",secret_provider_class="csi-test-spc"} 1 +# HELP rotation_reconcile_error_total Total number of rotation reconciles with error +# TYPE rotation_reconcile_error_total counter +rotation_reconcile_error_total{error_type="GRPCProviderError",os_type="linux",pod_name="csi-test-app-wcsxk",pod_namespace="csi-test-secret-ns",provider="azure",rotated="false",secret_provider_class="csi-test-spc"} 12 +# HELP node_publish_total Total number of node publish calls +# TYPE node_publish_total counter +node_publish_total{os_type="linux",pod_name="csi-test-app-wcsxk",pod_namespace="csi-test-secret-ns",provider="azure",secret_provider_class="csi-test-spc"} 1 +# HELP node_publish_error_total Total number of node publish calls with error +# TYPE node_publish_error_total counter +node_publish_error_total{error_type="ProviderBinaryNotFound",os_type="linux",pod_name="csi-test-app-wcsxk",pod_namespace="csi-test-secret-ns",provider="azure",secret_provider_class="csi-test-spc"} 7 +# HELP node_unpublish_total Total number of node unpublish calls +# TYPE node_unpublish_total counter +node_unpublish_total{os_type="linux"} 1 +``` \ No newline at end of file diff --git a/pkg/rotation/reconciler.go b/pkg/rotation/reconciler.go index e82d12f79..8d3b5396d 100644 --- a/pkg/rotation/reconciler.go +++ b/pkg/rotation/reconciler.go @@ -251,13 +251,16 @@ func (r *Reconciler) reconcile(ctx context.Context, spcps *secretsstorev1.Secret // after the provider mount request is complete var requiresUpdate bool var providerName string + podName := spcps.Status.PodName + podNamespace := spcps.Namespace + secretProviderClass := spcps.Status.SecretProviderClassName defer func() { if err != nil { - r.reporter.reportRotationErrorCtMetric(ctx, providerName, errorReason, requiresUpdate) + r.reporter.reportRotationErrorCtMetric(ctx, providerName, podName, podNamespace, secretProviderClass, errorReason, requiresUpdate) return } - r.reporter.reportRotationCtMetric(ctx, providerName, requiresUpdate) + r.reporter.reportRotationCtMetric(ctx, providerName, podName, podNamespace, secretProviderClass, requiresUpdate) r.reporter.reportRotationDuration(ctx, time.Since(begin).Seconds()) }() diff --git a/pkg/rotation/stats_reporter.go b/pkg/rotation/stats_reporter.go index bc3fa8066..3d173e25f 100644 --- a/pkg/rotation/stats_reporter.go +++ b/pkg/rotation/stats_reporter.go @@ -30,11 +30,14 @@ const ( ) var ( - providerKey = "provider" - errorKey = "error_type" - osTypeKey = "os_type" - rotatedKey = "rotated" - runtimeOS = runtime.GOOS + providerKey = "provider" + errorKey = "error_type" + osTypeKey = "os_type" + rotatedKey = "rotated" + runtimeOS = runtime.GOOS + podNameKey = "pod_name" + podNamespaceKey = "pod_namespace" + spcKey = "secret_provider_class" ) type reporter struct { @@ -44,8 +47,8 @@ type reporter struct { } type StatsReporter interface { - reportRotationCtMetric(ctx context.Context, provider string, wasRotated bool) - reportRotationErrorCtMetric(ctx context.Context, provider, errType string, wasRotated bool) + reportRotationCtMetric(ctx context.Context, provider, podName, podNamespace, spc string, wasRotated bool) + reportRotationErrorCtMetric(ctx context.Context, provider, podName, podNamespace, spc, errType string, wasRotated bool) reportRotationDuration(ctx context.Context, duration float64) } @@ -67,21 +70,27 @@ func newStatsReporter() (StatsReporter, error) { return r, nil } -func (r *reporter) reportRotationCtMetric(ctx context.Context, provider string, wasRotated bool) { +func (r *reporter) reportRotationCtMetric(ctx context.Context, provider, podName, podNamespace, spc string, wasRotated bool) { opt := metric.WithAttributes( attribute.Key(providerKey).String(provider), attribute.Key(osTypeKey).String(runtimeOS), attribute.Key(rotatedKey).Bool(wasRotated), + attribute.Key(podNameKey).String(podName), + attribute.Key(podNamespaceKey).String(podNamespace), + attribute.Key(spcKey).String(spc), ) r.rotationReconcileTotal.Add(ctx, 1, opt) } -func (r *reporter) reportRotationErrorCtMetric(ctx context.Context, provider, errType string, wasRotated bool) { +func (r *reporter) reportRotationErrorCtMetric(ctx context.Context, provider, podName, podNamespace, spc, errType string, wasRotated bool) { opt := metric.WithAttributes( attribute.Key(providerKey).String(provider), attribute.Key(errorKey).String(errType), attribute.Key(osTypeKey).String(runtimeOS), attribute.Key(rotatedKey).Bool(wasRotated), + attribute.Key(podNameKey).String(podName), + attribute.Key(podNamespaceKey).String(podNamespace), + attribute.Key(spcKey).String(spc), ) r.rotationReconcileErrorTotal.Add(ctx, 1, opt) } diff --git a/pkg/secrets-store/mocks/stats_reporter_mock.go b/pkg/secrets-store/mocks/stats_reporter_mock.go index 669674adb..5af4afaba 100644 --- a/pkg/secrets-store/mocks/stats_reporter_mock.go +++ b/pkg/secrets-store/mocks/stats_reporter_mock.go @@ -18,6 +18,14 @@ package mocks // import sigs.k8s.io/secrets-store-csi-driver/pkg/secrets-store/m import "context" +type MetricDetails struct { + Provider string + PodName string + PodNamespace string + Spc string + ErrorType string +} + type FakeReporter struct { reportNodePublishCtMetricInvoked int reportNodeUnPublishCtMetricInvoked int @@ -25,29 +33,45 @@ type FakeReporter struct { reportNodeUnPublishErrorCtMetricInvoked int reportSyncK8SecretCtMetricInvoked int reportSyncK8SecretDurationInvoked int + metricDetails []MetricDetails } func NewFakeReporter() *FakeReporter { - return &FakeReporter{} + return &FakeReporter{ + metricDetails: []MetricDetails{}, + } } -func (f *FakeReporter) ReportNodePublishCtMetric(ctx context.Context, provider string) { +func (f *FakeReporter) ReportNodePublishCtMetric(ctx context.Context, provider, podName, podNamespace, spc string) { f.reportNodePublishCtMetricInvoked++ + f.metricDetails = append(f.metricDetails, MetricDetails{ + Provider: provider, + PodName: podName, + PodNamespace: podNamespace, + Spc: spc, + }) } func (f *FakeReporter) ReportNodeUnPublishCtMetric(ctx context.Context) { f.reportNodeUnPublishCtMetricInvoked++ } -func (f *FakeReporter) ReportNodePublishErrorCtMetric(ctx context.Context, provider, errType string) { +func (f *FakeReporter) ReportNodePublishErrorCtMetric(ctx context.Context, provider, podName, podNamespace, spc, errType string) { f.reportNodePublishErrorCtMetricInvoked++ + f.metricDetails = append(f.metricDetails, MetricDetails{ + Provider: provider, + PodName: podName, + PodNamespace: podNamespace, + Spc: spc, + ErrorType: errType, + }) } func (f *FakeReporter) ReportNodeUnPublishErrorCtMetric(ctx context.Context) { f.reportNodeUnPublishErrorCtMetricInvoked++ } -func (f *FakeReporter) ReportSyncK8SecretCtMetric(ctx context.Context, provider string, count int) { +func (f *FakeReporter) ReportSyncK8SecretCtMetric(ctx context.Context, provider, podName, podNamespace, spc string, count int) { f.reportSyncK8SecretCtMetricInvoked++ } @@ -73,3 +97,7 @@ func (f *FakeReporter) ReportSyncK8SecretCtMetricInvoked() int { func (f *FakeReporter) ReportSyncK8SecretDurationInvoked() int { return f.reportSyncK8SecretDurationInvoked } + +func (f *FakeReporter) GetMetricDetails() []MetricDetails { + return f.metricDetails +} diff --git a/pkg/secrets-store/nodeserver.go b/pkg/secrets-store/nodeserver.go index e2af8dcd0..88558d0c9 100644 --- a/pkg/secrets-store/nodeserver.go +++ b/pkg/secrets-store/nodeserver.go @@ -73,7 +73,7 @@ func (ns *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis startTime := time.Now() var parameters map[string]string var providerName string - var podName, podNamespace, podUID, serviceAccountName string + var podName, podNamespace, podUID, serviceAccountName, secretProviderClass string var targetPath string var mounted bool errorReason := internalerrors.FailedToMount @@ -89,10 +89,10 @@ func (ns *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis klog.ErrorS(unmountErr, "failed to unmounting target path") } } - ns.reporter.ReportNodePublishErrorCtMetric(ctx, providerName, errorReason) + ns.reporter.ReportNodePublishErrorCtMetric(ctx, providerName, podName, podNamespace, secretProviderClass, errorReason) return } - ns.reporter.ReportNodePublishCtMetric(ctx, providerName) + ns.reporter.ReportNodePublishCtMetric(ctx, providerName, podName, podNamespace, secretProviderClass) }() // Check arguments @@ -115,7 +115,7 @@ func (ns *nodeServer) NodePublishVolume(ctx context.Context, req *csi.NodePublis mountFlags := req.GetVolumeCapability().GetMount().GetMountFlags() secrets := req.GetSecrets() - secretProviderClass := attrib[secretProviderClassField] + secretProviderClass = attrib[secretProviderClassField] providerName = attrib["providerName"] podName = attrib[CSIPodName] podNamespace = attrib[CSIPodNamespace] diff --git a/pkg/secrets-store/nodeserver_test.go b/pkg/secrets-store/nodeserver_test.go index cef12021f..a647ef0cd 100644 --- a/pkg/secrets-store/nodeserver_test.go +++ b/pkg/secrets-store/nodeserver_test.go @@ -20,9 +20,12 @@ import ( "context" "os" "path/filepath" + "reflect" "testing" "time" + internalerrors "sigs.k8s.io/secrets-store-csi-driver/pkg/errors" + secretsstorev1 "sigs.k8s.io/secrets-store-csi-driver/apis/v1" "sigs.k8s.io/secrets-store-csi-driver/pkg/k8s" "sigs.k8s.io/secrets-store-csi-driver/pkg/secrets-store/mocks" @@ -65,18 +68,21 @@ func TestNodePublishVolume_Errors(t *testing.T) { nodePublishVolReq *csi.NodePublishVolumeRequest initObjects []client.Object want codes.Code + metricDetails []mocks.MetricDetails }{ { name: "volume capabilities nil", nodePublishVolReq: &csi.NodePublishVolumeRequest{}, want: codes.InvalidArgument, + metricDetails: []mocks.MetricDetails{{ErrorType: internalerrors.FailedToMount}, {ErrorType: internalerrors.FailedToMount}}, }, { name: "volume id is empty", nodePublishVolReq: &csi.NodePublishVolumeRequest{ VolumeCapability: &csi.VolumeCapability{}, }, - want: codes.InvalidArgument, + want: codes.InvalidArgument, + metricDetails: []mocks.MetricDetails{{ErrorType: internalerrors.FailedToMount}, {ErrorType: internalerrors.FailedToMount}}, }, { name: "target path is empty", @@ -84,7 +90,8 @@ func TestNodePublishVolume_Errors(t *testing.T) { VolumeCapability: &csi.VolumeCapability{}, VolumeId: "testvolid1", }, - want: codes.InvalidArgument, + want: codes.InvalidArgument, + metricDetails: []mocks.MetricDetails{{ErrorType: internalerrors.FailedToMount}, {ErrorType: internalerrors.FailedToMount}}, }, { name: "volume context is not set", @@ -93,7 +100,8 @@ func TestNodePublishVolume_Errors(t *testing.T) { VolumeId: "testvolid1", TargetPath: targetPath(t), }, - want: codes.InvalidArgument, + want: codes.InvalidArgument, + metricDetails: []mocks.MetricDetails{{ErrorType: internalerrors.FailedToMount}, {ErrorType: internalerrors.FailedToMount}}, }, { name: "secret provider class not found", @@ -103,7 +111,8 @@ func TestNodePublishVolume_Errors(t *testing.T) { TargetPath: targetPath(t), VolumeContext: map[string]string{"secretProviderClass": "provider1"}, }, - want: codes.Unknown, + want: codes.Unknown, + metricDetails: []mocks.MetricDetails{{Spc: "provider1", ErrorType: internalerrors.SecretProviderClassNotFound}, {Spc: "provider1", ErrorType: internalerrors.SecretProviderClassNotFound}}, }, { name: "spc missing", @@ -126,7 +135,8 @@ func TestNodePublishVolume_Errors(t *testing.T) { }, }, }, - want: codes.Unknown, + want: codes.Unknown, + metricDetails: []mocks.MetricDetails{{Provider: "provider1", PodName: "pod1", PodNamespace: "default", Spc: "provider1", ErrorType: internalerrors.SecretProviderClassNotFound}, {Provider: "provider1", PodName: "pod1", PodNamespace: "default", Spc: "provider1", ErrorType: internalerrors.SecretProviderClassNotFound}}, }, { name: "provider not set in secret provider class", @@ -144,7 +154,8 @@ func TestNodePublishVolume_Errors(t *testing.T) { }, }, }, - want: codes.Unknown, + want: codes.Unknown, + metricDetails: []mocks.MetricDetails{{PodName: "pod1", PodNamespace: "default", Spc: "provider1", ErrorType: internalerrors.FailedToMount}, {PodName: "pod1", PodNamespace: "default", Spc: "provider1", ErrorType: internalerrors.FailedToMount}}, }, { name: "parameters not set in secret provider class", @@ -165,7 +176,8 @@ func TestNodePublishVolume_Errors(t *testing.T) { }, }, }, - want: codes.Unknown, + want: codes.Unknown, + metricDetails: []mocks.MetricDetails{{Provider: "provider1", PodName: "pod1", PodNamespace: "default", Spc: "provider1", ErrorType: internalerrors.FailedToMount}, {Provider: "provider1", PodName: "pod1", PodNamespace: "default", Spc: "provider1", ErrorType: internalerrors.FailedToMount}}, }, { name: "read only is not set to true", @@ -187,7 +199,8 @@ func TestNodePublishVolume_Errors(t *testing.T) { }, }, }, - want: codes.InvalidArgument, + want: codes.InvalidArgument, + metricDetails: []mocks.MetricDetails{{Provider: "provider1", PodName: "pod1", PodNamespace: "default", Spc: "provider1", ErrorType: internalerrors.FailedToMount}, {Provider: "provider1", PodName: "pod1", PodNamespace: "default", Spc: "provider1", ErrorType: internalerrors.FailedToMount}}, }, { name: "provider not installed", @@ -215,7 +228,8 @@ func TestNodePublishVolume_Errors(t *testing.T) { }, }, }, - want: codes.Unknown, + want: codes.Unknown, + metricDetails: []mocks.MetricDetails{{Provider: "provider_not_installed", PodName: "pod1", PodNamespace: "default", Spc: "provider1"}, {Provider: "provider_not_installed", PodName: "pod1", PodNamespace: "default", Spc: "provider1"}}, }, } @@ -236,7 +250,7 @@ func TestNodePublishVolume_Errors(t *testing.T) { } for attempts := 2; attempts > 0; attempts-- { - // How many times 'total_node_publish' and 'total_node_publish_error' counters have been incremented so far. + // How many times 'node_publish_total' and 'node_publish_total_error' counters have been incremented so far. c, cErr := r.ReportNodePublishCtMetricInvoked(), r.ReportNodePublishErrorCtMetricInvoked() _, err = ns.NodePublishVolume(context.TODO(), test.nodePublishVolReq) @@ -246,10 +260,14 @@ func TestNodePublishVolume_Errors(t *testing.T) { // Check that the correct counter has been incremented. if err != nil && r.ReportNodePublishErrorCtMetricInvoked() <= cErr { - t.Fatalf("expected 'total_node_publish_error' counter to be incremented, but it was not") + t.Fatalf("expected 'node_publish_total_error' counter to be incremented, but it was not") } if err == nil && r.ReportNodePublishCtMetricInvoked() <= c { - t.Fatalf("expected 'total_node_publish' counter to be incremented, but it was not") + t.Fatalf("expected 'node_publish_total' counter to be incremented, but it was not") + } + metricDetails := r.GetMetricDetails() + if !reflect.DeepEqual(metricDetails[2-attempts], test.metricDetails[2-attempts]) { + t.Fatalf("expected publish details to be %+v, but it was %+v", test.metricDetails[2-attempts], metricDetails[2-attempts]) } // if theres not an error we should check the mount w @@ -270,6 +288,7 @@ func TestNodePublishVolume(t *testing.T) { name string nodePublishVolReq *csi.NodePublishVolumeRequest initObjects []client.Object + metricDetails []mocks.MetricDetails }{ { name: "volume mount", @@ -297,6 +316,7 @@ func TestNodePublishVolume(t *testing.T) { }, }, }, + metricDetails: []mocks.MetricDetails{{Provider: "provider1", PodName: "pod1", PodNamespace: "default", Spc: "provider1"}, {PodName: "pod1", PodNamespace: "default", Spc: "provider1"}}, }, { name: "volume mount with refresh token", @@ -327,6 +347,7 @@ func TestNodePublishVolume(t *testing.T) { }, }, }, + metricDetails: []mocks.MetricDetails{{Provider: "provider1", PodName: "pod1", PodNamespace: "default", Spc: "provider1"}, {Provider: "provider1", PodName: "pod1", PodNamespace: "default", Spc: "provider1"}}, }, } @@ -347,20 +368,29 @@ func TestNodePublishVolume(t *testing.T) { } for attempts := 2; attempts > 0; attempts-- { - // How many times 'total_node_publish' and 'total_node_publish_error' counters have been incremented so far. + // How many times 'node_publish_total' and 'node_publish_total_error' counters have been incremented so far. c, cErr := r.ReportNodePublishCtMetricInvoked(), r.ReportNodePublishErrorCtMetricInvoked() - _, err = ns.NodePublishVolume(context.TODO(), test.nodePublishVolReq) if code := status.Code(err); code != codes.OK { t.Errorf("expected RPC status code: %v, got: %v\n", codes.OK, err) } + metricDetails := r.GetMetricDetails() // Check that the correct counter has been incremented. if err != nil && r.ReportNodePublishErrorCtMetricInvoked() <= cErr { - t.Fatalf("expected 'total_node_publish_error' counter to be incremented, but it was not") + t.Fatalf("expected 'node_publish_total_error' counter to be incremented, but it was not") } if err == nil && r.ReportNodePublishCtMetricInvoked() <= c { - t.Fatalf("expected 'total_node_publish' counter to be incremented, but it was not") + t.Fatalf("expected 'node_publish_total' counter to be incremented, but it was not") + } + if !reflect.DeepEqual(metricDetails[2-attempts], test.metricDetails[2-attempts]) { + t.Fatalf("expected publish details to be %+v, but it was %+v", test.metricDetails[2-attempts], metricDetails[2-attempts]) + } + if !reflect.DeepEqual(metricDetails[2-attempts], test.metricDetails[2-attempts]) { + t.Fatalf("expected publish details to be %+v, but it was %+v", test.metricDetails[2-attempts], metricDetails[2-attempts]) + } + if !reflect.DeepEqual(metricDetails[2-attempts], test.metricDetails[2-attempts]) { + t.Fatalf("expected publish details to be %+v, but it was %+v", test.metricDetails[2-attempts], metricDetails[2-attempts]) } // if theres not an error we should check the mount w @@ -407,7 +437,7 @@ func TestNodeUnpublishVolume(t *testing.T) { // Repeat the request multiple times to ensure it consistently returns OK, // even if it has already been unmounted. for attempts := 2; attempts > 0; attempts-- { - // How many times 'total_node_unpublish' and 'total_node_unpublish_error' counters have been incremented so far + // How many times 'node_unpublish_total' and 'node_unpublish_total_error' counters have been incremented so far c, cErr := r.ReportNodeUnPublishCtMetricInvoked(), r.ReportNodeUnPublishErrorCtMetricInvoked() _, err := ns.NodeUnpublishVolume(context.TODO(), req) @@ -417,10 +447,10 @@ func TestNodeUnpublishVolume(t *testing.T) { // Check that the correct counter has been incremented if err != nil && r.ReportNodeUnPublishErrorCtMetricInvoked() <= cErr { - t.Fatalf("expected 'total_node_unpublish_error' counter to be incremented, but it was not") + t.Fatalf("expected 'node_unpublish_total_error' counter to be incremented, but it was not") } if err == nil && r.ReportNodeUnPublishCtMetricInvoked() <= c { - t.Fatalf("expected 'total_node_unpublish' counter to be incremented, but it was not") + t.Fatalf("expected 'node_unpublish_total' counter to be incremented, but it was not") } // Ensure that the mounts were unmounted. @@ -469,7 +499,7 @@ func TestNodeUnpublishVolume_Error(t *testing.T) { } for attempts := 2; attempts > 0; attempts-- { - // How many times 'total_node_unpublish' and 'total_node_unpublish_error' counters have been incremented so far + // How many times 'node_unpublish_total' and 'node_unpublish_total_error' counters have been incremented so far c, cErr := r.ReportNodeUnPublishCtMetricInvoked(), r.ReportNodeUnPublishErrorCtMetricInvoked() _, err := ns.NodeUnpublishVolume(context.TODO(), test.nodeUnpublishVolReq) @@ -479,10 +509,10 @@ func TestNodeUnpublishVolume_Error(t *testing.T) { // Check that the correct counter has been incremented if err != nil && r.ReportNodeUnPublishErrorCtMetricInvoked() <= cErr { - t.Fatalf("expected 'total_node_unpublish_error' counter to be incremented, but it was not") + t.Fatalf("expected 'node_unpublish_total_error' counter to be incremented, but it was not") } if err == nil && r.ReportNodeUnPublishCtMetricInvoked() <= c { - t.Fatalf("expected 'total_node_unpublish' counter to be incremented, but it was not") + t.Fatalf("expected 'node_unpublish_total' counter to be incremented, but it was not") } } }) diff --git a/pkg/secrets-store/stats_reporter.go b/pkg/secrets-store/stats_reporter.go index 07447eb88..57c7b8629 100644 --- a/pkg/secrets-store/stats_reporter.go +++ b/pkg/secrets-store/stats_reporter.go @@ -30,10 +30,13 @@ const ( ) var ( - providerKey = "provider" - errorKey = "error_type" - osTypeKey = "os_type" - runtimeOS = runtime.GOOS + providerKey = "provider" + errorKey = "error_type" + osTypeKey = "os_type" + runtimeOS = runtime.GOOS + podNameKey = "pod_name" + podNamespaceKey = "pod_namespace" + spcKey = "secret_provider_class" ) type reporter struct { @@ -46,11 +49,11 @@ type reporter struct { } type StatsReporter interface { - ReportNodePublishCtMetric(ctx context.Context, provider string) + ReportNodePublishCtMetric(ctx context.Context, provider, podName, podNamespace, spc string) ReportNodeUnPublishCtMetric(ctx context.Context) - ReportNodePublishErrorCtMetric(ctx context.Context, provider, errType string) + ReportNodePublishErrorCtMetric(ctx context.Context, provider, podName, podNamespace, spc, errType string) ReportNodeUnPublishErrorCtMetric(ctx context.Context) - ReportSyncK8SecretCtMetric(ctx context.Context, provider string, count int) + ReportSyncK8SecretCtMetric(ctx context.Context, provider, podName, podNamespace, spc string, count int) ReportSyncK8SecretDuration(ctx context.Context, duration float64) } @@ -81,10 +84,13 @@ func NewStatsReporter() (StatsReporter, error) { return r, nil } -func (r *reporter) ReportNodePublishCtMetric(ctx context.Context, provider string) { +func (r *reporter) ReportNodePublishCtMetric(ctx context.Context, provider, podName, podNamespace, spc string) { opt := metric.WithAttributes( attribute.Key(providerKey).String(provider), attribute.Key(osTypeKey).String(runtimeOS), + attribute.Key(podNameKey).String(podName), + attribute.Key(podNamespaceKey).String(podNamespace), + attribute.Key(spcKey).String(spc), ) r.nodePublishTotal.Add(ctx, 1, opt) } @@ -96,11 +102,14 @@ func (r *reporter) ReportNodeUnPublishCtMetric(ctx context.Context) { r.nodeUnPublishTotal.Add(ctx, 1, opt) } -func (r *reporter) ReportNodePublishErrorCtMetric(ctx context.Context, provider, errType string) { +func (r *reporter) ReportNodePublishErrorCtMetric(ctx context.Context, provider, podName, podNamespace, spc, errType string) { opt := metric.WithAttributes( attribute.Key(providerKey).String(provider), attribute.Key(errorKey).String(errType), attribute.Key(osTypeKey).String(runtimeOS), + attribute.Key(podNameKey).String(podName), + attribute.Key(podNamespaceKey).String(podNamespace), + attribute.Key(spcKey).String(spc), ) r.nodePublishErrorTotal.Add(ctx, 1, opt) } @@ -112,10 +121,13 @@ func (r *reporter) ReportNodeUnPublishErrorCtMetric(ctx context.Context) { r.nodeUnPublishErrorTotal.Add(ctx, 1, opt) } -func (r *reporter) ReportSyncK8SecretCtMetric(ctx context.Context, provider string, count int) { +func (r *reporter) ReportSyncK8SecretCtMetric(ctx context.Context, provider, podName, podNamespace, spc string, count int) { opt := metric.WithAttributes( attribute.Key(providerKey).String(provider), attribute.Key(osTypeKey).String(runtimeOS), + attribute.Key(podNameKey).String(podName), + attribute.Key(podNamespaceKey).String(podNamespace), + attribute.Key(spcKey).String(spc), ) r.syncK8sSecretTotal.Add(ctx, int64(count), opt) }