opendatahub-io · jiridanek · Dec 17, 2024 · Jan 10, 2025 · Jan 10, 2025 · Jan 11, 2025
diff --git a/components/odh-notebook-controller/controllers/matchers_test.go b/components/odh-notebook-controller/controllers/matchers_test.go
@@ -0,0 +1,394 @@
+package controllers
+
+import (
+	"fmt"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"log"
+	"reflect"
+	"regexp"
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/onsi/gomega/types"
+	"github.com/stretchr/testify/assert"
+
+	routev1 "github.com/openshift/api/route/v1"
+	"k8s.io/apimachinery/pkg/runtime"
+)
+
+// See https://onsi.github.io/gomega/#adding-your-own-matchers
+// for docs on creating custom Gomega matchers.
+// There are some matcher examples at https://github.com/onsi/gomega/wiki
+
+// BeMatchingK8sResource is a custom Gomega matcher that compares using `comparator` function and reports differences using
+// [cmp.Diff]. It attempts to minimize the diff to only include those entries that cause `comparator` to fail.
+//
+// Use this to replace assertions such as
+// > Expect(CompareNotebookRoutes(*route, expectedRoute)).Should(BeTrueBecause(cmp.Diff(*route, expectedRoute)))
+// with
+// > Expect(*route).To(BeMatchingK8sResource(expectedRoute, CompareNotebookRoutes))
+//
+// The failure message diff tells you how to modify the [expected] so that is matches [actual].
+//
+// # Motivation
+//
+// When writing controller, we often compare current with expected, and if there is mismatch, we submit update.
+// Usually we only care about some specific fields being mismatched, therefore we have a comparator function that decides equality.
+// When writing tests, we ended up reusing these equality functions for assertions.
+// The problem is that the functions return only true or false and not the reason for reporting a mismatch.
+// This matcher can identify the fields that cause mismatch and reports them with a nice diff.
+//
+// # Alternatives
+//
+// The alternate approaches that can be found online seem to focus on specifying the comparer in such a way that it can be
+// introspected and the mismatch report can be then generated by inspecting the comparer. This implementation there
+// treats the comparer as a black box and requires no changes to the existing code.
+// - https://github.com/JiaYongfei/respect, establish a template struct that says what must match, the "default" or undefined fields don't count
+// - https://onsi.github.io/gomega/#codegstructcode-testing-complex-data-types, creates an assertion function by building up a structural template
+//
+// Example output
+//
+//	Expected
+//	    v1.Route{TypeMeta:v1.TypeMeta{Kind:"", APIVersion:""}, ObjectMeta:v1.ObjectMeta{Name:"test-notebook", ...
+//	to compare identical to
+//	    v1.Route{TypeMeta:v1.TypeMeta{Kind:"", APIVersion:""}, ObjectMeta:v1.ObjectMeta{Name:"test-notebook", ...
+//	but it differs in
+//	  v1.Route{
+//	    TypeMeta:   {},
+//	    ObjectMeta: {Name: "test-notebook", Namespace: "default", UID: "35eb6144-527a-46f6-8d25-defc99f50b82", ResourceVersion: "216", ...},
+//	    Spec: v1.RouteSpec{
+//	            Host:      "",
+//	            Subdomain: "",
+//	            Path:      "",
+//	            To: v1.RouteTargetReference{
+//	-                   Kind:   "Service",
+//	+                   Kind:   "No Such Service",
+//	                    Name:   "test-notebook",
+//	                    Weight: &100,
+//	            },
+//	            AlternateBackends: nil,
+//	            Port:              &{TargetPort: {Type: 1, StrVal: "http-test-notebook"}},
+//	            ... // 2 identical fields
+//	    },
+//	    Status: {Ingress: {}},
+//	  }
+//
+// NOTE: The diff minimization functionality is best-effort. It is designed to never under-approximate, but over-approximation is possible.
+// NOTE2: Using [gcustom.MakeMatcher] is not possible because it does not conveniently allow running [cmp.Diff] at the time of failure message generation.
+func BeMatchingK8sResource[T any, PT interface {
+	deepCopyer[T]
+	*T
+}](expected T, comparator func(T, T) bool) types.GomegaMatcher {
+	return &beMatchingK8sResource[T, PT]{
+		expected:   expected,
+		comparator: comparator,
+	}
+}
+
+// beMatchingK8sResource is a parameterized (=generic) type.
+// See https://stackoverflow.com/questions/71444847/go-with-generics-type-t-is-pointer-to-type-parameter-not-type-parameter
+type beMatchingK8sResource[T any, PT interface {
+	*T
+	deepCopyer[T]
+}] struct {
+	expected   T
+	comparator func(r1 T, r2 T) bool
+}
+
+var _ types.GomegaMatcher = &beMatchingK8sResource[routev1.Route, *routev1.Route]{}
+
+func (m *beMatchingK8sResource[T, PT]) Match(actual interface{}) (success bool, err error) {
+	actualT, ok := actual.(T)
+	if !ok {
+		return false, fmt.Errorf("BeMatchingK8sResource matcher expects two objects of the same type")
+	}
+
+	return m.comparator(m.expected, actualT), nil
+}
+
+func (m *beMatchingK8sResource[T, PT]) FailureMessage(actual interface{}) (message string) {
+	//m.expected
+	diff := m.computeMinimizedDiff(actual.(T))
+	return fmt.Sprintf("Expected\n\t%#v\nto compare identical to\n\t%#v\nbut it differs in\n%s", actual, m.expected, diff)
+}
+
+func (m *beMatchingK8sResource[T, PT]) NegatedFailureMessage(actual interface{}) (message string) {
+	diff := m.computeMinimizedDiff(actual.(T))
+	return fmt.Sprintf("Expected\n\t%#v\nto not compare identical to\n\t%#v\nit differs in\n%s", actual, m.expected, diff)
+}
+
+// diffReporter is the basis of a custom [cmp.Reporter] that records differences detected during comparison.
+// See example_reporter_test.go in the go-cmp package where this comes from.
+// https://github.com/google/go-cmp/blob/master/cmp/example_reporter_test.go
+//
+// Originally, go-cmp was created when the hidden time struct field for monotonic timestamps was added. https://github.com/google/go-cmp/issues/326#issuecomment-1465122016
+// Most issues are closed with suggestion to DIY through a custom [cmp.Reporter] https://github.com/google/go-cmp/issues/323
+// or to parse the generated diff as in https://github.com/google/go-cmp/issues/230
+//
+// The package does not give any guarantees how the diff is actually formatted //
+// > Throughout cmp's history, it has made a number of significant changes to the exact output of Diff.
+// > The closest equivalent of Chunk is the Reporter feature.
+// > Even there, we document that "within a slice, the exact set of inserted, removed, or modified elements is unspecified and may change in future implementations."
+//
+// Alternatives:
+//   - https://github.com/kylelemons/godebug/blob/master/diff/diff_test.go
+//   - https://pkg.go.dev/github.com/davecgh/go-spew/spew
+type diffReporter struct {
+	// path to the current node when visiting the structure fields
+	path cmp.Path
+
+	// OnDiff is a handler that allows us to work with the currently stored [path].
+	// It is called whenever we are visiting leaf values that are reported as unequal.
+	OnDiff func(path cmp.Path)
+}
+
+func (r *diffReporter) PushStep(ps cmp.PathStep) {
+	// NOTE (from docs): The PathStep itself is only valid until the step is popped.
+	r.path = append(r.path, ps)
+}
+func (r *diffReporter) Report(rs cmp.Result) {
+	if rs.Equal() {
+		return
+	}
+	r.OnDiff(r.path)
+}
+func (r *diffReporter) PopStep() {
+	r.path = r.path[:len(r.path)-1]
+}
+
+// deepCopyer contains the deep-copy methods that Kubernetes types all have.
+// This is used to create deep copies of the compared structs for the purposes of
+// isolating what specific differences cause them to not be equal.
+//
+// Doing deep copies in the general case can be tricky. We are lucky that K8s types have
+// a deep copy method already. The general case solutions have to worry about
+// - pointers (two different fields may contain the same pointer; will the pointers be the same in the copy?)
+// - cycles (can it copy a recursive data structure?)
+// - unexported fields (go reflection can read (but not set) them; regular assignment does set them)
+//
+// References
+// - https://www.redhat.com/en/blog/kubernetes-deep-dive-code-generation-customresources
+// - https://book.kubebuilder.io/reference/markers/object#objectdeepcopy
+// - https://github.com/brunoga/deep
+// - https://github.com/tiendc/go-deepcopy
+// - https://github.com/qdm12/reprint
+type deepCopyer[T any] interface {
+	DeepCopyInto(out *T)
+	DeepCopy() *T
+	DeepCopyObject() runtime.Object
+}
+
+// computeMinimizedDiff uses [diffReporter] to get all differences and for each it checks whether it
+// causes the two structs to compare as unequal or not. Outputs a [cmp.Diff]-style diff message.
+//
+// The implementation works as follows:
+//
+// For every leaf node reported by [diffReportr], we construct a deep copy of the original actual struct with just that leaf value changed to its expected counterpart.
+// Then we run the comparator function between the deep copy and the real actual.
+// If the comparator reports the two are different, we apply this difference onto a yet another deep copy where we accumulate all such differences.
+// In This way we process all the differences.
+// At the end we run the full cmp.Diff between the original actual struct and the accumulated-differences struct, and that will be the diff.
+//
+// Therefore, the final diff tells us how actual needs to change to compare equal with expected.
+//
+// This implementation is extremely inefficient and somewhat finicky, but it produces the most correct result.
+// Greatest limitation is that it assumes that individual differences between objects can be correctly judged in isolation.
+//
+// This does not use https://pkg.go.dev/k8s.io/apimachinery/pkg/api/equality, which is intentional.
+func (m *beMatchingK8sResource[T, PT]) computeMinimizedDiff(actual T) (message string) {
+	debug := false
+
+	// https://go.dev/blog/defer-panic-and-recover
+	defer func() {
+		// in case things later go wrong with diff minimization
+		if r := recover(); r != nil {
+			// return the un-minimized diff so that we have at least something
+			message = cmp.Diff(actual, m.expected) + "\n" +
+				"while computing the diff, there was a crash\n\t" +
+				fmt.Sprint(r)
+		}
+	}()
+
+	// accumulator is a struct to which we apply all the diffs that influence comparison result
+	accumulator := PT(&actual).DeepCopy()
+
+	// values in the path are only valid until the diffReporter pops up, so we need to work with it
+	// as diffReporter is running; a callback is one convenient way to structure the code around this
+	reporter := diffReporter{OnDiff: func(diffPath cmp.Path) {
+		clone := PT(&actual).DeepCopy()
+
+		// reflect.ValueOf(&v).Elem() is used to get a modifiable value
+		current := reflect.ValueOf(clone).Elem()
+		pointer := reflect.ValueOf(accumulator).Elem()
+
+		// the root of the path is operation-less, skip path[0]
+		var i int
+		for i = 1; i < len(diffPath); i++ {
+			vx, vy := diffPath[i].Values()
+			// if either of the two values (expected, actual) is invalid, break off
+			if !vx.IsValid() || !vy.IsValid() {
+				// it may happen that more than one OnDiff will break off at the same place
+				// but that does not matter for correctness, it will just be a bit inefficient
+				break
+			}
+
+			switch step := diffPath[i].(type) {
+			case cmp.StructField:
+				current = current.Field(step.Index())
+				pointer = pointer.Field(step.Index())
+			case cmp.SliceIndex:
+				current = current.Index(step.Key())
+				pointer = pointer.Index(step.Key())
+			case cmp.MapIndex:
+				current = current.MapIndex(step.Key())
+				pointer = pointer.MapIndex(step.Key())
+			case cmp.Indirect:
+				current = reflect.Indirect(current)
+				pointer = reflect.Indirect(pointer)
+			case cmp.TypeAssertion:
+				// this is imo a noop, we don't care about checking type assertions
+			case cmp.Transform:
+				// it cannot happen in this codebase, so it's best left unhandled
+				panic("unhandled situation: we can't go deeper and have to end the traversal here")
+			default:
+				panic("unreachable")
+			}
+		}
+
+		// after the for loop diffPath[i] either has invalid value or is index out of range, so i-1 does it
+		vx, vy := diffPath[i-1].Values()
+		if debug {
+			log.Printf("%#v:\n\t-: %+v\n\t+: %+v\n", diffPath, vx, vy)
+		}
+
+		current.Set(vx)
+
+		// Check whether our comparator function's result is influenced by this field's value
+		if !m.comparator(actual, *clone) {
+			// If yes, store that in the accumulator object from which we later compute a final diff
+			pointer.Set(vx)
+		}
+	}}
+	cmp.Equal(m.expected, actual, cmp.Reporter(&reporter))
+
+	// Compute the minimized diff
+	return cmp.Diff(actual, *accumulator)
+}
+
+const (
+	MatcherPanickedMessage = "while computing the diff, there was a crash"
+)
+
+func Test_Reflection_CanSet(t *testing.T) {
+	someRoute := routev1.Route{
+		Spec: routev1.RouteSpec{
+			Host: "someRouteHost",
+			Path: "someRoutePath",
+		},
+	}
+	// reflection needs to go through a pointer, otherwise [reflect.Value.CanSet()] returns false
+	field := reflect.ValueOf(&someRoute).Elem().FieldByName("Spec").FieldByName("Host")
+	assert.Equal(t, "someRouteHost", field.String())
+	assert.True(t, field.CanSet())
+	field.Set(reflect.ValueOf("someOtherRouteHost"))
+	assert.Equal(t, "someOtherRouteHost", someRoute.Spec.Host)
+}
+
+// Basic happy case. The diff does not include the Path change because the
+// comparator function is not sensitive to it.
+func Test_BeMatchingK8sResource_DiffRoute(t *testing.T) {
+	someRoute := routev1.Route{
+		Spec: routev1.RouteSpec{
+			Host: "someRouteHost",
+			Path: "someRoutePath",
+		},
+	}
+	someOtherRoute := routev1.Route{
+		Spec: routev1.RouteSpec{
+			Host: "someOtherRouteHost",
+			Path: "someOtherRoutePath",
+		},
+	}
+	matcher := BeMatchingK8sResource(someOtherRoute, func(r1 routev1.Route, r2 routev1.Route) bool { return r1.Spec.Host == r2.Spec.Host })
+	res, err := matcher.Match(someRoute)
+	assert.NoError(t, err)
+	assert.False(t, res)
+
+	// go-cmp uses non-breaking spaces at random in the diff output msg, lol
+	assert.Regexp(t, regexp.MustCompile(`^[\pZ\pC]+$`), ` 	 `)
+
+	// go-cmp diff output is not guaranteed to remain unchanged, so this may need revisiting
+	msg := matcher.FailureMessage(someRoute)
+	assert.Regexp(t, regexp.MustCompile(`-[\pZ\pC]+Host:\s+"someRouteHost",`), msg)
+	assert.Regexp(t, regexp.MustCompile(`\+[\pZ\pC]+Host:\s+"someOtherRouteHost",`), msg)
+
+	assert.NotRegexp(t, regexp.MustCompile(`-[\pZ\pC]+Path:\s+"someRoutePath",`), msg)
+	assert.NotRegexp(t, regexp.MustCompile(`\+[\pZ\pC]+Path:\s+"someOtherRoutePath",`), msg)
+}
+
+// Structs contain different keys, so the comparison has to happen
+// on a map as a whole, because the second struct lacks the key from first.
+// This is something I forgot to handle originally.
+func Test_BeMatchingK8sResource_MismatchedStructs(t *testing.T) {
+	someRoute := routev1.Route{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "someRouteName",
+			Labels: map[string]string{
+				"foo": "bar",
+			},
+		},
+	}
+	someOtherRoute := routev1.Route{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:   "someOtherRouteName",
+			Labels: nil,
+		},
+	}
+	matcher := BeMatchingK8sResource(someOtherRoute, func(r1 routev1.Route, r2 routev1.Route) bool {
+		return r1.ObjectMeta.Name == r2.ObjectMeta.Name
+	})
+	res, err := matcher.Match(someRoute)
+	assert.NoError(t, err)
+	assert.False(t, res)
+
+	msg := matcher.FailureMessage(someRoute)
+	assert.NotContains(t, msg, MatcherPanickedMessage)
+}
+
+// Checks for a situation where comparator function panics
+// when we try to use it to generate minimal diff.
+func Test_BeMatchingK8sResource_CrashingMatcher(t *testing.T) {
+	someRoute := routev1.Route{
+		Spec: routev1.RouteSpec{
+			Host: "someRouteHost",
+			Path: "someRoutePath",
+		},
+	}
+	someOtherRoute := routev1.Route{
+		Spec: routev1.RouteSpec{
+			Host: "someOtherRouteHost",
+			Path: "someOtherRoutePath",
+		},
+	}
+	shouldPanic := false
+	matcher := BeMatchingK8sResource(someOtherRoute, func(r1 routev1.Route, r2 routev1.Route) bool {
+		if shouldPanic {
+			panic("je nanic")
+		}
+		return r1.Spec.Host == r2.Spec.Host
+	})
+	res, err := matcher.Match(someRoute)
+	assert.NoError(t, err)
+	assert.False(t, res)
+
+	shouldPanic = true
+	msg := matcher.FailureMessage(someRoute)
+	assert.Contains(t, msg, MatcherPanickedMessage)
+	assert.Contains(t, msg, "je nanic")
+
+	assert.Regexp(t, regexp.MustCompile(`-[\pZ\pC]+Host:\s+"someRouteHost",`), msg)
+	assert.Regexp(t, regexp.MustCompile(`\+[\pZ\pC]+Host:\s+"someOtherRouteHost",`), msg)
+
+	assert.Regexp(t, regexp.MustCompile(`-[\pZ\pC]+Path:\s+"someRoutePath",`), msg)
+	assert.Regexp(t, regexp.MustCompile(`\+[\pZ\pC]+Path:\s+"someOtherRoutePath",`), msg)
+}