Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RHOAIENG-17634: chore(odh-nbc/tests): implement diff minimization for k8s objects diffs #503

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
394 changes: 394 additions & 0 deletions components/odh-notebook-controller/controllers/matchers_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,394 @@
package controllers

import (
"fmt"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"log"
"reflect"
"regexp"
"testing"

"github.com/google/go-cmp/cmp"
"github.com/onsi/gomega/types"
"github.com/stretchr/testify/assert"

routev1 "github.com/openshift/api/route/v1"
"k8s.io/apimachinery/pkg/runtime"
)

// See https://onsi.github.io/gomega/#adding-your-own-matchers
// for docs on creating custom Gomega matchers.
// There are some matcher examples at https://github.com/onsi/gomega/wiki

// BeMatchingK8sResource is a custom Gomega matcher that compares using `comparator` function and reports differences using
// [cmp.Diff]. It attempts to minimize the diff to only include those entries that cause `comparator` to fail.
//
// Use this to replace assertions such as
// > Expect(CompareNotebookRoutes(*route, expectedRoute)).Should(BeTrueBecause(cmp.Diff(*route, expectedRoute)))
// with
// > Expect(*route).To(BeMatchingK8sResource(expectedRoute, CompareNotebookRoutes))
//
// The failure message diff tells you how to modify the [expected] so that is matches [actual].
//
// # Motivation
//
// When writing controller, we often compare current with expected, and if there is mismatch, we submit update.
// Usually we only care about some specific fields being mismatched, therefore we have a comparator function that decides equality.
// When writing tests, we ended up reusing these equality functions for assertions.
// The problem is that the functions return only true or false and not the reason for reporting a mismatch.
// This matcher can identify the fields that cause mismatch and reports them with a nice diff.
//
// # Alternatives
//
// The alternate approaches that can be found online seem to focus on specifying the comparer in such a way that it can be
// introspected and the mismatch report can be then generated by inspecting the comparer. This implementation there
// treats the comparer as a black box and requires no changes to the existing code.
// - https://github.com/JiaYongfei/respect, establish a template struct that says what must match, the "default" or undefined fields don't count
// - https://onsi.github.io/gomega/#codegstructcode-testing-complex-data-types, creates an assertion function by building up a structural template
//
// Example output
//
// Expected
// v1.Route{TypeMeta:v1.TypeMeta{Kind:"", APIVersion:""}, ObjectMeta:v1.ObjectMeta{Name:"test-notebook", ...
// to compare identical to
// v1.Route{TypeMeta:v1.TypeMeta{Kind:"", APIVersion:""}, ObjectMeta:v1.ObjectMeta{Name:"test-notebook", ...
// but it differs in
// v1.Route{
// TypeMeta: {},
// ObjectMeta: {Name: "test-notebook", Namespace: "default", UID: "35eb6144-527a-46f6-8d25-defc99f50b82", ResourceVersion: "216", ...},
// Spec: v1.RouteSpec{
// Host: "",
// Subdomain: "",
// Path: "",
// To: v1.RouteTargetReference{
// - Kind: "Service",
// + Kind: "No Such Service",
// Name: "test-notebook",
// Weight: &100,
// },
// AlternateBackends: nil,
// Port: &{TargetPort: {Type: 1, StrVal: "http-test-notebook"}},
// ... // 2 identical fields
// },
// Status: {Ingress: {}},
// }
//
// NOTE: The diff minimization functionality is best-effort. It is designed to never under-approximate, but over-approximation is possible.
// NOTE2: Using [gcustom.MakeMatcher] is not possible because it does not conveniently allow running [cmp.Diff] at the time of failure message generation.
func BeMatchingK8sResource[T any, PT interface {
deepCopyer[T]
*T
}](expected T, comparator func(T, T) bool) types.GomegaMatcher {
return &beMatchingK8sResource[T, PT]{
expected: expected,
comparator: comparator,
}
}

// beMatchingK8sResource is a parameterized (=generic) type.
// See https://stackoverflow.com/questions/71444847/go-with-generics-type-t-is-pointer-to-type-parameter-not-type-parameter
type beMatchingK8sResource[T any, PT interface {
*T
deepCopyer[T]
}] struct {
expected T
comparator func(r1 T, r2 T) bool
}

var _ types.GomegaMatcher = &beMatchingK8sResource[routev1.Route, *routev1.Route]{}

func (m *beMatchingK8sResource[T, PT]) Match(actual interface{}) (success bool, err error) {
actualT, ok := actual.(T)
if !ok {
return false, fmt.Errorf("BeMatchingK8sResource matcher expects two objects of the same type")
}

return m.comparator(m.expected, actualT), nil
}

func (m *beMatchingK8sResource[T, PT]) FailureMessage(actual interface{}) (message string) {
//m.expected
diff := m.computeMinimizedDiff(actual.(T))
return fmt.Sprintf("Expected\n\t%#v\nto compare identical to\n\t%#v\nbut it differs in\n%s", actual, m.expected, diff)
}

func (m *beMatchingK8sResource[T, PT]) NegatedFailureMessage(actual interface{}) (message string) {
diff := m.computeMinimizedDiff(actual.(T))
return fmt.Sprintf("Expected\n\t%#v\nto not compare identical to\n\t%#v\nit differs in\n%s", actual, m.expected, diff)
}

// diffReporter is the basis of a custom [cmp.Reporter] that records differences detected during comparison.
// See example_reporter_test.go in the go-cmp package where this comes from.
// https://github.com/google/go-cmp/blob/master/cmp/example_reporter_test.go
//
// Originally, go-cmp was created when the hidden time struct field for monotonic timestamps was added. https://github.com/google/go-cmp/issues/326#issuecomment-1465122016
// Most issues are closed with suggestion to DIY through a custom [cmp.Reporter] https://github.com/google/go-cmp/issues/323
// or to parse the generated diff as in https://github.com/google/go-cmp/issues/230
//
// The package does not give any guarantees how the diff is actually formatted //
// > Throughout cmp's history, it has made a number of significant changes to the exact output of Diff.
// > The closest equivalent of Chunk is the Reporter feature.
// > Even there, we document that "within a slice, the exact set of inserted, removed, or modified elements is unspecified and may change in future implementations."
//
// Alternatives:
// - https://github.com/kylelemons/godebug/blob/master/diff/diff_test.go
// - https://pkg.go.dev/github.com/davecgh/go-spew/spew
type diffReporter struct {
// path to the current node when visiting the structure fields
path cmp.Path

// OnDiff is a handler that allows us to work with the currently stored [path].
// It is called whenever we are visiting leaf values that are reported as unequal.
OnDiff func(path cmp.Path)
}

func (r *diffReporter) PushStep(ps cmp.PathStep) {
// NOTE (from docs): The PathStep itself is only valid until the step is popped.
r.path = append(r.path, ps)
}
func (r *diffReporter) Report(rs cmp.Result) {
if rs.Equal() {
return
}
r.OnDiff(r.path)
}
func (r *diffReporter) PopStep() {
r.path = r.path[:len(r.path)-1]
}

// deepCopyer contains the deep-copy methods that Kubernetes types all have.
// This is used to create deep copies of the compared structs for the purposes of
// isolating what specific differences cause them to not be equal.
//
// Doing deep copies in the general case can be tricky. We are lucky that K8s types have
// a deep copy method already. The general case solutions have to worry about
// - pointers (two different fields may contain the same pointer; will the pointers be the same in the copy?)
// - cycles (can it copy a recursive data structure?)
// - unexported fields (go reflection can read (but not set) them; regular assignment does set them)
//
// References
// - https://www.redhat.com/en/blog/kubernetes-deep-dive-code-generation-customresources
// - https://book.kubebuilder.io/reference/markers/object#objectdeepcopy
// - https://github.com/brunoga/deep
// - https://github.com/tiendc/go-deepcopy
// - https://github.com/qdm12/reprint
type deepCopyer[T any] interface {
DeepCopyInto(out *T)
DeepCopy() *T
DeepCopyObject() runtime.Object
}

// computeMinimizedDiff uses [diffReporter] to get all differences and for each it checks whether it
// causes the two structs to compare as unequal or not. Outputs a [cmp.Diff]-style diff message.
//
// The implementation works as follows:
//
// For every leaf node reported by [diffReportr], we construct a deep copy of the original actual struct with just that leaf value changed to its expected counterpart.
// Then we run the comparator function between the deep copy and the real actual.
// If the comparator reports the two are different, we apply this difference onto a yet another deep copy where we accumulate all such differences.
// In This way we process all the differences.
// At the end we run the full cmp.Diff between the original actual struct and the accumulated-differences struct, and that will be the diff.
//
// Therefore, the final diff tells us how actual needs to change to compare equal with expected.
//
// This implementation is extremely inefficient and somewhat finicky, but it produces the most correct result.
// Greatest limitation is that it assumes that individual differences between objects can be correctly judged in isolation.
//
// This does not use https://pkg.go.dev/k8s.io/apimachinery/pkg/api/equality, which is intentional.
func (m *beMatchingK8sResource[T, PT]) computeMinimizedDiff(actual T) (message string) {
debug := false

// https://go.dev/blog/defer-panic-and-recover
defer func() {
// in case things later go wrong with diff minimization
if r := recover(); r != nil {
// return the un-minimized diff so that we have at least something
message = cmp.Diff(actual, m.expected) + "\n" +
"while computing the diff, there was a crash\n\t" +
fmt.Sprint(r)
}
}()

// accumulator is a struct to which we apply all the diffs that influence comparison result
accumulator := PT(&actual).DeepCopy()

// values in the path are only valid until the diffReporter pops up, so we need to work with it
// as diffReporter is running; a callback is one convenient way to structure the code around this
reporter := diffReporter{OnDiff: func(diffPath cmp.Path) {
clone := PT(&actual).DeepCopy()

// reflect.ValueOf(&v).Elem() is used to get a modifiable value
current := reflect.ValueOf(clone).Elem()
pointer := reflect.ValueOf(accumulator).Elem()

// the root of the path is operation-less, skip path[0]
var i int
for i = 1; i < len(diffPath); i++ {
vx, vy := diffPath[i].Values()
// if either of the two values (expected, actual) is invalid, break off
if !vx.IsValid() || !vy.IsValid() {
// it may happen that more than one OnDiff will break off at the same place
// but that does not matter for correctness, it will just be a bit inefficient
break
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, in this case - this diff isn't stored to the final diff - is it intentional? I'm not sure what are possible cases for these being invalid, so I'm not sure.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is, line 264, current.Set(vx), will run just like if we got at the end of the path.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(one of) these will be invalid if the structs are not matched up, say both have a map, and one of them has a key and the other one does not; so the vy will be the value in that key and vx will be invalid because that key is missing and there's no value for that; there's a self test for this

// Structs contain different keys, so the comparison has to happen
// on a map as a whole, because the second struct lacks the key from first.
// This is something I forgot to handle originally.
func Test_BeMatchingK8sResource_MismatchedStructs(t *testing.T) {
	someRoute := routev1.Route{
		ObjectMeta: metav1.ObjectMeta{
			Name: "someRouteName",
			Labels: map[string]string{
				"foo": "bar",
			},
		},
	}
	someOtherRoute := routev1.Route{
		ObjectMeta: metav1.ObjectMeta{
			Name:   "someOtherRouteName",
			Labels: nil,
		},
	}
...

}

switch step := diffPath[i].(type) {
case cmp.StructField:
current = current.Field(step.Index())
pointer = pointer.Field(step.Index())
case cmp.SliceIndex:
current = current.Index(step.Key())
pointer = pointer.Index(step.Key())
case cmp.MapIndex:
current = current.MapIndex(step.Key())
pointer = pointer.MapIndex(step.Key())
case cmp.Indirect:
current = reflect.Indirect(current)
pointer = reflect.Indirect(pointer)
case cmp.TypeAssertion:
// this is imo a noop, we don't care about checking type assertions
case cmp.Transform:
// it cannot happen in this codebase, so it's best left unhandled
panic("unhandled situation: we can't go deeper and have to end the traversal here")
default:
panic("unreachable")
}
}

// after the for loop diffPath[i] either has invalid value or is index out of range, so i-1 does it
vx, vy := diffPath[i-1].Values()
if debug {
log.Printf("%#v:\n\t-: %+v\n\t+: %+v\n", diffPath, vx, vy)
}

current.Set(vx)

// Check whether our comparator function's result is influenced by this field's value
if !m.comparator(actual, *clone) {
// If yes, store that in the accumulator object from which we later compute a final diff
pointer.Set(vx)
}
}}
cmp.Equal(m.expected, actual, cmp.Reporter(&reporter))

// Compute the minimized diff
return cmp.Diff(actual, *accumulator)
}

const (
MatcherPanickedMessage = "while computing the diff, there was a crash"
)

func Test_Reflection_CanSet(t *testing.T) {
someRoute := routev1.Route{
Spec: routev1.RouteSpec{
Host: "someRouteHost",
Path: "someRoutePath",
},
}
// reflection needs to go through a pointer, otherwise [reflect.Value.CanSet()] returns false
field := reflect.ValueOf(&someRoute).Elem().FieldByName("Spec").FieldByName("Host")
assert.Equal(t, "someRouteHost", field.String())
assert.True(t, field.CanSet())
field.Set(reflect.ValueOf("someOtherRouteHost"))
assert.Equal(t, "someOtherRouteHost", someRoute.Spec.Host)
}

// Basic happy case. The diff does not include the Path change because the
// comparator function is not sensitive to it.
func Test_BeMatchingK8sResource_DiffRoute(t *testing.T) {
someRoute := routev1.Route{
Spec: routev1.RouteSpec{
Host: "someRouteHost",
Path: "someRoutePath",
},
}
someOtherRoute := routev1.Route{
Spec: routev1.RouteSpec{
Host: "someOtherRouteHost",
Path: "someOtherRoutePath",
},
}
matcher := BeMatchingK8sResource(someOtherRoute, func(r1 routev1.Route, r2 routev1.Route) bool { return r1.Spec.Host == r2.Spec.Host })
res, err := matcher.Match(someRoute)
assert.NoError(t, err)
assert.False(t, res)

// go-cmp uses non-breaking spaces at random in the diff output msg, lol
assert.Regexp(t, regexp.MustCompile(`^[\pZ\pC]+$`), `  `)

// go-cmp diff output is not guaranteed to remain unchanged, so this may need revisiting
msg := matcher.FailureMessage(someRoute)
assert.Regexp(t, regexp.MustCompile(`-[\pZ\pC]+Host:\s+"someRouteHost",`), msg)
assert.Regexp(t, regexp.MustCompile(`\+[\pZ\pC]+Host:\s+"someOtherRouteHost",`), msg)

assert.NotRegexp(t, regexp.MustCompile(`-[\pZ\pC]+Path:\s+"someRoutePath",`), msg)
assert.NotRegexp(t, regexp.MustCompile(`\+[\pZ\pC]+Path:\s+"someOtherRoutePath",`), msg)
}

// Structs contain different keys, so the comparison has to happen
// on a map as a whole, because the second struct lacks the key from first.
// This is something I forgot to handle originally.
func Test_BeMatchingK8sResource_MismatchedStructs(t *testing.T) {
someRoute := routev1.Route{
ObjectMeta: metav1.ObjectMeta{
Name: "someRouteName",
Labels: map[string]string{
"foo": "bar",
},
},
}
someOtherRoute := routev1.Route{
ObjectMeta: metav1.ObjectMeta{
Name: "someOtherRouteName",
Labels: nil,
},
}
matcher := BeMatchingK8sResource(someOtherRoute, func(r1 routev1.Route, r2 routev1.Route) bool {
return r1.ObjectMeta.Name == r2.ObjectMeta.Name
})
res, err := matcher.Match(someRoute)
assert.NoError(t, err)
assert.False(t, res)

msg := matcher.FailureMessage(someRoute)
assert.NotContains(t, msg, MatcherPanickedMessage)
}

// Checks for a situation where comparator function panics
// when we try to use it to generate minimal diff.
func Test_BeMatchingK8sResource_CrashingMatcher(t *testing.T) {
someRoute := routev1.Route{
Spec: routev1.RouteSpec{
Host: "someRouteHost",
Path: "someRoutePath",
},
}
someOtherRoute := routev1.Route{
Spec: routev1.RouteSpec{
Host: "someOtherRouteHost",
Path: "someOtherRoutePath",
},
}
shouldPanic := false
matcher := BeMatchingK8sResource(someOtherRoute, func(r1 routev1.Route, r2 routev1.Route) bool {
if shouldPanic {
panic("je nanic")
}
return r1.Spec.Host == r2.Spec.Host
})
res, err := matcher.Match(someRoute)
assert.NoError(t, err)
assert.False(t, res)

shouldPanic = true
msg := matcher.FailureMessage(someRoute)
assert.Contains(t, msg, MatcherPanickedMessage)
assert.Contains(t, msg, "je nanic")

assert.Regexp(t, regexp.MustCompile(`-[\pZ\pC]+Host:\s+"someRouteHost",`), msg)
assert.Regexp(t, regexp.MustCompile(`\+[\pZ\pC]+Host:\s+"someOtherRouteHost",`), msg)

assert.Regexp(t, regexp.MustCompile(`-[\pZ\pC]+Path:\s+"someRoutePath",`), msg)
assert.Regexp(t, regexp.MustCompile(`\+[\pZ\pC]+Path:\s+"someOtherRoutePath",`), msg)
}
Loading
Loading