diff --git a/cache/bytes_limit_decorator.go b/cache/bytes_limit_decorator.go new file mode 100644 index 000000000..4473d605f --- /dev/null +++ b/cache/bytes_limit_decorator.go @@ -0,0 +1,71 @@ +package cache + +import ( + "container/list" + "errors" + + "github.com/cosmos/iavl/common" +) + +type lruCacheWithBytesLimit struct { + lruCache + bytesLimit int + // This is called an estimate because we calculcate the number of bytes used based on the + // knowledge of how the underlying structs, slices and strings are represented in Go. + // Since Go is a garbage collected language, there is no good way to retrieve the exact number + // of bytes used by both the stack and the heap at any particular moment. As a result, this + // is named an estimate to reflect that it is not a precise number. + curBytesEstimate int +} + +var _ Cache = (*lruCacheWithBytesLimit)(nil) + +func NewWithBytesLimit(bytesLimit int) Cache { + return &lruCacheWithBytesLimit{ + lruCache: lruCache{ + dict: make(map[string]*list.Element), + ll: list.New(), + }, + bytesLimit: bytesLimit, + } +} + +func (c *lruCacheWithBytesLimit) GetType() Type { + return LRU_bytes_limit +} + +func (c *lruCacheWithBytesLimit) isOverLimit() bool { + return c.curBytesEstimate > c.bytesLimit +} + +func (c *lruCacheWithBytesLimit) add(node Node) { + c.curBytesEstimate += (node.GetFullSize() + getCacheElemMetadataSize()) + c.lruCache.add(node) +} + +func (c *lruCacheWithBytesLimit) remove(e *list.Element) Node { + removed := c.lruCache.remove(e) + c.curBytesEstimate -= (removed.GetFullSize() + getCacheElemMetadataSize()) + return removed +} + +// getCacheElemMetadataSize returns how much space the structures +// that hold a cache element utilize in memory. +// With the current design, a list.Element is created that consists of 4 pointers. +// In addition, a pointer to the element is stored in the Go map and has string as a key +func getCacheElemMetadataSize() int { + return common.GetStringSizeBytes() + // cache dict key + common.UintSizeBytes + // pointer to the element in dict + common.Uint64Size*4 // 4 pointers within list.Element +} + +// getCacheCurrentBytes returns the current bytes +// estimate of the cache if the cache is lruCacheWithBytesLimit. +// If not, returns 0 and error. +func getCacheCurrentBytes(c Cache) (int, error) { + withBytesLimit, ok := c.(*lruCacheWithBytesLimit) + if ok { + return withBytesLimit.curBytesEstimate, nil + } + return 0, errors.New("cannot get bytes limit, not lruCacheWithBytesLimit") +} diff --git a/cache/bytes_limit_decorator_test.go b/cache/bytes_limit_decorator_test.go new file mode 100644 index 000000000..2fc71c299 --- /dev/null +++ b/cache/bytes_limit_decorator_test.go @@ -0,0 +1,248 @@ +package cache_test + +import ( + "testing" + + "github.com/cosmos/iavl/cache" + "github.com/stretchr/testify/require" +) + +func Test_BytesLimit_Add(t *testing.T) { + testcases := map[string]func() testcase{ + "add 1 node with size of exactly limit - added": func() testcase { + const nodeIdx = 0 + + return testcase{ + cacheLimit: testNodes[nodeIdx].GetFullSize() + cache.GetCacheElemMetadataSize(), + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + expectedBytesLimit: testNodes[nodeIdx].GetFullSize() + cache.GetCacheElemMetadataSize(), + }, + }, + expectedNodeIndexes: []int{nodeIdx}, + } + }, + "add 2 nodes with latter exceeding limit - added, old removed": func() testcase { + const nodeIdx = 0 + + return testcase{ + cacheLimit: testNodes[nodeIdx].GetFullSize() + cache.GetCacheElemMetadataSize(), + cacheOps: []cacheOp{ + { + testNodexIdx: nodeIdx, + expectedResult: noneRemoved, + expectedBytesLimit: testNodes[nodeIdx].GetFullSize() + cache.GetCacheElemMetadataSize(), + }, + { + testNodexIdx: nodeIdx + 1, + expectedResult: nodeIdx, + expectedBytesLimit: testNodes[nodeIdx+1].GetFullSize() + cache.GetCacheElemMetadataSize(), + }, + }, + expectedNodeIndexes: []int{nodeIdx + 1}, + } + }, + "add 2 nodes under limit": func() testcase { + const nodeIdx = 0 + + return testcase{ + cacheLimit: testNodes[nodeIdx].GetFullSize() + testNodes[nodeIdx+3].GetFullSize() + 2*cache.GetCacheElemMetadataSize(), + cacheOps: []cacheOp{ + { + testNodexIdx: nodeIdx, + expectedResult: noneRemoved, + expectedBytesLimit: testNodes[nodeIdx].GetFullSize() + cache.GetCacheElemMetadataSize(), + }, + { + testNodexIdx: nodeIdx + 3, + expectedResult: noneRemoved, + expectedBytesLimit: testNodes[nodeIdx].GetFullSize() + testNodes[nodeIdx+3].GetFullSize() + 2*cache.GetCacheElemMetadataSize(), + }, + }, + expectedNodeIndexes: []int{nodeIdx, nodeIdx + 3}, + } + }, + "add 3 nodes and 4th requiring the removal of first three due to being too large": func() testcase { + const nodeIdx = 0 + + return testcase{ + cacheLimit: testNodes[nodeIdx].GetFullSize() + + testNodes[nodeIdx+1].GetFullSize() + + testNodes[nodeIdx+2].GetFullSize() + + 3*cache.GetCacheElemMetadataSize(), + cacheOps: []cacheOp{ + { + testNodexIdx: nodeIdx, + expectedResult: noneRemoved, + expectedBytesLimit: testNodes[nodeIdx].GetFullSize() + cache.GetCacheElemMetadataSize(), + }, + { + testNodexIdx: nodeIdx + 1, + expectedResult: noneRemoved, + expectedBytesLimit: testNodes[nodeIdx].GetFullSize() + testNodes[nodeIdx+1].GetFullSize() + 2*cache.GetCacheElemMetadataSize(), + }, + { + testNodexIdx: nodeIdx + 2, + expectedResult: noneRemoved, + expectedBytesLimit: testNodes[nodeIdx].GetFullSize() + testNodes[nodeIdx+1].GetFullSize() + testNodes[nodeIdx+2].GetFullSize() + 3*cache.GetCacheElemMetadataSize(), + }, + { + testNodexIdx: nodeIdx + 3, + expectedResult: allButLastRemoved, + expectedBytesLimit: testNodes[nodeIdx+2].GetFullSize() + testNodes[nodeIdx+3].GetFullSize() + 2*cache.GetCacheElemMetadataSize(), + }, + }, + expectedNodeIndexes: []int{nodeIdx + 2, nodeIdx + 3}, + } + }, + } + + for name, getTestcaseFn := range testcases { + t.Run(name, func(t *testing.T) { + tc := getTestcaseFn() + bytesLimitCache := cache.NewWithBytesLimit(tc.cacheLimit) + testAdd(t, bytesLimitCache, tc) + }) + } +} + +func Test_BytesLimitCache_Remove(t *testing.T) { + testcases := map[string]func() testcase{ + "remove non-existent key, cache limit 0 - nil returned": func() testcase { + const ( + nodeIdx = 0 + cacheLimit = 0 + ) + + return testcase{ + cacheLimit: cacheLimit, + cacheOps: []cacheOp{ + { + testNodexIdx: nodeIdx, + expectedResult: noneRemoved, + }, + }, + } + }, + "remove non-existent key - nil returned": func() testcase { + const ( + nodeIdx = 0 + ) + + var ( + cacheLimit = testNodes[nodeIdx].GetFullSize() + cache.GetCacheElemMetadataSize() + ) + + return testcase{ + setup: func(c cache.Cache) { + require.Empty(t, cache.MockAdd(c, testNodes[nodeIdx+1])) + require.Equal(t, 1, c.Len()) + }, + cacheLimit: cacheLimit, + cacheOps: []cacheOp{ + { + testNodexIdx: nodeIdx, + expectedResult: noneRemoved, + }, + }, + expectedNodeIndexes: []int{1}, + } + }, + "remove existent key - removed": func() testcase { + const ( + nodeIdx = 0 + ) + + var ( + cacheLimit = testNodes[nodeIdx].GetFullSize() + cache.GetCacheElemMetadataSize() + ) + + return testcase{ + setup: func(c cache.Cache) { + require.Empty(t, cache.MockAdd(c, testNodes[nodeIdx])) + require.Equal(t, 1, c.Len()) + }, + cacheLimit: cacheLimit, + cacheOps: []cacheOp{ + { + testNodexIdx: nodeIdx, + expectedResult: nodeIdx, + }, + }, + } + }, + "remove twice, cache limit 1 - removed first time, then nil": func() testcase { + const ( + nodeIdx = 0 + ) + + var ( + cacheLimit = testNodes[nodeIdx].GetFullSize() + cache.GetCacheElemMetadataSize() + ) + + return testcase{ + setup: func(c cache.Cache) { + require.Empty(t, cache.MockAdd(c, testNodes[nodeIdx])) + require.Equal(t, 1, c.Len()) + }, + cacheLimit: cacheLimit, + cacheOps: []cacheOp{ + { + testNodexIdx: nodeIdx, + expectedResult: nodeIdx, + }, + { + testNodexIdx: nodeIdx, + expectedResult: noneRemoved, + }, + }, + } + }, + "remove all, cache limit 3": func() testcase { + const ( + nodeIdx = 0 + ) + + var ( + cacheLimit = testNodes[nodeIdx].GetFullSize() + + testNodes[nodeIdx+1].GetFullSize() + + testNodes[nodeIdx+2].GetFullSize() + + 3*cache.GetCacheElemMetadataSize() + ) + + return testcase{ + setup: func(c cache.Cache) { + require.Empty(t, cache.MockAdd(c, testNodes[nodeIdx])) + require.Empty(t, cache.MockAdd(c, testNodes[nodeIdx+1])) + require.Empty(t, cache.MockAdd(c, testNodes[nodeIdx+2])) + require.Equal(t, 3, c.Len()) + }, + cacheLimit: cacheLimit, + cacheOps: []cacheOp{ + { + testNodexIdx: nodeIdx + 2, + expectedResult: nodeIdx + 2, + }, + { + testNodexIdx: nodeIdx, + expectedResult: nodeIdx, + }, + { + testNodexIdx: nodeIdx + 1, + expectedResult: nodeIdx + 1, + }, + }, + } + }, + } + + for name, getTestcaseFn := range testcases { + t.Run(name, func(t *testing.T) { + tc := getTestcaseFn() + bytesLimitCache := cache.NewWithNodeLimit(tc.cacheLimit) + testRemove(t, bytesLimitCache, tc) + }) + } +} diff --git a/cache/cache.go b/cache/cache.go index 82dfed20b..640c26dd4 100644 --- a/cache/cache.go +++ b/cache/cache.go @@ -1,96 +1,72 @@ package cache -import ( - "container/list" -) +import "container/list" // Node represents a node eligible for caching. type Node interface { + // GetKey returns a node's key GetKey() []byte + + // GetFullSize returns the number of bytes a node occupies in memory. + GetFullSize() int } +// Type represents cache type. All types +// are defined below. +type Type int + +const ( + LRU Type = 0 + LRU_node_limit Type = 1 + LRU_bytes_limit Type = 2 +) + // Cache is an in-memory structure to persist nodes for quick access. type Cache interface { - // Adds node to cache. If full and had to remove the oldest element, - // returns the oldest, otherwise nil. - Add(node Node) Node - - // Returns Node for the key, if exists. nil otherwise. + // Get returns Node for the key, if exists. nil otherwise. Get(key []byte) Node + // GetType returns cache's type. + GetType() Type + // Has returns true if node with key exists in cache, false otherwise. Has(key []byte) bool - // Remove removes node with key from cache. The removed node is returned. - // if not in cache, return nil. - Remove(key []byte) Node - // Len returns the cache length. Len() int -} -// lruCache is an LRU cache implementation. -type lruCache struct { - dict map[string]*list.Element // FastNode cache. - cacheLimit int // FastNode cache size limit in elements. - ll *list.List // LRU queue of cache elements. Used for deletion. -} + // add adds node to cache. + add(node Node) -var _ Cache = (*lruCache)(nil) - -func New(cacheLimit int) Cache { - return &lruCache{ - dict: make(map[string]*list.Element), - cacheLimit: cacheLimit, - ll: list.New(), - } -} + // get returns list element corresponding to the key + get(key []byte) *list.Element -func (c *lruCache) Add(node Node) Node { - if e, exists := c.dict[string(node.GetKey())]; exists { - c.ll.MoveToFront(e) - old := e.Value - e.Value = node - return old.(Node) - } - - elem := c.ll.PushFront(node) - c.dict[string(node.GetKey())] = elem + // remove removes node with key from cache. The removed node is returned. + // if not in cache, return nil. + remove(e *list.Element) Node - if c.ll.Len() > c.cacheLimit { - oldest := c.ll.Back() + // isOverLimit returns true if cache limit has been reached, false otherwise. + isOverLimit() bool - return c.remove(oldest) - } - return nil + // getOldsest returns the oldest cache element. + getOldest() *list.Element } -func (nc *lruCache) Get(key []byte) Node { - if ele, hit := nc.dict[string(key)]; hit { - nc.ll.MoveToFront(ele) - return ele.Value.(Node) - } - return nil -} - -func (c *lruCache) Has(key []byte) bool { - _, exists := c.dict[string(key)] - return exists -} +// Add adds node to cache. If adding node, exceeds cache limit, +// removes old elements from cache up until the limit is not exceeded anymore. +func Add(cache Cache, node Node) { + cache.add(node) -func (nc *lruCache) Len() int { - return nc.ll.Len() + for cache.isOverLimit() { + cache.remove(cache.getOldest()) + } } -func (c *lruCache) Remove(key []byte) Node { - if elem, exists := c.dict[string(key)]; exists { - return c.remove(elem) +// Remove removed node with key from cache if exists and returns the +// removed node, nil otherwise. +func Remove(cache Cache, key []byte) Node { + if elem := cache.get(key); elem != nil { + return cache.remove(elem) } return nil } - -func (c *lruCache) remove(e *list.Element) Node { - removed := c.ll.Remove(e).(Node) - delete(c.dict, string(removed.GetKey())) - return removed -} diff --git a/cache/cache_bench_test.go b/cache/cache_bench_test.go index 14c32c671..d1187b677 100644 --- a/cache/cache_bench_test.go +++ b/cache/cache_bench_test.go @@ -1,18 +1,22 @@ package cache_test import ( + "crypto/rand" "testing" "github.com/cosmos/iavl/cache" "github.com/cosmos/iavl/common" + "github.com/stretchr/testify/require" ) -func BenchmarkAdd(b *testing.B) { +type benchTestcase struct { + cacheLimit int + keySize int +} + +func Benchmark_NodeLimitCache_Add(b *testing.B) { b.ReportAllocs() - testcases := map[string]struct { - cacheLimit int - keySize int - }{ + testcases := map[string]benchTestcase{ "small - limit: 10K, key size - 10b": { cacheLimit: 10000, keySize: 10, @@ -27,11 +31,15 @@ func BenchmarkAdd(b *testing.B) { }, } + benchmarkAdd(b, testcases) +} + +func benchmarkAdd(b *testing.B, testcases map[string]benchTestcase) { for name, tc := range testcases { - cache := cache.New(tc.cacheLimit) + c := cache.NewWithNodeLimit(tc.cacheLimit) b.Run(name, func(b *testing.B) { for i := 0; i < b.N; i++ { - _ = cache.Add(&testNode{ + cache.Add(c, &testNode{ key: randBytes(tc.keySize), }) } @@ -39,28 +47,84 @@ func BenchmarkAdd(b *testing.B) { } } -func BenchmarkRemove(b *testing.B) { +func Benchmark_NodeLimitCache_Remove(b *testing.B) { + c := cache.NewWithNodeLimit(1000) + bencmarkRemove(b, c) +} + +func Benchmark_BytesLimitCache_Add(b *testing.B) { b.ReportAllocs() + testcases := map[string]benchTestcase{ + "small - limit: 50MB, key size - 10b": { + cacheLimit: 50 * 1024 * 1024, + keySize: 10, + }, + "med - limit: 100MB, key size 20b": { + cacheLimit: 100 * 1024 * 1024, + keySize: 20, + }, + "large - limit 500MB: , key size 30b": { + cacheLimit: 500 * 1024 * 1024, + keySize: 30, + }, + } + + benchmarkAdd(b, testcases) +} +func Benchmark_BytesLimitCache_Remove(b *testing.B) { + c := cache.NewWithNodeLimit(50 * 1024 * 1024) + bencmarkRemove(b, c) +} + +// benchmarkRemove is meant to be run manually +// This is done because we want to avoid removing non-existent keys +// As a result, -benchtime flah should be below keysToPopulate variable. +// To run, uncomment b.Skip() and execute: +// go test -run=^$ -bench ^Benchmark_BytesLimitCache_Remove$ github.com/cosmos/iavl/cache -benchtime=1000000x +func bencmarkRemove(b *testing.B, c cache.Cache) { + b.Skip() + b.ReportAllocs() b.StopTimer() - cache := cache.New(1000) existentKeyMirror := [][]byte{} // Populate cache - for i := 0; i < 50; i++ { - key := randBytes(1000) + const keysToPopulate = 1000000 + + for i := 0; i < keysToPopulate; i++ { + key := randBytes(20) existentKeyMirror = append(existentKeyMirror, key) - cache.Add(&testNode{ + cache.Add(c, &testNode{ key: key, }) } r := common.NewRand() + removedKeys := make(map[string]struct{}, 0) + for i := 0; i < b.N; i++ { - key := existentKeyMirror[r.Intn(len(existentKeyMirror))] + var key []byte + for len(removedKeys) != keysToPopulate { + key = existentKeyMirror[r.Intn(len(existentKeyMirror))] + if _, exists := removedKeys[string(key)]; !exists { + break + } + } + b.ResetTimer() - _ = cache.Remove(key) + removed := cache.Remove(c, key) + removedKeys[string(key)] = struct{}{} + require.NotNil(b, removed) } } + +func randBytes(length int) []byte { + key := make([]byte, length) + // math.rand.Read always returns err=nil + // we do not need cryptographic randomness for this test: + //nolint:gosec + rand.Read(key) + return key +} diff --git a/cache/cache_test.go b/cache/cache_test.go deleted file mode 100644 index 4f0b40ab6..000000000 --- a/cache/cache_test.go +++ /dev/null @@ -1,311 +0,0 @@ -package cache_test - -import ( - "crypto/rand" - "fmt" - "testing" - - "github.com/cosmos/iavl/cache" - "github.com/stretchr/testify/require" -) - -// expectedResult represents the expected result of each add/remove operation. -// It can be noneRemoved or the index of the removed node in testNodes -type expectedResult int - -const ( - noneRemoved expectedResult = -1 - // The rest represent the index of the removed node -) - -// testNode is the node used for testing cache implementation -type testNode struct { - key []byte -} - -type cacheOp struct { - testNodexIdx int - expectedResult expectedResult -} - -type testcase struct { - setup func(cache.Cache) - cacheLimit int - cacheOps []cacheOp - expectedNodeIndexes []int // contents of the cache once test case completes represent by indexes in testNodes -} - -func (tn *testNode) GetKey() []byte { - return tn.key -} - -const ( - testKey = "key" -) - -var _ cache.Node = (*testNode)(nil) - -var ( - testNodes = []cache.Node{ - &testNode{ - key: []byte(fmt.Sprintf("%s%d", testKey, 1)), - }, - &testNode{ - key: []byte(fmt.Sprintf("%s%d", testKey, 2)), - }, - &testNode{ - key: []byte(fmt.Sprintf("%s%d", testKey, 3)), - }, - } -) - -func Test_Cache_Add(t *testing.T) { - testcases := map[string]testcase{ - "add 1 node with 1 limit - added": { - cacheLimit: 1, - cacheOps: []cacheOp{ - { - testNodexIdx: 0, - expectedResult: noneRemoved, - }, - }, - expectedNodeIndexes: []int{0}, - }, - "add 1 node twice, cache limit 2 - only one added": { - cacheLimit: 2, - cacheOps: []cacheOp{ - { - testNodexIdx: 0, - expectedResult: noneRemoved, - }, - { - testNodexIdx: 0, - expectedResult: 0, - }, - }, - expectedNodeIndexes: []int{0}, - }, - "add 1 node with 0 limit - not added and return itself": { - cacheLimit: 0, - cacheOps: []cacheOp{ - { - testNodexIdx: 0, - expectedResult: 0, - }, - }, - }, - "add 3 nodes with 1 limit - first 2 removed": { - cacheLimit: 1, - cacheOps: []cacheOp{ - { - testNodexIdx: 0, - expectedResult: noneRemoved, - }, - { - testNodexIdx: 1, - expectedResult: 0, - }, - { - testNodexIdx: 2, - expectedResult: 1, - }, - }, - expectedNodeIndexes: []int{2}, - }, - "add 3 nodes with 2 limit - first removed": { - cacheLimit: 2, - cacheOps: []cacheOp{ - { - testNodexIdx: 0, - expectedResult: noneRemoved, - }, - { - testNodexIdx: 1, - expectedResult: noneRemoved, - }, - { - testNodexIdx: 2, - expectedResult: 0, - }, - }, - expectedNodeIndexes: []int{1, 2}, - }, - "add 3 nodes with 10 limit - non removed": { - cacheLimit: 10, - cacheOps: []cacheOp{ - { - testNodexIdx: 0, - expectedResult: noneRemoved, - }, - { - testNodexIdx: 1, - expectedResult: noneRemoved, - }, - { - testNodexIdx: 2, - expectedResult: noneRemoved, - }, - }, - expectedNodeIndexes: []int{0, 1, 2}, - }, - } - - for name, tc := range testcases { - t.Run(name, func(t *testing.T) { - cache := cache.New(tc.cacheLimit) - - expectedCurSize := 0 - - for _, op := range tc.cacheOps { - - actualResult := cache.Add(testNodes[op.testNodexIdx]) - - expectedResult := op.expectedResult - - if expectedResult == noneRemoved { - require.Nil(t, actualResult) - expectedCurSize++ - } else { - require.NotNil(t, actualResult) - - // Here, op.expectedResult represents the index of the removed node in tc.cacheOps - require.Equal(t, testNodes[int(op.expectedResult)], actualResult) - } - require.Equal(t, expectedCurSize, cache.Len()) - } - - validateCacheContentsAfterTest(t, tc, cache) - }) - } -} - -func Test_Cache_Remove(t *testing.T) { - testcases := map[string]testcase{ - "remove non-existent key, cache limit 0 - nil returned": { - cacheLimit: 0, - cacheOps: []cacheOp{ - { - testNodexIdx: 0, - expectedResult: noneRemoved, - }, - }, - }, - "remove non-existent key, cache limit 1 - nil returned": { - setup: func(c cache.Cache) { - require.Nil(t, c.Add(testNodes[1])) - require.Equal(t, 1, c.Len()) - }, - cacheLimit: 1, - cacheOps: []cacheOp{ - { - testNodexIdx: 0, - expectedResult: noneRemoved, - }, - }, - expectedNodeIndexes: []int{1}, - }, - "remove existent key, cache limit 1 - removed": { - setup: func(c cache.Cache) { - require.Nil(t, c.Add(testNodes[0])) - require.Equal(t, 1, c.Len()) - }, - cacheLimit: 1, - cacheOps: []cacheOp{ - { - testNodexIdx: 0, - expectedResult: 0, - }, - }, - }, - "remove twice, cache limit 1 - removed first time, then nil": { - setup: func(c cache.Cache) { - require.Nil(t, c.Add(testNodes[0])) - require.Equal(t, 1, c.Len()) - }, - cacheLimit: 1, - cacheOps: []cacheOp{ - { - testNodexIdx: 0, - expectedResult: 0, - }, - { - testNodexIdx: 0, - expectedResult: noneRemoved, - }, - }, - }, - "remove all, cache limit 3": { - setup: func(c cache.Cache) { - require.Nil(t, c.Add(testNodes[0])) - require.Nil(t, c.Add(testNodes[1])) - require.Nil(t, c.Add(testNodes[2])) - require.Equal(t, 3, c.Len()) - }, - cacheLimit: 3, - cacheOps: []cacheOp{ - { - testNodexIdx: 2, - expectedResult: 2, - }, - { - testNodexIdx: 0, - expectedResult: 0, - }, - { - testNodexIdx: 1, - expectedResult: 1, - }, - }, - }, - } - - for name, tc := range testcases { - t.Run(name, func(t *testing.T) { - cache := cache.New(tc.cacheLimit) - - if tc.setup != nil { - tc.setup(cache) - } - - expectedCurSize := cache.Len() - - for _, op := range tc.cacheOps { - - actualResult := cache.Remove(testNodes[op.testNodexIdx].GetKey()) - - expectedResult := op.expectedResult - - if expectedResult == noneRemoved { - require.Nil(t, actualResult) - } else { - expectedCurSize-- - require.NotNil(t, actualResult) - - // Here, op.expectedResult represents the index of the removed node in tc.cacheOps - require.Equal(t, testNodes[int(op.expectedResult)], actualResult) - } - require.Equal(t, expectedCurSize, cache.Len()) - } - - validateCacheContentsAfterTest(t, tc, cache) - }) - } -} - -func validateCacheContentsAfterTest(t *testing.T, tc testcase, cache cache.Cache) { - require.Equal(t, len(tc.expectedNodeIndexes), cache.Len()) - for _, idx := range tc.expectedNodeIndexes { - expectedNode := testNodes[idx] - require.True(t, cache.Has(expectedNode.GetKey())) - require.Equal(t, expectedNode, cache.Get(expectedNode.GetKey())) - } -} - -func randBytes(length int) []byte { - key := make([]byte, length) - // math.rand.Read always returns err=nil - // we do not need cryptographic randomness for this test: - //nolint:gosec - rand.Read(key) - return key -} diff --git a/cache/common_test.go b/cache/common_test.go new file mode 100644 index 000000000..1647f1e9d --- /dev/null +++ b/cache/common_test.go @@ -0,0 +1,144 @@ +package cache_test + +import ( + "fmt" + "testing" + + "github.com/cosmos/iavl/cache" + "github.com/cosmos/iavl/common" + "github.com/stretchr/testify/require" +) + +// expectedResult represents the expected result of each add/remove operation. +// It can be noneRemoved or the index of the removed node in testNodes +type expectedResult int + +const ( + updated expectedResult = -3 + allButLastRemoved expectedResult = -2 + noneRemoved expectedResult = -1 + // The rest represent the index of the removed node +) + +// testNode is the node used for testing cache implementation +type testNode struct { + key []byte +} + +type cacheOp struct { + testNodexIdx int + expectedResult expectedResult + expectedBytesLimit int // used for testing lruCacheWithBytesLimit +} + +type testcase struct { + setup func(cache.Cache) + cacheLimit int + cacheOps []cacheOp + expectedNodeIndexes []int // contents of the cache once test case completes represent by indexes in testNodes +} + +func (tn *testNode) GetKey() []byte { + return tn.key +} + +func (tn *testNode) GetFullSize() int { + return len(tn.key) + common.GetSliceSizeBytes() +} + +const ( + testKey = "key" +) + +var _ cache.Node = (*testNode)(nil) + +var ( + testNodes = []cache.Node{ + &testNode{ + key: []byte(fmt.Sprintf("%s%d", testKey, 1)), + }, + &testNode{ + key: []byte(fmt.Sprintf("%s%d", testKey, 2)), + }, + &testNode{ + key: []byte(fmt.Sprintf("%s%d", testKey, 3)), + }, + &testNode{ + key: []byte(fmt.Sprintf("%s%d%s%d%s%d", testKey, 4, testKey, 4, testKey, 4)), + }, + } +) + +func testAdd(t *testing.T, testCache cache.Cache, tc testcase) { + expectedCurSize := 0 + + for opIdx, op := range tc.cacheOps { + + actualResult := cache.MockAdd(testCache, testNodes[op.testNodexIdx]) + + expectedResult := op.expectedResult + + switch expectedResult { + case noneRemoved: + expectedCurSize++ + fallthrough + case updated: + require.Empty(t, actualResult) + case allButLastRemoved: + require.NotNil(t, actualResult) + expectedCurSize = 2 + require.True(t, testCache.Has(testNodes[op.testNodexIdx].GetKey())) + require.Contains(t, actualResult, testNodes[tc.cacheOps[opIdx-2].testNodexIdx]) + default: + require.NotNil(t, actualResult) + // Here, op.expectedResult represents the index of the removed node in tc.cacheOps + require.Contains(t, actualResult, testNodes[int(op.expectedResult)]) + } + require.Equal(t, expectedCurSize, testCache.Len()) + + if testCache.GetType() == cache.LRU_bytes_limit { + currentBytes, err := cache.GetCacheCurrentBytes(testCache) + require.NoError(t, err) + require.Equal(t, op.expectedBytesLimit, currentBytes) + } + } + + validateCacheContentsAfterTest(t, tc, testCache) +} + +func testRemove(t *testing.T, testCache cache.Cache, tc testcase) { + if tc.setup != nil { + tc.setup(testCache) + } + + expectedCurSize := testCache.Len() + + for _, op := range tc.cacheOps { + + actualResult := cache.Remove(testCache, testNodes[op.testNodexIdx].GetKey()) + + expectedResult := op.expectedResult + + if expectedResult == noneRemoved { + require.Nil(t, actualResult) + } else { + expectedCurSize-- + require.NotNil(t, actualResult) + + // Here, op.expectedResult represents the index of the removed node in tc.cacheOps + require.Equal(t, testNodes[int(op.expectedResult)], actualResult) + } + require.Equal(t, expectedCurSize, testCache.Len()) + } + + validateCacheContentsAfterTest(t, tc, testCache) +} + +func validateCacheContentsAfterTest(t *testing.T, tc testcase, cache cache.Cache) { + require.Equal(t, len(tc.expectedNodeIndexes), cache.Len()) + for _, idx := range tc.expectedNodeIndexes { + expectedNode := testNodes[idx] + require.True(t, cache.Has(expectedNode.GetKey())) + require.Equal(t, expectedNode, cache.Get(expectedNode.GetKey())) + } +} diff --git a/cache/export_test.go b/cache/export_test.go new file mode 100644 index 000000000..1ba70b6a4 --- /dev/null +++ b/cache/export_test.go @@ -0,0 +1,17 @@ +package cache + +var ( + GetCacheElemMetadataSize = getCacheElemMetadataSize + GetCacheCurrentBytes = getCacheCurrentBytes +) + +// Used for testing, returns removed Nodes after adding current +func MockAdd(c Cache, node Node) []Node { + c.add(node) + + removed := make([]Node, 0) + for c.isOverLimit() { + removed = append(removed, c.remove(c.getOldest())) + } + return removed +} diff --git a/cache/lru_cache.go b/cache/lru_cache.go new file mode 100644 index 000000000..57330f2e2 --- /dev/null +++ b/cache/lru_cache.go @@ -0,0 +1,67 @@ +package cache + +import ( + "container/list" +) + +// lruCache is an abstract LRU cache implementation with no limits. +type lruCache struct { + dict map[string]*list.Element // FastNode cache. + ll *list.List // LRU queue of cache elements. Used for deletion. +} + +var _ Cache = (*lruCache)(nil) + +func (c *lruCache) add(node Node) { + if e, exists := c.dict[string(node.GetKey())]; exists { + c.ll.MoveToFront(e) + e.Value = node + return + } + + elem := c.ll.PushFront(node) + c.dict[string(node.GetKey())] = elem +} + +func (c *lruCache) Get(key []byte) Node { + if elem := c.get(key); elem != nil { + return elem.Value.(Node) + } + return nil +} + +func (c *lruCache) GetType() Type { + return LRU +} + +func (c *lruCache) Has(key []byte) bool { + _, exists := c.dict[string(key)] + return exists +} + +func (nc *lruCache) Len() int { + return nc.ll.Len() +} + +func (c *lruCache) get(key []byte) *list.Element { + elem, exists := c.dict[string(key)] + if exists { + c.ll.MoveToFront(elem) + return elem + } + return nil +} + +func (c *lruCache) remove(e *list.Element) Node { + removed := c.ll.Remove(e).(Node) + delete(c.dict, string(removed.GetKey())) + return removed +} + +func (c *lruCache) getOldest() *list.Element { + return c.ll.Back() +} + +func (c *lruCache) isOverLimit() bool { + return false +} diff --git a/cache/node_limit_decorator.go b/cache/node_limit_decorator.go new file mode 100644 index 000000000..a70656833 --- /dev/null +++ b/cache/node_limit_decorator.go @@ -0,0 +1,28 @@ +package cache + +import "container/list" + +type lruCacheWithNodeLimit struct { + lruCache + nodeLimit int +} + +var _ Cache = (*lruCacheWithNodeLimit)(nil) + +func NewWithNodeLimit(nodeLimit int) Cache { + return &lruCacheWithNodeLimit{ + lruCache: lruCache{ + dict: make(map[string]*list.Element), + ll: list.New(), + }, + nodeLimit: nodeLimit, + } +} + +func (c *lruCacheWithNodeLimit) GetType() Type { + return LRU_node_limit +} + +func (c *lruCacheWithNodeLimit) isOverLimit() bool { + return c.lruCache.ll.Len() > c.nodeLimit +} diff --git a/cache/node_limit_decorator_test.go b/cache/node_limit_decorator_test.go new file mode 100644 index 000000000..f13f5d716 --- /dev/null +++ b/cache/node_limit_decorator_test.go @@ -0,0 +1,208 @@ +package cache_test + +import ( + "testing" + + "github.com/cosmos/iavl/cache" + "github.com/stretchr/testify/require" +) + +func Test_NodeLimitCache_Add(t *testing.T) { + testcases := map[string]testcase{ + "add 1 node with 1 limit - added": { + cacheLimit: 1, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + }, + expectedNodeIndexes: []int{0}, + }, + "add 1 node twice, cache limit 2 - only one added": { + cacheLimit: 2, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + { + testNodexIdx: 0, + expectedResult: updated, + }, + }, + expectedNodeIndexes: []int{0}, + }, + "add 1 node with 0 limit - not added and return itself": { + cacheLimit: 0, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: 0, + }, + }, + }, + "add 3 nodes with 1 limit - first 2 removed": { + cacheLimit: 1, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + { + testNodexIdx: 1, + expectedResult: 0, + }, + { + testNodexIdx: 2, + expectedResult: 1, + }, + }, + expectedNodeIndexes: []int{2}, + }, + "add 3 nodes with 2 limit - first removed": { + cacheLimit: 2, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + { + testNodexIdx: 1, + expectedResult: noneRemoved, + }, + { + testNodexIdx: 2, + expectedResult: 0, + }, + }, + expectedNodeIndexes: []int{1, 2}, + }, + "add 3 nodes with 10 limit - non removed": { + cacheLimit: 10, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + { + testNodexIdx: 1, + expectedResult: noneRemoved, + }, + { + testNodexIdx: 2, + expectedResult: noneRemoved, + }, + }, + expectedNodeIndexes: []int{0, 1, 2}, + }, + } + + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + nodeLimitCache := cache.NewWithNodeLimit(tc.cacheLimit) + testAdd(t, nodeLimitCache, tc) + }) + } +} + +func Test_NodeLimitCache_Remove(t *testing.T) { + testcases := map[string]func() testcase{ + "remove non-existent key, cache limit 0 - nil returned": func() testcase { + + return testcase{ + cacheLimit: 0, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + }, + } + }, + "remove non-existent key, cache limit 1 - nil returned": func() testcase { + return testcase{ + setup: func(c cache.Cache) { + require.Empty(t, cache.MockAdd(c, testNodes[1])) + require.Equal(t, 1, c.Len()) + }, + cacheLimit: 1, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + }, + expectedNodeIndexes: []int{1}, + } + }, + "remove existent key, cache limit 1 - removed": func() testcase { + + return testcase{ + setup: func(c cache.Cache) { + require.Empty(t, cache.MockAdd(c, testNodes[0])) + require.Equal(t, 1, c.Len()) + }, + cacheLimit: 1, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: 0, + }, + }, + } + }, + "remove twice, cache limit 1 - removed first time, then nil": func() testcase { + return testcase{ + setup: func(c cache.Cache) { + require.Empty(t, cache.MockAdd(c, testNodes[0])) + require.Equal(t, 1, c.Len()) + }, + cacheLimit: 1, + cacheOps: []cacheOp{ + { + testNodexIdx: 0, + expectedResult: 0, + }, + { + testNodexIdx: 0, + expectedResult: noneRemoved, + }, + }, + } + }, + "remove all, cache limit 3": func() testcase { + return testcase{ + setup: func(c cache.Cache) { + require.Empty(t, cache.MockAdd(c, testNodes[0])) + require.Empty(t, cache.MockAdd(c, testNodes[1])) + require.Empty(t, cache.MockAdd(c, testNodes[2])) + require.Equal(t, 3, c.Len()) + }, + cacheLimit: 3, + cacheOps: []cacheOp{ + { + testNodexIdx: 2, + expectedResult: 2, + }, + { + testNodexIdx: 0, + expectedResult: 0, + }, + { + testNodexIdx: 1, + expectedResult: 1, + }, + }, + } + }, + } + + for name, getTestcaseFn := range testcases { + t.Run(name, func(t *testing.T) { + tc := getTestcaseFn() + nodeLimitCache := cache.NewWithNodeLimit(tc.cacheLimit) + testRemove(t, nodeLimitCache, tc) + }) + } +} diff --git a/common/size.go b/common/size.go new file mode 100644 index 000000000..541aae6aa --- /dev/null +++ b/common/size.go @@ -0,0 +1,23 @@ +package common + +import "math/bits" + +const ( + UintSizeBytes = bits.UintSize / 8 // 4 or 8 + + Uint64Size = 8 +) + +// GetSliceSizeBytes returns how how much space Go slice utilizes in memory. +// Slices have integers for length and capacity. Additionaly, they contain +// a pointer to the underlying array buffer. +func GetSliceSizeBytes() int { + return 3 * UintSizeBytes +} + +// GetStringSizeBytes returns how how much space Go string utilizes in memory. +// Strings have an integer for length and a pointer to the underlying array buffer. +// Contrary to slices, strings do not contain capacity as they are immutable. +func GetStringSizeBytes() int { + return 2 * UintSizeBytes +} diff --git a/fast_node.go b/fast_node.go index 7dd3b0933..7e4b84a23 100644 --- a/fast_node.go +++ b/fast_node.go @@ -4,6 +4,7 @@ import ( "io" "github.com/cosmos/iavl/cache" + "github.com/cosmos/iavl/common" "github.com/pkg/errors" ) @@ -49,10 +50,25 @@ func DeserializeFastNode(key []byte, buf []byte) (*FastNode, error) { return fastNode, nil } +// GetKey returns a node's key +// Implements cache.Node interface. func (fn *FastNode) GetKey() []byte { return fn.key } +// GetFullSize returns the number of bytes a node occupies in memory. +// Implements cache.Node interface. It is needed to enable cache's bytes limit decorator. +// +// Here, we estimate the following: +// key []byte - number of bytes in the slice + the underlying slice's structure +// versionLastUpdatedAt int64 - size of the 64 bit integer +// value []byte - number of bytes in the slice + the underlying slice's structure +func (fn *FastNode) GetFullSize() int { + return len(fn.key) + common.GetSliceSizeBytes() + + common.Uint64Size + + len(fn.value) + common.GetSliceSizeBytes() +} + func (node *FastNode) encodedSize() int { n := encodeVarintSize(node.versionLastUpdatedAt) + encodeBytesSize(node.value) return n diff --git a/fast_node_test.go b/fast_node_test.go index b6e1ffd98..f5f8abc71 100644 --- a/fast_node_test.go +++ b/fast_node_test.go @@ -5,6 +5,7 @@ import ( "encoding/hex" "testing" + "github.com/cosmos/iavl/common" "github.com/stretchr/testify/require" ) @@ -56,3 +57,27 @@ func TestFastNode_encode_decode(t *testing.T) { }) } } + +func TestFastNode_GetFullSize(t *testing.T) { + testcases := map[string]struct { + node *FastNode + expectedSize int + }{ + "empty": { + &FastNode{}, + common.UintSizeBytes*6 + common.Uint64Size, + }, + "with data": {&FastNode{ + key: []byte{0x4}, + versionLastUpdatedAt: 1, + value: []byte{0x2, 0x3}, + }, common.UintSizeBytes*6 + common.Uint64Size + 1 + 2}, + } + for name, tc := range testcases { + tc := tc + t.Run(name, func(t *testing.T) { + size := tc.node.GetFullSize() + require.Equal(t, tc.expectedSize, size) + }) + } +} diff --git a/node.go b/node.go index 8dbac4679..c948fb585 100644 --- a/node.go +++ b/node.go @@ -108,10 +108,20 @@ func MakeNode(buf []byte) (*Node, error) { return node, nil } +// GetKey returns a node's key +// Implements cache.Node interface. func (n *Node) GetKey() []byte { return n.hash } +// GetFullSize returns the number of bytes a node occupies in memory. +// Implements cache.Node interface. It is needed to enable cache's bytes limit decorator. +// Currently, node cache does not utilize the bytes limit decorator. As a result, the implementation +// is not provided yet. +func (n *Node) GetFullSize() int { + panic("not implemented") +} + // String returns a string representation of the node. func (node *Node) String() string { hashstr := "" diff --git a/nodedb.go b/nodedb.go index 1261b219e..68e7e1085 100644 --- a/nodedb.go +++ b/nodedb.go @@ -29,7 +29,7 @@ const ( // Using semantic versioning: https://semver.org/ defaultStorageVersionValue = "1.0.0" fastStorageVersionValue = "1.1.0" - fastNodeCacheLimit = 100000 + fastNodeCacheLimitBytes = 100 * 1024 * 1024 ) var ( @@ -96,8 +96,8 @@ func newNodeDB(db dbm.DB, cacheSize int, opts *Options) *nodeDB { batch: db.NewBatch(), opts: *opts, latestVersion: 0, // initially invalid - nodeCache: cache.New(cacheSize), - fastNodeCache: cache.New(fastNodeCacheLimit), + nodeCache: cache.NewWithNodeLimit(cacheSize), + fastNodeCache: cache.NewWithBytesLimit(fastNodeCacheLimitBytes), versionReaders: make(map[int64]uint32, 8), storageVersion: string(storeVersion), } @@ -134,7 +134,7 @@ func (ndb *nodeDB) GetNode(hash []byte) *Node { node.hash = hash node.persisted = true - ndb.nodeCache.Add(node) + cache.Add(ndb.nodeCache, node) return node } @@ -169,7 +169,7 @@ func (ndb *nodeDB) GetFastNode(key []byte) (*FastNode, error) { return nil, fmt.Errorf("error reading FastNode. bytes: %x, error: %w", buf, err) } - ndb.fastNodeCache.Add(fastNode) + cache.Add(ndb.fastNodeCache, fastNode) return fastNode, nil } @@ -198,7 +198,7 @@ func (ndb *nodeDB) SaveNode(node *Node) { } debug("BATCH SAVE %X %p\n", node.hash, node) node.persisted = true - ndb.nodeCache.Add(node) + cache.Add(ndb.nodeCache, node) } // SaveNode saves a FastNode to disk and add to cache. @@ -284,7 +284,7 @@ func (ndb *nodeDB) saveFastNodeUnlocked(node *FastNode, shouldAddToCache bool) e return fmt.Errorf("error while writing key/val to nodedb batch. Err: %w", err) } if shouldAddToCache { - ndb.fastNodeCache.Add(node) + cache.Add(ndb.fastNodeCache, node) } return nil } @@ -421,7 +421,7 @@ func (ndb *nodeDB) DeleteVersionsFrom(version int64) error { if err = ndb.batch.Delete(ndb.nodeKey(hash)); err != nil { return err } - ndb.nodeCache.Remove(hash) + cache.Remove(ndb.nodeCache, hash) } else if toVersion >= version-1 { if err := ndb.batch.Delete(key); err != nil { return err @@ -459,7 +459,7 @@ func (ndb *nodeDB) DeleteVersionsFrom(version int64) error { if err = ndb.batch.Delete(keyWithPrefix); err != nil { return err } - ndb.fastNodeCache.Remove(key) + cache.Remove(ndb.fastNodeCache, key) } return nil }) @@ -510,7 +510,7 @@ func (ndb *nodeDB) DeleteVersionsRange(fromVersion, toVersion int64) error { if err := ndb.batch.Delete(ndb.nodeKey(hash)); err != nil { panic(err) } - ndb.nodeCache.Remove(hash) + cache.Remove(ndb.nodeCache, hash) } else { ndb.saveOrphan(hash, from, predecessor) } @@ -541,7 +541,7 @@ func (ndb *nodeDB) DeleteFastNode(key []byte) error { if err := ndb.batch.Delete(ndb.fastNodeKey(key)); err != nil { return err } - ndb.fastNodeCache.Remove(key) + cache.Remove(ndb.fastNodeCache, key) return nil } @@ -569,7 +569,7 @@ func (ndb *nodeDB) deleteNodesFrom(version int64, hash []byte) error { return err } - ndb.nodeCache.Remove(hash) + cache.Remove(ndb.nodeCache, hash) } return nil @@ -630,7 +630,7 @@ func (ndb *nodeDB) deleteOrphans(version int64) error { if err := ndb.batch.Delete(ndb.nodeKey(hash)); err != nil { return err } - ndb.nodeCache.Remove(hash) + cache.Remove(ndb.nodeCache, hash) } else { debug("MOVE predecessor:%v fromVersion:%v toVersion:%v %X\n", predecessor, fromVersion, toVersion, hash) ndb.saveOrphan(hash, fromVersion, predecessor)