Skip to content

Commit

Permalink
0.1.1 (#27)
Browse files Browse the repository at this point in the history
* Cuckoo Filter
    * enforce unique inserts
    * import / export 
    * self expanding cuckoo filter implementation
  • Loading branch information
barrust authored Oct 4, 2017
1 parent 6a00ac1 commit 5cc4b98
Show file tree
Hide file tree
Showing 10 changed files with 304 additions and 90 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# PyProbables Changelog

### Version 0.1.1:
* CuckooFilter
* Import / Export functionality
* Enforce single insertion per key
* Auto expand when insertion failure OR when called to do so (settable)

### Version 0.1.0:
* Cuckoo Filter
* Added basic Cuckoo Filter code
Expand Down
2 changes: 1 addition & 1 deletion probables/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
__maintainer__ = 'Tyler Barrus'
__email__ = 'barrust@gmail.com'
__license__ = 'MIT'
__version__ = '0.1.0'
__version__ = '0.1.1'
__credits__ = []
__url__ = 'https://github.com/barrust/pyprobables'
__bugtrack_url__ = 'https://github.com/barrust/pyprobables/issues'
Expand Down
19 changes: 9 additions & 10 deletions probables/blooms/countingbloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from __future__ import (unicode_literals, absolute_import, print_function,
division)
from . basebloom import (BaseBloom)
from .. constants import (UINT32_T_MAX, UINT64_T_MAX)

MISMATCH_MSG = ('The parameter second must be of type CountingBloomFilter')

Expand Down Expand Up @@ -44,8 +45,6 @@ def __init__(self, est_elements=None, false_positive_rate=None,
false_positive_rate,
filepath, hex_string,
hash_function)
self.__uint32_t_max = 2**32 - 1
self.__uint64_t_max = 2**64 - 1

def __str__(self):
''' correctly handle python 3 vs python2 encoding if necessary '''
Expand Down Expand Up @@ -110,20 +109,20 @@ def add_alt(self, hashes, num_els=1):
Returns:
int: Maximum number of insertions
'''
res = self.__uint32_t_max
res = UINT32_T_MAX
for i in list(range(0, self.number_hashes)):
k = int(hashes[i]) % self.number_bits
j = self._get_element(k)
tmp = j + num_els
if tmp <= self.__uint32_t_max:
if tmp <= UINT32_T_MAX:
self._bloom[k] = self._get_set_element(j + num_els)
else:
self._bloom[k] = self.__uint32_t_max
self._bloom[k] = UINT32_T_MAX
if self._bloom[k] < res:
res = self._bloom[k]
self.elements_added += num_els
if self.elements_added > self.__uint64_t_max:
self.elements_added = self.__uint64_t_max
if self.elements_added > UINT64_T_MAX:
self.elements_added = UINT64_T_MAX
return res

def check(self, key):
Expand All @@ -147,7 +146,7 @@ def check_alt(self, hashes):
Returns:
int: Maximum number of insertions
'''
res = self.__uint32_t_max
res = UINT32_T_MAX
for i in list(range(0, self.number_hashes)):
k = int(hashes[i]) % self.number_bits
tmp = self._get_element(k)
Expand Down Expand Up @@ -179,8 +178,8 @@ def remove_alt(self, hashes, num_els=1):
int: Maximum number of insertions after the removal
'''
tmp = self.check_alt(hashes)
if tmp == self.__uint32_t_max: # cannot remove if we have hit the max
return self.__uint32_t_max
if tmp == UINT32_T_MAX: # cannot remove if we have hit the max
return UINT32_T_MAX
elif tmp == 0:
return 0

Expand Down
7 changes: 7 additions & 0 deletions probables/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
''' Project Constants (or basic numerical constants...) '''
INT32_T_MIN = -2147483648
INT32_T_MAX = 2147483647
INT64_T_MIN = -9223372036854775808
INT64_T_MAX = 9223372036854775807
UINT32_T_MAX = 2**32 - 1
UINT64_T_MAX = 2**64 - 1
22 changes: 9 additions & 13 deletions probables/countminsketch/countminsketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from .. exceptions import (InitializationError, NotSupportedError)
from .. hashes import (default_fnv_1a)
from .. utilities import (is_valid_file)
from .. constants import (INT32_T_MIN, INT32_T_MAX, INT64_T_MIN, INT64_T_MAX)


class CountMinSketch(object):
Expand Down Expand Up @@ -50,11 +51,6 @@ def __init__(self, width=None, depth=None, confidence=None,
self.__error_rate = 0.0
self.__elements_added = 0
self.__query_method = self.__min_query
# for python2 and python3 support
self.__int32_t_min = -2147483648
self.__int32_t_max = 2147483647
self.__int64_t_min = -9223372036854775808
self.__int64_t_max = 9223372036854775807

if is_valid_file(filepath):
self.__load(filepath)
Expand Down Expand Up @@ -216,13 +212,13 @@ def add_alt(self, hashes, num_els=1):
for i, val in enumerate(hashes):
t_bin = (val % self.__width) + (i * self.__width)
self._bins[t_bin] += num_els
if self._bins[t_bin] > self.__int32_t_max:
self._bins[t_bin] = self.__int32_t_max
if self._bins[t_bin] > INT32_T_MAX:
self._bins[t_bin] = INT32_T_MAX
res.append(self._bins[t_bin])
self.__elements_added += num_els

if self.__elements_added > self.__int64_t_max:
self.__elements_added = self.__int64_t_max
if self.__elements_added > INT64_T_MAX:
self.__elements_added = INT64_T_MAX
return self.__query_method(sorted(res))

def remove(self, key, num_els=1):
Expand Down Expand Up @@ -252,12 +248,12 @@ def remove_alt(self, hashes, num_els=1):
for i, val in enumerate(hashes):
t_bin = (val % self.__width) + (i * self.__width)
self._bins[t_bin] -= num_els
if self._bins[t_bin] < self.__int32_t_min:
self._bins[t_bin] = self.__int32_t_min
if self._bins[t_bin] < INT32_T_MIN:
self._bins[t_bin] = INT32_T_MIN
res.append(self._bins[t_bin])
self.__elements_added -= num_els
if self.__elements_added < self.__int64_t_min:
self.__elements_added = self.__int64_t_min
if self.__elements_added < INT64_T_MIN:
self.__elements_added = INT64_T_MIN

return self.__query_method(sorted(res))

Expand Down
Loading

0 comments on commit 5cc4b98

Please sign in to comment.