-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfast_levenshtein.py
62 lines (46 loc) · 1.69 KB
/
fast_levenshtein.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
class TrieNode:
def __init__(self, end: str = None):
self.children = dict()
self.end = end
def __setitem__(self, key, val):
self.children[key] = val
def __getitem__(self, key):
return self.children.get(key, None)
class LevenshTrie:
def __init__(self, words: list):
self.root = TrieNode()
for word in words:
self.insert(word)
def insert(self, word: list):
aux = self.root
for char in word:
if aux[char] is None:
aux[char] = TrieNode()
aux = aux[char]
aux.end = word
def search(self, word: str, maxDist: int):
ans = []
stack = [(child, letter, list(range(len(word) + 1)))
for letter, child in self.root.children.items()]
while stack:
node, letter, parent_row = stack.pop()
result_row = self._levenshtein(parent_row, letter, word)
current_dist = result_row[-1]
if current_dist <= maxDist and node.end is not None:
ans.append((node.end, current_dist))
if min(result_row) <= maxDist:
for letter, child in node.children.items():
stack.append((child,
letter,
result_row
))
return ans
def _levenshtein(self, row, letter, word):
result_row = [row[0] + 1]
for col in range(1, len(row)):
result_row.append(min(
row[col] + 1,
result_row[-1] + 1,
row[col-1] + (0 if word[col-1] == letter else 1)
))
return result_row