-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstoragebackend.py
84 lines (71 loc) · 2.52 KB
/
storagebackend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import math
from abc import ABC, abstractmethod
from document import Document
class StorageBackend(ABC):
@abstractmethod
def get_total_document_count(self) -> int:
pass
def _get_inverse_document_frequncy(self, docs_with_term) -> float:
if not docs_with_term:
return 0
return math.log(self.get_total_document_count() / len(docs_with_term))
@staticmethod
def _get_relevance(doc: Document, term: str, idf: float) -> float:
return doc.get_term_frequency(term) * idf
@abstractmethod
def close(self):
"""
Frees all hardware resources that this backend uses.
:return: None
"""
pass
@abstractmethod
def store(self, docs) -> bool:
"""
Stores multiple documents into this backend that can be retrieved later.
:param docs: A collection of documents that will be stored.
:return: True if documents were stored successfully, False otherwise.
"""
pass
@abstractmethod
def get(self, keyword: str):
"""
Returns any documents that contain the given keyword.
:param keyword: The keyword in question
:return: Collection of documents
"""
pass
@abstractmethod
def get_by_path(self, path: str) -> Document:
"""
Returns the document associated with a specific file path.
If there is no Document already stored for that path this will return None.
:param path: the path of the document
:return: Document for the path
"""
pass
@abstractmethod
def get_duplicates(self):
"""
Returns all duplicates that are in this backend.
The returned map has document hash codes as keys and Collection of all documents with that hash code as values.
:return: Map of all duplicates found
"""
pass
def get_duplicates_of(self, doc: Document):
"""
Returns all duplicates of the given document.
It is HIGHLY recommended to override this method for each backend since default implementation gathers all
duplicates to check.
If there are no duplicates for the document the returned Collection will be empty.
:param doc: Document to find duplicates of
:return: Collection of documents
"""
dupes = self.get_duplicates()
hash = doc.get_hash()
if hash in dupes:
return dupes[hash]
return []
@abstractmethod
def remove(self, doc: Document) -> bool:
pass