Skip to content

Commit

Permalink
Update Python version requirement in README.md, add type hints
Browse files Browse the repository at this point in the history
  • Loading branch information
LyubomirT committed Dec 1, 2023
1 parent a1f5b34 commit 078322d
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 33 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<div align="center">
<img src="https://img.shields.io/badge/Version-0.3.5-gold.svg" alt="version">
<img src="https://img.shields.io/badge/License-BSD%203--Clause-blue.svg" alt="license">
<img src="https://img.shields.io/badge/Python-3.2+-green.svg" alt="python">
<img src="https://img.shields.io/badge/Python-3.6+-green.svg" alt="python">
<img src="https://img.shields.io/badge/Platform-Linux%20%7C%20Windows%20%7C%20macOS-lightgrey.svg" alt="platform">
<!-- No dependencies -->
<img src="https://img.shields.io/badge/Dependencies-none-red.svg" alt="dependencies">
Expand All @@ -29,7 +29,7 @@ LESP is a lightweight, efficient spelling proofreader written in Python. It's de

## Installation 📥

Simply clone the repository and run the `demo.py` file to check it out. You don't need to install any additional libraries, so this is like plug-and-play. Just note that anything below Python 3.2 won't run this since old versions don't support `concurrent.futures`, which is used to speed up the process.
Simply clone the repository and run the `demo.py` file to check it out. You don't need to install any additional libraries, so this is like plug-and-play. Just note that anything below Python 3.6 won't run this since old versions don't support `concurrent.futures`, which is used to speed up the process.

PyPi package coming soon, so stay tuned for a more convenient way to install LESP!

Expand Down
67 changes: 36 additions & 31 deletions lesp/autocorrect.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
from typing import List, Optional, Union

import concurrent.futures
import os

class Proofreader:
def __init__(self, wordlist_path="lesp-wordlist.txt"):
self.wordlist_path = wordlist_path
def __init__(self, wordlist_path: str = "lesp-wordlist.txt") -> None:
self.wordlist_path: str = wordlist_path
self.load_wordlist()

def load_wordlist(self):
def load_wordlist(self) -> None:
try:
with open(self.wordlist_path, "r") as f:
self.wordlist = f.read().strip().split("\n")
self.wordlist: List[str] = f.read().strip().split("\n")
# Remove duplicate words in the wordlist
self.wordlist = list(set(self.wordlist))
# Remove leading and trailing whitespaces from each word
Expand All @@ -20,10 +22,10 @@ def load_wordlist(self):
raise FileNotFoundError(f"{self.wordlist_path} not found!")

@staticmethod
def get_similarity_score(word1, word2):
len1 = len(word1)
len2 = len(word2)
matrix = [[0 for j in range(len2 + 1)] for i in range(len1 + 1)]
def get_similarity_score(word1: str, word2: str) -> float:
len1: int = len(word1)
len2: int = len(word2)
matrix: List[List[int]] = [[0 for j in range(len2 + 1)] for i in range(len1 + 1)]

for i in range(len1 + 1):
matrix[i][0] = i
Expand All @@ -32,26 +34,29 @@ def get_similarity_score(word1, word2):

for i in range(1, len1 + 1):
for j in range(1, len2 + 1):
cost = 0 if word1[i - 1] == word2[j - 1] else 1
cost: int = 0 if word1[i - 1] == word2[j - 1] else 1
matrix[i][j] = min(matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + cost)

score = 1 - matrix[len1][len2] / max(len1, len2)
score: float = 1 - matrix[len1][len2] / max(len1, len2)
return score

@staticmethod
def get_similar_worker(args):
def get_similar_worker(args: tuple) -> List[str]:
word: str
similarity_rate: float
wordlist_chunk: List[str]
word, similarity_rate, wordlist_chunk = args
similar_words = []
similar_words: List[str] = []
for w in wordlist_chunk:
score = Proofreader.get_similarity_score(word, w)
score: float = Proofreader.get_similarity_score(word, w)
if score >= similarity_rate:
similar_words.append(w)
return similar_words

def is_correct(self, word):
def is_correct(self, word: str) -> bool:
return word.lower() in self.wordlist

def get_similar(self, word, similarity_rate, chunks=4, upto=3):
def get_similar(self, word: str, similarity_rate: float, chunks: int = 4, upto: int = 3) -> Optional[List[str]]:
if upto < 1:
raise ValueError("Can only return 1 or more similar words.")
if chunks < 1:
Expand All @@ -60,13 +65,13 @@ def get_similar(self, word, similarity_rate, chunks=4, upto=3):
raise ValueError("Similarity rate must be between 0 and 1.")

word = word.lower()
similar_words = []
chunk_size = len(self.wordlist) // chunks
similar_words: List[str] = []
chunk_size: int = len(self.wordlist) // chunks

chunks = [(word, similarity_rate, self.wordlist[i:i + chunk_size]) for i in range(0, len(self.wordlist), chunk_size)]
chunks: List[tuple] = [(word, similarity_rate, self.wordlist[i:i + chunk_size]) for i in range(0, len(self.wordlist), chunk_size)]

with concurrent.futures.ThreadPoolExecutor() as executor:
results = list(executor.map(Proofreader.get_similar_worker, chunks))
results: List[List[str]] = list(executor.map(Proofreader.get_similar_worker, chunks))

for similar_word_list in results:
similar_words.extend(similar_word_list)
Expand All @@ -79,19 +84,19 @@ def get_similar(self, word, similarity_rate, chunks=4, upto=3):
# Return only upto similar words
return similar_words[:upto]

def backup(self, path="wordlist_backup"):
def backup(self, path: str = "wordlist_backup") -> None:
if os.path.isdir(path):
raise ValueError("Path specified is a directory!")
with open(path, "w") as f:
f.write("\n".join(self.wordlist))

def restore(self, overwrite_current, path="wordlist_backup"):
def restore(self, overwrite_current: bool, path: str = "wordlist_backup") -> None:
try:
if not os.path.isfile(path):
raise FileNotFoundError("Backup file not found!")

with open(path, "r") as f:
wordlist_ = f.read().split("\n")
wordlist_: List[str] = f.read().split("\n")

if not all(word.isalpha() for word in wordlist_):
raise ValueError("Invalid backup file format. Words must contain only alphabetic characters.")
Expand All @@ -104,7 +109,7 @@ def restore(self, overwrite_current, path="wordlist_backup"):
except Exception as e:
raise ValueError(f"Error during restore: {str(e)}")

def extend_wordlist(self, word):
def extend_wordlist(self, word: Union[str, List[str], tuple]) -> None:
if isinstance(word, str):
if word.isalpha():
self.wordlist.append(word.lower())
Expand All @@ -119,7 +124,7 @@ def extend_wordlist(self, word):
else:
raise TypeError("Invalid input type. Please provide a string, list, or tuple of alphabetic words.")

def remove_from_wordlist(self, word):
def remove_from_wordlist(self, word: Union[str, List[str], tuple]) -> None:
if isinstance(word, str):
if word.isalpha():
if word in self.wordlist:
Expand All @@ -141,12 +146,12 @@ def remove_from_wordlist(self, word):
raise TypeError("Invalid input type. Please provide a string, list, or tuple of alphabetic words.")

@staticmethod
def stack(source, destination):
def stack(source: str, destination: str) -> None:
try:
with open(source, "r") as f:
source_words = f.read().split("\n")
source_words: List[str] = f.read().split("\n")
with open(destination, "r") as f:
destination_words = f.read().split("\n")
destination_words: List[str] = f.read().split("\n")

if any(len(word.split()) > 1 for word in source_words):
raise ValueError("Invalid source file format. Each word must be on a separate line.")
Expand All @@ -168,12 +173,12 @@ def stack(source, destination):
raise ValueError(f"Error during stacking: {str(e)}")

@staticmethod
def merge_delete(source, destination):
def merge_delete(source: str, destination: str) -> None:
try:
with open(source, "r") as f:
source_words = f.read().split("\n")
source_words: List[str] = f.read().split("\n")
with open(destination, "r") as f:
destination_words = f.read().split("\n")
destination_words: List[str] = f.read().split("\n")

if any(len(word.split()) > 1 for word in source_words):
raise ValueError("Invalid source file format. Each word must be on a separate line.")
Expand All @@ -196,4 +201,4 @@ def merge_delete(source, destination):
except FileNotFoundError as e:
raise FileNotFoundError(f"File not found: {str(e)}")
except Exception as e:
raise ValueError(f"Error during merge delete: {str(e)}")
raise ValueError(f"Error during merge delete: {str(e)}")

0 comments on commit 078322d

Please sign in to comment.