From b36f8ae69830ef90cb633250eab9a97311185a90 Mon Sep 17 00:00:00 2001 From: Raphael Sourty Date: Sun, 2 Jun 2024 21:52:24 +0200 Subject: [PATCH] reduce bm25 memory usage --- pyproject.toml | 2 +- python/lenlp/sparse/bm25_vectorizer.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 55fa785..a714a14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "lenlp" -version = "1.1.0" +version = "1.1.1" description = "Natural Language Processing library for Python from Rust." authors = [ diff --git a/python/lenlp/sparse/bm25_vectorizer.py b/python/lenlp/sparse/bm25_vectorizer.py index e67492d..e7a1507 100644 --- a/python/lenlp/sparse/bm25_vectorizer.py +++ b/python/lenlp/sparse/bm25_vectorizer.py @@ -109,7 +109,7 @@ def _transform(self, matrix: csr_matrix) -> csr_matrix: ) ) - denominator = matrix.copy().tocsc() + denominator = matrix.tocsc() denominator.data += np.take(a=regularization, indices=denominator.indices) matrix.data = ( (matrix.data * (self.k1 + 1)) / denominator.tocsr().data