Skip to content

Commit

Permalink
Merge pull request #62 from x-tabdeveloping/s3_reweighting
Browse files Browse the repository at this point in the history
S3 reweighting
  • Loading branch information
x-tabdeveloping authored Aug 12, 2024
2 parents 52fddac + 2075c91 commit a3b3241
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 16 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ line-length=79

[tool.poetry]
name = "turftopic"
version = "0.5.1"
version = "0.5.2"
description = "Topic modeling with contextual representations from sentence transformers."
authors = ["Márton Kardos <power.up1163@gmail.com>"]
license = "MIT"
Expand Down
23 changes: 8 additions & 15 deletions turftopic/models/decomp.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
from sklearn.base import TransformerMixin
from sklearn.decomposition import FastICA
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.preprocessing import scale
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

from turftopic.base import ContextualModel, Encoder
from turftopic.vectorizer import default_vectorizer
Expand Down Expand Up @@ -111,20 +110,14 @@ def fit_transform(
return doc_topic

def reweight_strong(self):
"""Reweights words so that only the strongest components for a word
has a value.
"""Reweights words based on their angle in ICA-space to the axis
base vectors.
"""
n_topics, n_vocab = self.components_.shape
mean_component = np.mean(self.components_, axis=1)
for i_vocab in range(n_vocab):
word_rep = self.components_[:, i_vocab]
min_topic = np.argmin(word_rep)
max_topic = np.argmax(word_rep)
for i_topic in range(n_topics):
if i_topic not in (min_topic, max_topic):
self.components_[i_topic, i_vocab] = mean_component[
i_topic
]
word_vectors = self.components_.T
n_topics = self.components_.shape[0]
axis_vectors = np.eye(n_topics)
cosine_components = cosine_similarity(axis_vectors, word_vectors)
self.components_ = cosine_components
return self

def transform(
Expand Down

0 comments on commit a3b3241

Please sign in to comment.