-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTextSemanticSimilarity.py
63 lines (43 loc) · 1.32 KB
/
TextSemanticSimilarity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from SOCPMI import SemanticSimilarity
from Cleaner import clean
from Auxiliary import Commonwords, DisplayMatrixform
from SimilarityCount import SimilarityCount
from SyntacticSimilarity import SyntacticSimilarity
s1="Leo is a good actor"
s2="Leo is a very talented artist"
#Removing punctuations, stopwords and storing important words in a list
sent1=clean(s1)
sent2=clean(s2)
m=len(sent1)
n=len(sent2)
#Getting the list of common words and the removal of those words from the original list
common,s1,s2=Commonwords(sent1,sent2)
if len(s2)<len(s1):
s1,s2=s2,s1
synmat={}
for i in s1:
synmat[i]={}
for j in s2:
synmat[i][j]=SyntacticSimilarity(i,j)
semmat={}
a=2;
delta=0.7 #Constant depends on the size of the training corpus
gamma =3 #Defines the weight given to semantic measure. Should be greater than 1.
for i in s1:
semmat[i]={}
for j in s2:
semmat[i][j]=SemanticSimilarity(i,j,a,delta,gamma)
DisplayMatrixform(synmat,s1,s2)
DisplayMatrixform(semmat,s1,s2)
primat={}
for i in s1:
primat[i]={}
for j in s2:
primat[i][j]=0.3*synmat[i][j]+0.7*semmat[i][j]
DisplayMatrixform(primat,s1,s2)
out1=[]
out2=[]
rhosum= SimilarityCount(primat,s1,s2,out1,out2)
print("Sumattion of maximum terms: ",rhosum)
sentencesimilarity= (len(common)+rhosum)*(m+n)/(2*m*n)
print("Sentence Similarity Measure: ",sentencesimilarity)