0% found this document useful (0 votes)
42 views3 pages

From Import From Import From Import From Import Import

This document contains code to perform sentence similarity comparison between two sentences. It tokenizes the sentences, removes stopwords, lemmatizes the words, calculates the WordNet similarity between each pair of words using WUP similarity, takes the maximum similarity value and calculates the average similarity score between 0-1 to classify the sentence pairs as similar, somewhat similar or not similar.

Uploaded by

femi
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
42 views3 pages

From Import From Import From Import From Import Import

This document contains code to perform sentence similarity comparison between two sentences. It tokenizes the sentences, removes stopwords, lemmatizes the words, calculates the WordNet similarity between each pair of words using WUP similarity, takes the maximum similarity value and calculates the average similarity score between 0-1 to classify the sentence pairs as similar, somewhat similar or not similar.

Uploaded by

femi
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 3

from nltk.

tokenize import sent_tokenize, word_tokenize


from nltk.corpus import stopwords,wordnet
from nltk.stem import WordNetLemmatizer
from itertools import product
import numpy

# str1 = "Abhishek is a good boy."


# str2 = "Abhishek is not a bad boy."
# str1 = "Cat is drinking water."
# str2 = "Lions eat flesh."
# str1 = "He loves to play football."
# str2 = "Football is his favourite sport."
# str1 = "Many consider Maradona as the best player in soccer
history."
# str2 = "Maradona is one of the best soccer player."

str1 = "I was given a card by her in the garden."


str2 = "In the garden, she gave me a card."

# str1 = "Ballmer has been vocal in the past warning that Linux
is a threat to Microsoft."
# str2 = "In the memo, Ballmer reiterated the open-source threat
to Microsoft."
# str1 = "The boy is fetching water from the well."
# str2 = "The lion is running in the forest."
# str1 = "A school is a place where kids go to study."
# str2 = "School is an institution for children who want to
study."
# str1 = "The world knows it has lost a heroic champion of
justice and freedom."
# str2 = "The earth recognizes the loss of a valiant champion of
independence and justice."
# str1 = "A cemetery is a place where dead people's bodies or
their ashes are buried."
# str2 = "A graveyard is an area of land ,sometimes near a
church, where dead people are buried."

##--------------- stopwords ---------------##


stop_words = set(stopwords.words("English"))

##---------------Initialising ---------------##
filtered_sentence1 = []
filtered_sentence2 = []
lemm_sentence1 = []
lemm_sentence2 = []
sims = []
temp1 = []
temp2 = []
simi = []
final = []
same_sent1 = []
same_sent2 = []

##---------------WordNet Lematizer---------------##
lemmatizer = WordNetLemmatizer()

##---------------Tokenizing & Stopwords removal


s1---------------##

for words1 in word_tokenize(str1):


if words1 not in stop_words:
if words1.isalnum():
filtered_sentence1.append(words1)

##---------------Lemmatizing s1---------------##

for i in filtered_sentence1:
lemm_sentence1.append(lemmatizer.lemmatize(i))

print(lemm_sentence1)

##---------------Tokenizing and removing the Stopwords


s2---------------##

for words2 in word_tokenize(str2):


if words2 not in stop_words:
if words2.isalnum():
filtered_sentence2.append(words2)

##---------------Lemmatizing s2 ---------------##

for i in filtered_sentence2:
lemm_sentence2.append(lemmatizer.lemmatize(i))

print(lemm_sentence2)

##---------------Similarity check for each word in s1 &


s2---------------##
for word1 in lemm_sentence1:
simi =[]
for word2 in lemm_sentence2:
sims = []
print(word1)
print(word2)
syns1 = wordnet.synsets(word1)
print(syns1)
syns2 = wordnet.synsets(word2)
print(syns2)
for sense1, sense2 in product(syns1, syns2):
d = wordnet.wup_similarity(sense1, sense2)
if d != None:
sims.append(d)

print(sims)
#print(max(sims))
if sims != []:
max_sim = max(sims)
print(max_sim)
simi.append(max_sim)

if simi != []:
max_final = max(simi)
final.append(max_final)

##--------------- classification-Output---------------##

similarity_index = numpy.mean(final)
similarity_index = round(similarity_index , 2)
print("Sentence 1: ",str1)
print("Sentence 2: ",str2)
print("Similarity index value : ", similarity_index)

if similarity_index>0.8:
print("Similar")
elif similarity_index>=0.6:
print("Somewhat Similar")
else:
print("Not Similar")

You might also like