Nlp Lab Manual
Nlp Lab Manual
a)Tokenization
import nltk
nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize, word_tokenize
text = "Natural language processing (NPL) is a field"
print(sent_tokenize(text))
print(word_tokenize(text))
output:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
print(stopwords.words('english'))
output:
['a', 'about', 'above', 'after', 'again', 'against', 'ain', 'all', 'am', 'an', 'and', 'any', 'are', 'aren', "aren't", 'as', 'at', 'be',
'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'can', 'couldn', "couldn't", 'd', 'did',
'didn', "didn't", 'do', 'does', 'doesn', "doesn't", 'doing', 'don', "don't", 'down', 'during', 'each', 'few', 'for',
'from', 'further', 'had', 'hadn', "hadn't", 'has', 'hasn', "hasn't", 'have', 'haven', "haven't", 'having', 'he', "he'd",
"he'll", 'her', 'here', 'hers', 'herself', "he's", 'him', 'himself', 'his', 'how', 'i', "i'd", 'if', "i'll", "i'm", 'in', 'into', 'is',
'isn', "isn't", 'it', "it'd", "it'll", "it's", 'its', 'itself', "i've", 'just', 'll', 'm', 'ma', 'me', 'mightn', "mightn't", 'more',
'most', 'mustn', "mustn't", 'my', 'myself', 'needn', "needn't", 'no', 'nor', 'not', 'now', 'o', 'of', 'off', 'on', 'once',
'only', 'or', 'other', 'our', 'ours', 'ourselves', 'out', 'over', 'own', 're', 's', 'same', 'shan', "shan't", 'she', "she'd",
"she'll", "she's", 'should', 'shouldn', "shouldn't", "should've", 'so', 'some', 'such', 't', 'than', 'that', "that'll",
'the', 'their', 'theirs', 'them', 'themselves', 'then', 'there', 'these', 'they', "they'd", "they'll", "they're", "they've",
'this', 'those', 'through', 'to', 'too', 'under', 'until', 'up', 've', 'very', 'was', 'wasn', "wasn't", 'we', "we'd", "we'll",
"we're", 'were', 'weren', "weren't", "we've", 'what', 'when', 'where', 'which', 'while', 'who', 'whom', 'why',
'will', 'with', 'won', "won't", 'wouldn', "wouldn't", 'y', 'you', "you'd", "you'll", 'your', "you're", 'yours',
'yourself', 'yourselves', "you've"]
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
def preprocess_text(text):
tokens = word_tokenize(text)
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word.lower() not in
stop_words]
return filtered_tokens
def main():
text = "NLTK is a leading platform for building Python programs to
work with human language data."
# The following line was incorrectly indented
preprocessed_text = preprocess_text(text)
print("Original Text:")
print(text)
print("\nTokenized Text:")
print(preprocessed_text)
if __name__ == "__main__":
main()
output:
Original Text:
NLTK is a leading platform for building Python programs to work with human
language data.
Tokenized Text:
['NLTK', 'leading', 'platform', 'building', 'Python', 'programs', 'work', 'human',
'language', 'data', '.']
EXPERIMENT 2
2Q)Write a python program to implement porter stemmer algorithm for
stemming?
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
def preprocess_text(text):
# Tokenization
tokens = word_tokenize(text)
# Removing stop words
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word.lower() not in
stop_words]
return filtered_tokens
def apply_stemming(tokens):
porter = PorterStemmer()
stemmed_tokens = [porter.stem(token) for token in tokens]
return stemmed_tokens
def main():
text = "NLTK is a leading platform for building Python programs to
work with human language data."
preprocessed_text = preprocess_text(text)
stemmed_text = apply_stemming(preprocessed_text)
print("Original Text:")
print(text)
print("\nTokenized Text:")
print(preprocessed_text)
print("\nStemmed Text:")
print(stemmed_text) # Removed extra space before print
if __name__ == "__main__":
main()
output:
Original Text:
NLTK is a leading platform for building Python programs to work with
human language data.
Tokenized Text:
['NLTK', 'leading', 'platform', 'building', 'Python', 'programs', 'work',
'human', 'language', 'data', '.']
Stemmed Text:
['nltk', 'lead', 'platform', 'build', 'python', 'program', 'work', 'human',
'languag', 'data', '.']
EXPERIMENT 3
3Q) Write nltk Python program that performs word analysis and generation?
import nltk
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
import random
def word_analysis(word):
# Get synsets (word senses) from WordNet
synsets = wordnet.synsets(word)
================================================ RESTART:
C:/Users/admin/nlp3.py
================================================
[nltk_data] Downloading package wordnet to
[nltk_data] C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data] Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data] C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data] Package averaged_perceptron_tagger is already up-to-
[nltk_data] date!
Word: bank
Word Analysis:
Sense 1: sloping land (especially the slope beside a body of water)
Examples: ['they pulled the canoe up on the bank', 'he sat on the bank of the river
and watched the currents']
Sense 2: a financial institution that accepts deposits and channels the money into
lending activities
Examples: ['he cashed a check at the bank', 'that bank holds the mortgage on my
home']
Sense 5: a supply or stock held in reserve for future use (especially in emergencies)
Examples: []
Sense 6: the funds held by a gambling house or the dealer in some gambling games
Examples: ['he tried to break the bank at Monte Carlo']
Sense 7: a slope in the turn of a road or track; the outside is higher than the inside
in order to reduce the effects of centrifugal force
Examples: []
Sense 8: a container (usually with a slot in the top) for keeping money at home
Examples: ['the coin bank was empty']
Sense 10: a flight maneuver; aircraft tips laterally about its longitudinal axis
(especially in turning)
Examples: ['the plane went into a steep bank']
Related Word:
give
EXPERIMENT 4
4Q)Create a sample list for atleast 5 words with ambiguous sensesand python
program to implement WSD?
import nltk
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
def get_word_senses(word):
# Get synsets (word senses) from WordNet
synsets = wordnet.synsets(word)
return similarity
output:
======================== RESTART: C:/Users/admin/nlp.py
========================
[nltk_data] Downloading package wordnet to
[nltk_data] C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data] Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data] C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data] Package averaged_perceptron_tagger is already up-to-
[nltk_data] date!
Word: bank
Context: I went to the bank to deposit my paycheck.
Best sense: cover with ashes so to control the rate of burning
Word: bat
Context: The bat flew through the dark cave.
Best sense: use a bat
Word: cloud
Context: The company uses cloud computing to store its data.
Best sense: billow up in the form of a cloud
Word: spring
Context: The spring season is my favorite time of year.
Best sense: the season of growth
Word: saw
Context: I saw the movie last night.
Best sense: cut with a saw
EXPERIMENT 5
5Q) Install NLK tool kit and perform stemming?
Install NLK tool kit
Here are the steps to install the NLTK toolkit in Python:
## Step 1: Install NLTK using pip
1. Open your terminal or command prompt.
2. Type the following command: pip install nltk
3. Press Enter to run the command.
That's it! You should now have NLTK installed and be ready to start working with
natural language processing tasks.
perform stemming:
import nltk
from nltk.stem import PorterStemmer, WordNetLemmatizer
# Initialize stemmers
porter_stemmer = PorterStemmer()
wordnet_lemmatizer = WordNetLemmatizer()
def perform_stemming(word):
# Perform stemming using Porter Stemmer
porter_stem = porter_stemmer.stem(word)
print(f"Porter Stem: {porter_stem}")
# Test stemming
word = "orbiting"
print(f"Original Word: {word}")
perform_stemming(word)
output:
================================================ RESTART:
C:/Users/admin/nlp5.py
================================================
[nltk_data] Downloading package wordnet to
[nltk_data] C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data] Package wordnet is already up-to-date!
Original Word: orbiting
Porter Stem: orbit
WordNet Lemmatize: orbiting
EXPERIMENT 6
6Q) Create sample list of atleast 10 wordsPOS tagging and find the POS for
any given word?
Here's a list of 10 words with their corresponding Part-of-Speech (POS) tags:
Word POS Tag
--------------------------------
Run VB (Verb)
Dog NN (Noun)
Happy JJ (Adjective)
Quickly RB (Adverb)
Eat VB (Verb)
Big JJ (Adjective)
Car NN (Noun)
Slowly RB (Adverb)
Think VB (Verb)
Beautiful JJ (Adjective)
Here's a Python code using NLTK to perform POS tagging:
import nltk
def perform_pos_tagging(sentence):
# Tokenize sentence
tokens = nltk.word_tokenize(sentence)
return tagged
EXPERIMENT 7
7Q)Write a python program to
a)Perform Morphological Analysis is using NLTK library
b)Generate n-grams using NLTK N-Grams library.
c)Implement N-Grams Smoothing
def perform_morphological_analysis(word):
# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()
# Perform lemmatization
if wordnet_tag:
lemma = lemmatizer.lemmatize(word, pos=wordnet_tag)
else:
lemma = word
output:
=============================================== RESTART:
C:/Users/admin/nlp7a.py
===============================================
[nltk_data] Downloading package wordnet to
[nltk_data] C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data] Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data] C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data] Package averaged_perceptron_tagger is already up-to-
[nltk_data] date!
Word: running
Part-of-speech tag: VBG
Lemma: run
# Generate n-grams
n_grams = list(ngrams(tokens, n))
return n_grams
# Test n-gram generation
text = "This is a sample text for generating n-grams."
n=3
n_grams = generate_ngrams(text, n)
print(f"{n}-grams:")
for n_gram in n_grams:
print(n_gram)
output:
============================================== RESTART:
C:/Users/admin/nlp7b,py.py
==============================================
[nltk_data] Downloading package punkt to
[nltk_data] C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data] Package punkt is already up-to-date!
3-grams:
('This', 'is', 'a')
('is', 'a', 'sample')
('a', 'sample', 'text')
('sample', 'text', 'for')
('text', 'for', 'generating')
('for', 'generating', 'n-grams')
('generating', 'n-grams', '.')
# Generate n-grams
n_grams = list(ngrams(tokens, n))
return n_gram_probabilities
n_gram_probabilities = calculate_ngram_probabilities(text, n)
smoothed_n_gram_probabilities =
smooth_ngram_probabilities(n_gram_probabilities, alpha)
print("N-gram Probabilities:")
for n_gram, prob in n_gram_probabilities.items():
print(f"{n_gram}: {prob}")
output:
================================================ RESTART:
C:/Users/admin/nlp7c.py
===============================================
[nltk_data] Downloading package punkt to
[nltk_data] C:\Users\admin\AppData\Roaming\nltk_data...
[nltk_data] Package punkt is already up-to-date!
N-gram Probabilities:
('This', 'is'): 0.06666666666666667
('is', 'a'): 0.06666666666666667
('a', 'sample'): 0.13333333333333333
('sample', 'text'): 0.06666666666666667
('text', 'for'): 0.06666666666666667
('for', 'generating'): 0.06666666666666667
('generating', 'n-grams'): 0.06666666666666667
('n-grams', '.'): 0.06666666666666667
('.', 'This'): 0.06666666666666667
('This', 'text'): 0.06666666666666667
('text', 'is'): 0.06666666666666667
('is', 'just'): 0.06666666666666667
('just', 'a'): 0.06666666666666667
('sample', '.'): 0.06666666666666667
EXPERIMENT 8
8) Using NLTK packageto convert audio file to text and text file toaudio files?
Program to Convert audio file to text:
## Audio to Text
#This program uses the speech_recognition library to convert an audio file to text.
import speech_recognition as sr
from nltk.tokenize import word_tokenize
def audio_to_text(audio_file):
# Create a speech recognition object
r = sr.Recognizer()
Tokenized Text:
['Hello', ',', 'how', 'are', 'you', '?']
## Text to Audio
#This program uses the gTTS library to convert a text file to an audio file.
Output:
Tokenized Text:
['Hello', ',', 'world', '!', 'This', 'is', 'an', 'example', 'text', '.']
Note: