0% found this document useful (0 votes)

4 views8 pages

Lab

The document consists of multiple labs demonstrating the use of various natural language processing techniques using libraries like Gensim and Transformers. Labs cover tasks such as loading pre-trained word vectors, finding similar words, visualizing word embeddings, enriching prompts, generating text, sentiment analysis, summarization, and fetching institution details from Wikipedia. Each lab includes code snippets and functionalities aimed at exploring and applying NLP models and techniques.

Uploaded by

pinkyponkey2

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

4 views8 pages

Lab

Uploaded by

pinkyponkey2

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 8

Lab 1

import gensim.downloader as gen

def load_model():

print("Loading pre-trained word vectors...")

m = gen.load("glove-wiki-gigaword-100")

print("Model loaded successfully!")

return m

def similar_word(model, word1, word2, word3):

try:

print(f"\nExploring rela onships: {word1} - {word2} + {word3}")

result = model.most_similar(posi ve=[word1, word3], nega ve=[word2])

print("Most similar words:")

for word, per in result:

print(f"{word}: {per:.4f}")

except KeyError as e:

print(f"Word '{e.args[0]}' not found in the vocabulary.")

def main():

model = load_model()

while True:

print("\nEnter three words (or type 'exit' to quit):")

word1 = input("Word 1: ").strip().lower()

if word1 == 'exit':

break

word2 = input("Word 2: ").strip().lower()

word3 = input("Word 3: ").strip().lower()

similar_word(model, word1, word2, word3)

main()

Lab 2

#Lab2

import gensim.downloader as api

import matplotlib.pyplot as plt

from sklearn.decomposi on import PCA

from sklearn.manifold import TSNE

import numpy as np

def load_model():
print("Loading pre-trained word vectors...")

model = api.load("glove-wiki-gigaword-100")

print("Model loaded successfully!")

return model

def visual(model, words, method='pca'):

vectors = np.array([model[word] for word in words if word in model])

reducer = PCA(n_components=2) if method == 'pca' else TSNE(n_components=2, perplexity=5, random_state=42)

reduced_vectors = reducer.ﬁt_transform(vectors)

plt.ﬁgure(ﬁgsize=(10, 6))

for word, coord in zip(words, reduced_vectors):

plt.sca er(coord[0], coord[1])

plt.text(coord[0] + 0.05, coord[1] + 0.05, word, fontsize=12)

plt. tle(f"Word Embeddings Visualiza on using {method.upper()}")

plt.show()

def similar_words(model, word, top_n=5):

try:

similar_words = model.most_similar(word, topn=top_n)

print(f"Top {top_n} words similar to '{word}':")

for similar_word, similarity in similar_words:

print(f"{similar_word}: {similarity:.4f}")

except KeyError:

print(f"Word '{word}' not found in the vocabulary.")

def main():

model = load_model()

domain_words = ["computer", "technology", "internet", "so ware", "hardware", "AI", "machine", "data", "network"]

visual(model, domain_words, method='pca')

while True:

word = input("Enter a word to ﬁnd similar words (or 'exit' to quit): ").strip().lower()

if word == 'exit':

break

similar_words(model, word)

main()

Lab 3

#Lab3

import gensim
from gensim.models import Word2Vec

import nltk

from nltk.tokenize import word_tokenize

legal_corpus = [

"The plain ﬀ ﬁled a lawsuit against the defendant for breach of contract.",

"The court ruled in favor of the defendant due to lack of evidence.",

"A new legal precedent was set in the case of intellectual property rights.",

"The judge dismissed the case ci ng jurisdic onal issues.",

"The a orney argued that the contract was legally binding.",

legal_sentences = [word_tokenize(sentence.lower()) for sentence in legal_corpus]

model = Word2Vec(sentences=legal_sentences, vector_size=100, window=5, min_count=1, workers=4)

model.save("legal_word2vec.model")

model = Word2Vec.load("legal_word2vec.model")

word = "court"

if word in model.wv:

print(f"Words most similar to '{word}':")

for similar_word, similarity in model.wv.most_similar(word):

print(f"{similar_word}: {similarity:.4f}")

else:

print(f"'{word}' not found in vocabulary.")

Lab 4

import gensim.downloader as api

from transformers import pipeline

import numpy as np

embedding_model = api.load("glove-wiki-gigaword-100")

original_prompt = "Describe the beau ful landscapes during sunset."

def enrich_prompt(prompt, embedding_model, n=3):

words = prompt.split()

enriched_prompt = []

for word in words:

word_lower = word.lower()

if word_lower in embedding_model:

similar_words = embedding_model.most_similar(word_lower, topn=n)

similar_list = [w[0] for w in similar_words]

enriched_prompt.append(" ".join(similar_list))

else:

enriched_prompt.append(word)

return " ".join(enriched_prompt)

enriched_prompt = enrich_prompt(original_prompt, embedding_model)

generator = pipeline("text-genera on", model="gpt2")

original_response = generator(original_prompt, max_length=60, num_return_sequences=1)[0]['generated_text']

enriched_response = generator(enriched_prompt, max_length=60, num_return_sequences=1)[0]['generated_text']

print("Original Prompt:\n", original_prompt)

print("\nGPT-2 Response:\n", original_response)

print("\n" + "="*80)

print("\nEnriched Prompt:\n", enriched_prompt)

print("\nGPT-2 Response:\n", enriched_response)

Lab 5

import gensim.downloader as api

import random

print("Loading pre-trained Word2Vec embeddings...")

word_vectors = api.load("word2vec-google-news-300")

def get_similar_words(seed_word, top_n=5):

try:

similar = word_vectors.most_similar(seed_word, topn=top_n)

return [word for word, _ in similar]

except KeyError:

print(f"'{seed_word}' is not in the vocabulary. Please try another word.")

return []

def construct_paragraph(seed_word, similar_words):

if len(similar_words) < 5:

return "Not enough similar words to create a story."

sentences = [

f"Once upon a me, there was a {seed_word}.",

f"The {seed_word} was known for its connec on to {similar_words[0]} and {similar_words[1]}.",

f"One day, the {seed_word} encountered a {similar_words[2]} and they became great friends.",

f"Together, they explored the world of {similar_words[3]} and discovered the wonders of {similar_words[4]}.",

f"In the end, the {seed_word} realized that life is full of surprises and adventures."

]
return " ".join(sentences)

def main():

seed_word = input("Enter a seed word: ").strip().lower()

similar = get_similar_words(seed_word)

if not similar:

return

print(f"\nSimilar words to '{seed_word}': {', '.join(similar)}")

paragraph = construct_paragraph(seed_word, similar)

print("\nHere's a short paragraph/story:\n")

print(paragraph)

if __name__ == "__main__":

main()

Lab 6

from transformers import pipeline

sen ment_analyzer = pipeline(

task="sen ment-analysis",

model="dis lbert/dis lbert-base-uncased-ﬁnetuned-sst-2-english"

def analyze_sen ment(text):

result = sen ment_analyzer(text)

return result

def main():

sentences = [

"I love this product! It's amazing.",

"The service was terrible and I'm very disappointed.",

"The movie was okay, not great but not bad either.",

"This is the best day of my life!",

"I feel so frustrated with this situa on."

for sentence in sentences:

sen ment_result = analyze_sen ment(sentence)

print(f"Sentence: {sentence}")

print(f"Sen ment: {sen ment_result[0]['label']} (conﬁdence: {sen ment_result[0]['score']:.4f})")

print("-" * 60)

main()
Lab 7

from transformers import pipeline

model_name = "facebook/bart-large-cnn"

summarizer = pipeline("summariza on", model=model_name)

def summarize_text(text, max_length=130, min_length=30, do_sample=False):

summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=do_sample)

return summary[0]['summary_text']

if __name__ == "__main__":

passage = """

The Hugging Face Transformers library provides an easy-to-use interface for working

with pre-trained models for various NLP tasks, including summariza on.

Summariza on is the task of reducing a long text into a shorter version while

preserving the key informa on.

This can be par cularly useful for quickly understanding the main points of lengthy

documents, ar cles, or reports.

The library supports several pre-trained models that can be used out-of-the-box for

summariza on tasks.

"""

summarized_text = summarize_text(passage)

print("Original Text:\n", passage)

print("\nSummarized Text:\n", summarized_text)

Lab 9

!pip install wikipedia-api

from pydan c import BaseModel

import wikipediaapi

class Ins tu onDetails(BaseModel):

name: str

founder: str

founded_year: str

branches: str

employees: str

summary: str

def fetch_ins tu on_details(ins tu on_name: str) -> Ins tu onDetails:

wiki_wiki = wikipediaapi.Wikipedia(
user_agent="MyWikipediaScraper/1.0 (contact: [email protected])",

language="en"

page = wiki_wiki.page(ins tu on_name)

if not page.exists():

raise ValueError("Ins tu on page does not exist on Wikipedia")

summary_sentences = page.summary.split(". ")

summary = ". ".join(summary_sentences[:4]) + "."

founder = "Not Available"

founded_year = "Not Available"

branches = "Not Available"

employees = "Not Available"

for sec on in page.sec ons:

tle = sec on. tle.lower()

text = sec on.text.strip()

if "founder" in tle:

founder = text.split(". ")[0]

if "history" in tle or "founded" in tle:

for line in text.split("\n"):

if "founded" in line.lower():

founded_year = line.strip()

break

if "branches" in tle:

branches = text.split(". ")[0]

if "employees" in tle:

employees = text.split(". ")[0]

return Ins tu onDetails(

name=ins tu on_name,

founder=founder,

founded_year=founded_year,

branches=branches,

employees=employees,

summary=summary

if __name__ == "__main__":

ins tu on_name = input("Enter Ins tu on Name: ")

try:

details = fetch_ins tu on_details(ins tu on_name)

print(details.model_dump_json(indent=4))

except ValueError as e:

print(str(e))

NLP Assignment 2
No ratings yet
NLP Assignment 2
3 pages
Transform Raw Texts Into Training and Development Data: Instructor: Nikos Aletras
No ratings yet
Transform Raw Texts Into Training and Development Data: Instructor: Nikos Aletras
2 pages
SAQA - 14944 - Learner Guide
No ratings yet
SAQA - 14944 - Learner Guide
28 pages
Gen AI Micro
No ratings yet
Gen AI Micro
15 pages
EWIT
No ratings yet
EWIT
21 pages
genaii
No ratings yet
genaii
5 pages
Generative AI 2
No ratings yet
Generative AI 2
24 pages
Generative AI (1)
No ratings yet
Generative AI (1)
16 pages
GenAI-Shortened
No ratings yet
GenAI-Shortened
8 pages
gen ai nw
No ratings yet
gen ai nw
12 pages
GenAIL
No ratings yet
GenAIL
12 pages
Gen AI lab
No ratings yet
Gen AI lab
22 pages
Gen Ai Lab
No ratings yet
Gen Ai Lab
3 pages
Program 4
No ratings yet
Program 4
8 pages
NLP Final Review
No ratings yet
NLP Final Review
32 pages
GEN AI LAB PROGRAMS
No ratings yet
GEN AI LAB PROGRAMS
15 pages
Genai Manual
No ratings yet
Genai Manual
17 pages
taask
No ratings yet
taask
18 pages
NLP Lab
No ratings yet
NLP Lab
18 pages
Unit 5b - Natural Language Processing
No ratings yet
Unit 5b - Natural Language Processing
41 pages
prog1
No ratings yet
prog1
1 page
Nlp Lab Manual
No ratings yet
Nlp Lab Manual
21 pages
GAI4
No ratings yet
GAI4
2 pages
Batch 2
No ratings yet
Batch 2
13 pages
Gena i Short
No ratings yet
Gena i Short
6 pages
Gen AI PRG-5
No ratings yet
Gen AI PRG-5
4 pages
GENAI_Lab_Viva_QA
No ratings yet
GENAI_Lab_Viva_QA
7 pages
Deep DL Manual Nainish
No ratings yet
Deep DL Manual Nainish
8 pages
ProgramsGenAI_BAIL657C
No ratings yet
ProgramsGenAI_BAIL657C
18 pages
Fqiwefp
No ratings yet
Fqiwefp
2 pages
Word2Vec - A Baby Step in Deep Learning But A Giant Leap Towards Natural Language Processing
100% (1)
Word2Vec - A Baby Step in Deep Learning But A Giant Leap Towards Natural Language Processing
12 pages
NLP_record[1][1] (1)
No ratings yet
NLP_record[1][1] (1)
23 pages
Next Word Prediction With NLP and Deep Learning
No ratings yet
Next Word Prediction With NLP and Deep Learning
13 pages
Natural Language Processing
No ratings yet
Natural Language Processing
17 pages
NLP Using Deep Learning Handson.txt
No ratings yet
NLP Using Deep Learning Handson.txt
7 pages
Rajeev Mishra 20 SCSE1180087
No ratings yet
Rajeev Mishra 20 SCSE1180087
29 pages
NLP
No ratings yet
NLP
9 pages
AIND-Capstone_machine_translation.ipynb at Master · Tommytracey_AIND-Capstone
No ratings yet
AIND-Capstone_machine_translation.ipynb at Master · Tommytracey_AIND-Capstone
26 pages
wordembed
No ratings yet
wordembed
31 pages
123nlp456
No ratings yet
123nlp456
4 pages
NLP_Module 2
No ratings yet
NLP_Module 2
54 pages
Word Embedding Generation For Telugu Corpus
No ratings yet
Word Embedding Generation For Telugu Corpus
28 pages
NLP PDF
No ratings yet
NLP PDF
3 pages
Lab Manual Generative AI
No ratings yet
Lab Manual Generative AI
34 pages
NLP Manual
No ratings yet
NLP Manual
21 pages
A Soft Introduction To NLP - Semantic Similarity Calculations Using Python - Medium
No ratings yet
A Soft Introduction To NLP - Semantic Similarity Calculations Using Python - Medium
13 pages
ML for NLP-LO4
No ratings yet
ML for NLP-LO4
42 pages
06 Wordvectors
No ratings yet
06 Wordvectors
96 pages
Clean Data
No ratings yet
Clean Data
4 pages
1
No ratings yet
1
13 pages
Lab 5
No ratings yet
Lab 5
27 pages
NLP Study Plan for Beginners- HW Samples
No ratings yet
NLP Study Plan for Beginners- HW Samples
47 pages
PG1
No ratings yet
PG1
5 pages
Gen_AI_prog5
No ratings yet
Gen_AI_prog5
2 pages
A1 Engine Search
No ratings yet
A1 Engine Search
2 pages
NLP 3
No ratings yet
NLP 3
3 pages
4 Word Representation
No ratings yet
4 Word Representation
41 pages
Ai&Ml Bai601 Nlp Lab Manual
No ratings yet
Ai&Ml Bai601 Nlp Lab Manual
48 pages
DM Chapter 9 - word embedding
No ratings yet
DM Chapter 9 - word embedding
7 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
UNIX Shell Programming Interview Questions You'll Most Likely Be Asked
From Everand
UNIX Shell Programming Interview Questions You'll Most Likely Be Asked
Vibrant Publishers
No ratings yet
System Report
No ratings yet
System Report
25 pages
Fsd Question Bank
No ratings yet
Fsd Question Bank
1 page
PE[1]Dee
No ratings yet
PE[1]Dee
21 pages
Shivarth Sports[1]
No ratings yet
Shivarth Sports[1]
27 pages
FSD_Assignment_All_Expanded_10marks
No ratings yet
FSD_Assignment_All_Expanded_10marks
6 pages
fsd (1)
No ratings yet
fsd (1)
14 pages
Module 2
No ratings yet
Module 2
7 pages
BIS613D
No ratings yet
BIS613D
2 pages
mod-5.pptx
No ratings yet
mod-5.pptx
21 pages
BRMK557 MQP-1 SOLUTIONS
No ratings yet
BRMK557 MQP-1 SOLUTIONS
22 pages
Brmk557 Mqp-2 Solutions
No ratings yet
Brmk557 Mqp-2 Solutions
22 pages
Engineering Structures: Diederik Veenendaal, Philippe Block
No ratings yet
Engineering Structures: Diederik Veenendaal, Philippe Block
12 pages
24 - Single-Source Shortest Paths
No ratings yet
24 - Single-Source Shortest Paths
55 pages
TD_Mounting-Systems
No ratings yet
TD_Mounting-Systems
69 pages
Ece-Viii-embedded System Design (06ec82) - Question Paper
No ratings yet
Ece-Viii-embedded System Design (06ec82) - Question Paper
3 pages
A Study On The Mobile Application Security Threats and Vulnerability Analysis Cases
No ratings yet
A Study On The Mobile Application Security Threats and Vulnerability Analysis Cases
8 pages
Puštanje U Pogon HE Zakučac
No ratings yet
Puštanje U Pogon HE Zakučac
10 pages
CV Rahiswarie Inggit
No ratings yet
CV Rahiswarie Inggit
3 pages
21 AD6703 UNIT I DIGITAL IMAGE FUNDAMENTALS[1]
No ratings yet
21 AD6703 UNIT I DIGITAL IMAGE FUNDAMENTALS[1]
24 pages
Samsung Network Video Recorder SRN-473S - Manual
No ratings yet
Samsung Network Video Recorder SRN-473S - Manual
192 pages
Effective: Study Guide
No ratings yet
Effective: Study Guide
5 pages
7012
No ratings yet
7012
2 pages
Six Sigma Handbook - Google Search
0% (1)
Six Sigma Handbook - Google Search
1 page
ConferencesTalks - Game Design
No ratings yet
ConferencesTalks - Game Design
5 pages
Silicon Chip-1994 09
No ratings yet
Silicon Chip-1994 09
100 pages
Hi3518E V20X Hi3516C V200 Demo Board User Guide
No ratings yet
Hi3518E V20X Hi3516C V200 Demo Board User Guide
16 pages
CNN-RNN: A Unified Framework For Multi-Label Image Classification
No ratings yet
CNN-RNN: A Unified Framework For Multi-Label Image Classification
10 pages
Interview Questions: (Ui Full Stack Web Development)
No ratings yet
Interview Questions: (Ui Full Stack Web Development)
3 pages
Mongodb Crud
No ratings yet
Mongodb Crud
63 pages
AICS 2016 Paper 47
No ratings yet
AICS 2016 Paper 47
12 pages
EM78P153K ELANMicroelectronics
No ratings yet
EM78P153K ELANMicroelectronics
68 pages
SIT 302_Mobile Application Development
No ratings yet
SIT 302_Mobile Application Development
3 pages
The Signature Display (12-19-2019 4 - 09 - 07 PM)
No ratings yet
The Signature Display (12-19-2019 4 - 09 - 07 PM)
31 pages
Legal Framework of E-Commerce
No ratings yet
Legal Framework of E-Commerce
12 pages
InduSoft Solution
No ratings yet
InduSoft Solution
103 pages
Process Engineer
No ratings yet
Process Engineer
3 pages
S500 Doc - 02 21 04 PROTEC 420
No ratings yet
S500 Doc - 02 21 04 PROTEC 420
1 page
Battle of Deep Fakes Artificial Intelligence Set To Become A Major
No ratings yet
Battle of Deep Fakes Artificial Intelligence Set To Become A Major
5 pages
Institute of Management Technology, Nagpur: Summer Internship Program (SIP) - 2020
No ratings yet
Institute of Management Technology, Nagpur: Summer Internship Program (SIP) - 2020
2 pages
English Paper 3
No ratings yet
English Paper 3
11 pages