0% found this document useful (0 votes)
4 views8 pages

Lab

The document consists of multiple labs demonstrating the use of various natural language processing techniques using libraries like Gensim and Transformers. Labs cover tasks such as loading pre-trained word vectors, finding similar words, visualizing word embeddings, enriching prompts, generating text, sentiment analysis, summarization, and fetching institution details from Wikipedia. Each lab includes code snippets and functionalities aimed at exploring and applying NLP models and techniques.

Uploaded by

pinkyponkey2
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views8 pages

Lab

The document consists of multiple labs demonstrating the use of various natural language processing techniques using libraries like Gensim and Transformers. Labs cover tasks such as loading pre-trained word vectors, finding similar words, visualizing word embeddings, enriching prompts, generating text, sentiment analysis, summarization, and fetching institution details from Wikipedia. Each lab includes code snippets and functionalities aimed at exploring and applying NLP models and techniques.

Uploaded by

pinkyponkey2
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 8

Lab 1

import gensim.downloader as gen

def load_model():

print("Loading pre-trained word vectors...")

m = gen.load("glove-wiki-gigaword-100")

print("Model loaded successfully!")

return m

def similar_word(model, word1, word2, word3):

try:

print(f"\nExploring rela onships: {word1} - {word2} + {word3}")

result = model.most_similar(posi ve=[word1, word3], nega ve=[word2])

print("Most similar words:")

for word, per in result:

print(f"{word}: {per:.4f}")

except KeyError as e:

print(f"Word '{e.args[0]}' not found in the vocabulary.")

def main():

model = load_model()

while True:

print("\nEnter three words (or type 'exit' to quit):")

word1 = input("Word 1: ").strip().lower()

if word1 == 'exit':

break

word2 = input("Word 2: ").strip().lower()

word3 = input("Word 3: ").strip().lower()

similar_word(model, word1, word2, word3)

main()

Lab 2

#Lab2

import gensim.downloader as api

import matplotlib.pyplot as plt

from sklearn.decomposi on import PCA

from sklearn.manifold import TSNE

import numpy as np

def load_model():
print("Loading pre-trained word vectors...")

model = api.load("glove-wiki-gigaword-100")

print("Model loaded successfully!")

return model

def visual(model, words, method='pca'):

vectors = np.array([model[word] for word in words if word in model])

reducer = PCA(n_components=2) if method == 'pca' else TSNE(n_components=2, perplexity=5, random_state=42)

reduced_vectors = reducer.fit_transform(vectors)

plt.figure(figsize=(10, 6))

for word, coord in zip(words, reduced_vectors):

plt.sca er(coord[0], coord[1])

plt.text(coord[0] + 0.05, coord[1] + 0.05, word, fontsize=12)

plt. tle(f"Word Embeddings Visualiza on using {method.upper()}")

plt.show()

def similar_words(model, word, top_n=5):

try:

similar_words = model.most_similar(word, topn=top_n)

print(f"Top {top_n} words similar to '{word}':")

for similar_word, similarity in similar_words:

print(f"{similar_word}: {similarity:.4f}")

except KeyError:

print(f"Word '{word}' not found in the vocabulary.")

def main():

model = load_model()

domain_words = ["computer", "technology", "internet", "so ware", "hardware", "AI", "machine", "data", "network"]

visual(model, domain_words, method='pca')

while True:

word = input("Enter a word to find similar words (or 'exit' to quit): ").strip().lower()

if word == 'exit':

break

similar_words(model, word)

main()

Lab 3

#Lab3

import gensim
from gensim.models import Word2Vec

import nltk

from nltk.tokenize import word_tokenize

legal_corpus = [

"The plain ff filed a lawsuit against the defendant for breach of contract.",

"The court ruled in favor of the defendant due to lack of evidence.",

"A new legal precedent was set in the case of intellectual property rights.",

"The judge dismissed the case ci ng jurisdic onal issues.",

"The a orney argued that the contract was legally binding.",

legal_sentences = [word_tokenize(sentence.lower()) for sentence in legal_corpus]

model = Word2Vec(sentences=legal_sentences, vector_size=100, window=5, min_count=1, workers=4)

model.save("legal_word2vec.model")

model = Word2Vec.load("legal_word2vec.model")

word = "court"

if word in model.wv:

print(f"Words most similar to '{word}':")

for similar_word, similarity in model.wv.most_similar(word):

print(f"{similar_word}: {similarity:.4f}")

else:

print(f"'{word}' not found in vocabulary.")

Lab 4

import gensim.downloader as api

from transformers import pipeline

import numpy as np

embedding_model = api.load("glove-wiki-gigaword-100")

original_prompt = "Describe the beau ful landscapes during sunset."

def enrich_prompt(prompt, embedding_model, n=3):

words = prompt.split()

enriched_prompt = []

for word in words:

word_lower = word.lower()

if word_lower in embedding_model:

similar_words = embedding_model.most_similar(word_lower, topn=n)

similar_list = [w[0] for w in similar_words]


enriched_prompt.append(" ".join(similar_list))

else:

enriched_prompt.append(word)

return " ".join(enriched_prompt)

enriched_prompt = enrich_prompt(original_prompt, embedding_model)

generator = pipeline("text-genera on", model="gpt2")

original_response = generator(original_prompt, max_length=60, num_return_sequences=1)[0]['generated_text']

enriched_response = generator(enriched_prompt, max_length=60, num_return_sequences=1)[0]['generated_text']

print("Original Prompt:\n", original_prompt)

print("\nGPT-2 Response:\n", original_response)

print("\n" + "="*80)

print("\nEnriched Prompt:\n", enriched_prompt)

print("\nGPT-2 Response:\n", enriched_response)

Lab 5

import gensim.downloader as api

import random

print("Loading pre-trained Word2Vec embeddings...")

word_vectors = api.load("word2vec-google-news-300")

def get_similar_words(seed_word, top_n=5):

try:

similar = word_vectors.most_similar(seed_word, topn=top_n)

return [word for word, _ in similar]

except KeyError:

print(f"'{seed_word}' is not in the vocabulary. Please try another word.")

return []

def construct_paragraph(seed_word, similar_words):

if len(similar_words) < 5:

return "Not enough similar words to create a story."

sentences = [

f"Once upon a me, there was a {seed_word}.",

f"The {seed_word} was known for its connec on to {similar_words[0]} and {similar_words[1]}.",

f"One day, the {seed_word} encountered a {similar_words[2]} and they became great friends.",

f"Together, they explored the world of {similar_words[3]} and discovered the wonders of {similar_words[4]}.",

f"In the end, the {seed_word} realized that life is full of surprises and adventures."

]
return " ".join(sentences)

def main():

seed_word = input("Enter a seed word: ").strip().lower()

similar = get_similar_words(seed_word)

if not similar:

return

print(f"\nSimilar words to '{seed_word}': {', '.join(similar)}")

paragraph = construct_paragraph(seed_word, similar)

print("\nHere's a short paragraph/story:\n")

print(paragraph)

if __name__ == "__main__":

main()

Lab 6

from transformers import pipeline

sen ment_analyzer = pipeline(

task="sen ment-analysis",

model="dis lbert/dis lbert-base-uncased-finetuned-sst-2-english"

def analyze_sen ment(text):

result = sen ment_analyzer(text)

return result

def main():

sentences = [

"I love this product! It's amazing.",

"The service was terrible and I'm very disappointed.",

"The movie was okay, not great but not bad either.",

"This is the best day of my life!",

"I feel so frustrated with this situa on."

for sentence in sentences:

sen ment_result = analyze_sen ment(sentence)

print(f"Sentence: {sentence}")

print(f"Sen ment: {sen ment_result[0]['label']} (confidence: {sen ment_result[0]['score']:.4f})")

print("-" * 60)

main()
Lab 7

from transformers import pipeline

model_name = "facebook/bart-large-cnn"

summarizer = pipeline("summariza on", model=model_name)

def summarize_text(text, max_length=130, min_length=30, do_sample=False):

summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=do_sample)

return summary[0]['summary_text']

if __name__ == "__main__":

passage = """

The Hugging Face Transformers library provides an easy-to-use interface for working

with pre-trained models for various NLP tasks, including summariza on.

Summariza on is the task of reducing a long text into a shorter version while

preserving the key informa on.

This can be par cularly useful for quickly understanding the main points of lengthy

documents, ar cles, or reports.

The library supports several pre-trained models that can be used out-of-the-box for

summariza on tasks.

"""

summarized_text = summarize_text(passage)

print("Original Text:\n", passage)

print("\nSummarized Text:\n", summarized_text)

Lab 9

!pip install wikipedia-api

from pydan c import BaseModel

import wikipediaapi

class Ins tu onDetails(BaseModel):

name: str

founder: str

founded_year: str

branches: str

employees: str

summary: str

def fetch_ins tu on_details(ins tu on_name: str) -> Ins tu onDetails:

wiki_wiki = wikipediaapi.Wikipedia(
user_agent="MyWikipediaScraper/1.0 (contact: [email protected])",

language="en"

page = wiki_wiki.page(ins tu on_name)

if not page.exists():

raise ValueError("Ins tu on page does not exist on Wikipedia")

summary_sentences = page.summary.split(". ")

summary = ". ".join(summary_sentences[:4]) + "."

founder = "Not Available"

founded_year = "Not Available"

branches = "Not Available"

employees = "Not Available"

for sec on in page.sec ons:

tle = sec on. tle.lower()

text = sec on.text.strip()

if "founder" in tle:

founder = text.split(". ")[0]

if "history" in tle or "founded" in tle:

for line in text.split("\n"):

if "founded" in line.lower():

founded_year = line.strip()

break

if "branches" in tle:

branches = text.split(". ")[0]

if "employees" in tle:

employees = text.split(". ")[0]

return Ins tu onDetails(

name=ins tu on_name,

founder=founder,

founded_year=founded_year,

branches=branches,

employees=employees,

summary=summary

if __name__ == "__main__":

ins tu on_name = input("Enter Ins tu on Name: ")


try:

details = fetch_ins tu on_details(ins tu on_name)

print(details.model_dump_json(indent=4))

except ValueError as e:

print(str(e))

You might also like