E-Recruiting and shortlisting using Candidate Resume with NLP and Machine Learning
- Sneha T
Index.html
<!DOCTYPE html>
<html>
<head>
<title>Resume Analyzer</title>
<link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}" id="theme-style">
<style>
body.dark-mode {
background-color: #1a1a1a;
color: #ffffff;
</style>
<script>
function toggleDarkMode() {
const body = document.body;
const themeStyleLink = document.getElementById('theme-style');
// Add an event listener for the 'transitionend' event
themeStyleLink.addEventListener('transitionend', () => {
// Update the theme link after the transition is complete
themeStyleLink.removeEventListener('transitionend', arguments.callee);
themeStyleLink.href = body.classList.contains('dark-mode')
? "{{ url_for('static', filename='styles.css') }}"
: "{{ url_for('static', filename='dark-theme.css') }}";
});
// Toggle the dark mode class
body.classList.toggle('dark-mode');
</script>
</head>
<body>
<style>
body {
background-image: url('img1.jpg');
</style>
<center>
<img src="https://ideogram.ai/api/images/direct/qtam5-HIR62mza3EqF_FPQ.jpg"
width="150" height="150" alt="Flowers in Chania">
<h1>Resume Analyzer</h1>
</center>
<label id="dark-mode-toggle-label" for="dark-mode-toggle">
<input type="checkbox" id="dark-mode-toggle" onchange="toggleDarkMode()">
<div id="dark-mode-toggle-slider"></div>
</label>
<form action="/" method="post" enctype="multipart/form-data">
<label for="job_description">Job Description:</label>
<textarea name="job_description" rows="5" cols="40" required></textarea>
<br>
<label for="resume_files">Upload Resumes (PDF):</label>
<input type="file" name="resume_files" accept=".pdf" multiple required>
<br>
<input type="submit" value="Analyze Resumes">
</form>
<br>
{% if results %}
<h2>Ranked Resumes:</h2>
<table>
<tr>
<th>Rank</th>
<th>Name</th>
<th>Email</th>
<th>Similarity in %</th>
</tr>
{% for result in results %}
<tr>
<td>{{ loop.index }}</td>
<td>{{ result[0][0] }}</td>
<td>{{ result[1][0] }}</td>
<td>{{ result[2] }}</td>
</tr>
{% endfor %}
</table>
{% if results %}
<br>
<a href="{{ url_for('download_csv') }}" download="ranked_resumes.csv"
class="download-link">
Download CSV
</a>
{% endif %}
{% endif %}
</body>
</html>
Styles.css
/* Reset some default styles */
body,
h1,
h2,
p,
table,
th,
td {
margin: 0;
padding: 0;
/* Light mode styles */
body:not(.dark-mode) {
background-color: #ffffff;
color: #000000;
/* Dark mode styles */
body.dark-mode {
background-color: #1a1a1a;
color: #ffffff;
/* Dark mode toggle styles */
#dark-mode-toggle-label {
position: relative;
display: inline-block;
width: 60px;
height: 34px;
}
#dark-mode-toggle-slider {
position: absolute;
cursor: pointer;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: #ccc;
border-radius: 34px;
transition: 0.4s;
#dark-mode-toggle-slider:before {
position: absolute;
content: "";
height: 26px;
width: 26px;
left: 4px;
bottom: 4px;
background-color: #ffffff;
border-radius: 50%;
transition: 0.4s;
#dark-mode-toggle:checked+#dark-mode-toggle-slider {
background-color: #2196F3;
}
#dark-mode-toggle:checked+#dark-mode-toggle-slider:before {
transform: translateX(26px);
body.dark-mode label {
color: #000000;
/* Label color for dark mode */
/* Basic styling */
body {
font-family: Arial, sans-serif;
background-color: #f2f2f2;
color: #333;
padding: 20px;
transition: background-color 0.4s, color 0.4s;
h1 {
margin-bottom: 20px;
/* Form styling */
form {
background-color: #fff;
padding: 20px;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
label,
input[type="file"],
textarea,
input[type="submit"] {
display: block;
margin-bottom: 10px;
input[type="file"] {
margin-top: 5px;
textarea {
width: 100%;
padding: 1px;
border: 1px solid #ccc;
border-radius: 4px;
resize: vertical;
}
input[type="submit"] {
background-color: #007bff;
color: #fff;
border: none;
padding: 10px 15px;
border-radius: 4px;
cursor: pointer;
/* Table styling */
table {
border-collapse: collapse;
width: 100%;
margin-top: 20px;
th,
td {
padding: 8px;
text-align: left;
border-bottom: 1px solid #ddd;
th {
background-color: #f2f2f2;
}
/* Responsive layout */
@media (max-width: 768px) {
form {
width: 100%;
padding: 15px;
App.py
from flask import Flask, render_template, request
import spacy
import PyPDF2
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import csv
import os
app = Flask(__name__)
# Load spaCy NER model
nlp = spacy.load("en_core_web_sm")
# Initialize results variable
results = []
# Extract text from PDFs
def extract_text_from_pdf(pdf_path):
with open(pdf_path, "rb") as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Extract entities using spaCy NER
def extract_entities(text):
emails = re.findall(r'\S+@\S+', text)
names = re.findall(r'^([A-Z][a-z]+)\s+([A-Z][a-z]+)', text)
if names:
names = [" ".join(names[0])]
return emails, names
@app.route('/', methods=['GET', 'POST'])
def index():
results = []
if request.method == 'POST':
job_description = request.form['job_description']
resume_files = request.files.getlist('resume_files')
# Create a directory for uploads if it doesn't exist
if not os.path.exists("uploads"):
os.makedirs("uploads")
# Process uploaded resumes
processed_resumes = []
for resume_file in resume_files:
# Save the uploaded file
resume_path = os.path.join("uploads", resume_file.filename)
resume_file.save(resume_path)
# Process the saved file
resume_text = extract_text_from_pdf(resume_path)
emails, names = extract_entities(resume_text)
processed_resumes.append((names, emails, resume_text))
# TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()
job_desc_vector = tfidf_vectorizer.fit_transform([job_description])
# Rank resumes based on similarity
ranked_resumes = []
for (names, emails, resume_text) in processed_resumes:
resume_vector = tfidf_vectorizer.transform([resume_text])
similarity = cosine_similarity(job_desc_vector, resume_vector)[0][0] * 100
ranked_resumes.append((names, emails, similarity))
# Sort resumes by similarity score
ranked_resumes.sort(key=lambda x: x[2], reverse=True)
results = ranked_resumes
return render_template('index.html', results=results)
from flask import send_file
@app.route('/download_csv')
def download_csv():
# Generate the CSV content
csv_content = "Rank,Name,Email,Similarity\n"
for rank, (names, emails, similarity) in enumerate(results, start=1):
name = names[0] if names else "N/A"
email = emails[0] if emails else "N/A"
csv_content += f"{rank},{name},{email},{similarity}\n"
# Create a temporary file to store the CSV content
csv_filename = "ranked_resumes.csv"
with open(csv_filename, "w") as csv_file:
csv_file.write(csv_content)
# Send the file for download
csv_full_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), csv_filename)
return send_file(csv_full_path, as_attachment=True,
download_name="ranked_resumes.csv")
if __name__ == '__main__':
app.run(debug=True)
ResumeRanker.py
import spacy
import PyPDF2
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import csv
csv_filename = "ranked_resumes.csv"
nlp = spacy.load("en_core_web_sm")
job_description = "NLP Specialist: Develop and implement NLP algorithms. Proficiency in
Python, NLP libraries, and ML frameworks required."
resume_paths = ["resume1.pdf", "resume2.pdf", "resume3.pdf"]
def extract_text_from_pdf(pdf_path):
with open(pdf_path, "rb") as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
def extract_entities(text):
emails = re.findall(r'\S+@\S+', text)
names = re.findall(r'^([A-Z][a-z]+)\s+([A-Z][a-z]+)', text)
if names:
names = [" ".join(names[0])]
return emails, names
tfidf_vectorizer = TfidfVectorizer()
job_desc_vector = tfidf_vectorizer.fit_transform([job_description])
ranked_resumes = []
for resume_path in resume_paths:
resume_text = extract_text_from_pdf(resume_path)
emails, names = extract_entities(resume_text)
resume_vector = tfidf_vectorizer.transform([resume_text])
similarity = cosine_similarity(job_desc_vector, resume_vector)[0][0]
ranked_resumes.append((names, emails, similarity))
ranked_resumes.sort(key=lambda x: x[2], reverse=True)
for rank, (names, emails, similarity) in enumerate(ranked_resumes, start=1):
print(f"Rank {rank}: Names: {names}, Emails: {emails}, Similarity: {similarity:.2f}")
with open(csv_filename, "w", newline="") as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow(["Rank", "Name", "Email", "Similarity"])
for rank, (names, emails, similarity) in enumerate(ranked_resumes, start=1):
name = names[0] if names else "N/A"
email = emails[0] if emails else "N/A"
csv_writer.writerow([rank, name, email, similarity])