Step 6: Scoring System
Now that we've analyzed our resume data in various ways, it's time to develop a scoring system. This system will assign a weightage score (0-10) to each resume based on its relevance to specific job titles.
What is a Resume Scoring System?
A resume scoring system evaluates how well a candidate's qualifications match the requirements for a specific job. It helps recruiters:
- Objectively compare candidates
- Quickly identify the most promising applicants
- Ensure all resumes are evaluated using the same criteria
- Save time in the initial screening process
Designing Our Scoring Algorithm
Our scoring algorithm will consider multiple factors:
- Skill Match: How many required skills does the candidate have?
- Experience: How many years of relevant experience does the candidate have?
- Education: Does the candidate have the required education level?
- Contextual Relevance: How important are the matched skills in the context of the resume?
- Keyword Density: How frequently do job-related terms appear?
Let's implement this step by step:
import numpy as np
import pandas as pd
# Define a function to score resumes based on job requirements
def score_resume(resume_row, job_title, required_skills, preferred_skills, min_experience, education_level):
score = 0 # Start with a score of 0
max_score = 10 # Maximum possible score
# 1. Skill Match (up to 4 points)
candidate_skills = resume_row['skills']
# Calculate required skills match
req_skill_matches = sum(1 for skill in required_skills if skill in candidate_skills)
req_skill_score = (req_skill_matches / len(required_skills)) * 3 if required_skills else 0
# Calculate preferred skills match
pref_skill_matches = sum(1 for skill in preferred_skills if skill in candidate_skills)
pref_skill_score = (pref_skill_matches / len(preferred_skills)) * 1 if preferred_skills else 0
skill_score = req_skill_score + pref_skill_score
score += min(skill_score, 4) # Cap at 4 points
# 2. Experience (up to 3 points)
years_experience = resume_row['years_experience']
if years_experience >= min_experience:
experience_score = min(3, (years_experience - min_experience) * 0.5 + 2)
else:
experience_score = max(0, 2 * years_experience / min_experience)
score += experience_score
# 3. Education (up to 1.5 points)
education_levels = {
'high school': 1,
'associate': 2,
'bachelors': 3,
'masters': 4,
'phd': 5
}
required_edu_level = education_levels.get(education_level.lower(), 0)
candidate_edu = resume_row['education']
if candidate_edu:
candidate_edu_level = max(education_levels.get(edu.lower(), 0) for edu in candidate_edu)
if candidate_edu_level >= required_edu_level:
edu_score = 1.5
else:
edu_score = 1.0 * candidate_edu_level / required_edu_level
else:
edu_score = 0
score += edu_score
# 4. Contextual Relevance (up to 1 point)
# We'll use our previously calculated skill importance
# For simplicity, we'll just check if the job title appears in the resume
if job_title.lower() in resume_row['Resume'].lower():
score += 0.5
# Check if any similar job titles appear
similar_titles = get_similar_job_titles(job_title)
if any(title.lower() in resume_row['Resume'].lower() for title in similar_titles):
score += 0.5
# 5. Keyword Density (up to 0.5 points)
# Calculate the density of job-related keywords
job_keywords = set(job_title.lower().split() + [skill.lower() for skill in required_skills])
resume_text = resume_row['Resume'].lower()
word_count = len(resume_text.split())
keyword_count = sum(resume_text.count(keyword) for keyword in job_keywords)
keyword_density = keyword_count / word_count if word_count > 0 else 0
score += min(0.5, keyword_density * 50) # Scale and cap at 0.5
# Return the final score, rounded to 1 decimal place
return round(min(max_score, score), 1)
# Helper function to get similar job titles
def get_similar_job_titles(job_title):
# This would ideally use a job title taxonomy or word embeddings
# For simplicity, we'll use a small hardcoded dictionary
similar_titles = {
'data scientist': ['machine learning engineer', 'ai specialist', 'data analyst', 'statistician'],
'software engineer': ['developer', 'programmer', 'software developer', 'coder'],
'project manager': ['program manager', 'product manager', 'scrum master'],
'marketing specialist': ['marketing coordinator', 'marketing assistant', 'digital marketer']
}
for key, values in similar_titles.items():
if job_title.lower() in key or key in job_title.lower():
return values + [key]
return []
Testing Our Scoring System
Let's test our scoring system with a few job titles:
# Define some job requirements
job_requirements = {
'Data Scientist': {
'required_skills': ['Python', 'Machine Learning', 'SQL', 'Data Analysis'],
'preferred_skills': ['Deep Learning', 'AI', 'Statistics', 'Data Visualization'],
'min_experience': 2,
'education_level': 'masters'
},
'Software Engineer': {
'required_skills': ['Java', 'JavaScript', 'SQL', 'HTML', 'CSS'],
'preferred_skills': ['React', 'Node.js', 'Docker', 'AWS'],
'min_experience': 3,
'education_level': 'bachelors'
},
'Marketing Specialist': {
'required_skills': ['Communication', 'Social Media', 'Content Creation'],
'preferred_skills': ['Adobe Creative Suite', 'SEO', 'Google Analytics'],
'min_experience': 1,
'education_level': 'bachelors'
}
}
# Score all resumes for each job title
for job_title, requirements in job_requirements.items():
column_name = f'score_{job_title.replace(" ", "_")}'
resume_data[column_name] = resume_data.apply(
lambda row: score_resume(
row,
job_title,
requirements['required_skills'],
requirements['preferred_skills'],
requirements['min_experience'],
requirements['education_level']
),
axis=1
)
# Display the top 5 candidates for each job title
for job_title in job_requirements.keys():
column_name = f'score_{job_title.replace(" ", "_")}'
print(f"\nTop 5 candidates for {job_title}:")
top_candidates = resume_data.sort_values(by=column_name, ascending=False).head(5)
for idx, row in top_candidates.iterrows():
print(f"Candidate {idx}: Score {row[column_name]}, Skills: {', '.join(row['skills'])}")
Visualizing the Scores
Let's create some visualizations to better understand our scoring results:
import matplotlib.pyplot as plt
import seaborn as sns
# Set up the figure
plt.figure(figsize=(15, 10))
# 1. Score distribution for each job title
for i, job_title in enumerate(job_requirements.keys()):
column_name = f'score_{job_title.replace(" ", "_")}'
plt.subplot(2, 2, i+1)
sns.histplot(resume_data[column_name], bins=20, kde=True)
plt.title(f'Score Distribution for {job_title}')
plt.xlabel('Score')
plt.ylabel('Number of Resumes')
# 2. Correlation between scores and features
plt.subplot(2, 2, 4)
score_columns = [f'score_{job_title.replace(" ", "_")}' for job_title in job_requirements.keys()]
correlation_data = resume_data[['years_experience'] + score_columns]
sns.heatmap(correlation_data.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation between Experience and Scores')
plt.tight_layout()
plt.savefig('results/score_analysis.png')
plt.close()
# 3. Compare scores across job categories
plt.figure(figsize=(12, 8))
job_categories = resume_data['Category'].unique()[:5] # Take first 5 categories
category_data = []
for category in job_categories:
category_resumes = resume_data[resume_data['Category'] == category]
for job_title in job_requirements.keys():
column_name = f'score_{job_title.replace(" ", "_")}'
avg_score = category_resumes[column_name].mean()
category_data.append({
'Category': category,
'Job Title': job_title,
'Average Score': avg_score
})
category_df = pd.DataFrame(category_data)
pivot_df = category_df.pivot(index='Category', columns='Job Title', values='Average Score')
sns.heatmap(pivot_df, annot=True, cmap='YlGnBu')
plt.title('Average Scores by Resume Category and Job Title')
plt.tight_layout()
plt.savefig('results/category_score_comparison.png')
plt.close()
Implementing a Weighted Scoring System
Different companies might value different aspects of a candidate's profile. Let's create a more flexible scoring system with customizable weights:
def weighted_score_resume(resume_row, job_title, requirements, weights):
"""
Score a resume with customizable weights for different factors
Parameters:
- resume_row: Row from the resume DataFrame
- job_title: Title of the job
- requirements: Dictionary with required_skills, preferred_skills, min_experience, education_level
- weights: Dictionary with weights for skills, experience, education, relevance, keywords
Returns:
- Weighted score from 0-10
"""
# Extract requirements
required_skills = requirements['required_skills']
preferred_skills = requirements.get('preferred_skills', [])
min_experience = requirements['min_experience']
education_level = requirements['education_level']
# Extract weights (with defaults)
skill_weight = weights.get('skills', 0.4)
exp_weight = weights.get('experience', 0.3)
edu_weight = weights.get('education', 0.15)
relevance_weight = weights.get('relevance', 0.1)
keyword_weight = weights.get('keywords', 0.05)
# Calculate individual scores (normalized to 0-1)
# 1. Skill Match
candidate_skills = resume_row['skills']
req_skill_matches = sum(1 for skill in required_skills if skill in candidate_skills)
req_skill_score = (req_skill_matches / len(required_skills)) if required_skills else 0
pref_skill_matches = sum(1 for skill in preferred_skills if skill in candidate_skills)
pref_skill_score = (pref_skill_matches / len(preferred_skills)) * 0.5 if preferred_skills else 0
skill_score = min(1.0, req_skill_score + pref_skill_score)
# 2. Experience
years_experience = resume_row['years_experience']
if years_experience >= min_experience:
experience_score = min(1.0, (years_experience - min_experience) / 5 + 0.7)
else:
experience_score = max(0, 0.7 * years_experience / min_experience)
# 3. Education
education_levels = {
'high school': 1,
'associate': 2,
'bachelors': 3,
'masters': 4,
'phd': 5
}
required_edu_level = education_levels.get(education_level.lower(), 0)
candidate_edu = resume_row['education']
if candidate_edu:
candidate_edu_level = max(education_levels.get(edu.lower(), 0) for edu in candidate_edu)
edu_score = min(1.0, candidate_edu_level / required_edu_level)
else:
edu_score = 0
# 4. Contextual Relevance
relevance_score = 0
if job_title.lower() in resume_row['Resume'].lower():
relevance_score += 0.5
similar_titles = get_similar_job_titles(job_title)
if any(title.lower() in resume_row['Resume'].lower() for title in similar_titles):
relevance_score += 0.5
relevance_score = min(1.0, relevance_score)
# 5. Keyword Density
job_keywords = set(job_title.lower().split() + [skill.lower() for skill in required_skills])
resume_text = resume_row['Resume'].lower()
word_count = len(resume_text.split())
keyword_count = sum(resume_text.count(keyword) for keyword in job_keywords)
keyword_density = keyword_count / word_count if word_count > 0 else 0
keyword_score = min(1.0, keyword_density * 10)
# Calculate weighted score
weighted_score = (
skill_score * skill_weight +
experience_score * exp_weight +
edu_score * edu_weight +
relevance_score * relevance_weight +
keyword_score * keyword_weight
) * 10 # Scale to 0-10
return round(weighted_score, 1)
# Test with different weight profiles
weight_profiles = {
'Balanced': {
'skills': 0.4,
'experience': 0.3,
'education': 0.15,
'relevance': 0.1,
'keywords': 0.05
},
'Skill-focused': {
'skills': 0.6,
'experience': 0.2,
'education': 0.1,
'relevance': 0.05,
'keywords': 0.05
},
'Experience-focused': {
'skills': 0.3,
'experience': 0.5,
'education': 0.1,
'relevance': 0.05,
'keywords': 0.05
}
}
# Apply weighted scoring for a specific job
job_title = 'Data Scientist'
requirements = job_requirements[job_title]
for profile_name, weights in weight_profiles.items():
column_name = f'score_{profile_name}'
resume_data[column_name] = resume_data.apply(
lambda row: weighted_score_resume(row, job_title, requirements, weights),
axis=1
)
# Compare the results
print(f"\nTop 3 candidates for {job_title} with different weight profiles:")
for profile_name in weight_profiles.keys():
column_name = f'score_{profile_name}'
print(f"\n{profile_name} profile:")
top_candidates = resume_data.sort_values(by=column_name, ascending=False).head(3)
for idx, row in top_candidates.iterrows():
print(f"Candidate {idx}: Score {row[column_name]}, Experience: {row['years_experience']} years, Skills: {', '.join(row['skills'][:3])}...")
Saving Our Scoring Model
Let's save our scoring model so we can use it later:
import pickle
# Create a dictionary with our scoring functions and requirements
scoring_model = {
'score_resume': score_resume,
'weighted_score_resume': weighted_score_resume,
'get_similar_job_titles': get_similar_job_titles,
'job_requirements': job_requirements,
'weight_profiles': weight_profiles
}
# Save the model
with open('models/scoring_model.pkl', 'wb') as f:
pickle.dump(scoring_model, f)
print("Scoring model saved to models/scoring_model.pkl")
The scoring system is the heart of our Resume Parser AI. It combines all the information we've extracted and analyzed to provide a meaningful evaluation of each candidate's fit for a specific job.
Next Steps
Now that we've developed a scoring system for our Resume Parser AI, we need to test and optimize our model to ensure it's working effectively. In the next step, we'll evaluate the performance of our model and make adjustments to improve its accuracy.