forked from OmkarPathak/pyresparser
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrank_candidate.py
99 lines (81 loc) · 3.11 KB
/
rank_candidate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from multiprocessing import cpu_count, Pool
from typing import Set
from pyresparser.utils import extract_skills
import pandas as pd
import spacy
def get_candidate_score(
job_skill_count: int,
job_skills: Set[str],
candidate_skills: Set[str]
) -> float:
"""
This function compares the candidate skills with job description skills
and rates the candidate based on the fraction of common skills
:param job_skill_count: Number of job skills
:param job_skills: Set of job skills
:param candidate_skills: Set of candidate skills
:return: Candidate score
"""
common_skills = job_skills.intersection(candidate_skills)
candidate_score = float(len(common_skills)) / job_skill_count
return candidate_score * 100
def get_candidate_score_wrapper(args: tuple) -> float:
"""
A wrapper function to de-structure the tuple of arguments
for multiprocessing.Pool.map()
:param args: Tuple of arguments for the wrapped function
:return: Result of the wrapped function
"""
return get_candidate_score(*args)
def sort_candidates(
job_desc_text: str,
candidates_df: pd.DataFrame
) -> pd.DataFrame:
"""
This function compares the skills of a number of candidates
against the skills required of a given job description
:param job_desc_text: Job description text
:param candidates_df: DataFrame containing candidate details and skills
:return: DataFrame with candidates sorted as per their match
with the given job description
"""
# Get the list of required skills from the Job description
# and convert them to a set
nlp = spacy.load("en_core_web_sm")
doc = nlp(job_desc_text)
job_skills = set([
skill.lower() for skill in extract_skills(doc, doc.noun_chunks)
])
job_skill_count = len(job_skills)
# Get the candidate skills from the dataframe and create
# a set of skills for each candidate
candidates_skills = candidates_df["Skills"].values.tolist()
candidates_skills = [
set([
skill.strip().lower() for skill in skill_list.split(",")
])
for skill_list in candidates_skills
]
# Use multiprocessing to evaluate multiple candidates for
# a given job in parallel
num_executors = cpu_count()
processing_data = [
(job_skill_count, job_skills, person_skills)
for person_skills in candidates_skills
]
with Pool(num_executors) as process_pool:
candidates_df["Score"] = process_pool.map(get_candidate_score_wrapper, processing_data)
return candidates_df
if __name__ == '__main__':
# Read candidate details
df = pd.read_csv("resumes.csv", usecols=["Email", "Skills"])
try:
with open("sample_job_description.txt", "r") as fp:
job_description = fp.read()
except FileNotFoundError:
job_description = None
if job_description:
ranked_df = sort_candidates(job_description, df)
# Sort candidates in descending order of score
ranked_df.sort_values(by="Score", ascending=False, inplace=True)
ranked_df.to_csv("ranked.csv", index=False)