Skip to content

Commit

Permalink
Merge pull request #175 from MITLibraries/HRQB-49-historical-libhr-data
Browse files Browse the repository at this point in the history
HRQB 49 - Include historical LibHR data
  • Loading branch information
ghukill authored Sep 18, 2024
2 parents 5a35f77 + 6364f99 commit 63de91c
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 38 deletions.
3 changes: 3 additions & 0 deletions hrqb/tasks/employee_appointments.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def get_dataframe(self) -> pd.DataFrame:
libhr_df = self.named_inputs["ExtractQBLibHREmployeeAppointments"].read()
depts_df = self.named_inputs["ExtractQBDepartments"].read()

# filter libhr data to active appointments, with position IDs
libhr_df = libhr_df[(libhr_df["Active"]) & ~(libhr_df["Position ID"].isna())]

# normalize position id to string and pad zeros
libhr_df["Position ID"] = libhr_df["Position ID"].apply(
lambda x: str(int(x)).zfill(8)
Expand Down
28 changes: 26 additions & 2 deletions hrqb/tasks/libhr_employee_appointments.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""hrqb.tasks.libhr_employee_appointments"""

import luigi # type: ignore[import-untyped]
import numpy as np
import pandas as pd

from hrqb.base.task import (
PandasPickleTask,
QuickbaseUpsertTask,
)
from hrqb.utils import md5_hash_from_values
from hrqb.utils.quickbase import QBClient


Expand All @@ -23,7 +25,16 @@ class ExtractLibHREmployeeAppointments(PandasPickleTask):
csv_filepath = luigi.Parameter()

def get_dataframe(self) -> pd.DataFrame:
return pd.read_csv(self.csv_filepath)
# read CSV file
libhr_df = pd.read_csv(self.csv_filepath)

# convert 'Active' column to Quickbase Yes/No checkbox value
# np.False_ and np.True_ values are the result of Excel --> CSV --> pandas
libhr_df["Active"] = libhr_df["Active"].replace(
{np.True_: "Yes", np.False_: "No"}
)

return libhr_df


class ExtractQBDepartments(PandasPickleTask):
Expand Down Expand Up @@ -67,13 +78,26 @@ def get_dataframe(self) -> pd.DataFrame:
how="left",
)

# mint a unique, deterministic value for the merge "Key" field
libhr_df["Key"] = libhr_df.apply(
lambda row: md5_hash_from_values(
[
str(row["MIT ID"]),
str(row["HC ID"]),
]
),
axis=1,
)

fields = {
"MIT ID": "Related Employee MIT ID",
"Supervisor ID": "Related Supervisor MIT ID",
"Cost Object": "Cost Object",
"HC ID": "HC ID",
"Position ID": "Position ID",
"Related Department ID": "Related Department ID",
"Active": "Active",
"Key": "Key",
}
return libhr_df[fields.keys()].rename(columns=fields)

Expand All @@ -86,7 +110,7 @@ class LoadLibHREmployeeAppointments(QuickbaseUpsertTask):
@property
def merge_field(self) -> str | None:
"""Explicitly merge on unique Position ID field."""
return "Position ID"
return "Key"

def requires(self) -> list[luigi.Task]: # pragma: nocover
return [
Expand Down
1 change: 1 addition & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,7 @@ def task_extract_qb_libhr_complete(all_tasks_pipeline_name):
"Position ID": 987654321,
"Cost Object": 7777777,
"Related Department ID": 42.0,
"Active": True,
}
]
)
Expand Down
8 changes: 4 additions & 4 deletions tests/fixtures/libhr_static_data.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
MIT ID,HC ID,Full Name,Position,Position ID,Employee Type,Supervisor ID,Supervisor Name,Cost Object,Department
123456789,L-001,"Doe, John",Science Librarian,888888888,Admin Staff,444444444,"Smith, Fancy",555555555,DDC
987654321,L-100,"Doe, Jane",Data Engineer,999999999,Admin Staff,444444444,"Smith, Fancy",555555555,ITS
987654321,L-100,"Doe, Jane",DevOps Engineer,999999991,Admin Staff,444444444,"Smith, Fancy",555555555,BAD_ACRO
MIT ID,HC ID,Full Name,Position,Position ID,Employee Type,Supervisor ID,Supervisor Name,Cost Object,Department,Active
123456789,L-001,"Doe, John",Science Librarian,888888888,Admin Staff,444444444,"Smith, Fancy",555555555,DDC,True
987654321,L-100,"Doe, Jane",Data Engineer,999999999,Admin Staff,444444444,"Smith, Fancy",555555555,ITS,True
987654321,L-101x,"Doe, Jane",DevOps Engineer,999999991,Admin Staff,444444444,"Smith, Fancy",555555555,BAD_ACRO,False
47 changes: 15 additions & 32 deletions tests/tasks/test_libhr_employee_appointments.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# ruff: noqa: PD901, PLR2004

import numpy as np
import pandas as pd


Expand All @@ -18,39 +17,23 @@ def test_transform_libhr_employee_appointments_merge_departments(
task_transform_libhr_employee_appointments,
):
new_df = task_transform_libhr_employee_appointments.get_dataframe()
assert new_df.equals(
pd.DataFrame(
[
{
"Related Employee MIT ID": 123456789,
"Related Supervisor MIT ID": 444444444,
"Cost Object": 555555555,
"HC ID": "L-001",
"Position ID": 888888888,
"Related Department ID": 35.0,
},
{
"Related Employee MIT ID": 987654321,
"Related Supervisor MIT ID": 444444444,
"Cost Object": 555555555,
"HC ID": "L-100",
"Position ID": 999999999,
"Related Department ID": 40.0,
},
{
"Related Employee MIT ID": 987654321,
"Related Supervisor MIT ID": 444444444,
"Cost Object": 555555555,
"HC ID": "L-100",
"Position ID": 999999991,
"Related Department ID": np.nan,
},
]
)
)
assert new_df.iloc[0]["Related Department ID"] == 35.0
assert new_df.iloc[1]["Related Department ID"] == 40.0
assert pd.isna(new_df.iloc[2]["Related Department ID"]) # no match in merge, so NULL


def test_load_libhr_employee_appointments_merge_field_set(
task_load_libhr_employee_appointments,
):
assert task_load_libhr_employee_appointments.merge_field == "Position ID"
assert task_load_libhr_employee_appointments.merge_field == "Key"


def test_transform_libhr_employee_appointments_merge_field_key_values(
task_transform_libhr_employee_appointments,
):
new_df = task_transform_libhr_employee_appointments.get_dataframe()
assert list(new_df["Key"]) == [
"81cf06bfd65aa1f7019750c57a79be99",
"6e07102ee39ec1f22c63231d090bd4dd",
"af08a24eeb35fae63fa76e755537b949",
]

0 comments on commit 63de91c

Please sign in to comment.