-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
HRQB 40 - Fix merges without volatile 'HR Appointment Key' #115
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,10 +7,12 @@ | |
import pandas as pd | ||
|
||
from hrqb.base.task import ( | ||
HRQBTask, | ||
PandasPickleTask, | ||
QuickbaseUpsertTask, | ||
SQLQueryExtractTask, | ||
) | ||
from hrqb.tasks.employee_appointments import TransformEmployeeAppointments | ||
from hrqb.utils import ( | ||
convert_oracle_bools_to_qb_bools, | ||
md5_hash_from_values, | ||
|
@@ -44,17 +46,28 @@ def get_dataframe(self) -> pd.DataFrame: | |
dw_leaves_df = self.named_inputs["ExtractDWEmployeeLeave"].read() | ||
qb_emp_appts_df = self.named_inputs["ExtractQBEmployeeAppointments"].read() | ||
|
||
qb_emp_appts_df = qb_emp_appts_df[["HR Appointment Key", "Record ID#"]].rename( | ||
# join Employee Appointments from QB to get QB Record ID | ||
dw_leaves_df = normalize_dataframe_dates( | ||
dw_leaves_df, | ||
["appt_begin_date", "appt_end_date", "absence_date"], | ||
) | ||
dw_leaves_df["emp_appt_merge_key"] = dw_leaves_df.apply( | ||
lambda row: TransformEmployeeAppointments.generate_merge_key( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [Non-blocking] Hmm, is there a reason why There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The reasoning here is kind of twofold. The logic for generating a merge field key on Related -- at least not yet -- the other tasks that generate a merge key, no other tasks need to know how they do it. If that changes, then I think a natural refactor would be to drop it into an externally available If this pattern continues where tasks need to be able to recreate the merge key for other tasks, it might be worth abstracting that out to the base tasks, and then each task just identifies what row data is used for it. But I don't think we're quite there yet. This could be the end of this need.... or not! |
||
row.mit_id, | ||
row.position_id, | ||
row.appt_begin_date, | ||
row.appt_end_date, | ||
), | ||
axis=1, | ||
) | ||
qb_emp_appts_df = qb_emp_appts_df[["Key", "Record ID#"]].rename( | ||
columns={ | ||
"HR Appointment Key": "hr_appt_key", | ||
"Key": "emp_appt_merge_key", | ||
"Record ID#": "related_employee_appointment_id", | ||
} | ||
) | ||
leaves_df = dw_leaves_df.merge(qb_emp_appts_df, how="left", on="hr_appt_key") | ||
|
||
leaves_df = normalize_dataframe_dates( | ||
leaves_df, | ||
["appt_begin_date", "appt_end_date", "absence_date"], | ||
leaves_df = dw_leaves_df.merge( | ||
qb_emp_appts_df, how="left", on="emp_appt_merge_key" | ||
) | ||
|
||
# WIP TODO: determine what data points from combination employee leave and | ||
|
@@ -92,6 +105,18 @@ def get_dataframe(self) -> pd.DataFrame: | |
} | ||
return leaves_df[fields.keys()].rename(columns=fields) | ||
|
||
@HRQBTask.integrity_check | ||
def all_rows_have_employee_appointments(self, output_df: pd.DataFrame) -> None: | ||
missing_appointment_count = len( | ||
output_df[output_df["Related Employee Appointment"].isna()] | ||
) | ||
if missing_appointment_count > 0: | ||
message = ( | ||
f"{missing_appointment_count} rows are missing an Employee " | ||
f"Appointment for task '{self.name}'" | ||
) | ||
raise ValueError(message) | ||
|
||
|
||
class LoadEmployeeLeave(QuickbaseUpsertTask): | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -749,8 +749,10 @@ def task_extract_dw_employee_salary_history_complete(all_tasks_pipeline_name): | |
"mit_id": "123456789", | ||
"job_id": "123456789", | ||
"position_id": "987654321", | ||
"start_date": Timestamp("2010-07-01 00:00:00"), | ||
"end_date": datetime.datetime(2011, 12, 1, 0, 0), | ||
"appt_begin_date": Timestamp("2019-01-01 00:00:00"), | ||
"appt_end_date": datetime.datetime(2022, 12, 1, 0, 0), | ||
"start_date": Timestamp("2019-07-01 00:00:00"), | ||
"end_date": datetime.datetime(2022, 12, 1, 0, 0), | ||
"hr_personnel_action_type_key": "CS01", | ||
"hr_personnel_action": "Annual Salary Review", | ||
"hr_action_reason": "Review Increase", | ||
|
@@ -779,14 +781,14 @@ def task_shared_extract_qb_employee_appointments_complete(all_tasks_pipeline_nam | |
[ | ||
{ | ||
"Record ID#": 12000, | ||
"HR Appointment Key": 123.0, | ||
"Position ID": "987654321", | ||
"Begin Date": "2019-01-01", | ||
"End Date": "2022-12-01", | ||
"MIT ID": "123456789", | ||
"Related Employee Type": "Admin Staff", | ||
"Union Name": "Le Union", | ||
"Exempt / NE": "E", | ||
"Key": "868ce513323c5391ae8afaa0ceb70c69", | ||
} | ||
] | ||
) | ||
|
@@ -837,10 +839,11 @@ def task_extract_dw_employee_leave_complete(all_tasks_pipeline_name): | |
[ | ||
{ | ||
"mit_id": "123456789", | ||
"appt_begin_date": Timestamp("2010-01-01 00:00:00"), | ||
"appt_end_date": datetime.datetime(2011, 12, 1, 0, 0), | ||
"appt_begin_date": Timestamp("2019-01-01 00:00:00"), | ||
"appt_end_date": datetime.datetime(2022, 12, 1, 0, 0), | ||
"position_id": "987654321", | ||
"hr_appt_key": 123, | ||
"absence_date": Timestamp("2010-07-01 00:00:00"), | ||
"absence_date": Timestamp("2019-07-01 00:00:00"), | ||
Comment on lines
-840
to
+846
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This update, while not strictly required here, was just to better align mocked data in fixtures. |
||
"absence_type": "Vacation", | ||
"absence_type_code": "VACA", | ||
"actual_absence_hours": 8.0, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I like that this function gives us more information on which fields (and their data types) are selected as merge keys!