From 20c00f43dea3c5ac3e64b93ced85b060e5f497f5 Mon Sep 17 00:00:00 2001 From: Graham Hukill Date: Mon, 17 Jun 2024 13:48:54 -0400 Subject: [PATCH 1/3] QBClient able to delete records Why these changes are being introduced: For development, it is sometimes required that some or all records from a QB table are deleted. This is strangely not trivial in QB. Paginated deletes in the GUI are slow and time consuming, and the API requires a somewhat cryptic query payload. A testable, normalized way of deleting records is helpful. How this addresses that need: * QBClient gets two new methods, delete_records() and delete_all_table_records() Side effects of this change: * None Relevant ticket(s): * None --- hrqb/utils/quickbase.py | 26 +++++++++++++++++++ tests/conftest.py | 19 ++++++++++++++ .../qb_api_responses/deleteRecords.json | 3 +++ tests/test_qbclient_client.py | 16 ++++++++++++ 4 files changed, 64 insertions(+) create mode 100644 tests/fixtures/qb_api_responses/deleteRecords.json diff --git a/hrqb/utils/quickbase.py b/hrqb/utils/quickbase.py index d42f9e2..1bdc9c6 100644 --- a/hrqb/utils/quickbase.py +++ b/hrqb/utils/quickbase.py @@ -247,3 +247,29 @@ def get_table_as_df( records, columns=table_fields_df.label, ) + + def delete_records(self, table_id: str, where_clause: str) -> dict: + """Deleted Records from a Table given a where clause. + + https://developer.quickbase.com/operation/deleteRecords + """ + return self.make_request( + requests.delete, + "records", + cache=False, + json={ + "from": table_id, + "where": where_clause, + }, + ) + + def delete_all_table_records(self, table_id: str) -> dict: + """Delete all records from a Table. + + This is accomplished by retrieving table fields, identifying the 'Record ID#' + field ID, and then creating a query that deletes all records where record id is + greater than 0 (this is the suggested method for truncating a QB table). + """ + table = self.get_table(table_id) + key_field_id = table["keyFieldId"] + return self.delete_records(table_id, f"{{{key_field_id}.GT.0}}") diff --git a/tests/conftest.py b/tests/conftest.py index 71204e2..a55e4fd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -356,6 +356,25 @@ def mocked_qb_api_getTable(qbclient, global_requests_mock, mocked_table_id): return api_response +@pytest.fixture +def mocked_delete_payload(mocked_table_id): + return {"from": mocked_table_id, "where": "{3.GT.0}"} + + +@pytest.fixture +def mocked_qb_api_delete_records(qbclient, mocked_delete_payload, global_requests_mock): + url = f"{qbclient.api_base}/records" + with open("tests/fixtures/qb_api_responses/deleteRecords.json") as f: + api_response = json.load(f) + global_requests_mock.register_uri( + "DELETE", + url, + additional_matcher=lambda req: req.json() == mocked_delete_payload, + json=api_response, + ) + return api_response + + @pytest.fixture def qbclient_with_mocked_table_fields(qbclient, mocked_query_table_fields): with mock.patch.object(type(qbclient), "get_table_fields") as mocked_table_fields: diff --git a/tests/fixtures/qb_api_responses/deleteRecords.json b/tests/fixtures/qb_api_responses/deleteRecords.json new file mode 100644 index 0000000..d014df1 --- /dev/null +++ b/tests/fixtures/qb_api_responses/deleteRecords.json @@ -0,0 +1,3 @@ +{ + "numberDeleted": 1 +} \ No newline at end of file diff --git a/tests/test_qbclient_client.py b/tests/test_qbclient_client.py index 2043112..e92db79 100644 --- a/tests/test_qbclient_client.py +++ b/tests/test_qbclient_client.py @@ -212,3 +212,19 @@ def test_qbclient_parse_upsert_results_response_success(qbclient, mocked_qb_api_ def test_qbclient_parse_upsert_results_response_error_return_none(qbclient): assert qbclient.parse_upsert_results({"msg": "bad API response"}) is None + + +def test_qbclient_delete_records_success( + qbclient, mocked_delete_payload, mocked_qb_api_delete_records +): + table_id = mocked_delete_payload["from"] + query = mocked_delete_payload["where"] + assert qbclient.delete_records(table_id, query) == mocked_qb_api_delete_records + + +def test_qbclient_delete_all_table_records_success( + qbclient, mocked_table_id, mocked_qb_api_delete_records +): + assert ( + qbclient.delete_all_table_records(mocked_table_id) == mocked_qb_api_delete_records + ) From 0103251d263ff1f60085a6e4d078b7a40be3094a Mon Sep 17 00:00:00 2001 From: Graham Hukill Date: Mon, 17 Jun 2024 15:16:29 -0400 Subject: [PATCH 2/3] Add ETL tasks for Performance Reviews Why these changes are being introduced: Performance Reviews in Quickbase are based on Employee Appointments. They should be dynamically generated in the types of 3 month, 6 month, and annual reviews. This HRQBClient is responsible for making the performance review record, but NOT the outcome of the review; this is manually filled in by HR. How this addresses that need: * Adds new ETL tasks for Performance Reviews and Years (lookup table) Side effects of this change: * Performance Reviews loaded Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/HRQB-35 --- hrqb/tasks/performance_reviews.py | 178 ++++++++++++++++++++++++ hrqb/tasks/pipelines.py | 2 + hrqb/tasks/years.py | 43 ++++++ hrqb/utils/__init__.py | 26 ++++ tests/conftest.py | 38 +++++ tests/tasks/test_lookup_tables.py | 8 ++ tests/tasks/test_performance_reviews.py | 148 ++++++++++++++++++++ tests/test_utils.py | 45 ++++++ 8 files changed, 488 insertions(+) create mode 100644 hrqb/tasks/performance_reviews.py create mode 100644 hrqb/tasks/years.py create mode 100644 tests/tasks/test_performance_reviews.py diff --git a/hrqb/tasks/performance_reviews.py b/hrqb/tasks/performance_reviews.py new file mode 100644 index 0000000..5fd6c25 --- /dev/null +++ b/hrqb/tasks/performance_reviews.py @@ -0,0 +1,178 @@ +"""hrqb.tasks.performance_reviews""" + +import datetime + +import luigi # type: ignore[import-untyped] +import pandas as pd +from dateutil.relativedelta import relativedelta # type: ignore[import-untyped] + +from hrqb.base.task import PandasPickleTask, QuickbaseUpsertTask +from hrqb.utils import ( + convert_dataframe_columns_to_dates, + md5_hash_from_values, + normalize_dataframe_dates, + today_date, +) + + +class TransformPerformanceReviews(PandasPickleTask): + stage = luigi.Parameter("Transform") + + def requires(self) -> list[luigi.Task]: # pragma: nocover + from hrqb.tasks.shared import ExtractQBEmployeeAppointments + + return [ExtractQBEmployeeAppointments(pipeline=self.pipeline)] + + def get_dataframe(self) -> pd.DataFrame: + """Build dataframe of performance reviews from employee appointments.""" + emp_appts_df = self._get_employee_appointments() + + # loop through all appointments and create dataframe of performance reviews + reviews: list[dict] = [] + for _, emp_appt_row in emp_appts_df.iterrows(): + reviews.append(self._get_three_month_review(emp_appt_row)) + reviews.append(self._get_six_month_review(emp_appt_row)) + reviews.extend(self._get_annual_reviews(emp_appt_row)) + perf_revs_df = pd.DataFrame(reviews) + + perf_revs_df = normalize_dataframe_dates( + perf_revs_df, + [ + "review_date", + "period_start_date", + "period_end_date", + ], + ) + + # mint a unique, deterministic value for the merge "Key" field + perf_revs_df["key"] = perf_revs_df.apply( + lambda row: md5_hash_from_values( + [ + row.mit_id, + row.review_type, + row.review_year, + ] + ), + axis=1, + ) + + fields = { + "mit_id": "MIT ID", + "employee_appointment_id": "Related Employee Appointment", + "review_type": "Review Type", + "period_start_date": "Period Covered Start Date", + "period_end_date": "Period Covered End Date", + "review_date": "Date of Review", + "review_year": "Related Year", + "key": "Key", + } + return perf_revs_df[fields.keys()].rename(columns=fields) + + def _get_employee_appointments(self) -> pd.DataFrame: + """Get employee appointments from Quickbase.""" + emp_appts_df = self.named_inputs["ExtractQBEmployeeAppointments"].read() + emp_appt_fields = { + "MIT ID": "mit_id", + "Record ID#": "employee_appointment_id", + "Begin Date": "appointment_begin_date", + "End Date": "appointment_end_date", + "Related Employee Type": "employee_type", + "Union Name": "union_name", + "Exempt / NE": "exempt", + } + emp_appts_df = emp_appts_df.rename(columns=emp_appt_fields)[ + emp_appt_fields.values() + ] + return convert_dataframe_columns_to_dates( + emp_appts_df, ["appointment_begin_date", "appointment_end_date"] + ) + + def _get_three_month_review(self, emp_appt_row: pd.Series) -> dict: + review_date = emp_appt_row.appointment_begin_date + relativedelta(months=+3) + return { + "mit_id": emp_appt_row.mit_id, + "employee_appointment_id": emp_appt_row.employee_appointment_id, + "review_type": "3 Month Review", + "review_date": review_date, + "period_start_date": emp_appt_row.appointment_begin_date, + "period_end_date": review_date, + "review_year": str(review_date.year), + } + + def _get_six_month_review(self, emp_appt_row: pd.Series) -> dict: + review_date = emp_appt_row.appointment_begin_date + relativedelta(months=+6) + return { + "mit_id": emp_appt_row.mit_id, + "employee_appointment_id": emp_appt_row.employee_appointment_id, + "review_type": "6 Month Review", + "review_date": review_date, + "period_start_date": emp_appt_row.appointment_begin_date, + "period_end_date": review_date, + "review_year": str(review_date.year), + } + + def _get_annual_reviews(self, emp_appt_row: pd.Series) -> list[dict]: + """Get annual performance reviews for an appointment. + + This method begins with the appointment start year, with a minimum of 2019, then + adds performance reviews through current year + 1. + + If an annual performance review would fall inside of a 3 or 6 month review, it is + not included. + + NOTE: as of 6/17/2024, HR is in the process of re-evaluating annual review + timeframes. The cadence and review dates set below are placeholders until + that is finalized. + """ + start_year = max([emp_appt_row.appointment_begin_date.year, 2019]) + end_year = today_date().year + 2 + + review_month = 7 if emp_appt_row.exempt else 8 + + reviews = [] + for year in range(start_year, end_year): + review_end_date = datetime.datetime( + year, review_month, 1, tzinfo=datetime.UTC + ) + review_start_date = review_end_date - relativedelta(years=1) + + # if annual review is less than 6 month review, skip + six_month_review_date = self._get_six_month_review(emp_appt_row)[ + "review_date" + ] + if review_end_date <= six_month_review_date: + continue + + reviews.append( + { + "mit_id": emp_appt_row.mit_id, + "employee_appointment_id": emp_appt_row.employee_appointment_id, + "review_type": "Annual", + "period_start_date": review_start_date, + "period_end_date": review_end_date, + "review_date": review_end_date, + "review_year": str(year), + } + ) + return reviews + + +class LoadPerformanceReviews(QuickbaseUpsertTask): + table_name = luigi.Parameter("Performance Reviews") + stage = luigi.Parameter("Load") + + def requires(self) -> list[luigi.Task]: # pragma: nocover + from hrqb.tasks.years import LoadYears + + return [ + LoadYears(pipeline=self.pipeline), + TransformPerformanceReviews(pipeline=self.pipeline), + ] + + @property + def merge_field(self) -> str | None: + return "Key" + + @property + def input_task_to_load(self) -> str | None: + return "TransformPerformanceReviews" diff --git a/hrqb/tasks/pipelines.py b/hrqb/tasks/pipelines.py index a5fba16..c8cc12b 100644 --- a/hrqb/tasks/pipelines.py +++ b/hrqb/tasks/pipelines.py @@ -15,11 +15,13 @@ def requires(self) -> Iterator[luigi.Task]: # pragma: no cover from hrqb.tasks.employee_leave import LoadEmployeeLeave from hrqb.tasks.employee_salary_history import LoadEmployeeSalaryHistory from hrqb.tasks.employees import LoadEmployees + from hrqb.tasks.performance_reviews import LoadPerformanceReviews yield LoadEmployees(pipeline=self.pipeline_name) yield LoadEmployeeAppointments(pipeline=self.pipeline_name) yield LoadEmployeeSalaryHistory(pipeline=self.pipeline_name) yield LoadEmployeeLeave(pipeline=self.pipeline_name) + yield LoadPerformanceReviews(pipeline=self.pipeline_name) class UpdateLibHRData(HRQBPipelineTask): diff --git a/hrqb/tasks/years.py b/hrqb/tasks/years.py new file mode 100644 index 0000000..b5073c5 --- /dev/null +++ b/hrqb/tasks/years.py @@ -0,0 +1,43 @@ +"""hrqb.tasks.years""" + +import luigi # type: ignore[import-untyped] +import pandas as pd + +from hrqb.base.task import PandasPickleTask, QuickbaseUpsertTask +from hrqb.utils import today_date + + +class TransformYears(PandasPickleTask): + stage = luigi.Parameter("Transform") + + def requires(self) -> list[luigi.Task]: # pragma: nocover + from hrqb.tasks.performance_reviews import TransformPerformanceReviews + + return [TransformPerformanceReviews(pipeline=self.pipeline)] + + def get_dataframe(self) -> pd.DataFrame: + perf_revs_df = self.single_input_dataframe + perf_revs_df = perf_revs_df.rename(columns={"Related Year": "year"}) + + years_df = perf_revs_df.drop_duplicates("year").copy() + years_df["year"] = years_df["year"].astype(int) + years_df["active"] = years_df["year"] == today_date().year + years_df["year"] = years_df["year"].astype(str) + + fields = { + "year": "Year", + "active": "Active Year", + } + return years_df[fields.keys()].rename(columns=fields) + + +class LoadYears(QuickbaseUpsertTask): + table_name = luigi.Parameter("Years") + stage = luigi.Parameter("Load") + + @property + def merge_field(self) -> str | None: + return "Year" # pragma: nocover + + def requires(self) -> list[luigi.Task]: # pragma: nocover + return [TransformYears(pipeline=self.pipeline)] diff --git a/hrqb/utils/__init__.py b/hrqb/utils/__init__.py index 88b70c7..445ac9c 100644 --- a/hrqb/utils/__init__.py +++ b/hrqb/utils/__init__.py @@ -3,6 +3,7 @@ import datetime import hashlib import logging +from typing import Any import click import pandas as pd @@ -31,6 +32,31 @@ def normalize_date(date: str | datetime.datetime) -> str | None: return None +def convert_dataframe_columns_to_dates( + df: pd.DataFrame, columns: list[str] +) -> pd.DataFrame: + """Convert select columns from a dataframe to datetime objects. + + This more manual approach avoids a pandas error with pd.to_datetime() when the date + exceeds 2262-04-11. Normally this would not be a problem, but employee appointments + that are ongoing receive a datetime of 2999-12-31. See: https://pandas.pydata.org/ + pandas-docs/stable/user_guide/timeseries.html#timestamp-limitations. + """ + + def convert_to_date( + value: Any, # noqa: ANN401 + ) -> datetime.datetime | pd.Timestamp | None: + if isinstance(value, str): + return date_parser(value).replace(tzinfo=datetime.UTC) + if isinstance(value, datetime.datetime | pd.Timestamp): + return value.replace(tzinfo=datetime.UTC) + return None + + for column in columns: + df[column] = df[column].apply(lambda x: convert_to_date(x)) + return df + + def normalize_dataframe_dates(df: pd.DataFrame, columns: list[str]) -> pd.DataFrame: df[columns] = df[columns].map(normalize_date) return df diff --git a/tests/conftest.py b/tests/conftest.py index a55e4fd..a537445 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -782,6 +782,10 @@ def task_shared_extract_qb_employee_appointments_complete(all_tasks_pipeline_nam "Position ID": "987654321", "Begin Date": "2010-01-01", "End Date": "2011-12-01", + "MIT ID": "123456789", + "Related Employee Type": "Admin Staff", + "Union Name": "Le Union", + "Exempt / NE": "E", } ] ) @@ -880,3 +884,37 @@ def task_transform_employee_leave_types_complete( task = TransformEmployeeLeaveTypes(pipeline=all_tasks_pipeline_name) task.run() return task + + +@pytest.fixture +def task_transform_performance_reviews_complete( + all_tasks_pipeline_name, + task_shared_extract_qb_employee_appointments_complete, +): + from hrqb.tasks.performance_reviews import TransformPerformanceReviews + + task = TransformPerformanceReviews(pipeline=all_tasks_pipeline_name) + task.run() + return task + + +@pytest.fixture +def task_load_performance_reviews_complete( + all_tasks_pipeline_name, + task_transform_performance_reviews_complete, +): + from hrqb.tasks.performance_reviews import LoadPerformanceReviews + + return LoadPerformanceReviews(pipeline=all_tasks_pipeline_name) + + +@pytest.fixture +def task_transform_years_complete( + all_tasks_pipeline_name, + task_transform_performance_reviews_complete, +): + from hrqb.tasks.years import TransformYears + + task = TransformYears(pipeline=all_tasks_pipeline_name) + task.run() + return task diff --git a/tests/tasks/test_lookup_tables.py b/tests/tasks/test_lookup_tables.py index 415f9d5..81c1e25 100644 --- a/tests/tasks/test_lookup_tables.py +++ b/tests/tasks/test_lookup_tables.py @@ -34,3 +34,11 @@ def test_task_transform_employee_leave_types_required_fields( assert {"Leave Type", "Paid Leave", "Accrue Seniority"} == set( task_transform_employee_leave_types_complete.get_dataframe().columns ) + + +def test_task_transform_years_required_fields( + task_transform_years_complete, +): + assert {"Year", "Active Year"} == set( + task_transform_years_complete.get_dataframe().columns + ) diff --git a/tests/tasks/test_performance_reviews.py b/tests/tasks/test_performance_reviews.py new file mode 100644 index 0000000..15492b6 --- /dev/null +++ b/tests/tasks/test_performance_reviews.py @@ -0,0 +1,148 @@ +# ruff: noqa: PLR2004, PD901, SLF001, D205, D212 +import datetime + +import pandas as pd + +from hrqb.utils import md5_hash_from_values + + +def test_task_transform_performance_reviews_get_employee_appointments_required_fields( + task_transform_performance_reviews_complete, +): + emp_appts_df = ( + task_transform_performance_reviews_complete._get_employee_appointments() + ) + assert { + "mit_id", + "employee_appointment_id", + "appointment_begin_date", + "appointment_end_date", + "employee_type", + "union_name", + "exempt", + } == set(emp_appts_df.columns) + + +def test_task_transform_performance_reviews_get_employee_appointments_datetime_converts( + task_transform_performance_reviews_complete, +): + row = task_transform_performance_reviews_complete._get_employee_appointments().iloc[0] + for column in ["appointment_begin_date", "appointment_end_date"]: + assert isinstance( + row[column], + datetime.datetime, + ) + + +def test_task_transform_performance_reviews_get_three_month_review( + task_transform_performance_reviews_complete, +): + row = task_transform_performance_reviews_complete._get_employee_appointments().iloc[0] + review_date = datetime.datetime(2010, 4, 1, tzinfo=datetime.UTC) + assert task_transform_performance_reviews_complete._get_three_month_review(row) == { + "mit_id": "123456789", + "employee_appointment_id": 12000, + "review_type": "3 Month Review", + "review_date": pd.Timestamp(review_date), + "period_start_date": row.appointment_begin_date, + "period_end_date": pd.Timestamp(review_date), + "review_year": str(review_date.year), + } + + +def test_task_transform_performance_reviews_get_six_month_review( + task_transform_performance_reviews_complete, +): + row = task_transform_performance_reviews_complete._get_employee_appointments().iloc[0] + review_date = datetime.datetime(2010, 7, 1, tzinfo=datetime.UTC) + assert task_transform_performance_reviews_complete._get_six_month_review(row) == { + "mit_id": "123456789", + "employee_appointment_id": 12000, + "review_type": "6 Month Review", + "review_date": pd.Timestamp(review_date), + "period_start_date": row.appointment_begin_date, + "period_end_date": pd.Timestamp(review_date), + "review_year": str(review_date.year), + } + + +def test_task_transform_performance_reviews_get_annual_reviews( + task_transform_performance_reviews_complete, +): + row = task_transform_performance_reviews_complete._get_employee_appointments().iloc[0] + ann_revs_df = pd.DataFrame( + task_transform_performance_reviews_complete._get_annual_reviews(row) + ) + assert len(ann_revs_df) == 7 + assert list(ann_revs_df.review_year) == [ + "2019", + "2020", + "2021", + "2022", + "2023", + "2024", + "2025", + ] + assert list(ann_revs_df.period_start_date) == [ + pd.Timestamp("2018-07-01 00:00:00+0000", tz="UTC"), + pd.Timestamp("2019-07-01 00:00:00+0000", tz="UTC"), + pd.Timestamp("2020-07-01 00:00:00+0000", tz="UTC"), + pd.Timestamp("2021-07-01 00:00:00+0000", tz="UTC"), + pd.Timestamp("2022-07-01 00:00:00+0000", tz="UTC"), + pd.Timestamp("2023-07-01 00:00:00+0000", tz="UTC"), + pd.Timestamp("2024-07-01 00:00:00+0000", tz="UTC"), + ] + assert list(ann_revs_df.period_end_date) == [ + pd.Timestamp("2019-07-01 00:00:00+0000", tz="UTC"), + pd.Timestamp("2020-07-01 00:00:00+0000", tz="UTC"), + pd.Timestamp("2021-07-01 00:00:00+0000", tz="UTC"), + pd.Timestamp("2022-07-01 00:00:00+0000", tz="UTC"), + pd.Timestamp("2023-07-01 00:00:00+0000", tz="UTC"), + pd.Timestamp("2024-07-01 00:00:00+0000", tz="UTC"), + pd.Timestamp("2025-07-01 00:00:00+0000", tz="UTC"), + ] + + +def test_task_transform_performance_reviews_get_annual_reviews_skip_six_month_overlap( + task_transform_performance_reviews_complete, +): + """ + Note in the test above that the number of annual reviews is 7, and that there is a + review from 2018-2019. By setting this appointment begin date as 2019-03-01, this + means there should not be a 2018-2019 review, as the 6-month review will be after the + beginning of the 2019-2020 review cycle. + """ + row = task_transform_performance_reviews_complete._get_employee_appointments().iloc[0] + row.appointment_begin_date = datetime.datetime(2019, 3, 1, tzinfo=datetime.UTC) + ann_revs_df = pd.DataFrame( + task_transform_performance_reviews_complete._get_annual_reviews(row) + ) + + assert len(ann_revs_df) == 6 + assert ( + pd.Timestamp("2018-07-01 00:00:00+0000", tz="UTC") + not in ann_revs_df.period_start_date + ) + + +def test_task_transform_performance_reviews_key_expected_from_input_data( + task_transform_performance_reviews_complete, +): + row = task_transform_performance_reviews_complete.get_dataframe().iloc[0] + assert row["Key"] == md5_hash_from_values( + [ + row["MIT ID"], + row["Review Type"], + row["Related Year"], + ] + ) + + +def test_task_load_employee_salary_history_explicit_properties( + task_load_performance_reviews_complete, +): + assert task_load_performance_reviews_complete.merge_field == "Key" + assert ( + task_load_performance_reviews_complete.input_task_to_load + == "TransformPerformanceReviews" + ) diff --git a/tests/test_utils.py b/tests/test_utils.py index 4f23418..64019e6 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -5,10 +5,12 @@ import numpy as np import pandas as pd import pytest +from dateutil.parser import ParserError # type: ignore[import-untyped] from freezegun import freeze_time from hrqb.utils import ( click_argument_to_dict, + convert_dataframe_columns_to_dates, convert_oracle_bools_to_qb_bools, md5_hash_from_values, normalize_dataframe_dates, @@ -138,3 +140,46 @@ def test_md5_hash_from_values_raise_error_for_non_string_value(): datetime.datetime(2000, 1, 1, tzinfo=datetime.UTC), ] ) + + +def test_convert_dataframe_columns_to_dates_conversion_success(): + df = pd.DataFrame( + [ + { + "foo": "2000-01-01", + "bar": "horse", + }, + { + "foo": datetime.datetime(2020, 6, 1, tzinfo=datetime.UTC), + "bar": "zebra", + }, + { + "foo": 42, + "bar": "giraffe", + }, + ] + ) + assert convert_dataframe_columns_to_dates(df, ["foo"]).equals( + pd.DataFrame( + [ + { + "foo": datetime.datetime(2000, 1, 1, tzinfo=datetime.UTC), + "bar": "horse", + }, + { + "foo": datetime.datetime(2020, 6, 1, tzinfo=datetime.UTC), + "bar": "zebra", + }, + { + "foo": None, + "bar": "giraffe", + }, + ] + ) + ) + + +def test_convert_dataframe_columns_to_dates_bad_date_raise_error(): + df = pd.DataFrame([{"foo": "I CANNOT BE PARSED", "bar": "horse"}]) + with pytest.raises(ParserError, match="Unknown string format: I CANNOT BE PARSED"): + assert convert_dataframe_columns_to_dates(df, ["foo"]) From b264bc71dcf30cd53a3d3930b99e3d8be9ec8db5 Mon Sep 17 00:00:00 2001 From: Graham Hukill Date: Mon, 17 Jun 2024 16:40:35 -0400 Subject: [PATCH 3/3] more explicit type hinting --- hrqb/utils/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hrqb/utils/__init__.py b/hrqb/utils/__init__.py index 445ac9c..e0eddf6 100644 --- a/hrqb/utils/__init__.py +++ b/hrqb/utils/__init__.py @@ -3,7 +3,6 @@ import datetime import hashlib import logging -from typing import Any import click import pandas as pd @@ -44,7 +43,7 @@ def convert_dataframe_columns_to_dates( """ def convert_to_date( - value: Any, # noqa: ANN401 + value: str | datetime.datetime | pd.Timestamp, ) -> datetime.datetime | pd.Timestamp | None: if isinstance(value, str): return date_parser(value).replace(tzinfo=datetime.UTC)