From a7592c437f2622fe5da47ef82b3ef0b142f1da66 Mon Sep 17 00:00:00 2001 From: Graham Hukill Date: Tue, 30 Apr 2024 16:32:52 -0400 Subject: [PATCH 1/6] Setup CLI main group and ping command --- hrqb/cli.py | 24 +++++++++++++++++------- tests/test_cli.py | 22 +++++++++++----------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/hrqb/cli.py b/hrqb/cli.py index 81d72b2..ca942d3 100644 --- a/hrqb/cli.py +++ b/hrqb/cli.py @@ -9,20 +9,30 @@ logger = logging.getLogger(__name__) -@click.command() +@click.group(context_settings={"help_option_names": ["-h", "--help"]}) @click.option( - "-v", "--verbose", is_flag=True, help="Pass to log at debug level instead of info" + "-v", + "--verbose", + is_flag=True, + help="Pass to log at debug level instead of info.", ) -def main(*, verbose: bool) -> None: - start_time = perf_counter() +@click.pass_context +def main(ctx: click.Context, verbose: bool) -> None: # noqa: FBT001 + ctx.ensure_object(dict) + ctx.obj["START_TIME"] = perf_counter() root_logger = logging.getLogger() logger.info(configure_logger(root_logger, verbose=verbose)) logger.info(configure_sentry()) logger.info("Running process") - # Do things here! - elapsed_time = perf_counter() - start_time +@main.command() +@click.pass_context +def ping(ctx: click.Context) -> None: + logger.debug("pong") logger.info( - "Total time to complete process: %s", str(timedelta(seconds=elapsed_time)) + "Total elapsed: %s", + str( + timedelta(seconds=perf_counter() - ctx.obj["START_TIME"]), + ), ) diff --git a/tests/test_cli.py b/tests/test_cli.py index 8ba957f..0f39638 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,17 +1,17 @@ from hrqb.cli import main +OKAY_RESULT_CODE = 0 +MISSING_CLICK_ARG_RESULT_CODE = 2 -def test_cli_no_options(caplog, runner): + +def test_cli_no_subcommand(runner): result = runner.invoke(main) - assert result.exit_code == 0 - assert "Logger 'root' configured with level=INFO" in caplog.text - assert "Running process" in caplog.text - assert "Total time to complete process" in caplog.text + assert result.exit_code == OKAY_RESULT_CODE -def test_cli_all_options(caplog, runner): - result = runner.invoke(main, ["--verbose"]) - assert result.exit_code == 0 - assert "Logger 'root' configured with level=DEBUG" in caplog.text - assert "Running process" in caplog.text - assert "Total time to complete process" in caplog.text +def test_cli_verbose_ping(caplog, runner): + caplog.set_level("DEBUG") + args = ["--verbose", "ping"] + result = runner.invoke(main, args) + assert result.exit_code == OKAY_RESULT_CODE + assert "pong" in caplog.text From bdd4f72be425d602c950d155400c4fa1bec50f2e Mon Sep 17 00:00:00 2001 From: Graham Hukill Date: Tue, 30 Apr 2024 16:36:31 -0400 Subject: [PATCH 2/6] Fix lint-apply command --- Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 10f0f14..9bc6dd3 100644 --- a/Makefile +++ b/Makefile @@ -44,8 +44,7 @@ safety: # check for security vulnerabilities and verify Pipfile.lock is up-to-da pipenv check pipenv verify -lint-apply: # apply changes with 'black' and resolve 'fixable errors' with 'ruff' - black-apply ruff-apply +lint-apply: black-apply ruff-apply # apply changes with 'black' and resolve 'fixable errors' with 'ruff' black-apply: # apply changes with 'black' pipenv run black . From 1688e124339967ee9fe18ae155b4c8e3d740f6a2 Mon Sep 17 00:00:00 2001 From: Graham Hukill Date: Tue, 30 Apr 2024 16:38:41 -0400 Subject: [PATCH 3/6] Base luigi Task and Targets for HRQB Why these changes are being introduced: This application will be oriented around Extract, Transform, and Load luigi Tasks. Each of these Tasks will likely either pickle pandas objects to disk, or upsert data to Quickbase. The base Task and Targets here will be used throughout the actual ETL luigi tasks defined. How this addresses that need: * HRQBLocalTarget extends luigi.LocalTarget to have a table name property * PandasPickleTarget extends HRQBLocalTarget to read/write pandas objects to disk * QuickbaseTableTarget extends HRQBLocalTarget to read/write Quickbase API writes * HRQBTask extends luigi.Task to provide some convenience methods used by many Tasks * PandasPickleTask extends HRQBTask to provide PandasPickleTarget defaults * QuickbaseTableUpsert is similar to PandasPickleTask * HRQBPipelineTask is a special luigi wrapper class meant to trigger other tasks but not have output. Will be used for defining a pipeline (set of tasks). * extract, transform, and load files scaffolded for future Tasks Side effects of this change: * None Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/HRQB-10 --- Pipfile | 2 + Pipfile.lock | 276 ++++++++++++++++++++++++++++------------ hrqb/base/__init__.py | 0 hrqb/base/target.py | 40 ++++++ hrqb/base/task.py | 70 ++++++++++ hrqb/luigi.toml | 0 hrqb/pipelines.py | 15 +++ hrqb/tasks/__init__.py | 0 hrqb/tasks/extract.py | 3 + hrqb/tasks/load.py | 3 + hrqb/tasks/transform.py | 3 + hrqb/utils/__init__.py | 7 + hrqb/utils/db.py | 0 pyproject.toml | 1 + 14 files changed, 336 insertions(+), 84 deletions(-) create mode 100644 hrqb/base/__init__.py create mode 100644 hrqb/base/target.py create mode 100644 hrqb/base/task.py create mode 100644 hrqb/luigi.toml create mode 100644 hrqb/pipelines.py create mode 100644 hrqb/tasks/__init__.py create mode 100644 hrqb/tasks/extract.py create mode 100644 hrqb/tasks/load.py create mode 100644 hrqb/tasks/transform.py create mode 100644 hrqb/utils/__init__.py create mode 100644 hrqb/utils/db.py diff --git a/Pipfile b/Pipfile index 5eda0cc..2ab5d07 100644 --- a/Pipfile +++ b/Pipfile @@ -8,6 +8,8 @@ click = "*" sentry-sdk = "*" oracledb = "*" luigi = "*" +pandas = "*" +pandas-stubs = "*" [dev-packages] black = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 8ee8395..71b6876 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "4d33054d3ac8eca88368040c31c32c58b5640b184b62062d55d898a18d869b09" + "sha256": "e94095672a2ac97f7593e1d809fb0945400e553fb00af2de964a9c70f082da3d" }, "pipfile-spec": 6, "requires": { @@ -130,11 +130,11 @@ }, "docutils": { "hashes": [ - "sha256:14c8d34a55b46c88f9f714adb29cefbdd69fb82f3fef825e59c5faab935390d8", - "sha256:65249d8a5345bc95e0f40f280ba63c98eb24de35c6c8f5b662e3e8948adea83f" + "sha256:3a6b18732edf182daa3cd12775bbb338cf5691468f91eeeb109deff6ebfa986f", + "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2" ], "markers": "python_version >= '3.9'", - "version": "==0.21.1" + "version": "==0.21.2" }, "lockfile": { "hashes": [ @@ -150,6 +150,48 @@ "index": "pypi", "version": "==3.5.0" }, + "numpy": { + "hashes": [ + "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b", + "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818", + "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20", + "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0", + "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010", + "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a", + "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea", + "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c", + "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71", + "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110", + "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be", + "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a", + "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a", + "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5", + "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed", + "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd", + "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c", + "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e", + "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0", + "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c", + "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a", + "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b", + "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0", + "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6", + "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2", + "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a", + "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30", + "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218", + "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5", + "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07", + "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2", + "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4", + "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764", + "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef", + "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3", + "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f" + ], + "markers": "python_version == '3.11'", + "version": "==1.26.4" + }, "oracledb": { "hashes": [ "sha256:08aa313b801dda950918168d3962ba59a617adce143e0c2bf1ee9b847695faaa", @@ -187,6 +229,49 @@ "index": "pypi", "version": "==2.1.2" }, + "pandas": { + "hashes": [ + "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863", + "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2", + "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1", + "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad", + "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db", + "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76", + "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51", + "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32", + "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08", + "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b", + "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4", + "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921", + "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288", + "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee", + "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0", + "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24", + "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99", + "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151", + "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd", + "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce", + "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57", + "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef", + "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54", + "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a", + "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238", + "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23", + "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772", + "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce", + "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad" + ], + "index": "pypi", + "version": "==2.2.2" + }, + "pandas-stubs": { + "hashes": [ + "sha256:0126a26451a37cb893ea62357ca87ba3d181bd999ec8ba2ca5602e20207d6682", + "sha256:236a4f812fb6b1922e9607ff09e427f6d8540c421c9e5a40e3e4ddf7adac7f05" + ], + "index": "pypi", + "version": "==2.2.1.240316" + }, "pycparser": { "hashes": [ "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", @@ -211,13 +296,20 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.9.0.post0" }, + "pytz": { + "hashes": [ + "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812", + "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319" + ], + "version": "==2024.1" + }, "sentry-sdk": { "hashes": [ - "sha256:1ce29e30240cc289a027011103a8c83885b15ef2f316a60bcc7c5300afa144f1", - "sha256:509aa9678c0512344ca886281766c2e538682f8acfa50fd8d405f8c417ad0625" + "sha256:b54c54a2160f509cf2757260d0cf3885b608c6192c2555a3857e3a4d0f84bdb3", + "sha256:c278e0f523f6f0ee69dc43ad26dcdb1202dffe5ac326ae31472e012d941bee21" ], "index": "pypi", - "version": "==1.45.0" + "version": "==2.0.1" }, "setuptools": { "hashes": [ @@ -260,12 +352,28 @@ "markers": "python_version >= '3.8'", "version": "==6.4" }, + "types-pytz": { + "hashes": [ + "sha256:6810c8a1f68f21fdf0f4f374a432487c77645a0ac0b31de4bf4690cf21ad3981", + "sha256:8335d443310e2db7b74e007414e74c4f53b67452c0cb0d228ca359ccfba59659" + ], + "markers": "python_version >= '3.8'", + "version": "==2024.1.0.20240417" + }, + "tzdata": { + "hashes": [ + "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd", + "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252" + ], + "markers": "python_version >= '2'", + "version": "==2024.1" + }, "urllib3": { "hashes": [ "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d", "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19" ], - "markers": "python_version >= '3.6'", + "markers": "python_version >= '3.8'", "version": "==2.2.1" } }, @@ -279,31 +387,31 @@ }, "black": { "hashes": [ - "sha256:1bb9ca06e556a09f7f7177bc7cb604e5ed2d2df1e9119e4f7d2f1f7071c32e5d", - "sha256:21f9407063ec71c5580b8ad975653c66508d6a9f57bd008bb8691d273705adcd", - "sha256:4396ca365a4310beef84d446ca5016f671b10f07abdba3e4e4304218d2c71d33", - "sha256:44d99dfdf37a2a00a6f7a8dcbd19edf361d056ee51093b2445de7ca09adac965", - "sha256:5cd5b4f76056cecce3e69b0d4c228326d2595f506797f40b9233424e2524c070", - "sha256:64578cf99b6b46a6301bc28bdb89f9d6f9b592b1c5837818a177c98525dbe397", - "sha256:64e60a7edd71fd542a10a9643bf369bfd2644de95ec71e86790b063aa02ff745", - "sha256:652e55bb722ca026299eb74e53880ee2315b181dfdd44dca98e43448620ddec1", - "sha256:6644f97a7ef6f401a150cca551a1ff97e03c25d8519ee0bbc9b0058772882665", - "sha256:6ad001a9ddd9b8dfd1b434d566be39b1cd502802c8d38bbb1ba612afda2ef436", - "sha256:71d998b73c957444fb7c52096c3843875f4b6b47a54972598741fe9a7f737fcb", - "sha256:74eb9b5420e26b42c00a3ff470dc0cd144b80a766128b1771d07643165e08d0e", - "sha256:75a2d0b4f5eb81f7eebc31f788f9830a6ce10a68c91fbe0fade34fff7a2836e6", - "sha256:7852b05d02b5b9a8c893ab95863ef8986e4dda29af80bbbda94d7aee1abf8702", - "sha256:7f2966b9b2b3b7104fca9d75b2ee856fe3fdd7ed9e47c753a4bb1a675f2caab8", - "sha256:8e5537f456a22cf5cfcb2707803431d2feeb82ab3748ade280d6ccd0b40ed2e8", - "sha256:d4e71cdebdc8efeb6deaf5f2deb28325f8614d48426bed118ecc2dcaefb9ebf3", - "sha256:dae79397f367ac8d7adb6c779813328f6d690943f64b32983e896bcccd18cbad", - "sha256:e3a3a092b8b756c643fe45f4624dbd5a389f770a4ac294cf4d0fce6af86addaf", - "sha256:eb949f56a63c5e134dfdca12091e98ffb5fd446293ebae123d10fc1abad00b9e", - "sha256:f07b69fda20578367eaebbd670ff8fc653ab181e1ff95d84497f9fa20e7d0641", - "sha256:f95cece33329dc4aa3b0e1a771c41075812e46cf3d6e3f1dfe3d91ff09826ed2" + "sha256:257d724c2c9b1660f353b36c802ccece186a30accc7742c176d29c146df6e474", + "sha256:37aae07b029fa0174d39daf02748b379399b909652a806e5708199bd93899da1", + "sha256:415e686e87dbbe6f4cd5ef0fbf764af7b89f9057b97c908742b6008cc554b9c0", + "sha256:48a85f2cb5e6799a9ef05347b476cce6c182d6c71ee36925a6c194d074336ef8", + "sha256:7768a0dbf16a39aa5e9a3ded568bb545c8c2727396d063bbaf847df05b08cd96", + "sha256:7e122b1c4fb252fd85df3ca93578732b4749d9be076593076ef4d07a0233c3e1", + "sha256:88c57dc656038f1ab9f92b3eb5335ee9b021412feaa46330d5eba4e51fe49b04", + "sha256:8e537d281831ad0e71007dcdcbe50a71470b978c453fa41ce77186bbe0ed6021", + "sha256:98e123f1d5cfd42f886624d84464f7756f60ff6eab89ae845210631714f6db94", + "sha256:accf49e151c8ed2c0cdc528691838afd217c50412534e876a19270fea1e28e2d", + "sha256:b1530ae42e9d6d5b670a34db49a94115a64596bc77710b1d05e9801e62ca0a7c", + "sha256:b9176b9832e84308818a99a561e90aa479e73c523b3f77afd07913380ae2eab7", + "sha256:bdde6f877a18f24844e381d45e9947a49e97933573ac9d4345399be37621e26c", + "sha256:be8bef99eb46d5021bf053114442914baeb3649a89dc5f3a555c88737e5e98fc", + "sha256:bf10f7310db693bb62692609b397e8d67257c55f949abde4c67f9cc574492cc7", + "sha256:c872b53057f000085da66a19c55d68f6f8ddcac2642392ad3a355878406fbd4d", + "sha256:d36ed1124bb81b32f8614555b34cc4259c3fbc7eec17870e8ff8ded335b58d8c", + "sha256:da33a1a5e49c4122ccdfd56cd021ff1ebc4a1ec4e2d01594fef9b6f267a9e741", + "sha256:dd1b5a14e417189db4c7b64a6540f31730713d173f0b63e55fabd52d61d8fdce", + "sha256:e151054aa00bad1f4e1f04919542885f89f5f7d086b8a59e5000e6c616896ffb", + "sha256:eaea3008c281f1038edb473c1aa8ed8143a5535ff18f978a318f10302b254063", + "sha256:ef703f83fc32e131e9bcc0a5094cfe85599e7109f896fe8bc96cc402f3eb4b6e" ], "index": "pypi", - "version": "==24.4.0" + "version": "==24.4.2" }, "certifi": { "hashes": [ @@ -552,11 +660,11 @@ }, "ipython": { "hashes": [ - "sha256:07232af52a5ba146dc3372c7bf52a0f890a23edf38d77caef8d53f9cdc2584c1", - "sha256:7468edaf4f6de3e1b912e57f66c241e6fd3c7099f2ec2136e239e142e800274d" + "sha256:010db3f8a728a578bb641fdd06c063b9fb8e96a9464c63aec6310fbcb5e80501", + "sha256:d7bf2f6c4314984e3e02393213bab8703cf163ede39672ce5918c51fe253a2a3" ], "index": "pypi", - "version": "==8.23.0" + "version": "==8.24.0" }, "jedi": { "hashes": [ @@ -576,36 +684,36 @@ }, "mypy": { "hashes": [ - "sha256:0235391f1c6f6ce487b23b9dbd1327b4ec33bb93934aa986efe8a9563d9349e6", - "sha256:190da1ee69b427d7efa8aa0d5e5ccd67a4fb04038c380237a0d96829cb157913", - "sha256:2418488264eb41f69cc64a69a745fad4a8f86649af4b1041a4c64ee61fc61129", - "sha256:3a3c007ff3ee90f69cf0a15cbcdf0995749569b86b6d2f327af01fd1b8aee9dc", - "sha256:3cc5da0127e6a478cddd906068496a97a7618a21ce9b54bde5bf7e539c7af974", - "sha256:48533cdd345c3c2e5ef48ba3b0d3880b257b423e7995dada04248725c6f77374", - "sha256:49c87c15aed320de9b438ae7b00c1ac91cd393c1b854c2ce538e2a72d55df150", - "sha256:4d3dbd346cfec7cb98e6cbb6e0f3c23618af826316188d587d1c1bc34f0ede03", - "sha256:571741dc4194b4f82d344b15e8837e8c5fcc462d66d076748142327626a1b6e9", - "sha256:587ce887f75dd9700252a3abbc9c97bbe165a4a630597845c61279cf32dfbf02", - "sha256:5d741d3fc7c4da608764073089e5f58ef6352bedc223ff58f2f038c2c4698a89", - "sha256:5e6061f44f2313b94f920e91b204ec600982961e07a17e0f6cd83371cb23f5c2", - "sha256:61758fabd58ce4b0720ae1e2fea5cfd4431591d6d590b197775329264f86311d", - "sha256:653265f9a2784db65bfca694d1edd23093ce49740b2244cde583aeb134c008f3", - "sha256:68edad3dc7d70f2f17ae4c6c1b9471a56138ca22722487eebacfd1eb5321d612", - "sha256:81a10926e5473c5fc3da8abb04119a1f5811a236dc3a38d92015cb1e6ba4cb9e", - "sha256:85ca5fcc24f0b4aeedc1d02f93707bccc04733f21d41c88334c5482219b1ccb3", - "sha256:a260627a570559181a9ea5de61ac6297aa5af202f06fd7ab093ce74e7181e43e", - "sha256:aceb1db093b04db5cd390821464504111b8ec3e351eb85afd1433490163d60cd", - "sha256:b685154e22e4e9199fc95f298661deea28aaede5ae16ccc8cbb1045e716b3e04", - "sha256:d357423fa57a489e8c47b7c85dfb96698caba13d66e086b412298a1a0ea3b0ed", - "sha256:d4d5ddc13421ba3e2e082a6c2d74c2ddb3979c39b582dacd53dd5d9431237185", - "sha256:e49499be624dead83927e70c756970a0bc8240e9f769389cdf5714b0784ca6bf", - "sha256:e54396d70be04b34f31d2edf3362c1edd023246c82f1730bbf8768c28db5361b", - "sha256:f88566144752999351725ac623471661c9d1cd8caa0134ff98cceeea181789f4", - "sha256:f8a67616990062232ee4c3952f41c779afac41405806042a8126fe96e098419f", - "sha256:fe28657de3bfec596bbeef01cb219833ad9d38dd5393fc649f4b366840baefe6" + "sha256:075cbf81f3e134eadaf247de187bd604748171d6b79736fa9b6c9685b4083061", + "sha256:12b6bfc1b1a66095ab413160a6e520e1dc076a28f3e22f7fb25ba3b000b4ef99", + "sha256:1ec404a7cbe9fc0e92cb0e67f55ce0c025014e26d33e54d9e506a0f2d07fe5de", + "sha256:28d0e038361b45f099cc086d9dd99c15ff14d0188f44ac883010e172ce86c38a", + "sha256:2b0695d605ddcd3eb2f736cd8b4e388288c21e7de85001e9f85df9187f2b50f9", + "sha256:3236a4c8f535a0631f85f5fcdffba71c7feeef76a6002fcba7c1a8e57c8be1ec", + "sha256:3be66771aa5c97602f382230165b856c231d1277c511c9a8dd058be4784472e1", + "sha256:3d087fcbec056c4ee34974da493a826ce316947485cef3901f511848e687c131", + "sha256:3f298531bca95ff615b6e9f2fc0333aae27fa48052903a0ac90215021cdcfa4f", + "sha256:4a2b5cdbb5dd35aa08ea9114436e0d79aceb2f38e32c21684dcf8e24e1e92821", + "sha256:4cf18f9d0efa1b16478c4c129eabec36148032575391095f73cae2e722fcf9d5", + "sha256:8b2cbaca148d0754a54d44121b5825ae71868c7592a53b7292eeb0f3fdae95ee", + "sha256:8f55583b12156c399dce2df7d16f8a5095291354f1e839c252ec6c0611e86e2e", + "sha256:92f93b21c0fe73dc00abf91022234c79d793318b8a96faac147cd579c1671746", + "sha256:9e36fb078cce9904c7989b9693e41cb9711e0600139ce3970c6ef814b6ebc2b2", + "sha256:9fd50226364cd2737351c79807775136b0abe084433b55b2e29181a4c3c878c0", + "sha256:a781f6ad4bab20eef8b65174a57e5203f4be627b46291f4589879bf4e257b97b", + "sha256:a87dbfa85971e8d59c9cc1fcf534efe664d8949e4c0b6b44e8ca548e746a8d53", + "sha256:b808e12113505b97d9023b0b5e0c0705a90571c6feefc6f215c1df9381256e30", + "sha256:bc6ac273b23c6b82da3bb25f4136c4fd42665f17f2cd850771cb600bdd2ebeda", + "sha256:cd777b780312ddb135bceb9bc8722a73ec95e042f911cc279e2ec3c667076051", + "sha256:da1cbf08fb3b851ab3b9523a884c232774008267b1f83371ace57f412fe308c2", + "sha256:e22e1527dc3d4aa94311d246b59e47f6455b8729f4968765ac1eacf9a4760bc7", + "sha256:f8c083976eb530019175aabadb60921e73b4f45736760826aa1689dda8208aee", + "sha256:f90cff89eea89273727d8783fef5d4a934be2fdca11b47def50cf5d311aff727", + "sha256:fa7ef5244615a2523b56c034becde4e9e3f9b034854c93639adb667ec9ec2976", + "sha256:fcfc70599efde5c67862a07a1aaf50e55bce629ace26bb19dc17cece5dd31ca4" ], "index": "pypi", - "version": "==1.9.0" + "version": "==1.10.0" }, "mypy-extensions": { "hashes": [ @@ -711,11 +819,11 @@ }, "pytest": { "hashes": [ - "sha256:2a8386cfc11fa9d2c50ee7b2a57e7d898ef90470a7a34c4b949ff59662bb78b7", - "sha256:ac978141a75948948817d360297b7aae0fcb9d6ff6bc9ec6d514b85d5a65c044" + "sha256:1733f0620f6cda4095bbf0d9ff8022486e91892245bb9e7d5542c018f612f233", + "sha256:d507d4482197eac0ba2bae2e9babf0672eb333017bcedaa5fb1a3d42c1174b3f" ], "index": "pypi", - "version": "==8.1.1" + "version": "==8.2.0" }, "pyyaml": { "hashes": [ @@ -784,26 +892,26 @@ }, "ruff": { "hashes": [ - "sha256:0926cefb57fc5fced629603fbd1a23d458b25418681d96823992ba975f050c2b", - "sha256:1c859f294f8633889e7d77de228b203eb0e9a03071b72b5989d89a0cf98ee262", - "sha256:2c6e37f2e3cd74496a74af9a4fa67b547ab3ca137688c484749189bf3a686ceb", - "sha256:2d9ef6231e3fbdc0b8c72404a1a0c46fd0dcea84efca83beb4681c318ea6a953", - "sha256:6e68d248ed688b9d69fd4d18737edcbb79c98b251bba5a2b031ce2470224bdf9", - "sha256:9485f54a7189e6f7433e0058cf8581bee45c31a25cd69009d2a040d1bd4bfaef", - "sha256:a1eaf03d87e6a7cd5e661d36d8c6e874693cb9bc3049d110bc9a97b350680c43", - "sha256:b34510141e393519a47f2d7b8216fec747ea1f2c81e85f076e9f2910588d4b64", - "sha256:b90506f3d6d1f41f43f9b7b5ff845aeefabed6d2494307bc7b178360a8805252", - "sha256:b92f03b4aa9fa23e1799b40f15f8b95cdc418782a567d6c43def65e1bbb7f1cf", - "sha256:baa27d9d72a94574d250f42b7640b3bd2edc4c58ac8ac2778a8c82374bb27984", - "sha256:c7d391e5936af5c9e252743d767c564670dc3889aff460d35c518ee76e4b26d7", - "sha256:d2921ac03ce1383e360e8a95442ffb0d757a6a7ddd9a5be68561a671e0e5807e", - "sha256:d592116cdbb65f8b1b7e2a2b48297eb865f6bdc20641879aa9d7b9c11d86db79", - "sha256:eec8d185fe193ad053eda3a6be23069e0c8ba8c5d20bc5ace6e3b9e37d246d3f", - "sha256:efd703a5975ac1998c2cc5e9494e13b28f31e66c616b0a76e206de2562e0843c", - "sha256:f1ee41580bff1a651339eb3337c20c12f4037f6110a36ae4a2d864c52e5ef954" + "sha256:0e2e06459042ac841ed510196c350ba35a9b24a643e23db60d79b2db92af0c2b", + "sha256:1f32cadf44c2020e75e0c56c3408ed1d32c024766bd41aedef92aa3ca28eef68", + "sha256:22e306bf15e09af45ca812bc42fa59b628646fa7c26072555f278994890bc7ac", + "sha256:24016ed18db3dc9786af103ff49c03bdf408ea253f3cb9e3638f39ac9cf2d483", + "sha256:33bcc160aee2520664bc0859cfeaebc84bb7323becff3f303b8f1f2d81cb4edc", + "sha256:3afabaf7ba8e9c485a14ad8f4122feff6b2b93cc53cd4dad2fd24ae35112d5c5", + "sha256:5ec481661fb2fd88a5d6cf1f83403d388ec90f9daaa36e40e2c003de66751798", + "sha256:652e4ba553e421a6dc2a6d4868bc3b3881311702633eb3672f9f244ded8908cd", + "sha256:6a2243f8f434e487c2a010c7252150b1fdf019035130f41b77626f5655c9ca22", + "sha256:6ab165ef5d72392b4ebb85a8b0fbd321f69832a632e07a74794c0e598e7a8376", + "sha256:7891ee376770ac094da3ad40c116258a381b86c7352552788377c6eb16d784fe", + "sha256:799eb468ea6bc54b95527143a4ceaf970d5aa3613050c6cff54c85fda3fde480", + "sha256:82986bb77ad83a1719c90b9528a9dd663c9206f7c0ab69282af8223566a0c34e", + "sha256:8772130a063f3eebdf7095da00c0b9898bd1774c43b336272c3e98667d4fb8fa", + "sha256:8d14dc8953f8af7e003a485ef560bbefa5f8cc1ad994eebb5b12136049bbccc5", + "sha256:cbd1e87c71bca14792948c4ccb51ee61c3296e164019d2d484f3eaa2d360dfaf", + "sha256:ec4ba9436a51527fb6931a8839af4c36a5481f8c19e8f5e42c2f7ad3a49f5069" ], "index": "pypi", - "version": "==0.4.1" + "version": "==0.4.2" }, "setuptools": { "hashes": [ @@ -849,7 +957,7 @@ "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d", "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19" ], - "markers": "python_version >= '3.6'", + "markers": "python_version >= '3.8'", "version": "==2.2.1" }, "virtualenv": { diff --git a/hrqb/base/__init__.py b/hrqb/base/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hrqb/base/target.py b/hrqb/base/target.py new file mode 100644 index 0000000..d736c95 --- /dev/null +++ b/hrqb/base/target.py @@ -0,0 +1,40 @@ +"""hrqb.base.target""" + +import json + +import luigi # type: ignore[import-untyped] +import pandas as pd +from luigi.format import MixedUnicodeBytes # type: ignore[import-untyped] + +PandasObject = pd.DataFrame | pd.Series + + +class HRQBLocalTarget(luigi.LocalTarget): + """Target is local file with path and table name init.""" + + def __init__(self, path: str, table_name: str) -> None: + super().__init__(path, format=MixedUnicodeBytes) + self.path = path + self.table_name = table_name + + +class PandasPickleTarget(HRQBLocalTarget): + """Target is Pandas object (DataFrame or Series) pickled to disk.""" + + def read(self) -> PandasObject: + return pd.read_pickle(self.path) + + def write(self, panda_object: PandasObject) -> None: + panda_object.to_pickle(self.path) + + +class QuickbaseTableTarget(HRQBLocalTarget): + """Target is upsert to Quickbase table.""" + + def read(self) -> dict: + with open(self.path) as f: + return json.load(f) + + def write(self, data: dict, indent: bool = True) -> int: # noqa: FBT001, FBT002 + with open(self.path, "w") as f: + return f.write(json.dumps(data, indent=indent)) diff --git a/hrqb/base/task.py b/hrqb/base/task.py new file mode 100644 index 0000000..244815d --- /dev/null +++ b/hrqb/base/task.py @@ -0,0 +1,70 @@ +"""hrqb.base.task""" + +import luigi # type: ignore[import-untyped] +import pandas as pd + +from hrqb.base.target import PandasPickleTarget, QuickbaseTableTarget +from hrqb.utils import today_date + + +class HRQBTask(luigi.Task): + """Base Task class for all HRQB Tasks.""" + + path = luigi.Parameter() + table_name = luigi.Parameter() + + @property + def single_input(self) -> PandasPickleTarget | QuickbaseTableTarget: + input_count = len(self.input()) + if input_count > 1: + message = f"Expected a single input to this Task but found: {input_count}" + raise ValueError(message) + return self.input()[0] + + @property + def input_pandas_dataframe(self) -> pd.DataFrame: + input_object = self.single_input + data_object = input_object.read() + if not isinstance(data_object, pd.DataFrame): + message = f"Expected pandas Dataframe got: {type(data_object)}" + raise TypeError(message) + return data_object + + @property + def input_pandas_series(self) -> pd.Series: + input_object = self.single_input + data_object = input_object.read() + if not isinstance(data_object, pd.Series): + message = f"Expected pandas Series got: {type(data_object)}" + raise TypeError(message) + return data_object + + +class PandasPickleTask(HRQBTask): + """Base Task class for Tasks that write pickled pandas objects.""" + + def target(self) -> PandasPickleTarget: + return PandasPickleTarget( + path=self.path, + table_name=self.table_name, + ) + + def output(self) -> PandasPickleTarget: + return self.target() + + +class QuickbaseTableUpsert(HRQBTask): + """Base Task class for Tasks that upsert data to Quickbase tables.""" + + def target(self) -> QuickbaseTableTarget: + return QuickbaseTableTarget( + path=self.path, + table_name=self.table_name, + ) + + def output(self) -> QuickbaseTableTarget: + return self.target() + + +class HRQBPipelineTask(luigi.WrapperTask): + date = luigi.DateParameter(default=today_date()) diff --git a/hrqb/luigi.toml b/hrqb/luigi.toml new file mode 100644 index 0000000..e69de29 diff --git a/hrqb/pipelines.py b/hrqb/pipelines.py new file mode 100644 index 0000000..3e91f85 --- /dev/null +++ b/hrqb/pipelines.py @@ -0,0 +1,15 @@ +"""hrqb.pipelines""" + +import luigi # type: ignore[import-untyped] +from luigi.execution_summary import LuigiRunResult # type: ignore[import-untyped] + + +def run_pipeline(pipeline_task: luigi.WrapperTask) -> LuigiRunResult: + return luigi.build( + [pipeline_task], + local_scheduler=True, + detailed_summary=True, + ) + + +# TODO: add wrapper Pipeline tasks in this file # noqa: TD002, TD003, FIX002 diff --git a/hrqb/tasks/__init__.py b/hrqb/tasks/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/hrqb/tasks/extract.py b/hrqb/tasks/extract.py new file mode 100644 index 0000000..1d6192b --- /dev/null +++ b/hrqb/tasks/extract.py @@ -0,0 +1,3 @@ +"""hrqb.tasks.extract""" + +# TODO: add extract tasks in this file # noqa: TD002, TD003, FIX002 diff --git a/hrqb/tasks/load.py b/hrqb/tasks/load.py new file mode 100644 index 0000000..d525600 --- /dev/null +++ b/hrqb/tasks/load.py @@ -0,0 +1,3 @@ +"""hrqb.tasks.load""" + +# TODO: add load tasks in this file # noqa: TD002, TD003, FIX002 diff --git a/hrqb/tasks/transform.py b/hrqb/tasks/transform.py new file mode 100644 index 0000000..e42b37f --- /dev/null +++ b/hrqb/tasks/transform.py @@ -0,0 +1,3 @@ +"""hrqb.tasks.transform""" + +# TODO: add transform tasks in this file # noqa: TD002, TD003, FIX002 diff --git a/hrqb/utils/__init__.py b/hrqb/utils/__init__.py new file mode 100644 index 0000000..581c514 --- /dev/null +++ b/hrqb/utils/__init__.py @@ -0,0 +1,7 @@ +"""hrqb.utils""" + +import datetime + + +def today_date() -> datetime.date: + return datetime.datetime.now(tz=datetime.UTC).date() diff --git a/hrqb/utils/db.py b/hrqb/utils/db.py new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml index aee38e9..9b38b80 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ ignore = [ "PLR0912", "PLR0913", "PLR0915", + "S301", "S320", "S321", ] From 35013dea8fc408174959646a88ef94ad1430bd08 Mon Sep 17 00:00:00 2001 From: Graham Hukill Date: Wed, 1 May 2024 09:26:48 -0400 Subject: [PATCH 4/6] Setup Config class and luigi config file Why these changes are being introduced: Luigi supports a custom configuration file. Given we want to run this app primarily from the hrqb folder, this sets up an env var pattern to define where the config file is. This also implements a similar design to other apps of a 'Config' class with dot notation available env vars. How this addresses that need: * Creates Config class for app to use * Sets required env var LUIGI_CONFIG_PATH in Config class Side effects of this change: * None Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/HRQB-10 --- README.md | 1 + hrqb/cli.py | 5 ++++- hrqb/config.py | 24 ++++++++++++++++++++++++ hrqb/luigi.cfg | 3 +++ hrqb/luigi.toml | 0 tests/conftest.py | 1 + tests/test_luigi_config.py | 7 +++++++ 7 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 hrqb/luigi.cfg delete mode 100644 hrqb/luigi.toml create mode 100644 tests/test_luigi_config.py diff --git a/README.md b/README.md index 8dacb36..a58d3d9 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,7 @@ See additional diagrams and documentation in the [docs](docs) folder: ```shell SENTRY_DSN=# If set to a valid Sentry DSN, enables Sentry exception monitoring. This is not needed for local development. WORKSPACE=# Set to `dev` for local development, this will be set to `stage` and `prod` in those environments by Terraform. +LUIGI_CONFIG_PATH=hrqb/luigi.cfg # this env var must be set, pointing to config file in hrqb folder ``` ### Optional diff --git a/hrqb/cli.py b/hrqb/cli.py index ca942d3..9f25631 100644 --- a/hrqb/cli.py +++ b/hrqb/cli.py @@ -4,10 +4,12 @@ import click -from hrqb.config import configure_logger, configure_sentry +from hrqb.config import Config, configure_logger, configure_sentry logger = logging.getLogger(__name__) +CONFIG = Config() + @click.group(context_settings={"help_option_names": ["-h", "--help"]}) @click.option( @@ -23,6 +25,7 @@ def main(ctx: click.Context, verbose: bool) -> None: # noqa: FBT001 root_logger = logging.getLogger() logger.info(configure_logger(root_logger, verbose=verbose)) logger.info(configure_sentry()) + CONFIG.check_required_env_vars() logger.info("Running process") diff --git a/hrqb/config.py b/hrqb/config.py index ef51ee2..1960d5d 100644 --- a/hrqb/config.py +++ b/hrqb/config.py @@ -1,9 +1,33 @@ import logging import os +from typing import Any import sentry_sdk +class Config: + REQUIRED_ENV_VARS = ( + "WORKSPACE", + "SENTRY_DSN", + "LUIGI_CONFIG_PATH", + ) + OPTIONAL_ENV_VARS = ("DYLD_LIBRARY_PATH",) + + def check_required_env_vars(self) -> None: + """Method to raise exception if required env vars not set.""" + missing_vars = [var for var in self.REQUIRED_ENV_VARS if not os.getenv(var)] + if missing_vars: + message = f"Missing required environment variables: {', '.join(missing_vars)}" + raise OSError(message) + + def __getattr__(self, name: str) -> Any: # noqa: ANN401 + """Provide dot notation access to configurations and env vars on this class.""" + if name in self.REQUIRED_ENV_VARS or name in self.OPTIONAL_ENV_VARS: + return os.getenv(name) + message = f"'{name}' not a valid configuration variable" + raise AttributeError(message) + + def configure_logger(logger: logging.Logger, *, verbose: bool) -> str: if verbose: logging.basicConfig( diff --git a/hrqb/luigi.cfg b/hrqb/luigi.cfg new file mode 100644 index 0000000..bbaeb39 --- /dev/null +++ b/hrqb/luigi.cfg @@ -0,0 +1,3 @@ +[core] +autoload_range=true +parallel_scheduling=false \ No newline at end of file diff --git a/hrqb/luigi.toml b/hrqb/luigi.toml deleted file mode 100644 index e69de29..0000000 diff --git a/tests/conftest.py b/tests/conftest.py index 42c8c64..aaffc39 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,6 +6,7 @@ def _test_env(monkeypatch): monkeypatch.setenv("SENTRY_DSN", "None") monkeypatch.setenv("WORKSPACE", "test") + monkeypatch.setenv("LUIGI_CONFIG_PATH", "hrqb/luigi.cfg") @pytest.fixture diff --git a/tests/test_luigi_config.py b/tests/test_luigi_config.py new file mode 100644 index 0000000..070fa39 --- /dev/null +++ b/tests/test_luigi_config.py @@ -0,0 +1,7 @@ +import luigi + + +def test_luigi_config_file_env_var_sets_config_file(): + luigi_config = luigi.configuration.get_config() + assert luigi_config.enabled + assert "hrqb/luigi.cfg" in luigi_config._config_paths # noqa: SLF001 From b8d36613ac6f4472f2084eb9012a5111b46ad058 Mon Sep 17 00:00:00 2001 From: Graham Hukill Date: Wed, 1 May 2024 11:59:21 -0400 Subject: [PATCH 5/6] Tests for HRQB base Targets and Tasks Why these changes are being introduced: HRQB introduces some base luigi Targets and Tasks to work with, given the very similar and predictable work by most of the anticipated Tasks. These tests exercise some of those unique default parameters and convenience methods. Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/HRQB-10 --- hrqb/base/__init__.py | 17 ++++++ hrqb/base/task.py | 10 ++-- tests/conftest.py | 106 ++++++++++++++++++++++++++++++++++++++ tests/test_base_target.py | 68 ++++++++++++++++++++++++ tests/test_base_task.py | 103 ++++++++++++++++++++++++++++++++++++ 5 files changed, 299 insertions(+), 5 deletions(-) create mode 100644 tests/test_base_target.py create mode 100644 tests/test_base_task.py diff --git a/hrqb/base/__init__.py b/hrqb/base/__init__.py index e69de29..25f4b59 100644 --- a/hrqb/base/__init__.py +++ b/hrqb/base/__init__.py @@ -0,0 +1,17 @@ +from hrqb.base.target import HRQBLocalTarget, PandasPickleTarget, QuickbaseTableTarget +from hrqb.base.task import ( + HRQBPipelineTask, + HRQBTask, + PandasPickleTask, + QuickbaseUpsertTask, +) + +__all__ = [ + "HRQBLocalTarget", + "PandasPickleTarget", + "QuickbaseTableTarget", + "HRQBPipelineTask", + "HRQBTask", + "PandasPickleTask", + "QuickbaseUpsertTask", +] diff --git a/hrqb/base/task.py b/hrqb/base/task.py index 244815d..8847e68 100644 --- a/hrqb/base/task.py +++ b/hrqb/base/task.py @@ -3,7 +3,7 @@ import luigi # type: ignore[import-untyped] import pandas as pd -from hrqb.base.target import PandasPickleTarget, QuickbaseTableTarget +from hrqb.base import PandasPickleTarget, QuickbaseTableTarget from hrqb.utils import today_date @@ -16,7 +16,7 @@ class HRQBTask(luigi.Task): @property def single_input(self) -> PandasPickleTarget | QuickbaseTableTarget: input_count = len(self.input()) - if input_count > 1: + if input_count != 1: message = f"Expected a single input to this Task but found: {input_count}" raise ValueError(message) return self.input()[0] @@ -26,7 +26,7 @@ def input_pandas_dataframe(self) -> pd.DataFrame: input_object = self.single_input data_object = input_object.read() if not isinstance(data_object, pd.DataFrame): - message = f"Expected pandas Dataframe got: {type(data_object)}" + message = f"Expected pandas Dataframe but got: {type(data_object)}" raise TypeError(message) return data_object @@ -35,7 +35,7 @@ def input_pandas_series(self) -> pd.Series: input_object = self.single_input data_object = input_object.read() if not isinstance(data_object, pd.Series): - message = f"Expected pandas Series got: {type(data_object)}" + message = f"Expected pandas Series but got: {type(data_object)}" raise TypeError(message) return data_object @@ -53,7 +53,7 @@ def output(self) -> PandasPickleTarget: return self.target() -class QuickbaseTableUpsert(HRQBTask): +class QuickbaseUpsertTask(HRQBTask): """Base Task class for Tasks that upsert data to Quickbase tables.""" def target(self) -> QuickbaseTableTarget: diff --git a/tests/conftest.py b/tests/conftest.py index aaffc39..8a77707 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,9 @@ +import pandas as pd import pytest from click.testing import CliRunner +from hrqb.base.task import PandasPickleTask, QuickbaseUpsertTask + @pytest.fixture(autouse=True) def _test_env(monkeypatch): @@ -12,3 +15,106 @@ def _test_env(monkeypatch): @pytest.fixture def runner(): return CliRunner() + + +@pytest.fixture(scope="session") +def session_temp_dir(tmp_path_factory): + return tmp_path_factory.mktemp("shared_temp_dir") + + +@pytest.fixture +def simple_pandas_dataframe(): + return pd.DataFrame([(42, "horse"), (101, "zebra")], columns=["id", "data"]) + + +@pytest.fixture +def simple_pandas_series(): + return pd.Series(["horse", "zebra"]) + + +@pytest.fixture +def quickbase_api_write_receipt(): + # example data from https://developer.quickbase.com/operation/upsert + return { + "data": [ + { + "3": {"value": 1}, + "6": {"value": "Updating this record"}, + "7": {"value": 10}, + "8": {"value": "2019-12-18T08:00:00.000Z"}, + }, + { + "3": {"value": 11}, + "6": {"value": "This is my text"}, + "7": {"value": 15}, + "8": {"value": "2019-12-19T08:00:00.000Z"}, + }, + { + "3": {"value": 12}, + "6": {"value": "This is my other text"}, + "7": {"value": 20}, + "8": {"value": "2019-12-20T08:00:00.000Z"}, + }, + ], + "metadata": { + "createdRecordIds": [11, 12], + "totalNumberOfRecordsProcessed": 3, + "unchangedRecordIds": [], + "updatedRecordIds": [1], + }, + } + + +@pytest.fixture +def pandas_pickle_task(tmpdir): + filepath = f"{tmpdir}/foo.pickle" + return PandasPickleTask(path=filepath, table_name="Foo") + + +@pytest.fixture +def quickbase_upsert_task(tmpdir): + filepath = f"{tmpdir}/foo.json" + return QuickbaseUpsertTask(path=filepath, table_name="Foo") + + +@pytest.fixture +def complete_first_pandas_dataframe_task( + tmpdir, pandas_pickle_task, simple_pandas_dataframe +): + pandas_pickle_task.target().write(simple_pandas_dataframe) + assert pandas_pickle_task.complete() + return pandas_pickle_task + + +@pytest.fixture +def complete_first_pandas_series_task(tmpdir, pandas_pickle_task, simple_pandas_series): + pandas_pickle_task.target().write(simple_pandas_series) + assert pandas_pickle_task.complete() + return pandas_pickle_task + + +@pytest.fixture +def incomplete_first_pandas_task(tmpdir, pandas_pickle_task): + return pandas_pickle_task + + +@pytest.fixture +def second_task_with_complete_parent_dataframe_task( + tmpdir, complete_first_pandas_dataframe_task +): + class SecondTask(PandasPickleTask): + def requires(self): + return [complete_first_pandas_dataframe_task] + + return SecondTask(path=f"{tmpdir}/bar.pickle", table_name="bar") + + +@pytest.fixture +def second_task_with_complete_parent_series_task( + tmpdir, complete_first_pandas_series_task +): + class SecondTask(PandasPickleTask): + def requires(self): + return [complete_first_pandas_series_task] + + return SecondTask(path=f"{tmpdir}/bar.pickle", table_name="bar") diff --git a/tests/test_base_target.py b/tests/test_base_target.py new file mode 100644 index 0000000..64fb167 --- /dev/null +++ b/tests/test_base_target.py @@ -0,0 +1,68 @@ +import os + +import luigi +import pandas as pd +import pytest + +from hrqb.base import HRQBLocalTarget, PandasPickleTarget, QuickbaseTableTarget + + +def test_hrqb_local_target_require_path_and_table_name(): + with pytest.raises( + TypeError, + match="missing 2 required positional arguments: 'path' and 'table_name'", + ): + HRQBLocalTarget() + + +def test_hrqb_local_target_init_success(tmpdir): + filepath = f"{tmpdir}/temp_text_file.txt" + target = HRQBLocalTarget(path=filepath, table_name="Foo") + assert target.path == filepath + assert target.table_name == "Foo" + assert isinstance(target, HRQBLocalTarget) + assert isinstance(target, luigi.LocalTarget) + + +def test_hrqb_local_target_write_read_file(tmpdir): + filepath = f"{tmpdir}/temp_text_file.txt" + target = HRQBLocalTarget(path=filepath, table_name="Foo") + message = b"Hello World!" + with target.open("w") as f: + f.write(message) + assert os.path.exists(filepath) + + with target.open("r") as f: + assert f.read() == message + + +def test_pandas_pickle_target_dataframe_write_read_success( + tmpdir, simple_pandas_dataframe +): + filepath = f"{tmpdir}/temp_file.pickle" + target = PandasPickleTarget(path=filepath, table_name="Foo") + target.write(simple_pandas_dataframe) + assert os.path.exists(filepath) + + assert isinstance(target.read(), pd.DataFrame) + assert target.read().equals(simple_pandas_dataframe) + + +def test_pandas_pickle_target_series_write_read_success(tmpdir, simple_pandas_series): + filepath = f"{tmpdir}/temp_file.pickle" + target = PandasPickleTarget(path=filepath, table_name="Foo") + target.write(simple_pandas_series) + assert os.path.exists(filepath) + + assert isinstance(target.read(), pd.Series) + assert target.read().equals(simple_pandas_series) + + +def test_quickbase_table_target_write_read_success(tmpdir, quickbase_api_write_receipt): + filepath = f"{tmpdir}/temp_file.json" + target = QuickbaseTableTarget(path=filepath, table_name="Foo") + target.write(quickbase_api_write_receipt) + assert os.path.exists(filepath) + + assert isinstance(target.read(), dict) + assert target.read() == quickbase_api_write_receipt diff --git a/tests/test_base_task.py b/tests/test_base_task.py new file mode 100644 index 0000000..cb00e60 --- /dev/null +++ b/tests/test_base_task.py @@ -0,0 +1,103 @@ +import pandas as pd +import pytest + +from hrqb.base import PandasPickleTarget, PandasPickleTask, QuickbaseTableTarget + + +def test_pandas_pickle_task_gives_pandas_pickle_target(pandas_pickle_task): + target = pandas_pickle_task.target() + assert isinstance(target, PandasPickleTarget) + assert target.path == pandas_pickle_task.path + assert target.table_name == pandas_pickle_task.table_name + + +def test_pandas_pickle_task_output_path_is_target_path(pandas_pickle_task): + assert pandas_pickle_task.output().path == pandas_pickle_task.target().path + + +def test_quickbase_upsert_task_gives_quickbase_table_target(quickbase_upsert_task): + target = quickbase_upsert_task.target() + assert isinstance(target, QuickbaseTableTarget) + assert target.path == quickbase_upsert_task.path + assert target.table_name == quickbase_upsert_task.table_name + + +def test_quickbase_upsert_task_output_path_is_target_path(quickbase_upsert_task): + assert quickbase_upsert_task.output().path == quickbase_upsert_task.target().path + + +def test_luigi_task_incomplete_when_target_not_exists(tmpdir, pandas_pickle_task): + assert not pandas_pickle_task.complete() + + +def test_luigi_task_complete_when_target_exists(tmpdir, pandas_pickle_task): + with open(pandas_pickle_task.path, "a"): + assert pandas_pickle_task.complete() + + +def test_hrqb_task_get_single_input_from_complete_parent_task_success( + tmpdir, + complete_first_pandas_dataframe_task, +): + class SecondTask(PandasPickleTask): + def requires(self): + return [complete_first_pandas_dataframe_task] + + task = SecondTask(path=f"{tmpdir}/bar.pickle", table_name="bar") + assert task.single_input.exists() + task_input_dataframe = task.single_input.read() + parent_task_output_dataframe = complete_first_pandas_dataframe_task.target().read() + assert task_input_dataframe.equals(parent_task_output_dataframe) + + +def test_hrqb_task_get_single_input_from_incomplete_parent_task_not_exists( + tmpdir, + incomplete_first_pandas_task, +): + class SecondTask(PandasPickleTask): + def requires(self): + return [incomplete_first_pandas_task] + + task = SecondTask(path=f"{tmpdir}/bar.pickle", table_name="bar") + assert not task.single_input.exists() + + +def test_hrqb_task_get_parent_task_target_dataframe( + second_task_with_complete_parent_dataframe_task, +): + assert isinstance( + second_task_with_complete_parent_dataframe_task.input_pandas_dataframe, + pd.DataFrame, + ) + with pytest.raises(TypeError, match="Expected pandas Series but got"): + _ = second_task_with_complete_parent_dataframe_task.input_pandas_series + + +def test_hrqb_task_get_parent_task_target_series( + second_task_with_complete_parent_series_task, +): + assert isinstance( + second_task_with_complete_parent_series_task.input_pandas_series, pd.Series + ) + with pytest.raises(TypeError, match="Expected pandas Dataframe but got"): + _ = second_task_with_complete_parent_series_task.input_pandas_dataframe + + +def test_hrqb_task_multiple_parent_tasks_single_input_raise_error( + tmpdir, + second_task_with_complete_parent_dataframe_task, + second_task_with_complete_parent_series_task, +): + class SecondTask(PandasPickleTask): + def requires(self): + # note the multiple parent Tasks here + return [ + second_task_with_complete_parent_dataframe_task, + second_task_with_complete_parent_series_task, + ] + + task = SecondTask(path=f"{tmpdir}/bar.pickle", table_name="bar") + with pytest.raises( + ValueError, match="Expected a single input to this Task but found: 2" + ): + _ = task.single_input From c4216baceba882d1ae328b44ca992760bea7c410 Mon Sep 17 00:00:00 2001 From: Graham Hukill Date: Wed, 1 May 2024 12:55:05 -0400 Subject: [PATCH 6/6] Set LUIGI_CONFIG_PATH env var in Makefile --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 9bc6dd3..72523a6 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ SHELL=/bin/bash DATETIME:=$(shell date -u +%Y%m%dT%H%M%SZ) +export LUIGI_CONFIG_PATH=hrqb/luigi.cfg help: # preview Makefile commands @awk 'BEGIN { FS = ":.*#"; print "Usage: make \n\nTargets:" } \