From bc14c90c537d0cd7cfb851e1461bcb37d70b09a4 Mon Sep 17 00:00:00 2001 From: Graham Hukill Date: Tue, 26 Nov 2024 11:52:32 -0500 Subject: [PATCH] LibHR many to one for headcount id Why these changes are being introduced: It was determined that the LibHR Employee Appointments table was the best way to represent vacancies for internal Library HR headcount ids. How this addresses that need: * Added begin and end dates, allowing for multiple rows for a headcount id * Added Internal Position Title column to indicate the position title is internal to Library HR, and not from the data warehouse Side effects of this change: * LibHR table allows for multiple rows for a person and/or headcount id Relevant ticket(s): * https://mitlibraries.atlassian.net/browse/HRQB-57 --- Pipfile | 1 + Pipfile.lock | 185 ++++++++++-------- hrqb/tasks/libhr_employee_appointments.py | 73 ++++++- hrqb/tasks/pipelines.py | 15 +- tests/fixtures/libhr_static_data.csv | 10 +- .../tasks/test_libhr_employee_appointments.py | 23 ++- 6 files changed, 194 insertions(+), 113 deletions(-) diff --git a/Pipfile b/Pipfile index f3664ba..64d0eb9 100644 --- a/Pipfile +++ b/Pipfile @@ -15,6 +15,7 @@ requests = "*" types-requests = "*" sqlalchemy = "*" us = "*" +cryptography = "*" [dev-packages] black = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 88cedf8..b8e7797 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "f5d1453c2db4e24b390883065c78b6cdb8e5a6c1669f6e4170a41d669949a877" + "sha256": "0394bc4ac12b6c67aad70ff6b52e723572f4ae64b0b480be031ab678ff1092b2" }, "pipfile-spec": 6, "requires": { @@ -35,61 +35,76 @@ }, "cffi": { "hashes": [ - "sha256:0c9ef6ff37e974b73c25eecc13952c55bceed9112be2d9d938ded8e856138bcc", - "sha256:131fd094d1065b19540c3d72594260f118b231090295d8c34e19a7bbcf2e860a", - "sha256:1b8ebc27c014c59692bb2664c7d13ce7a6e9a629be20e54e7271fa696ff2b417", - "sha256:2c56b361916f390cd758a57f2e16233eb4f64bcbeee88a4881ea90fca14dc6ab", - "sha256:2d92b25dbf6cae33f65005baf472d2c245c050b1ce709cc4588cdcdd5495b520", - "sha256:31d13b0f99e0836b7ff893d37af07366ebc90b678b6664c955b54561fc36ef36", - "sha256:32c68ef735dbe5857c810328cb2481e24722a59a2003018885514d4c09af9743", - "sha256:3686dffb02459559c74dd3d81748269ffb0eb027c39a6fc99502de37d501faa8", - "sha256:582215a0e9adbe0e379761260553ba11c58943e4bbe9c36430c4ca6ac74b15ed", - "sha256:5b50bf3f55561dac5438f8e70bfcdfd74543fd60df5fa5f62d94e5867deca684", - "sha256:5bf44d66cdf9e893637896c7faa22298baebcd18d1ddb6d2626a6e39793a1d56", - "sha256:6602bc8dc6f3a9e02b6c22c4fc1e47aa50f8f8e6d3f78a5e16ac33ef5fefa324", - "sha256:673739cb539f8cdaa07d92d02efa93c9ccf87e345b9a0b556e3ecc666718468d", - "sha256:68678abf380b42ce21a5f2abde8efee05c114c2fdb2e9eef2efdb0257fba1235", - "sha256:68e7c44931cc171c54ccb702482e9fc723192e88d25a0e133edd7aff8fcd1f6e", - "sha256:6b3d6606d369fc1da4fd8c357d026317fbb9c9b75d36dc16e90e84c26854b088", - "sha256:748dcd1e3d3d7cd5443ef03ce8685043294ad6bd7c02a38d1bd367cfd968e000", - "sha256:7651c50c8c5ef7bdb41108b7b8c5a83013bfaa8a935590c5d74627c047a583c7", - "sha256:7b78010e7b97fef4bee1e896df8a4bbb6712b7f05b7ef630f9d1da00f6444d2e", - "sha256:7e61e3e4fa664a8588aa25c883eab612a188c725755afff6289454d6362b9673", - "sha256:80876338e19c951fdfed6198e70bc88f1c9758b94578d5a7c4c91a87af3cf31c", - "sha256:8895613bcc094d4a1b2dbe179d88d7fb4a15cee43c052e8885783fac397d91fe", - "sha256:88e2b3c14bdb32e440be531ade29d3c50a1a59cd4e51b1dd8b0865c54ea5d2e2", - "sha256:8f8e709127c6c77446a8c0a8c8bf3c8ee706a06cd44b1e827c3e6a2ee6b8c098", - "sha256:9cb4a35b3642fc5c005a6755a5d17c6c8b6bcb6981baf81cea8bfbc8903e8ba8", - "sha256:9f90389693731ff1f659e55c7d1640e2ec43ff725cc61b04b2f9c6d8d017df6a", - "sha256:a09582f178759ee8128d9270cd1344154fd473bb77d94ce0aeb2a93ebf0feaf0", - "sha256:a6a14b17d7e17fa0d207ac08642c8820f84f25ce17a442fd15e27ea18d67c59b", - "sha256:a72e8961a86d19bdb45851d8f1f08b041ea37d2bd8d4fd19903bc3083d80c896", - "sha256:abd808f9c129ba2beda4cfc53bde801e5bcf9d6e0f22f095e45327c038bfe68e", - "sha256:ac0f5edd2360eea2f1daa9e26a41db02dd4b0451b48f7c318e217ee092a213e9", - "sha256:b29ebffcf550f9da55bec9e02ad430c992a87e5f512cd63388abb76f1036d8d2", - "sha256:b2ca4e77f9f47c55c194982e10f058db063937845bb2b7a86c84a6cfe0aefa8b", - "sha256:b7be2d771cdba2942e13215c4e340bfd76398e9227ad10402a8767ab1865d2e6", - "sha256:b84834d0cf97e7d27dd5b7f3aca7b6e9263c56308ab9dc8aae9784abb774d404", - "sha256:b86851a328eedc692acf81fb05444bdf1891747c25af7529e39ddafaf68a4f3f", - "sha256:bcb3ef43e58665bbda2fb198698fcae6776483e0c4a631aa5647806c25e02cc0", - "sha256:c0f31130ebc2d37cdd8e44605fb5fa7ad59049298b3f745c74fa74c62fbfcfc4", - "sha256:c6a164aa47843fb1b01e941d385aab7215563bb8816d80ff3a363a9f8448a8dc", - "sha256:d8a9d3ebe49f084ad71f9269834ceccbf398253c9fac910c4fd7053ff1386936", - "sha256:db8e577c19c0fda0beb7e0d4e09e0ba74b1e4c092e0e40bfa12fe05b6f6d75ba", - "sha256:dc9b18bf40cc75f66f40a7379f6a9513244fe33c0e8aa72e2d56b0196a7ef872", - "sha256:e09f3ff613345df5e8c3667da1d918f9149bd623cd9070c983c013792a9a62eb", - "sha256:e4108df7fe9b707191e55f33efbcb2d81928e10cea45527879a4749cbe472614", - "sha256:e6024675e67af929088fda399b2094574609396b1decb609c55fa58b028a32a1", - "sha256:e70f54f1796669ef691ca07d046cd81a29cb4deb1e5f942003f401c0c4a2695d", - "sha256:e715596e683d2ce000574bae5d07bd522c781a822866c20495e52520564f0969", - "sha256:e760191dd42581e023a68b758769e2da259b5d52e3103c6060ddc02c9edb8d7b", - "sha256:ed86a35631f7bfbb28e108dd96773b9d5a6ce4811cf6ea468bb6a359b256b1e4", - "sha256:ee07e47c12890ef248766a6e55bd38ebfb2bb8edd4142d56db91b21ea68b7627", - "sha256:fa3a0128b152627161ce47201262d3140edb5a5c3da88d73a1b790a959126956", - "sha256:fcc8eb6d5902bb1cf6dc4f187ee3ea80a1eba0a89aba40a5cb20a5087d961357" + "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8", + "sha256:0984a4925a435b1da406122d4d7968dd861c1385afe3b45ba82b750f229811e2", + "sha256:0e2b1fac190ae3ebfe37b979cc1ce69c81f4e4fe5746bb401dca63a9062cdaf1", + "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15", + "sha256:1257bdabf294dceb59f5e70c64a3e2f462c30c7ad68092d01bbbfb1c16b1ba36", + "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", + "sha256:1d599671f396c4723d016dbddb72fe8e0397082b0a77a4fab8028923bec050e8", + "sha256:28b16024becceed8c6dfbc75629e27788d8a3f9030691a1dbf9821a128b22c36", + "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17", + "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf", + "sha256:31000ec67d4221a71bd3f67df918b1f88f676f1c3b535a7eb473255fdc0b83fc", + "sha256:386c8bf53c502fff58903061338ce4f4950cbdcb23e2902d86c0f722b786bbe3", + "sha256:3edc8d958eb099c634dace3c7e16560ae474aa3803a5df240542b305d14e14ed", + "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702", + "sha256:46bf43160c1a35f7ec506d254e5c890f3c03648a4dbac12d624e4490a7046cd1", + "sha256:4ceb10419a9adf4460ea14cfd6bc43d08701f0835e979bf821052f1805850fe8", + "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", + "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6", + "sha256:610faea79c43e44c71e1ec53a554553fa22321b65fae24889706c0a84d4ad86d", + "sha256:636062ea65bd0195bc012fea9321aca499c0504409f413dc88af450b57ffd03b", + "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e", + "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be", + "sha256:6f17be4345073b0a7b8ea599688f692ac3ef23ce28e5df79c04de519dbc4912c", + "sha256:706510fe141c86a69c8ddc029c7910003a17353970cff3b904ff0686a5927683", + "sha256:72e72408cad3d5419375fc87d289076ee319835bdfa2caad331e377589aebba9", + "sha256:733e99bc2df47476e3848417c5a4540522f234dfd4ef3ab7fafdf555b082ec0c", + "sha256:7596d6620d3fa590f677e9ee430df2958d2d6d6de2feeae5b20e82c00b76fbf8", + "sha256:78122be759c3f8a014ce010908ae03364d00a1f81ab5c7f4a7a5120607ea56e1", + "sha256:805b4371bf7197c329fcb3ead37e710d1bca9da5d583f5073b799d5c5bd1eee4", + "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655", + "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67", + "sha256:9755e4345d1ec879e3849e62222a18c7174d65a6a92d5b346b1863912168b595", + "sha256:98e3969bcff97cae1b2def8ba499ea3d6f31ddfdb7635374834cf89a1a08ecf0", + "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", + "sha256:a1ed2dd2972641495a3ec98445e09766f077aee98a1c896dcb4ad0d303628e41", + "sha256:a24ed04c8ffd54b0729c07cee15a81d964e6fee0e3d4d342a27b020d22959dc6", + "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401", + "sha256:a9b15d491f3ad5d692e11f6b71f7857e7835eb677955c00cc0aefcd0669adaf6", + "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3", + "sha256:b2ab587605f4ba0bf81dc0cb08a41bd1c0a5906bd59243d56bad7668a6fc6c16", + "sha256:b62ce867176a75d03a665bad002af8e6d54644fad99a3c70905c543130e39d93", + "sha256:c03e868a0b3bc35839ba98e74211ed2b05d2119be4e8a0f224fba9384f1fe02e", + "sha256:c59d6e989d07460165cc5ad3c61f9fd8f1b4796eacbd81cee78957842b834af4", + "sha256:c7eac2ef9b63c79431bc4b25f1cd649d7f061a28808cbc6c47b534bd789ef964", + "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c", + "sha256:ca74b8dbe6e8e8263c0ffd60277de77dcee6c837a3d0881d8c1ead7268c9e576", + "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0", + "sha256:cdf5ce3acdfd1661132f2a9c19cac174758dc2352bfe37d98aa7512c6b7178b3", + "sha256:d016c76bdd850f3c626af19b0542c9677ba156e4ee4fccfdd7848803533ef662", + "sha256:d01b12eeeb4427d3110de311e1774046ad344f5b1a7403101878976ecd7a10f3", + "sha256:d63afe322132c194cf832bfec0dc69a99fb9bb6bbd550f161a49e9e855cc78ff", + "sha256:da95af8214998d77a98cc14e3a3bd00aa191526343078b530ceb0bd710fb48a5", + "sha256:dd398dbc6773384a17fe0d3e7eeb8d1a21c2200473ee6806bb5e6a8e62bb73dd", + "sha256:de2ea4b5833625383e464549fec1bc395c1bdeeb5f25c4a3a82b5a8c756ec22f", + "sha256:de55b766c7aa2e2a3092c51e0483d700341182f08e67c63630d5b6f200bb28e5", + "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14", + "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", + "sha256:e221cf152cff04059d011ee126477f0d9588303eb57e88923578ace7baad17f9", + "sha256:e31ae45bc2e29f6b2abd0de1cc3b9d5205aa847cafaecb8af1476a609a2f6eb7", + "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382", + "sha256:f1e22e8c4419538cb197e4dd60acc919d7696e5ef98ee4da4e01d3f8cfa4cc5a", + "sha256:f3a2b4222ce6b60e2e8b337bb9596923045681d71e5a082783484d845390938e", + "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", + "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4", + "sha256:f79fc4fc25f1c8698ff97788206bb3c2598949bfe0fef03d299eb1b5356ada99", + "sha256:f7f5baafcc48261359e14bcd6d9bff6d4b28d9103847c9e136694cb0501aef87", + "sha256:fc48c783f9c87e60831201f2cce7f3b2e4846bf4d8728eabe54d60700b318a0b" ], "markers": "platform_python_implementation != 'PyPy'", - "version": "==1.16.0" + "version": "==1.17.1" }, "charset-normalizer": { "hashes": [ @@ -198,41 +213,37 @@ }, "cryptography": { "hashes": [ - "sha256:013629ae70b40af70c9a7a5db40abe5d9054e6f4380e50ce769947b73bf3caad", - "sha256:2346b911eb349ab547076f47f2e035fc8ff2c02380a7cbbf8d87114fa0f1c583", - "sha256:2f66d9cd9147ee495a8374a45ca445819f8929a3efcd2e3df6428e46c3cbb10b", - "sha256:2f88d197e66c65be5e42cd72e5c18afbfae3f741742070e3019ac8f4ac57262c", - "sha256:31f721658a29331f895a5a54e7e82075554ccfb8b163a18719d342f5ffe5ecb1", - "sha256:343728aac38decfdeecf55ecab3264b015be68fc2816ca800db649607aeee648", - "sha256:5226d5d21ab681f432a9c1cf8b658c0cb02533eece706b155e5fbd8a0cdd3949", - "sha256:57080dee41209e556a9a4ce60d229244f7a66ef52750f813bfbe18959770cfba", - "sha256:5a94eccb2a81a309806027e1670a358b99b8fe8bfe9f8d329f27d72c094dde8c", - "sha256:6b7c4f03ce01afd3b76cf69a5455caa9cfa3de8c8f493e0d3ab7d20611c8dae9", - "sha256:7016f837e15b0a1c119d27ecd89b3515f01f90a8615ed5e9427e30d9cdbfed3d", - "sha256:81884c4d096c272f00aeb1f11cf62ccd39763581645b0812e99a91505fa48e0c", - "sha256:81d8a521705787afe7a18d5bfb47ea9d9cc068206270aad0b96a725022e18d2e", - "sha256:8d09d05439ce7baa8e9e95b07ec5b6c886f548deb7e0f69ef25f64b3bce842f2", - "sha256:961e61cefdcb06e0c6d7e3a1b22ebe8b996eb2bf50614e89384be54c48c6b63d", - "sha256:9c0c1716c8447ee7dbf08d6db2e5c41c688544c61074b54fc4564196f55c25a7", - "sha256:a0608251135d0e03111152e41f0cc2392d1e74e35703960d4190b2e0f4ca9c70", - "sha256:a0c5b2b0585b6af82d7e385f55a8bc568abff8923af147ee3c07bd8b42cda8b2", - "sha256:ad803773e9df0b92e0a817d22fd8a3675493f690b96130a5e24f1b8fabbea9c7", - "sha256:b297f90c5723d04bcc8265fc2a0f86d4ea2e0f7ab4b6994459548d3a6b992a14", - "sha256:ba4f0a211697362e89ad822e667d8d340b4d8d55fae72cdd619389fb5912eefe", - "sha256:c4783183f7cb757b73b2ae9aed6599b96338eb957233c58ca8f49a49cc32fd5e", - "sha256:c9bb2ae11bfbab395bdd072985abde58ea9860ed84e59dbc0463a5d0159f5b71", - "sha256:cafb92b2bc622cd1aa6a1dce4b93307792633f4c5fe1f46c6b97cf67073ec961", - "sha256:d45b940883a03e19e944456a558b67a41160e367a719833c53de6911cabba2b7", - "sha256:dc0fdf6787f37b1c6b08e6dfc892d9d068b5bdb671198c72072828b80bd5fe4c", - "sha256:dea567d1b0e8bc5764b9443858b673b734100c2871dc93163f58c46a97a83d28", - "sha256:dec9b018df185f08483f294cae6ccac29e7a6e0678996587363dc352dc65c842", - "sha256:e3ec3672626e1b9e55afd0df6d774ff0e953452886e06e0f1eb7eb0c832e8902", - "sha256:e599b53fd95357d92304510fb7bda8523ed1f79ca98dce2f43c115950aa78801", - "sha256:fa76fbb7596cc5839320000cdd5d0955313696d9511debab7ee7278fc8b5c84a", - "sha256:fff12c88a672ab9c9c1cf7b0c80e3ad9e2ebd9d828d955c126be4fd3e5578c9e" + "sha256:0c580952eef9bf68c4747774cde7ec1d85a6e61de97281f2dba83c7d2c806362", + "sha256:0f996e7268af62598f2fc1204afa98a3b5712313a55c4c9d434aef49cadc91d4", + "sha256:1ec0bcf7e17c0c5669d881b1cd38c4972fade441b27bda1051665faaa89bdcaa", + "sha256:281c945d0e28c92ca5e5930664c1cefd85efe80e5c0d2bc58dd63383fda29f83", + "sha256:2ce6fae5bdad59577b44e4dfed356944fbf1d925269114c28be377692643b4ff", + "sha256:315b9001266a492a6ff443b61238f956b214dbec9910a081ba5b6646a055a805", + "sha256:443c4a81bb10daed9a8f334365fe52542771f25aedaf889fd323a853ce7377d6", + "sha256:4a02ded6cd4f0a5562a8887df8b3bd14e822a90f97ac5e544c162899bc467664", + "sha256:53a583b6637ab4c4e3591a15bc9db855b8d9dee9a669b550f311480acab6eb08", + "sha256:63efa177ff54aec6e1c0aefaa1a241232dcd37413835a9b674b6e3f0ae2bfd3e", + "sha256:74f57f24754fe349223792466a709f8e0c093205ff0dca557af51072ff47ab18", + "sha256:7e1ce50266f4f70bf41a2c6dc4358afadae90e2a1e5342d3c08883df1675374f", + "sha256:81ef806b1fef6b06dcebad789f988d3b37ccaee225695cf3e07648eee0fc6b73", + "sha256:846da004a5804145a5f441b8530b4bf35afbf7da70f82409f151695b127213d5", + "sha256:8ac43ae87929a5982f5948ceda07001ee5e83227fd69cf55b109144938d96984", + "sha256:9762ea51a8fc2a88b70cf2995e5675b38d93bf36bd67d91721c309df184f49bd", + "sha256:a2a431ee15799d6db9fe80c82b055bae5a752bef645bba795e8e52687c69efe3", + "sha256:bf7a1932ac4176486eab36a19ed4c0492da5d97123f1406cf15e41b05e787d2e", + "sha256:c2e6fc39c4ab499049df3bdf567f768a723a5e8464816e8f009f121a5a9f4405", + "sha256:cbeb489927bd7af4aa98d4b261af9a5bc025bd87f0e3547e11584be9e9427be2", + "sha256:d03b5621a135bffecad2c73e9f4deb1a0f977b9a8ffe6f8e002bf6c9d07b918c", + "sha256:d56e96520b1020449bbace2b78b603442e7e378a9b3bd68de65c782db1507995", + "sha256:df6b6c6d742395dd77a23ea3728ab62f98379eff8fb61be2744d4679ab678f73", + "sha256:e1be4655c7ef6e1bbe6b5d0403526601323420bcf414598955968c9ef3eb7d16", + "sha256:f18c716be16bc1fea8e95def49edf46b82fccaa88587a45f8dc0ff6ab5d8e0a7", + "sha256:f46304d6f0c6ab8e52770addfa2fc41e6629495548862279641972b6215451cd", + "sha256:f7b178f11ed3664fd0e995a47ed2b5ff0a12d893e41dd0494f406d1cf555cab7" ], + "index": "pypi", "markers": "python_version >= '3.7'", - "version": "==42.0.8" + "version": "==43.0.3" }, "docutils": { "hashes": [ diff --git a/hrqb/tasks/libhr_employee_appointments.py b/hrqb/tasks/libhr_employee_appointments.py index 3796431..6fbae18 100644 --- a/hrqb/tasks/libhr_employee_appointments.py +++ b/hrqb/tasks/libhr_employee_appointments.py @@ -1,14 +1,20 @@ """hrqb.tasks.libhr_employee_appointments""" +import datetime +import re + import luigi # type: ignore[import-untyped] -import numpy as np import pandas as pd from hrqb.base.task import ( PandasPickleTask, QuickbaseUpsertTask, ) -from hrqb.utils import md5_hash_from_values +from hrqb.utils import ( + convert_dataframe_columns_to_dates, + md5_hash_from_values, + normalize_dataframe_dates, +) from hrqb.utils.quickbase import QBClient @@ -18,6 +24,21 @@ class ExtractLibHREmployeeAppointments(PandasPickleTask): This task is expecting the CSV to be a local filepath. Unlike other pipelines in this client, this pipeline is rarely run, and is suitable for local, developer runs to load data. + + Expected schema of CSV file: + - MIT ID: str, MIT ID + - HC ID: str, pattern of "L-###{a|b|x)}" + - Full Name: str (OPTIONAL; human eyes) + - Internal Position Title: str, free-text that is stored in LibHR table + - Position ID: str, position number used to join warehouse data + - Employee Type: str (OPTIONAL; human eyes) + - Supervisor ID: str, MIT ID + - Supervisor Name: str, (OPTIONAL; human eyes) + - Cost Object: str + - Department: str, Department acronym + - Begin Date: YYYY-MM-DD str, begin date when Headcount ID (HC ID) applied + - End Date: YYYY-MM-DD str, end date when Headcount ID (HC ID) applied + - Notes: str, free-text notes """ pipeline = luigi.Parameter() @@ -25,15 +46,41 @@ class ExtractLibHREmployeeAppointments(PandasPickleTask): csv_filepath = luigi.Parameter() def get_dataframe(self) -> pd.DataFrame: - # read CSV file libhr_df = pd.read_csv(self.csv_filepath) - # convert 'Active' column to Quickbase Yes/No checkbox value - # np.False_ and np.True_ values are the result of Excel --> CSV --> pandas - libhr_df["Active"] = libhr_df["Active"].replace( - {np.True_: "Yes", np.False_: "No"} + # convert Begin and End dates and set "Active" column + libhr_df = convert_dataframe_columns_to_dates( + libhr_df, columns=["Begin Date", "End Date"] ) + # set Active column value of Yes/No + def determine_active(end_date: datetime.datetime) -> str: + if end_date is None: + return "No" + if end_date >= datetime.datetime.now(tz=datetime.UTC): + return "Yes" + return "No" + + libhr_df["Active"] = libhr_df["End Date"].apply(determine_active) + + # normalize Headcount ID, raising exceptions if not properly formed or absent + def remove_headcount_id_suffixes(original_headcount_id: str | None) -> str | None: + if original_headcount_id is None: + message = "LibHR CSV data is missing a Headcount ID for one or more rows." + raise ValueError(message) + matched_object = re.match( + r"([T,L]-\d\d\d).*", + original_headcount_id.strip().upper(), + ) + if not matched_object: + message = f"Could not parse HC ID: {original_headcount_id}" + raise ValueError(message) + normalized_hc_id = matched_object.group(1) + return normalized_hc_id.upper() + + libhr_df["HC ID (Original)"] = libhr_df["HC ID"] + libhr_df["HC ID"] = libhr_df["HC ID"].apply(remove_headcount_id_suffixes) + return libhr_df @@ -84,20 +131,30 @@ def get_dataframe(self) -> pd.DataFrame: [ str(row["MIT ID"]), str(row["HC ID"]), + str(row["HC ID (Original)"]), + str(row["Begin Date"]), + str(row["End Date"]), ] ), axis=1, ) + libhr_df = normalize_dataframe_dates(libhr_df, ["Begin Date", "End Date"]) + fields = { "MIT ID": "Related Employee MIT ID", "Supervisor ID": "Related Supervisor MIT ID", "Cost Object": "Cost Object", "HC ID": "HC ID", + "HC ID (Original)": "HC ID (Original)", "Position ID": "Position ID", + "Internal Position Title": "Internal Position Title", "Related Department ID": "Related Department ID", "Active": "Active", "Key": "Key", + "Begin Date": "Begin Date", + "End Date": "End Date", + "Notes": "Notes", } return libhr_df[fields.keys()].rename(columns=fields) @@ -109,7 +166,7 @@ class LoadLibHREmployeeAppointments(QuickbaseUpsertTask): @property def merge_field(self) -> str | None: - """Explicitly merge on unique Position ID field.""" + """Explicitly merge on unique Key field.""" return "Key" def requires(self) -> list[luigi.Task]: # pragma: nocover diff --git a/hrqb/tasks/pipelines.py b/hrqb/tasks/pipelines.py index 936a21c..cf52aa5 100644 --- a/hrqb/tasks/pipelines.py +++ b/hrqb/tasks/pipelines.py @@ -30,21 +30,16 @@ class UpdateLibHRData(HRQBPipelineTask): """Pipeline to load Library HR employee appointment data from static CSV file. This pipeline loads the table 'LibHR Employee Appointments', which contains - information known only by Library HR, that we cannot get from the data warehouse, - including: - - position HC ID (Headcount ID) - - position supervisor - - position library department - - position cost object + information known only by Library HR, that we cannot get from the data warehouse. This Quickbase table is used by the 'Employee Appointments' table to fill in gaps from warehouse data alone. This pipeline is useful for initial loading and bulk changes, but this table is primarily managed directly in Quickbase by HR staff. - This pipeline requires a 'csv_filepath' parameter is defined when running, e.g. - pipenv run hrqb --verbose / - pipeline -p UpdateLibHRData / - --pipeline-parameters=csv_filepath= / + This pipeline requires a 'csv_filepath' parameter is defined when running, e.g.: + pipenv run hrqb --verbose \ + pipeline -p UpdateLibHRData \ + --pipeline-parameters=csv_filepath= \ run """ diff --git a/tests/fixtures/libhr_static_data.csv b/tests/fixtures/libhr_static_data.csv index a8be8cc..f414b11 100644 --- a/tests/fixtures/libhr_static_data.csv +++ b/tests/fixtures/libhr_static_data.csv @@ -1,4 +1,6 @@ -MIT ID,HC ID,Full Name,Position,Position ID,Employee Type,Supervisor ID,Supervisor Name,Cost Object,Department,Active -123456789,L-001,"Doe, John",Science Librarian,888888888,Admin Staff,444444444,"Smith, Fancy",555555555,DDC,True -987654321,L-100,"Doe, Jane",Data Engineer,999999999,Admin Staff,444444444,"Smith, Fancy",555555555,ITS,True -987654321,L-101x,"Doe, Jane",DevOps Engineer,999999991,Admin Staff,444444444,"Smith, Fancy",555555555,BAD_ACRO,False \ No newline at end of file +MIT ID,HC ID,Full Name,Internal Position Title,Position ID,Employee Type,Supervisor ID,Supervisor Name,Cost Object,Department,Begin Date,End Date,Notes +123456789,L-001,"Doe, John",Science Librarian,888888888,Admin Staff,444444444,"Smith, Fancy",555555555,DDC,2022-02-02,2999-12-31, +987654321,L-100,"Doe, Jane",Data Engineer,999999999,Admin Staff,444444444,"Smith, Fancy",555555555,ITS,2021-01-01,2999-12-31,Represents employee Jane Doe getting a new headcount id when changing positions +987654321,L-101x,"Doe, Jane",DevOps Engineer,999999991,Admin Staff,444444444,"Smith, Fancy",555555555,BAD_ACRO,2020-01-01,2020-12-31,"Represents a headcount id that ended, and was picked up by a new employee." +987654320,L-101,"Doe, Alice",DevOps Engineer,999999991,Admin Staff,444444444,"Smith, Fancy",555555555,ITS,2021-06-01,2999-12-31,"Represents a headcount id that was picked up and is still active, but headcount id L-101 was vacant for six months." +343434343,L-050x,"Doe, Stan",Music Librarian,777777777,,333333333,"Lee, Pauline",444444444,IDLA,2023-07-15,2024-02-01,Represents a still vacant headcount id. \ No newline at end of file diff --git a/tests/tasks/test_libhr_employee_appointments.py b/tests/tasks/test_libhr_employee_appointments.py index d441fc3..5d81d63 100644 --- a/tests/tasks/test_libhr_employee_appointments.py +++ b/tests/tasks/test_libhr_employee_appointments.py @@ -1,4 +1,5 @@ # ruff: noqa: PD901, PLR2004 +import datetime import pandas as pd @@ -8,9 +9,21 @@ def test_extract_libhr_employee_appointments_read_csv( ): df = task_extract_libhr_employee_appointments.get_dataframe() assert isinstance(df, pd.DataFrame) - assert len(df) == 3 + assert len(df) == 5 assert df.iloc[0]["MIT ID"] == 123456789 assert df.iloc[0]["Supervisor ID"] == 444444444 + assert df.iloc[0]["Begin Date"] == datetime.datetime(2022, 2, 2, tzinfo=datetime.UTC) + assert df.iloc[0]["End Date"] == datetime.datetime(2999, 12, 31, tzinfo=datetime.UTC) + assert df.iloc[0]["Active"] + + +def test_extract_libhr_employee_appointments_strips_headcount_id_suffix( + task_extract_libhr_employee_appointments, +): + df = task_extract_libhr_employee_appointments.get_dataframe() + row = df.iloc[2] + assert row["HC ID"] == "L-101" + assert row["HC ID (Original)"] == "L-101x" def test_transform_libhr_employee_appointments_merge_departments( @@ -33,7 +46,9 @@ def test_transform_libhr_employee_appointments_merge_field_key_values( ): new_df = task_transform_libhr_employee_appointments.get_dataframe() assert list(new_df["Key"]) == [ - "81cf06bfd65aa1f7019750c57a79be99", - "6e07102ee39ec1f22c63231d090bd4dd", - "af08a24eeb35fae63fa76e755537b949", + "bf150338afc4af388b4be1ec33e8b7ec", + "f71875f7a6ceb7fd41d8ac2dbb4b6aa7", + "e4dd968ab59c898e8318c57ad322518e", + "8833ed8dbb41f850377dd6cdf896c922", + "47652ff11feb5f688adc48fb38839d0a", ]