From c6c8ac2c360aa2431066679ae838e22d842a4704 Mon Sep 17 00:00:00 2001 From: abstractqqq Date: Fri, 29 Dec 2023 14:35:26 -0500 Subject: [PATCH] restructured rust code org --- Cargo.lock | 233 ++++++++++-------- Cargo.toml | 2 +- python/polars_ds/metrics.py | 17 ++ src/lib.rs | 20 +- src/{num_ext => num}/complex.rs | 0 src/{num_ext => num}/cond_entropy.rs | 0 src/{num_ext => num}/entrophies.rs | 2 +- src/{num_ext => num}/fft.rs | 0 src/{num_ext => num}/gcd_lcm.rs | 0 src/{num_ext => num}/haversine.rs | 0 src/{num_ext => num}/jaccard.rs | 0 src/{num_ext => num}/knn.rs | 0 src/{num_ext => num}/lempel_ziv.rs | 0 src/{num_ext => num}/mod.rs | 0 src/{num_ext => num}/ols.rs | 0 src/{num_ext => num}/powi.rs | 0 src/{num_ext => num}/tp_fp.rs | 0 src/{num_ext => num}/trapz.rs | 0 src/{stats_ext => stats}/chi2.rs | 2 +- src/{stats_ext => stats}/fstats.rs | 2 +- src/{stats_ext => stats}/ks.rs | 2 +- src/stats/mod.rs | 80 ++++-- src/{stats_ext => stats}/normal_test.rs | 2 +- src/{stats_ext => stats}/sample.rs | 0 src/{stats_ext => stats}/t_test.rs | 8 +- src/stats_ext/mod.rs | 64 ----- src/{stats => stats_utils}/beta.rs | 0 src/{stats => stats_utils}/gamma.rs | 0 src/stats_utils/mod.rs | 18 ++ src/{stats => stats_utils}/normal.rs | 0 src/{str_ext => str2}/aho_corasick.rs | 0 src/{str_ext => str2}/consts.rs | 0 src/{str_ext => str2}/fuzz.rs | 0 src/{str_ext => str2}/hamming.rs | 0 src/{str_ext => str2}/inflections.rs | 0 src/{str_ext => str2}/is_stopword.rs | 0 src/{str_ext => str2}/jaro.rs | 0 src/{str_ext => str2}/levenshtein.rs | 0 src/{str_ext => str2}/mod.rs | 0 src/{str_ext => str2}/osa.rs | 0 src/{str_ext => str2}/overlap.rs | 0 .../snowball/algorithms/english_stemmer.rs | 4 +- .../snowball/algorithms/mod.rs | 0 src/{str_ext => str2}/snowball/among.rs | 2 +- src/{str_ext => str2}/snowball/mod.rs | 4 +- .../snowball/snowball_env.rs | 2 +- src/{str_ext => str2}/snowball_stem.rs | 0 src/{str_ext => str2}/sorensen_dice.rs | 0 src/{str_ext => str2}/str_jaccard.rs | 0 49 files changed, 245 insertions(+), 219 deletions(-) rename src/{num_ext => num}/complex.rs (100%) rename src/{num_ext => num}/cond_entropy.rs (100%) rename src/{num_ext => num}/entrophies.rs (98%) rename src/{num_ext => num}/fft.rs (100%) rename src/{num_ext => num}/gcd_lcm.rs (100%) rename src/{num_ext => num}/haversine.rs (100%) rename src/{num_ext => num}/jaccard.rs (100%) rename src/{num_ext => num}/knn.rs (100%) rename src/{num_ext => num}/lempel_ziv.rs (100%) rename src/{num_ext => num}/mod.rs (100%) rename src/{num_ext => num}/ols.rs (100%) rename src/{num_ext => num}/powi.rs (100%) rename src/{num_ext => num}/tp_fp.rs (100%) rename src/{num_ext => num}/trapz.rs (100%) rename src/{stats_ext => stats}/chi2.rs (98%) rename src/{stats_ext => stats}/fstats.rs (99%) rename src/{stats_ext => stats}/ks.rs (98%) rename src/{stats_ext => stats}/normal_test.rs (98%) rename src/{stats_ext => stats}/sample.rs (100%) rename src/{stats_ext => stats}/t_test.rs (96%) delete mode 100644 src/stats_ext/mod.rs rename src/{stats => stats_utils}/beta.rs (100%) rename src/{stats => stats_utils}/gamma.rs (100%) create mode 100644 src/stats_utils/mod.rs rename src/{stats => stats_utils}/normal.rs (100%) rename src/{str_ext => str2}/aho_corasick.rs (100%) rename src/{str_ext => str2}/consts.rs (100%) rename src/{str_ext => str2}/fuzz.rs (100%) rename src/{str_ext => str2}/hamming.rs (100%) rename src/{str_ext => str2}/inflections.rs (100%) rename src/{str_ext => str2}/is_stopword.rs (100%) rename src/{str_ext => str2}/jaro.rs (100%) rename src/{str_ext => str2}/levenshtein.rs (100%) rename src/{str_ext => str2}/mod.rs (100%) rename src/{str_ext => str2}/osa.rs (100%) rename src/{str_ext => str2}/overlap.rs (100%) rename src/{str_ext => str2}/snowball/algorithms/english_stemmer.rs (99%) rename src/{str_ext => str2}/snowball/algorithms/mod.rs (100%) rename src/{str_ext => str2}/snowball/among.rs (78%) rename src/{str_ext => str2}/snowball/mod.rs (92%) rename src/{str_ext => str2}/snowball/snowball_env.rs (99%) rename src/{str_ext => str2}/snowball_stem.rs (100%) rename src/{str_ext => str2}/sorensen_dice.rs (100%) rename src/{str_ext => str2}/str_jaccard.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index a3c6d6ad..a208b267 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -70,29 +70,6 @@ dependencies = [ "serde", ] -[[package]] -name = "assert2" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eaf98d1183406dcb8f8b545e1f24829d75c1a9d35eec4b86309a22aa8b6d8e95" -dependencies = [ - "assert2-macros", - "is-terminal", - "yansi", -] - -[[package]] -name = "assert2-macros" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c55bdf3e6f792f8f1c750bb6886b7ca40fa5a354ddb7a4dee550b93985a9235" -dependencies = [ - "proc-macro2", - "quote", - "rustc_version 0.4.0", - "syn 1.0.109", -] - [[package]] name = "atoi" version = "2.0.0" @@ -317,9 +294,9 @@ checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" [[package]] name = "dbgf" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4491eba7b24b935558dd1948ac98f5f87428198847894580bd99c01bc30f6d24" +checksum = "23e8ca49a64a84b13276067a894854fd56c571971976e2f2d179b99fe894b2f7" [[package]] name = "dyn-clone" @@ -343,6 +320,18 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +[[package]] +name = "enum-as-inner" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.39", +] + [[package]] name = "enum_dispatch" version = "0.3.12" @@ -356,21 +345,31 @@ dependencies = [ ] [[package]] -name = "equivalent" -version = "1.0.1" +name = "equator" +version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" +checksum = "8a5d677974df89c37e11e3bb9dff9a51901c4abc9e4130200a0e88967ca4a73a" +dependencies = [ + "equator-macro", +] [[package]] -name = "errno" -version = "0.3.5" +name = "equator-macro" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3e13f66a2f95e32a39eaa81f6b95d42878ca0e1db0c7543723dfe12557e860" +checksum = "4a576659e2372c3ac65a3b25dc68fe455d8fae1c67450f251776ec0ffb7bdab8" dependencies = [ - "libc", - "windows-sys", + "proc-macro2", + "quote", + "syn 2.0.39", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "ethnum" version = "1.5.0" @@ -379,9 +378,9 @@ checksum = "b90ca2580b73ab6a1f724b76ca11ab632df820fd6040c336200d2c1df7b3c82c" [[package]] name = "faer" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31bba601a75b39da36fae2e92186f9db83cb7c63a8271cf1dc489c1de7380cbb" +checksum = "e70e56a6aaf3f56ab0b44ed10b9263dc6e1dcdb32a2a698e732327687fde7737" dependencies = [ "coe-rs", "dbgf", @@ -391,6 +390,7 @@ dependencies = [ "faer-evd", "faer-lu", "faer-qr", + "faer-sparse", "faer-svd", "matrixcompare", "ndarray", @@ -401,9 +401,9 @@ dependencies = [ [[package]] name = "faer-cholesky" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ac614eed0cbf2cc05e3245052bbb196601884d6423672210cc0adf0c47fa328" +checksum = "c9d1c7944403c66085e9b4187ca8a710c6cb1cdeb2511228fb745387b29215a9" dependencies = [ "bytemuck", "dyn-stack", @@ -418,14 +418,14 @@ dependencies = [ [[package]] name = "faer-core" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06958f8e82aefdf7564baa76f9094c36bcfdb2c1e69bd8097ff1f9e83c3c78d" +checksum = "7c11e871d9a8e09c645ce2356ec980838930e884379894a084aa946ef349f26e" dependencies = [ - "assert2", "bytemuck", "coe-rs", "dyn-stack", + "equator", "faer-entity", "gemm", "matrixcompare-core", @@ -440,9 +440,9 @@ dependencies = [ [[package]] name = "faer-entity" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5a909dcb9287b8f3e658c711ca93a4c5761262a6233dc6ac0230ba40bba9465" +checksum = "ab27f48f17fee3ea487eb5ee828892348bcc903ad7297c472953236e62229b84" dependencies = [ "bytemuck", "coe-rs", @@ -455,9 +455,9 @@ dependencies = [ [[package]] name = "faer-evd" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae1e892668c4b1ed2a8df9a2f696ee4055b1cb1366f8926177f4d64c8e56b214" +checksum = "eaa2267c1dfcf7e74e47114a4c37dc8a3ecc5e61dfdede1ce24a89e663e342c0" dependencies = [ "bytemuck", "coe-rs", @@ -474,9 +474,9 @@ dependencies = [ [[package]] name = "faer-lu" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25efab53c8d22da197966185f3d3e2d70d3d65215efec4c50268e9bcadb755b8" +checksum = "abf3c7ef8bc8d55e101c0a9b71839543dece2e9c3a796ac6c3aa9784826d6c00" dependencies = [ "bytemuck", "coe-rs", @@ -494,9 +494,9 @@ dependencies = [ [[package]] name = "faer-qr" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a04e83ae623f2d42d4f61dc555942e78bc08669330ce9010dbbfc3655a8206a0" +checksum = "816658a3b4abd8cf67a0431d7ee48e40108061dce5e466d4f0004f6f45cdd060" dependencies = [ "bytemuck", "coe-rs", @@ -510,11 +510,31 @@ dependencies = [ "reborrow", ] +[[package]] +name = "faer-sparse" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27799c41a4f14db0d37c03fb3dc784ebef0c916ab55ff7a5f61b7bbf9bb55dfc" +dependencies = [ + "bytemuck", + "coe-rs", + "dbgf", + "dyn-stack", + "faer-cholesky", + "faer-core", + "faer-entity", + "faer-lu", + "faer-qr", + "pulp", + "rayon", + "reborrow", +] + [[package]] name = "faer-svd" -version = "0.15.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0da6648b7bd79a8ca835d8d92674eff978610733ef6ba8238045c3c0e3c8151" +checksum = "402ae5bd9451228c4fef5db431321b04fc40692664d025d5c8e6ea72afb6ed92" dependencies = [ "bytemuck", "coe-rs", @@ -548,9 +568,9 @@ checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" [[package]] name = "gemm" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b3afa707040531a7527477fd63a81ea4f6f3d26037a2f96776e57fb843b258e" +checksum = "e97d506c68f4fb12325b52a638e7d54cc87e3593a4ded0de60218b6dfd65f645" dependencies = [ "dyn-stack", "gemm-c32", @@ -568,9 +588,9 @@ dependencies = [ [[package]] name = "gemm-c32" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cc3973a4c30c73f26a099113953d0c772bb17ee2e07976c0a06b8fe1f38a57d" +checksum = "0dd16f26e8f34661edc906d8c9522b59ec1655c865a98a58950d0246eeaca9da" dependencies = [ "dyn-stack", "gemm-common", @@ -583,9 +603,9 @@ dependencies = [ [[package]] name = "gemm-c64" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30362894b93dada374442cb2edf4512ddf19513c9bec88e06a445bcb6b22e64f" +checksum = "a8e34381bc060b47fbd25522a281799ef763cd27f43bbd1783d935774659242a" dependencies = [ "dyn-stack", "gemm-common", @@ -598,9 +618,9 @@ dependencies = [ [[package]] name = "gemm-common" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "988499faa80566b046b4fee2c5f15af55b5a20c1fe8486b112ebb34efa045ad6" +checksum = "22518a76339b09276f77c3166c44262e55f633712fe8a44fd0573505887feeab" dependencies = [ "bytemuck", "dyn-stack", @@ -613,13 +633,14 @@ dependencies = [ "raw-cpuid", "rayon", "seq-macro", + "sysctl", ] [[package]] name = "gemm-f16" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6cf2854a12371684c38d9a865063a27661812a3ff5803454c5742e8f5a388ce" +checksum = "70409bbf3ef83b38cbe4a58cd4b797c1c27902505bdd926a588ea61b6c550a84" dependencies = [ "dyn-stack", "gemm-common", @@ -635,9 +656,9 @@ dependencies = [ [[package]] name = "gemm-f32" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bc84003cf6d950a7c7ca714ad6db281b6cef5c7d462f5cd9ad90ea2409c7227" +checksum = "5ea3068edca27f100964157211782eba19e961aa4d0d2bdac3e1775a51aa7680" dependencies = [ "dyn-stack", "gemm-common", @@ -650,9 +671,9 @@ dependencies = [ [[package]] name = "gemm-f64" -version = "0.16.15" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35187ef101a71eed0ecd26fb4a6255b4192a12f1c5335f3a795698f2d9b6cf33" +checksum = "5fd41e8f5a60dce8d8acd852a3f4b22f8e18be957e1937731be692c037652510" dependencies = [ "dyn-stack", "gemm-common", @@ -711,12 +732,6 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" -[[package]] -name = "hermit-abi" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" - [[package]] name = "home" version = "0.5.5" @@ -780,17 +795,6 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a257582fdcde896fd96463bf2d40eefea0580021c0712a0e2b028b60b47a837a" -[[package]] -name = "is-terminal" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" -dependencies = [ - "hermit-abi", - "rustix", - "windows-sys", -] - [[package]] name = "iter-read" version = "0.3.1" @@ -882,12 +886,6 @@ dependencies = [ "libc", ] -[[package]] -name = "linux-raw-sys" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da2479e8c062e40bf0066ffa0bc823de0a9368974af99c9f6df941d2c231e03f" - [[package]] name = "lock_api" version = "0.4.11" @@ -1923,19 +1921,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "rustix" -version = "0.38.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b426b0506e5d50a7d8dafcf2e81471400deb602392c7dd110815afb4eaf02a3" -dependencies = [ - "bitflags 2.4.1", - "errno", - "libc", - "linux-raw-sys", - "windows-sys", -] - [[package]] name = "rustversion" version = "1.0.14" @@ -1948,6 +1933,15 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -2125,6 +2119,20 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sysctl" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec7dddc5f0fee506baf8b9fdb989e242f17e4b11c61dfbb0635b705217199eea" +dependencies = [ + "bitflags 2.4.1", + "byteorder", + "enum-as-inner", + "libc", + "thiserror", + "walkdir", +] + [[package]] name = "sysinfo" version = "0.29.10" @@ -2205,6 +2213,16 @@ version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -2281,6 +2299,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -2368,12 +2395,6 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9828b178da53440fa9c766a3d2f73f7cf5d0ac1fe3980c1e5018d899fd19e07b" -[[package]] -name = "yansi" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" - [[package]] name = "zerocopy" version = "0.7.25" diff --git a/Cargo.toml b/Cargo.toml index 758bf355..7cb360f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ pyo3 = {version = "*", features = ["extension-module"]} pyo3-polars = {version = "*", features = ["derive"]} polars = {version = "0.35.4", features = ["performant", "lazy", "dtype-array", "ndarray", "log", "nightly"]} num = "0.4.1" -faer = {version = "0.15", features = ["ndarray", "nightly"]} +faer = {version = "0.16", features = ["ndarray", "nightly"]} serde = {version = "*", features=["derive"]} ndarray = {version="0.15.6", features=["rayon"]} # see if we can get rid of this hashbrown = {version = "0.14.2", features=["nightly"]} diff --git a/python/polars_ds/metrics.py b/python/polars_ds/metrics.py index 874280cf..93183355 100644 --- a/python/polars_ds/metrics.py +++ b/python/polars_ds/metrics.py @@ -23,6 +23,23 @@ class MetricExt: def __init__(self, expr: pl.Expr): self._expr: pl.Expr = expr + def max_error(self, pred: pl.Expr) -> pl.Expr: + """ + Computes the max absolute error between actual and pred. + """ + x = self._expr - pred + return pl.max_horizontal(x.max(), -x.min()) + + def mean_gamma_deviance(self, pred: pl.Expr) -> pl.Expr: + """ + Computes the mean gamma deviance between actual and pred. + + Note that this will return NaNs when any value is < 0. This only makes sense when y_true + and y_pred as strictly positive. + """ + x = self._expr / pred + return 2.0 * (x.log() + x - 1).mean() + def hubor_loss(self, pred: pl.Expr, delta: float) -> pl.Expr: """ Computes huber loss between this and the other expression. This assumes diff --git a/src/lib.rs b/src/lib.rs index 30c986bb..1445b49b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,12 +1,8 @@ -mod num_ext; +mod num; +mod stats_utils; mod stats; -mod stats_ext; -mod str_ext; +mod str2; mod utils; -use polars::{ - error::{PolarsError, PolarsResult}, - series::Series, -}; use pyo3::{pymodule, types::PyModule, PyResult, Python}; #[cfg(target_os = "linux")] @@ -16,18 +12,10 @@ use jemallocator::Jemalloc; #[cfg(target_os = "linux")] static ALLOC: Jemalloc = Jemalloc; -// #[inline] -// pub fn no_null_in_inputs(inputs: &[Series], err_msg: String) -> PolarsResult<()> { -// for s in inputs { -// if s.null_count() > 0 { -// return Err(PolarsError::ComputeError(err_msg.into())); -// } -// } -// Ok(()) -// } #[pymodule] #[pyo3(name = "_polars_ds")] fn _polars_ds(_py: Python<'_>, _m: &PyModule) -> PyResult<()> { + Ok(()) } diff --git a/src/num_ext/complex.rs b/src/num/complex.rs similarity index 100% rename from src/num_ext/complex.rs rename to src/num/complex.rs diff --git a/src/num_ext/cond_entropy.rs b/src/num/cond_entropy.rs similarity index 100% rename from src/num_ext/cond_entropy.rs rename to src/num/cond_entropy.rs diff --git a/src/num_ext/entrophies.rs b/src/num/entrophies.rs similarity index 98% rename from src/num_ext/entrophies.rs rename to src/num/entrophies.rs index f67f6cee..6e152912 100644 --- a/src/num_ext/entrophies.rs +++ b/src/num/entrophies.rs @@ -1,4 +1,4 @@ -use crate::num_ext::knn::{build_standard_kdtree, query_nb_cnt, KdtreeKwargs}; +use crate::num::knn::{build_standard_kdtree, query_nb_cnt, KdtreeKwargs}; use ndarray::s; use polars::prelude::*; use pyo3_polars::derive::polars_expr; diff --git a/src/num_ext/fft.rs b/src/num/fft.rs similarity index 100% rename from src/num_ext/fft.rs rename to src/num/fft.rs diff --git a/src/num_ext/gcd_lcm.rs b/src/num/gcd_lcm.rs similarity index 100% rename from src/num_ext/gcd_lcm.rs rename to src/num/gcd_lcm.rs diff --git a/src/num_ext/haversine.rs b/src/num/haversine.rs similarity index 100% rename from src/num_ext/haversine.rs rename to src/num/haversine.rs diff --git a/src/num_ext/jaccard.rs b/src/num/jaccard.rs similarity index 100% rename from src/num_ext/jaccard.rs rename to src/num/jaccard.rs diff --git a/src/num_ext/knn.rs b/src/num/knn.rs similarity index 100% rename from src/num_ext/knn.rs rename to src/num/knn.rs diff --git a/src/num_ext/lempel_ziv.rs b/src/num/lempel_ziv.rs similarity index 100% rename from src/num_ext/lempel_ziv.rs rename to src/num/lempel_ziv.rs diff --git a/src/num_ext/mod.rs b/src/num/mod.rs similarity index 100% rename from src/num_ext/mod.rs rename to src/num/mod.rs diff --git a/src/num_ext/ols.rs b/src/num/ols.rs similarity index 100% rename from src/num_ext/ols.rs rename to src/num/ols.rs diff --git a/src/num_ext/powi.rs b/src/num/powi.rs similarity index 100% rename from src/num_ext/powi.rs rename to src/num/powi.rs diff --git a/src/num_ext/tp_fp.rs b/src/num/tp_fp.rs similarity index 100% rename from src/num_ext/tp_fp.rs rename to src/num/tp_fp.rs diff --git a/src/num_ext/trapz.rs b/src/num/trapz.rs similarity index 100% rename from src/num_ext/trapz.rs rename to src/num/trapz.rs diff --git a/src/stats_ext/chi2.rs b/src/stats/chi2.rs similarity index 98% rename from src/stats_ext/chi2.rs rename to src/stats/chi2.rs index a6df113d..8a954819 100644 --- a/src/stats_ext/chi2.rs +++ b/src/stats/chi2.rs @@ -1,5 +1,5 @@ use super::simple_stats_output; -use crate::stats::gamma; +use crate::stats_utils::gamma; use polars::prelude::*; use pyo3_polars::derive::polars_expr; diff --git a/src/stats_ext/fstats.rs b/src/stats/fstats.rs similarity index 99% rename from src/stats_ext/fstats.rs rename to src/stats/fstats.rs index 24eafee0..8096aedc 100644 --- a/src/stats_ext/fstats.rs +++ b/src/stats/fstats.rs @@ -1,6 +1,6 @@ /// Multiple F-statistics at once and F test use super::{list_float_output, simple_stats_output, StatsResult}; -use crate::stats::beta::fisher_snedecor_sf; +use crate::stats_utils::beta::fisher_snedecor_sf; use itertools::Itertools; use polars::prelude::*; use pyo3_polars::derive::polars_expr; diff --git a/src/stats_ext/ks.rs b/src/stats/ks.rs similarity index 98% rename from src/stats_ext/ks.rs rename to src/stats/ks.rs index 389b46ec..2c7306f4 100644 --- a/src/stats_ext/ks.rs +++ b/src/stats/ks.rs @@ -1,5 +1,5 @@ /// KS statistics. -use crate::stats_ext::StatsResult; +use crate::stats::StatsResult; use crate::utils::binary_search_right; use itertools::Itertools; use polars::prelude::*; diff --git a/src/stats/mod.rs b/src/stats/mod.rs index 6805d2d5..d0b50ff1 100644 --- a/src/stats/mod.rs +++ b/src/stats/mod.rs @@ -1,18 +1,64 @@ -/// This submodule is mostly taken from the project statrs. See credit section in README.md -/// The reason I do not want to add it as a dependency is that it has a nalgebra dependency for -/// multi-variate distributions, which is something that I think will not be needed in this -/// package. Another reason is that if I want to do linear algebra, I would use Faer since Faer -/// performs better and nalgebra is too much of a dependency for this package right now. -pub mod beta; -pub mod gamma; -pub mod normal; - -pub const PREC_ACC: f64 = 0.0000000000000011102230246251565; -pub const LN_PI: f64 = 1.1447298858494001741434273513530587116472948129153; -//pub const LN_SQRT_2PI: f64 = 0.91893853320467274178032973640561763986139747363778; -pub const LN_2_SQRT_E_OVER_PI: f64 = 0.6207822376352452223455184457816472122518527279025978; - -#[inline] -pub fn is_zero(x: f64) -> bool { - x.abs() < PREC_ACC +mod chi2; +mod fstats; +mod ks; +mod normal_test; +mod sample; +mod t_test; + +use polars::prelude::*; + +pub fn list_float_output(_: &[Field]) -> PolarsResult { + Ok(Field::new( + "list_float", + DataType::List(Box::new(DataType::Float64)), + )) +} + +pub fn simple_stats_output(_: &[Field]) -> PolarsResult { + let s = Field::new("statistic", DataType::Float64); + let p = Field::new("pvalue", DataType::Float64); + let v: Vec = vec![s, p]; + Ok(Field::new("", DataType::Struct(v))) +} + +struct StatsResult { + pub statistic: f64, + pub p: Option, +} + +impl StatsResult { + pub fn new(s: f64, p: f64) -> StatsResult { + StatsResult { + statistic: s, + p: Some(p), + } + } + + pub fn from_stats(s: f64) -> StatsResult { + StatsResult { + statistic: s, + p: None, + } + } + + pub fn unwrap_p_or(&self, default: f64) -> f64 { + self.p.unwrap_or(default) + } +} + +pub enum Alternative { + TwoSided, + Less, + Greater, +} + +impl From<&str> for Alternative { + fn from(s: &str) -> Alternative { + match s.to_lowercase().as_str() { + "two-sided" | "two" => Alternative::TwoSided, + "less" => Alternative::Less, + "greater" => Alternative::Greater, + _ => Alternative::TwoSided, + } + } } diff --git a/src/stats_ext/normal_test.rs b/src/stats/normal_test.rs similarity index 98% rename from src/stats_ext/normal_test.rs rename to src/stats/normal_test.rs index afbc2876..37057c26 100644 --- a/src/stats_ext/normal_test.rs +++ b/src/stats/normal_test.rs @@ -10,7 +10,7 @@ /// /// I chose this over the Shapiro Francia test because the distribution is unknown and would require Monte Carlo use super::{simple_stats_output, StatsResult}; -use crate::stats::{gamma, is_zero}; +use crate::stats_utils::{gamma, is_zero}; use polars::prelude::*; use pyo3_polars::derive::polars_expr; diff --git a/src/stats_ext/sample.rs b/src/stats/sample.rs similarity index 100% rename from src/stats_ext/sample.rs rename to src/stats/sample.rs diff --git a/src/stats_ext/t_test.rs b/src/stats/t_test.rs similarity index 96% rename from src/stats_ext/t_test.rs rename to src/stats/t_test.rs index bce55898..99d5e596 100644 --- a/src/stats_ext/t_test.rs +++ b/src/stats/t_test.rs @@ -1,6 +1,6 @@ /// Student's t test and Welch's t test. use super::{simple_stats_output, Alternative, StatsResult}; -use crate::stats::{beta, is_zero}; +use crate::{stats_utils::{beta, is_zero}, stats}; use polars::prelude::*; use pyo3_polars::derive::polars_expr; @@ -110,7 +110,7 @@ fn pl_ttest_2samp(inputs: &[Series]) -> PolarsResult { let alt = inputs[5].utf8()?; let alt = alt.get(0).unwrap(); - let alt = super::Alternative::from(alt); + let alt = stats::Alternative::from(alt); let valid = mean1.is_finite() && mean2.is_finite() && var1.is_finite() && var2.is_finite(); if !valid { @@ -147,7 +147,7 @@ fn pl_welch_t(inputs: &[Series]) -> PolarsResult { let alt = inputs[6].utf8()?; let alt = alt.get(0).unwrap(); - let alt = super::Alternative::from(alt); + let alt = stats::Alternative::from(alt); // No need to check for validity because input is sanitized. @@ -175,7 +175,7 @@ fn pl_ttest_1samp(inputs: &[Series]) -> PolarsResult { let alt = inputs[4].utf8()?; let alt = alt.get(0).unwrap(); - let alt = super::Alternative::from(alt); + let alt = stats::Alternative::from(alt); // No need to check for validity because input is sanitized. diff --git a/src/stats_ext/mod.rs b/src/stats_ext/mod.rs deleted file mode 100644 index d0b50ff1..00000000 --- a/src/stats_ext/mod.rs +++ /dev/null @@ -1,64 +0,0 @@ -mod chi2; -mod fstats; -mod ks; -mod normal_test; -mod sample; -mod t_test; - -use polars::prelude::*; - -pub fn list_float_output(_: &[Field]) -> PolarsResult { - Ok(Field::new( - "list_float", - DataType::List(Box::new(DataType::Float64)), - )) -} - -pub fn simple_stats_output(_: &[Field]) -> PolarsResult { - let s = Field::new("statistic", DataType::Float64); - let p = Field::new("pvalue", DataType::Float64); - let v: Vec = vec![s, p]; - Ok(Field::new("", DataType::Struct(v))) -} - -struct StatsResult { - pub statistic: f64, - pub p: Option, -} - -impl StatsResult { - pub fn new(s: f64, p: f64) -> StatsResult { - StatsResult { - statistic: s, - p: Some(p), - } - } - - pub fn from_stats(s: f64) -> StatsResult { - StatsResult { - statistic: s, - p: None, - } - } - - pub fn unwrap_p_or(&self, default: f64) -> f64 { - self.p.unwrap_or(default) - } -} - -pub enum Alternative { - TwoSided, - Less, - Greater, -} - -impl From<&str> for Alternative { - fn from(s: &str) -> Alternative { - match s.to_lowercase().as_str() { - "two-sided" | "two" => Alternative::TwoSided, - "less" => Alternative::Less, - "greater" => Alternative::Greater, - _ => Alternative::TwoSided, - } - } -} diff --git a/src/stats/beta.rs b/src/stats_utils/beta.rs similarity index 100% rename from src/stats/beta.rs rename to src/stats_utils/beta.rs diff --git a/src/stats/gamma.rs b/src/stats_utils/gamma.rs similarity index 100% rename from src/stats/gamma.rs rename to src/stats_utils/gamma.rs diff --git a/src/stats_utils/mod.rs b/src/stats_utils/mod.rs new file mode 100644 index 00000000..6805d2d5 --- /dev/null +++ b/src/stats_utils/mod.rs @@ -0,0 +1,18 @@ +/// This submodule is mostly taken from the project statrs. See credit section in README.md +/// The reason I do not want to add it as a dependency is that it has a nalgebra dependency for +/// multi-variate distributions, which is something that I think will not be needed in this +/// package. Another reason is that if I want to do linear algebra, I would use Faer since Faer +/// performs better and nalgebra is too much of a dependency for this package right now. +pub mod beta; +pub mod gamma; +pub mod normal; + +pub const PREC_ACC: f64 = 0.0000000000000011102230246251565; +pub const LN_PI: f64 = 1.1447298858494001741434273513530587116472948129153; +//pub const LN_SQRT_2PI: f64 = 0.91893853320467274178032973640561763986139747363778; +pub const LN_2_SQRT_E_OVER_PI: f64 = 0.6207822376352452223455184457816472122518527279025978; + +#[inline] +pub fn is_zero(x: f64) -> bool { + x.abs() < PREC_ACC +} diff --git a/src/stats/normal.rs b/src/stats_utils/normal.rs similarity index 100% rename from src/stats/normal.rs rename to src/stats_utils/normal.rs diff --git a/src/str_ext/aho_corasick.rs b/src/str2/aho_corasick.rs similarity index 100% rename from src/str_ext/aho_corasick.rs rename to src/str2/aho_corasick.rs diff --git a/src/str_ext/consts.rs b/src/str2/consts.rs similarity index 100% rename from src/str_ext/consts.rs rename to src/str2/consts.rs diff --git a/src/str_ext/fuzz.rs b/src/str2/fuzz.rs similarity index 100% rename from src/str_ext/fuzz.rs rename to src/str2/fuzz.rs diff --git a/src/str_ext/hamming.rs b/src/str2/hamming.rs similarity index 100% rename from src/str_ext/hamming.rs rename to src/str2/hamming.rs diff --git a/src/str_ext/inflections.rs b/src/str2/inflections.rs similarity index 100% rename from src/str_ext/inflections.rs rename to src/str2/inflections.rs diff --git a/src/str_ext/is_stopword.rs b/src/str2/is_stopword.rs similarity index 100% rename from src/str_ext/is_stopword.rs rename to src/str2/is_stopword.rs diff --git a/src/str_ext/jaro.rs b/src/str2/jaro.rs similarity index 100% rename from src/str_ext/jaro.rs rename to src/str2/jaro.rs diff --git a/src/str_ext/levenshtein.rs b/src/str2/levenshtein.rs similarity index 100% rename from src/str_ext/levenshtein.rs rename to src/str2/levenshtein.rs diff --git a/src/str_ext/mod.rs b/src/str2/mod.rs similarity index 100% rename from src/str_ext/mod.rs rename to src/str2/mod.rs diff --git a/src/str_ext/osa.rs b/src/str2/osa.rs similarity index 100% rename from src/str_ext/osa.rs rename to src/str2/osa.rs diff --git a/src/str_ext/overlap.rs b/src/str2/overlap.rs similarity index 100% rename from src/str_ext/overlap.rs rename to src/str2/overlap.rs diff --git a/src/str_ext/snowball/algorithms/english_stemmer.rs b/src/str2/snowball/algorithms/english_stemmer.rs similarity index 99% rename from src/str_ext/snowball/algorithms/english_stemmer.rs rename to src/str2/snowball/algorithms/english_stemmer.rs index 945791fa..acb48732 100644 --- a/src/str_ext/snowball/algorithms/english_stemmer.rs +++ b/src/str2/snowball/algorithms/english_stemmer.rs @@ -5,8 +5,8 @@ #![allow(unused_mut)] #![allow(unused_parens)] #![allow(unused_variables)] -use crate::str_ext::snowball::Among; -use crate::str_ext::snowball::SnowballEnv; +use crate::str2::snowball::Among; +use crate::str2::snowball::SnowballEnv; static A_0: &'static [Among; 3] = &[ Among("arsen", -1, -1, None), diff --git a/src/str_ext/snowball/algorithms/mod.rs b/src/str2/snowball/algorithms/mod.rs similarity index 100% rename from src/str_ext/snowball/algorithms/mod.rs rename to src/str2/snowball/algorithms/mod.rs diff --git a/src/str_ext/snowball/among.rs b/src/str2/snowball/among.rs similarity index 78% rename from src/str_ext/snowball/among.rs rename to src/str2/snowball/among.rs index 18eebf10..6a083538 100644 --- a/src/str_ext/snowball/among.rs +++ b/src/str2/snowball/among.rs @@ -1,4 +1,4 @@ -use crate::str_ext::snowball::SnowballEnv; +use crate::str2::snowball::SnowballEnv; pub struct Among( pub &'static str, diff --git a/src/str_ext/snowball/mod.rs b/src/str2/snowball/mod.rs similarity index 92% rename from src/str_ext/snowball/mod.rs rename to src/str2/snowball/mod.rs index 15e38302..8174c4df 100644 --- a/src/str_ext/snowball/mod.rs +++ b/src/str2/snowball/mod.rs @@ -21,5 +21,5 @@ mod among; mod snowball_env; // TODO: why do we need this `crate::`? -pub use crate::str_ext::snowball::among::Among; -pub use crate::str_ext::snowball::snowball_env::SnowballEnv; +pub use crate::str2::snowball::among::Among; +pub use crate::str2::snowball::snowball_env::SnowballEnv; diff --git a/src/str_ext/snowball/snowball_env.rs b/src/str2/snowball/snowball_env.rs similarity index 99% rename from src/str_ext/snowball/snowball_env.rs rename to src/str2/snowball/snowball_env.rs index a080d432..8c5a47d1 100644 --- a/src/str_ext/snowball/snowball_env.rs +++ b/src/str2/snowball/snowball_env.rs @@ -1,4 +1,4 @@ -use crate::str_ext::snowball::Among; +use crate::str2::snowball::Among; use std::borrow::Cow; #[derive(Debug, Clone)] diff --git a/src/str_ext/snowball_stem.rs b/src/str2/snowball_stem.rs similarity index 100% rename from src/str_ext/snowball_stem.rs rename to src/str2/snowball_stem.rs diff --git a/src/str_ext/sorensen_dice.rs b/src/str2/sorensen_dice.rs similarity index 100% rename from src/str_ext/sorensen_dice.rs rename to src/str2/sorensen_dice.rs diff --git a/src/str_ext/str_jaccard.rs b/src/str2/str_jaccard.rs similarity index 100% rename from src/str_ext/str_jaccard.rs rename to src/str2/str_jaccard.rs