diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 0032b46..69075af 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -57,9 +57,9 @@ jobs: - run: | julia --project=docs -e ' using Documenter: DocMeta, doctest - using TMLECLI - DocMeta.setdocmeta!(TMLECLI, :DocTestSetup, :(using TMLECLI); recursive=true) - doctest(TMLECLI)' + using TmleCLI + DocMeta.setdocmeta!(TmleCLI, :DocTestSetup, :(using TmleCLI); recursive=true) + doctest(TmleCLI)' - run: julia --project=docs docs/make.jl env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/Project.toml b/Project.toml index 4008075..ee14227 100644 --- a/Project.toml +++ b/Project.toml @@ -1,4 +1,4 @@ -name = "TMLECLI" +name = "TmleCLI" uuid = "2573d147-4098-46ba-9db2-8608d210ccac" authors = ["Olivier Labayle"] version = "0.9.0" @@ -13,7 +13,6 @@ Configurations = "5218b696-f38b-4ac9-8b61-a12ec717816d" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" EvoTrees = "f6006082-12f8-11e9-0c9c-0d5d367ab1e5" GLMNet = "8d5ece8b-de18-5317-b113-243142960cc6" -HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" MKL = "33e6dc65-8f57-5167-99aa-e5a354878fb2" @@ -24,8 +23,6 @@ MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea" MLJModels = "d491faf4-2d78-11e9-2867-c94bc002c0b7" MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91" Mmap = "a63ad114-7e13-5084-954f-fe012c677804" -MultipleTesting = "f8716d33-7c4a-5097-896f-ce0ecbd3ef6b" -Optim = "429524aa-4258-5aef-a3af-852621145aeb" PackageCompiler = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" @@ -52,9 +49,8 @@ MLJLinearModels = "0.10.0" MLJModelInterface = "1.8.0" MLJModels = "0.16, 0.17" MLJXGBoostInterface = "0.3.4" -MultipleTesting = "0.6.0" -Optim = "1.7" PackageCompiler = "2.1.16" +TMLE = "0.17" Tables = "1.10.1" YAML = "0.4.9" julia = "1.10, 1" diff --git a/README.md b/README.md index f96ff56..db2ac1c 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ -# TMLECLI +# TmleCLI -[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://targene.github.io/TMLE-CLI.jl/stable/) -![GitHub Workflow Status (with branch)](https://img.shields.io/github/actions/workflow/status/TARGENE/TMLE-CLI.jl/CI.yml?branch=main) -![Codecov](https://img.shields.io/codecov/c/github/TARGENE/TMLE-CLI.jl/main) -![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/TARGENE/TMLE-CLI.jl) +[![Stable](https://img.shields.io/badge/docs-stable-blue.svg)](https://targene.github.io/TmleCLI.jl/stable/) +![GitHub Workflow Status (with branch)](https://img.shields.io/github/actions/workflow/status/TARGENE/TmleCLI.jl/CI.yml?branch=main) +![Codecov](https://img.shields.io/codecov/c/github/TARGENE/TmleCLI.jl/main) +![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/TARGENE/TmleCLI.jl) Command Line Interface for Targeted Minimum-Loss Estimation of causal effects on Tabular datasets. \ No newline at end of file diff --git a/deps/build_sysimage.jl b/deps/build_sysimage.jl index 58fd67d..4c54f88 100644 --- a/deps/build_sysimage.jl +++ b/deps/build_sysimage.jl @@ -1,6 +1,6 @@ using PackageCompiler PackageCompiler.create_sysimage( - ["TMLECLI"], + ["TmleCLI"], cpu_target="generic", sysimage_path="TMLESysimage.so", precompile_execution_file="deps/execute.jl", diff --git a/deps/execute.jl b/deps/execute.jl index f3481da..99353e4 100644 --- a/deps/execute.jl +++ b/deps/execute.jl @@ -1,7 +1,7 @@ -using TMLECLI +using TmleCLI @info "Running precompilation script." # Run workload -TEST_DIR = joinpath(pkgdir(TMLECLI), "test") +TEST_DIR = joinpath(pkgdir(TmleCLI), "test") push!(LOAD_PATH, TEST_DIR) include(joinpath(TEST_DIR, "runtests.jl")) \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile index c298181..427140b 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -14,9 +14,9 @@ RUN bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" # Import the project -COPY . /TMLECLI.jl +COPY . /TmleCLI.jl -WORKDIR /TMLECLI.jl +WORKDIR /TmleCLI.jl # Precompile the project RUN julia --project -e'using Pkg; Pkg.instantiate(); Pkg.resolve(); Pkg.precompile()' diff --git a/docs/make.jl b/docs/make.jl index d9d5c4b..6e938d8 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,18 +1,18 @@ using Documenter -using TMLECLI +using TmleCLI -DocMeta.setdocmeta!(TMLECLI, :DocTestSetup, :(using TMLECLI); recursive=true) +DocMeta.setdocmeta!(TmleCLI, :DocTestSetup, :(using TmleCLI); recursive=true) makedocs( authors="Olivier Labayle", - repo="https://github.com/TARGENE/TMLE-CLI.jl/blob/{commit}{path}#{line}", - sitename = "TMLE-CLI.jl", + repo="https://github.com/TARGENE/TmleCLI.jl/blob/{commit}{path}#{line}", + sitename = "TmleCLI.jl", format = Documenter.HTML(; prettyurls=get(ENV, "CI", "false") == "true", - canonical="https://TARGENE.github.io/TMLE-CLI.jl", + canonical="https://TARGENE.github.io/TmleCLI.jl", assets=String["assets/logo.ico"], ), - modules = [TMLECLI], + modules = [TmleCLI], pages=[ "Home" => "index.md", "Command Line Interface" => ["cli.md", "tmle_estimation.md", "sieve_variance.md", "make_summary.md"], @@ -25,7 +25,7 @@ makedocs( @info "Deploying docs..." deploydocs(; - repo="github.com/TARGENE/TMLE-CLI.jl", + repo="github.com/TARGENE/TmleCLI.jl", devbranch="main", push_preview=true ) \ No newline at end of file diff --git a/docs/src/index.md b/docs/src/index.md index eee7166..d0c3fe7 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,4 +1,4 @@ -# TMLE-CLI.jl +# TmleCLI.jl The goal of this package, is to provide a standalone executable to run large scale Targeted Minimum Loss-based Estimation ([TMLE](https://link.springer.com/book/10.1007/978-1-4419-9782-1)) on tabular datasets. To learn more about TMLE, please visit [TMLE.jl](https://targene.github.io/TMLE.jl/stable/), the companion package. diff --git a/docs/src/models.md b/docs/src/models.md index 9601509..699972b 100644 --- a/docs/src/models.md +++ b/docs/src/models.md @@ -1,7 +1,7 @@ # Models ```@meta -CurrentModule = TMLECLI +CurrentModule = TmleCLI ``` Because [TMLE.jl](https://targene.github.io/TMLE.jl/stable/) is based on top of [MLJ](https://alan-turing-institute.github.io/MLJ.jl/dev/), we can support any model respecting the MLJ interface. At the moment, we readily support all models from the following packages: @@ -12,13 +12,13 @@ Because [TMLE.jl](https://targene.github.io/TMLE.jl/stable/) is based on top of - [GLMNet](https://github.com/JuliaStats/GLMNet.jl): A Julia wrapper of the [glmnet](https://glmnet.stanford.edu/articles/glmnet.html) package. See the [GLMNet](@ref) section. - [MLJModels](https://github.com/JuliaAI/MLJModels.jl): General utilities such as the `OneHotEncoder` or `InteractionTransformer`. -Further support for more packages can be added on request, please fill an [issue](https://github.com/TARGENE/TMLE-CLI.jl/issues). +Further support for more packages can be added on request, please fill an [issue](https://github.com/TARGENE/TmleCLI.jl/issues). Also, because the estimator file used by the TMLE CLI is a pure Julia file, it is possible to use it in order to install additional package that can be used to define additional models. -Finally, we also provide some additional models described in [Additional models provided by TMLE-CLI.jl](@ref). +Finally, we also provide some additional models described in [Additional models provided by TmleCLI.jl](@ref). -## Additional models provided by TMLE-CLI.jl +## Additional models provided by TmleCLI.jl ### GLMNet diff --git a/docs/src/resampling.md b/docs/src/resampling.md index 9d00fec..6527b39 100644 --- a/docs/src/resampling.md +++ b/docs/src/resampling.md @@ -1,7 +1,7 @@ # Resampling Strategies ```@meta -CurrentModule = TMLECLI +CurrentModule = TmleCLI ``` We also provide additional resampling strategies compliant with the `MLJ.ResamplingStrategy` interface. diff --git a/docs/src/tmle_estimation.md b/docs/src/tmle_estimation.md index c2f8adc..15342c3 100644 --- a/docs/src/tmle_estimation.md +++ b/docs/src/tmle_estimation.md @@ -124,7 +124,7 @@ Linear models typically do not involve any interaction terms. Here, to add extra ### Estimators Via A Julia File -Building an estimator via a configuration string is quite flexible and should cover most use cases. However, in some cases you may want to have full control over the estimation procedure. This is possible by instead providing a Julia configuration file describing the estimators to be used. The file should define an `ESTIMATORS` NamedTuple describing the estimators to be used, and some examples can be found [here](https://github.com/TARGENE/TMLE-CLI.jl/tree/treatment_values/estimators-configs). +Building an estimator via a configuration string is quite flexible and should cover most use cases. However, in some cases you may want to have full control over the estimation procedure. This is possible by instead providing a Julia configuration file describing the estimators to be used. The file should define an `ESTIMATORS` NamedTuple describing the estimators to be used, and some examples can be found [here](https://github.com/TARGENE/TmleCLI.jl/tree/treatment_values/estimators-configs). For further information, we recommend you have a look at both: @@ -155,7 +155,7 @@ In what follows, `Y` is an outcome of interest, `W` a set of confounding variabl For all the following experiments: -- The Julia script can be found at [experiments/runtime.jl](https://github.com/TARGENE/TMLE-CLI.jl/tree/main/experiments/runtime.jl). +- The Julia script can be found at [experiments/runtime.jl](https://github.com/TARGENE/TmleCLI.jl/tree/main/experiments/runtime.jl). - The various estimators used below are further described in the[estimators-configs](https://github.com/TARGENE/TMLE.jl/tree/main/estimators-configs) folder. ### Multiple treatment contrasts diff --git a/experiments/runtime.jl b/experiments/runtime.jl index 953539a..95d8e7e 100644 --- a/experiments/runtime.jl +++ b/experiments/runtime.jl @@ -1,5 +1,5 @@ using ArgParse -using TMLECLI +using TmleCLI const ESTIMATORS = [ "glm", @@ -60,7 +60,7 @@ function main(parsed_args) "chunksize" => 100, "verbosity" => parsed_args["verbosity"], ) - nparams = length(TMLECLI.parameters_from_yaml(paramfile)) + nparams = length(TmleCLI.parameters_from_yaml(paramfile)) # Time it: this will include precompilation time t_start = time() diff --git a/src/TMLECLI.jl b/src/TMLECLI.jl index f38f5f2..6b78cc8 100644 --- a/src/TMLECLI.jl +++ b/src/TMLECLI.jl @@ -1,4 +1,4 @@ -module TMLECLI +module TmleCLI if occursin("Intel", Sys.cpu_info()[1].model) using MKL @@ -20,7 +20,6 @@ using GLMNet using MLJModels using Mmap using Serialization -using MultipleTesting using Combinatorics using Tables using Random diff --git a/src/models/biallelic_snp_encoder.jl b/src/models/biallelic_snp_encoder.jl index 8851d31..d1ae8dc 100644 --- a/src/models/biallelic_snp_encoder.jl +++ b/src/models/biallelic_snp_encoder.jl @@ -59,7 +59,7 @@ function MLJModelInterface.transform(model::BiAllelicSNPEncoder, fitresult, X) ref_allele = fitresult[colname] newcoltype = Missing <: eltype(column) ? Union{Missing, Int} : Int newcolumn = Vector{newcoltype}(undef, size(column, 1)) - TMLECLI.count_nref!(newcolumn, column, ref_allele) + TmleCLI.count_nref!(newcolumn, column, ref_allele) else newcolumn = column end diff --git a/src/runner.jl b/src/runner.jl index 948587b..aa81777 100644 --- a/src/runner.jl +++ b/src/runner.jl @@ -125,7 +125,7 @@ function (runner::Runner)(partition) continue end # Make sure data types are appropriate for the estimand - TMLECLI.coerce_types!(runner.dataset, Ψ) + TmleCLI.coerce_types!(runner.dataset, Ψ) # Maybe update cache with new η_spec estimators_results = [] for estimator in runner.estimators diff --git a/test/cache_managers.jl b/test/cache_managers.jl index 96e1b4d..583fe64 100644 --- a/test/cache_managers.jl +++ b/test/cache_managers.jl @@ -1,21 +1,21 @@ module TestRunner -using TMLECLI +using TmleCLI using Test using TMLE @testset "Test NoCacheManager" begin - cache_manager = TMLECLI.NoCacheManager() + cache_manager = TmleCLI.NoCacheManager() cache_manager.cache["Toto"] = 1 cache_manager.cache["Tata"] = 2 - TMLECLI.release!(cache_manager, nothing) + TmleCLI.release!(cache_manager, nothing) @test cache_manager.cache == Dict() # Check this does not throw - TMLECLI.release!(cache_manager, nothing) + TmleCLI.release!(cache_manager, nothing) end @testset "Test MaxSizeCacheManager" begin - cache_manager = TMLECLI.MaxSizeCacheManager(3) + cache_manager = TmleCLI.MaxSizeCacheManager(3) Y_T₁T₂ = TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W)) cache_manager.cache[Y_T₁T₂] = 1 T₁_W = TMLE.ConditionalDistribution(:T₁, (:W,)) @@ -29,7 +29,7 @@ end cache_manager.cache[η] = 1 cache_manager.cache[:last_fluctuation] = 1 length(cache_manager.cache) == 5 - TMLECLI.release!(cache_manager, nothing) + TmleCLI.release!(cache_manager, nothing) # CMRelevantFactors and fluctuation dropped @test cache_manager.cache == Dict( TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W)) => 1, @@ -62,7 +62,7 @@ end ) ] η_counts = TMLE.nuisance_function_counts(estimands) - cache_manager = TMLECLI.ReleaseUnusableCacheManager(η_counts) + cache_manager = TmleCLI.ReleaseUnusableCacheManager(η_counts) # Estimation of the first estimand will fill the cache with the following Y_T₁T₂ = TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W)) cache_manager.cache[Y_T₁T₂] = 1 @@ -78,7 +78,7 @@ end cache_manager.cache[:last_fluctuation] = 1 @test length(cache_manager.cache) == 5 # After estimation of the first estimand, the fluctuation and composite factor are released - TMLECLI.release!(cache_manager, estimands[1]) + TmleCLI.release!(cache_manager, estimands[1]) @test cache_manager.cache == Dict( TMLE.ConditionalDistribution(:Y, (:T₁, :T₂, :W)) => 1, TMLE.ConditionalDistribution(:T₂, (:W,)) => 1, @@ -89,7 +89,7 @@ end cache_manager.cache[η] = 1 cache_manager.cache[:last_fluctuation] = 1 # Y_T₁T₂ and T₂_W are no longer needed - TMLECLI.release!(cache_manager, estimands[2]) + TmleCLI.release!(cache_manager, estimands[2]) @test cache_manager.cache == Dict(TMLE.ConditionalDistribution(:T₁, (:W,)) => 1) # Estimation of the third estimand will fill the cache with the following @@ -102,10 +102,10 @@ end cache_manager.cache[η] = 1 cache_manager.cache[:last_fluctuation] = 1 # Y_T₁ and T₁_W are no longer needed - TMLECLI.release!(cache_manager, estimands[3]) + TmleCLI.release!(cache_manager, estimands[3]) @test cache_manager.cache == Dict() # Check this does not throw - TMLECLI.release!(cache_manager, estimands[1]) + TmleCLI.release!(cache_manager, estimands[1]) end end diff --git a/test/models/adaptive_interaction_transformer.jl b/test/models/adaptive_interaction_transformer.jl index 7846b5a..100bdbd 100644 --- a/test/models/adaptive_interaction_transformer.jl +++ b/test/models/adaptive_interaction_transformer.jl @@ -1,7 +1,7 @@ module TestRestrictedInteractionTransformer using Test -using TMLECLI +using TmleCLI using Random using MLJBase using Tables @@ -44,7 +44,7 @@ using Tables ] @test expected_interactions == interactions Xt = MLJBase.transform(mach, X) - feature_names = TMLECLI.feature_names(interactions) + feature_names = TmleCLI.feature_names(interactions) @test Tables.columnnames(Xt) == tuple(Tables.columnnames(X)..., feature_names...) @test Xt[Symbol("rs1234_&_sex_&_PC_2")] == X[:rs1234].*X[:sex].*X[:PC_2] @@ -72,7 +72,7 @@ using Tables ] @test expected_interactions == interactions Xt = MLJBase.transform(mach, X) - feature_names = TMLECLI.feature_names(interactions) + feature_names = TmleCLI.feature_names(interactions) @test Tables.columnnames(Xt) == tuple(Tables.columnnames(X)..., feature_names...) # Test with both primary_variables and primary_variables_patterns @@ -110,7 +110,7 @@ using Tables ] @test expected_interactions == interactions Xt = MLJBase.transform(mach, X) - feature_names = TMLECLI.feature_names(interactions) + feature_names = TmleCLI.feature_names(interactions) @test Tables.columnnames(Xt) == tuple(Tables.columnnames(X)..., feature_names...) # Invalid column @@ -120,7 +120,7 @@ using Tables primary_variables=[:A, :B], ) mach = machine(model, X, scitype_check_level=0) - @test_throws TMLECLI.InvalidColumnError("A") fit!(mach, verbosity=0) + @test_throws TmleCLI.InvalidColumnError("A") fit!(mach, verbosity=0) end end diff --git a/test/models/biallelic_snp_encoder.jl b/test/models/biallelic_snp_encoder.jl index c0ffb37..e47315f 100644 --- a/test/models/biallelic_snp_encoder.jl +++ b/test/models/biallelic_snp_encoder.jl @@ -1,7 +1,7 @@ module TestBiAllelelicSNPEncoder using Test -using TMLECLI +using TmleCLI using CategoricalArrays using MLJBase @@ -28,21 +28,21 @@ using MLJBase othercol = [1, 2, 3, 4] ) mach = machine(BiAllelicSNPEncoder(patterns=[r"^rs"]), X) - @test_throws TMLECLI.NonBiAllelicGenotypeError(:rs1234, "CCC") fit!(mach, verbosity=0) + @test_throws TmleCLI.NonBiAllelicGenotypeError(:rs1234, "CCC") fit!(mach, verbosity=0) X = ( rs1234 = categorical(["AC", "CC", "CCT", missing]), othercol = [1, 2, 3, 4] ) mach = machine(BiAllelicSNPEncoder(patterns=[r"^rs"]), X) - @test_throws TMLECLI.NonBiallelicSNPError(:rs1234) fit!(mach, verbosity=0) + @test_throws TmleCLI.NonBiallelicSNPError(:rs1234) fit!(mach, verbosity=0) X = ( rs1234 = ["AC", "CC", "CCT", missing], othercol = [1, 2, 3, 4] ) mach = machine(BiAllelicSNPEncoder(patterns=[r"^rs"]), X) - @test_throws TMLECLI.NonCategoricalVectorError(:rs1234) fit!(mach, verbosity=0) + @test_throws TmleCLI.NonCategoricalVectorError(:rs1234) fit!(mach, verbosity=0) end diff --git a/test/models/glmnet.jl b/test/models/glmnet.jl index 2234735..8796fb5 100644 --- a/test/models/glmnet.jl +++ b/test/models/glmnet.jl @@ -1,7 +1,7 @@ module TestGLMNet using Test -using TMLECLI +using TmleCLI using MLJ using StableRNGs @@ -10,9 +10,9 @@ using StableRNGs rng = StableRNG(123) X = rand(rng, n, 3) y = [1, 1, 1, 1, 1, 0, 0, 0, 0, 0] - folds = TMLECLI.getfolds(CV(), X, y) + folds = TmleCLI.getfolds(CV(), X, y) @test folds == [1, 1, 2, 2, 3, 3, 4, 4, 5, 6] - folds = TMLECLI.getfolds(StratifiedCV(nfolds=3), X, y) + folds = TmleCLI.getfolds(StratifiedCV(nfolds=3), X, y) @test folds == [1, 2, 3, 1, 2, 3, 1, 2, 3, 1] end @@ -22,7 +22,7 @@ end rng = StableRNG(123) n, p = 1000, 5 X, y = make_regression(n, p, rng=rng) - net = TMLECLI.GLMNetRegressor(resampling=CV(nfolds=3), rng=rng) + net = TmleCLI.GLMNetRegressor(resampling=CV(nfolds=3), rng=rng) mach = machine(net, X, y) pe = evaluate!(mach, measure=rmse, resampling=CV(rng=rng), verbosity=0) @test pe.measurement[1] < 0.1 @@ -30,7 +30,7 @@ end # Binary outcome rng = StableRNG(123) X, y = make_moons(n, rng=rng) - net = TMLECLI.GLMNetClassifier(rng=rng) + net = TmleCLI.GLMNetClassifier(rng=rng) mach = machine(net, X, y) pe = evaluate!(mach, measure=log_loss, resampling=JointStratifiedCV(resampling=StratifiedCV(rng=rng)), verbosity=0) @test pe.measurement[1] < 0.180 @@ -38,7 +38,7 @@ end # Multivariate outcome rng = StableRNG(123) X, y = make_blobs(n, rng=rng) - net = TMLECLI.GLMNetClassifier(resampling=StratifiedCV(nfolds=3), rng=rng) + net = TmleCLI.GLMNetClassifier(resampling=StratifiedCV(nfolds=3), rng=rng) mach = machine(net, X, y) pe = evaluate!(mach, measure=[log_loss], resampling=StratifiedCV(rng=rng), verbosity=0) @test pe.measurement[1] < 0.008 diff --git a/test/models/registry.jl b/test/models/registry.jl index 739bde0..36f25db 100644 --- a/test/models/registry.jl +++ b/test/models/registry.jl @@ -1,6 +1,6 @@ module TestRegistry -using TMLECLI +using TmleCLI using Test using TMLE using MLJBase @@ -9,7 +9,7 @@ using MLJLinearModels @testset "Test estimators_from_string: no models provided" begin # Default configuration results in GLMNets with interactions of order 2 - estimators = TMLECLI.estimators_from_string(config_string="wtmle-ose", treatment_variables=Set([:T1, :T2])) + estimators = TmleCLI.estimators_from_string(config_string="wtmle-ose", treatment_variables=Set([:T1, :T2])) ## Check estimators @test estimators.WTMLE_GLMNET_GLMNET isa TMLEE @test estimators.WTMLE_GLMNET_GLMNET.weighted === true @@ -41,7 +41,7 @@ end @testset "Test estimators_from_string: 1 model provided" begin # 1 model is provided and used for all nuisance functions - estimators = TMLECLI.estimators_from_string(config_string="cvtmle-cvose--tunedxgboost", treatment_variables=[]) + estimators = TmleCLI.estimators_from_string(config_string="cvtmle-cvose--tunedxgboost", treatment_variables=[]) expected_resampling = JointStratifiedCV( patterns = Regex[], resampling=StratifiedCV(nfolds=3) @@ -63,7 +63,7 @@ end @testset "Test estimators_from_string: 2 models provided" begin # 2 model is provided for nuisance functions - estimators = TMLECLI.estimators_from_string(config_string="tmle--sl--glm", treatment_variables=["Coco"]) + estimators = TmleCLI.estimators_from_string(config_string="tmle--sl--glm", treatment_variables=["Coco"]) ## Check estimators @test estimators.TMLE_SL_GLM isa TMLEE @test estimators.TMLE_SL_GLM.weighted === false diff --git a/test/outputs.jl b/test/outputs.jl index 547dbd3..0033136 100644 --- a/test/outputs.jl +++ b/test/outputs.jl @@ -1,22 +1,22 @@ module TestOutputs -using TMLECLI +using TmleCLI using Test using JSON -TESTDIR = joinpath(pkgdir(TMLECLI), "test") +TESTDIR = joinpath(pkgdir(TmleCLI), "test") include(joinpath(TESTDIR, "testutils.jl")) @testset "Test initialize" begin tmpdir = mktempdir() - outputs = TMLECLI.Outputs( + outputs = TmleCLI.Outputs( json = joinpath(tmpdir, "output.json"), jls = joinpath(tmpdir, "output.jls"), hdf5 = joinpath(tmpdir, "output.hdf5"), ) - TMLECLI.initialize(outputs) + TmleCLI.initialize(outputs) @test readlines(open(outputs.json)) == ["["] @test !isfile(outputs.jls) @@ -27,7 +27,7 @@ include(joinpath(TESTDIR, "testutils.jl")) touch(outputs.hdf5) @test isfile(outputs.jls) @test isfile(outputs.hdf5) - TMLECLI.initialize(outputs) + TmleCLI.initialize(outputs) @test readlines(open(outputs.json)) == ["["] @test !isfile(outputs.jls) @test !isfile(outputs.hdf5) @@ -43,10 +43,10 @@ end end tmpdir = mktempdir() filename = joinpath(tmpdir, "output_test.json") - TMLECLI.initialize_json(filename) - TMLECLI.update_json(filename, results[1:3]) - TMLECLI.update_json(filename, results[4:end]) - TMLECLI.finalize_json(filename) + TmleCLI.initialize_json(filename) + TmleCLI.update_json(filename, results[1:3]) + TmleCLI.update_json(filename, results[4:end]) + TmleCLI.finalize_json(filename) loaded_results = TMLE.read_json(filename, use_mmap=false) @test size(loaded_results) == size(results) for (result, loaded_result) in zip(results, loaded_results) diff --git a/test/resampling.jl b/test/resampling.jl index eb1eba1..9382c52 100644 --- a/test/resampling.jl +++ b/test/resampling.jl @@ -1,7 +1,7 @@ module TestResampling using Test -using TMLECLI +using TmleCLI using CategoricalArrays using MLJBase using StableRNGs @@ -11,8 +11,8 @@ using StableRNGs ##################################################################### @testset "Test AdativeResampling's methods" begin - @test TMLECLI.base_resampling(AdaptiveCV()) == CV - @test TMLECLI.base_resampling(AdaptiveStratifiedCV()) == StratifiedCV + @test TmleCLI.base_resampling(AdaptiveCV()) == CV + @test TmleCLI.base_resampling(AdaptiveStratifiedCV()) == StratifiedCV end @testset "Test AdaptiveCV" begin @@ -28,7 +28,7 @@ end # Categorical target cv = AdaptiveStratifiedCV() y = categorical(["a", "a", "a", "b", "b", "c", "c"]) - @test TMLECLI.countuniques(y) == [3, 2, 2] + @test TmleCLI.countuniques(y) == [3, 2, 2] ## neff < 30 => nfolds = 5*neff = 7 ttp = MLJBase.train_test_pairs(cv, 1:7, y) @test length(ttp) == 7 @@ -37,7 +37,7 @@ end ## neff = 2500 => 10 n = 50_500 y = categorical(vcat(repeat([true], 50_000), repeat([false], 500))) - @test TMLECLI.countuniques(y) == [50_000, 500] + @test TmleCLI.countuniques(y) == [50_000, 500] ttp = MLJBase.train_test_pairs(cv, 1:n, y) @test length(ttp)== 10 @test ttp == MLJBase.train_test_pairs(StratifiedCV(nfolds=10), 1:n, y) @@ -56,27 +56,27 @@ end ) y = categorical([1, 0, 1, 1, 0, 1, 1]) - stratification_col = TMLECLI.initialize_aggregate(y) + stratification_col = TmleCLI.initialize_aggregate(y) @test all(stratification_col .== "") # No pattern specified, all X finite variables are considered, i.e. X1 and X2 - TMLECLI.aggregate_features!(stratification_col, nothing, X) + TmleCLI.aggregate_features!(stratification_col, nothing, X) @test stratification_col == ["_0_1", "_0_2", "_1_4", "_0_4", "_1_2", "_0_2", "_missing_missing"] # y is finite and is considered - TMLECLI.aggretate_finite_col!(stratification_col, y) + TmleCLI.aggretate_finite_col!(stratification_col, y) @test stratification_col == ["_0_1_1", "_0_2_0", "_1_4_1", "_0_4_1", "_1_2_0", "_0_2_1", "_missing_missing_1"] y = [1., 1.1, 2., 5., -4., -4., 3.2] - stratification_col = TMLECLI.initialize_aggregate(y) + stratification_col = TmleCLI.initialize_aggregate(y) # Only X1 will be matched - TMLECLI.aggregate_features!(stratification_col, [r"X1"], X) + TmleCLI.aggregate_features!(stratification_col, [r"X1"], X) @test stratification_col == ["_0", "_0", "_1", "_0", "_1", "_0", "_missing"] # y is continuous and is not considered - TMLECLI.aggretate_finite_col!(stratification_col, y) + TmleCLI.aggretate_finite_col!(stratification_col, y) @test stratification_col == ["_0", "_0", "_1", "_0", "_1", "_0", "_missing"] # Used by registry - @test TMLECLI.matches_patterns("T1", [r"^T1$", r"^T2$"]) - @test !TMLECLI.matches_patterns("T3", [r"^T1$", r"^T2$"]) + @test TmleCLI.matches_patterns("T1", [r"^T1$", r"^T2$"]) + @test !TmleCLI.matches_patterns("T3", [r"^T1$", r"^T2$"]) end @testset "Test JointStratifiedCV" begin diff --git a/test/runner.jl b/test/runner.jl index 7d9b96e..34e1e30 100644 --- a/test/runner.jl +++ b/test/runner.jl @@ -1,7 +1,7 @@ module TestsTMLE using Test -using TMLECLI +using TmleCLI using TMLE using JLD2 using CSV @@ -11,7 +11,7 @@ using JSON using MLJBase using MLJModels -PKGDIR = pkgdir(TMLECLI) +PKGDIR = pkgdir(TmleCLI) TESTDIR = joinpath(PKGDIR, "test") CONFIGDIR = joinpath(TESTDIR, "config") @@ -19,7 +19,7 @@ include(joinpath(TESTDIR, "testutils.jl")) @testset "Test instantiate_estimators from file" begin # From explicit file - estimators = TMLECLI.instantiate_estimators(joinpath(TESTDIR, "config", "tmle_ose_config.jl"), nothing) + estimators = TmleCLI.instantiate_estimators(joinpath(TESTDIR, "config", "tmle_ose_config.jl"), nothing) @test estimators.TMLE isa TMLE.TMLEE @test estimators.OSE isa TMLE.OSE @test estimators.TMLE.weighted === true @@ -27,7 +27,7 @@ include(joinpath(TESTDIR, "testutils.jl")) @test estimators.TMLE.models[:G_default].continuous_encoder isa MLJModels.ContinuousEncoder @test estimators.TMLE.models[:G_default].probabilistic_stack isa MLJBase.ProbabilisticStack # From already constructed estimators - estimators_new = TMLECLI.instantiate_estimators(estimators, nothing) + estimators_new = TmleCLI.instantiate_estimators(estimators, nothing) @test estimators_new === estimators end @@ -36,12 +36,12 @@ end dataset = build_dataset(;n=1000) config = statistical_estimands_only_config() - outputs = TMLECLI.Outputs( + outputs = TmleCLI.Outputs( json=joinpath(tmpdir, "output.json"), hdf5=joinpath(tmpdir, "output.hdf5"), jls=joinpath(tmpdir, "output.jls"), ) - estimators = TMLECLI.instantiate_estimators(joinpath(CONFIGDIR, "tmle_ose_config.jl"), nothing) + estimators = TmleCLI.instantiate_estimators(joinpath(CONFIGDIR, "tmle_ose_config.jl"), nothing) runner = Runner( dataset; estimands_config=config, @@ -52,7 +52,7 @@ end pvalue_threshold=1e-5 ) # Initialize outputs - TMLECLI.initialize(outputs) + TmleCLI.initialize(outputs) # Run partition = 4:6 results = runner(partition) @@ -61,9 +61,9 @@ end @test result.OSE isa TMLE.OSEstimate end # Update outputs - TMLECLI.update_outputs(runner, results) + TmleCLI.update_outputs(runner, results) # Finalize outputs - TMLECLI.finalize(runner.outputs) + TmleCLI.finalize(runner.outputs) # Test Save to JSON loaded_json_results = TMLE.read_json(outputs.json, use_mmap=false) @@ -108,7 +108,7 @@ end estimands_filename = joinpath(tmpdir, "configuration.json") configuration = statistical_estimands_only_config() TMLE.write_json(estimands_filename, configuration) - outputs = TMLECLI.Outputs( + outputs = TmleCLI.Outputs( json=joinpath(tmpdir, "output.json"), hdf5=joinpath(tmpdir, "output.hdf5"), ) @@ -179,7 +179,7 @@ end tmpdir = mktempdir() datafile = joinpath(tmpdir, "data.csv") write_dataset(datafile) - outputs = TMLECLI.Outputs( + outputs = TmleCLI.Outputs( json=joinpath(tmpdir, "output.json"), hdf5=joinpath(tmpdir, "output.hdf5") ) @@ -216,10 +216,10 @@ end jldopen(outputs.hdf5) do io results_from_hdf5 = io["Batch_1"] for estimator in (:OSE, :TMLE) - @test results_from_hdf5[1][estimator] isa TMLECLI.FailedEstimate + @test results_from_hdf5[1][estimator] isa TmleCLI.FailedEstimate @test results_from_hdf5[2][estimator] isa TMLE.EICEstimate for i in 3:6 - @test results_from_hdf5[i][estimator] isa TMLECLI.FailedEstimate + @test results_from_hdf5[i][estimator] isa TmleCLI.FailedEstimate @test results_from_hdf5[i][estimator].estimand isa TMLE.Estimand end end diff --git a/test/runtests.jl b/test/runtests.jl index 890f7d0..4041b13 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,7 @@ -using TMLECLI +using TmleCLI using Test -TESTDIR = joinpath(pkgdir(TMLECLI), "test") +TESTDIR = joinpath(pkgdir(TmleCLI), "test") @time begin @test include(joinpath(TESTDIR, "outputs.jl")) diff --git a/test/sieve_variance.jl b/test/sieve_variance.jl index 4677c66..e7aed84 100644 --- a/test/sieve_variance.jl +++ b/test/sieve_variance.jl @@ -6,12 +6,12 @@ using CSV using JLD2 using TMLE using CategoricalArrays -using TMLECLI +using TmleCLI using StableRNGs using Distributions using LogExpFunctions -TESTDIR = joinpath(pkgdir(TMLECLI), "test") +TESTDIR = joinpath(pkgdir(TmleCLI), "test") include(joinpath(TESTDIR, "testutils.jl")) @@ -55,7 +55,7 @@ function build_tmle_output_file(dir, sample_ids, estimandfile, outprefix; ) datafile = joinpath(dir, "data.csv") write_sieve_dataset(datafile, sample_ids) - outputs = TMLECLI.Outputs( + outputs = TmleCLI.Outputs( hdf5=joinpath(dir, string(outprefix, ".hdf5")), ) tmle(datafile; @@ -108,14 +108,14 @@ function test_initial_output(output, expected_output) end @testset "Test readGRM" begin prefix = joinpath(TESTDIR, "data", "grm", "test.grm") - GRM, ids = TMLECLI.readGRM(prefix) + GRM, ids = TmleCLI.readGRM(prefix) @test eltype(ids.SAMPLE_ID) == String @test size(GRM, 1) == 18915 @test size(ids, 1) == 194 end @testset "Test build_work_list" begin - grm_ids = TMLECLI.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id")) + grm_ids = TmleCLI.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id")) tmpdir = mktempdir() configuration = statistical_estimands_only_config() @@ -131,7 +131,7 @@ end TMLE.write_json(estimandsfile_2, config_2) build_tmle_output_file(tmpdir, grm_ids.SAMPLE_ID, estimandsfile_2, "tmle_output_2") - results, influence_curves, n_obs = TMLECLI.build_work_list(joinpath(tmpdir, "tmle_output"), grm_ids) + results, influence_curves, n_obs = TmleCLI.build_work_list(joinpath(tmpdir, "tmle_output"), grm_ids) # Check n_obs @test n_obs == [194, 194, 194, 193, 193, 194] # Check influence curves @@ -149,7 +149,7 @@ end estimandsfile = joinpath(tmpdir, "configuration.json") TMLE.write_json(estimandsfile, configuration) build_tmle_output_file(tmpdir, grm_ids.SAMPLE_ID, estimandsfile, "tmle_output"; pvalue_threshold=pvalue_threshold) - results, influence_curves, n_obs = TMLECLI.build_work_list(joinpath(tmpdir, "tmle_output"), grm_ids) + results, influence_curves, n_obs = TmleCLI.build_work_list(joinpath(tmpdir, "tmle_output"), grm_ids) # Check n_obs @test n_obs == [194, 193, 193, 194] # Check influence curves @@ -165,11 +165,11 @@ end @testset "Test bit_distance" begin sample_grm = Float32[-0.6, -0.8, -0.25, -0.3, -0.1, 0.1, 0.7, 0.5, 0.2, 1.] nτs = 6 - τs = TMLECLI.default_τs(nτs, max_τ=0.75) + τs = TmleCLI.default_τs(nτs, max_τ=0.75) @test τs == Float32[0.0, 0.15, 0.3, 0.45, 0.6, 0.75] - τs = TMLECLI.default_τs(nτs) + τs = TmleCLI.default_τs(nτs) @test τs == Float32[0., 0.4, 0.8, 1.2, 1.6, 2.0] - d = TMLECLI.bit_distances(sample_grm, τs) + d = TmleCLI.bit_distances(sample_grm, τs) @test d == [0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 @@ -187,7 +187,7 @@ end 0. 0. 1. 1. 1. 0. 1. 1.] sample = 4 - var_ = TMLECLI.aggregate_variances(influence_curves, indicator, sample) + var_ = TmleCLI.aggregate_variances(influence_curves, indicator, sample) @test var_ == [24.0 189.0 40.0 225.0 48.0 333.0] @@ -198,7 +198,7 @@ end n_obs = [10, 10, 100] variances = [1. 2. 3. 4. 5. 6.] - TMLECLI.normalize!(variances, n_obs) + TmleCLI.normalize!(variances, n_obs) @test variances == [0.1 0.2 0.03 0.4 0.5 0.06] end @@ -208,7 +208,7 @@ end n_samples = 5 nτs = 5 n_obs = [3, 4, 4] - τs = TMLECLI.default_τs(nτs) + τs = TmleCLI.default_τs(nτs) # The GRM has 15 lower triangular elements grm = Float32[0.4, 0.1, 0.5, 0.2, -0.2, 0.6, 0.3, -0.6, 0.4, 0.3, 0.6, 0.3, 0.7, 0.3, 0.1] @@ -217,7 +217,7 @@ end 0.0 0. 0.1 0.3 0.2] - variances = TMLECLI.compute_variances(influence_curves, grm, τs, n_obs) + variances = TmleCLI.compute_variances(influence_curves, grm, τs, n_obs) @test size(variances) == (nτs, n_curves) # when τ=2, all elements are used @@ -235,7 +235,7 @@ end # Check against basic_variance_implementation matrix_distance = zeros(Float32, n_samples, n_samples) for τ_id in 1:nτs - vector_distance = TMLECLI.bit_distances(grm, [τs[τ_id]]) + vector_distance = TmleCLI.bit_distances(grm, [τs[τ_id]]) distance_vector_to_matrix!(matrix_distance, vector_distance, n_samples) for curve_id in 1:n_curves influence_curve = influence_curves[curve_id, :] @@ -250,7 +250,7 @@ end @testset "Test grm_rows_bounds" begin n_samples = 5 - grm_bounds = TMLECLI.grm_rows_bounds(n_samples) + grm_bounds = TmleCLI.grm_rows_bounds(n_samples) @test grm_bounds == [1 => 1 2 => 3 4 => 6 @@ -263,14 +263,14 @@ end 1. 2. 6. 4. 5. 3. ] - stderrors = TMLECLI.corrected_stderrors(variances) + stderrors = TmleCLI.corrected_stderrors(variances) # sanity check @test stderrors == sqrt.([4., 5., 6.]) end @testset "Test SVP" begin # Generate data - grm_ids = TMLECLI.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id")) + grm_ids = TmleCLI.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id")) tmpdir = mktempdir() configuration = statistical_estimands_only_config() pvalue_threshold = 0.1 @@ -296,7 +296,7 @@ end io = jldopen(output) # Check τs - @test io["taus"] == TMLECLI.default_τs(10; max_τ=0.75) + @test io["taus"] == TmleCLI.default_τs(10; max_τ=0.75) # Check variances @test size(io["variances"]) == (10, 4) # Check results @@ -320,7 +320,7 @@ end @testset "Test SVP: causal and composed estimands" begin # Generate data - grm_ids = TMLECLI.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id")) + grm_ids = TmleCLI.GRMIDs(joinpath(TESTDIR, "data", "grm", "test.grm.id")) tmpdir = mktempdir() configuration = causal_and_joint_estimands_config() configfile = joinpath(tmpdir, "configuration.json") diff --git a/test/summary.jl b/test/summary.jl index c47b86e..15eed66 100644 --- a/test/summary.jl +++ b/test/summary.jl @@ -1,13 +1,13 @@ module TestSummary -using TMLECLI +using TmleCLI using Test using CSV using DataFrames using Serialization using JLD2 -TESTDIR = joinpath(pkgdir(TMLECLI), "test") +TESTDIR = joinpath(pkgdir(TmleCLI), "test") CONFIGDIR = joinpath(TESTDIR, "config") @@ -20,7 +20,7 @@ include(joinpath(TESTDIR, "testutils.jl")) estimatorfile = "wtmle--glmnet" # First Run - tmle_output_1 = TMLECLI.Outputs(hdf5=joinpath(tmpdir, "tmle_output_1.hdf5")) + tmle_output_1 = TmleCLI.Outputs(hdf5=joinpath(tmpdir, "tmle_output_1.hdf5")) config_1 = statistical_estimands_only_config() configfile_1 = joinpath(tmpdir, "configuration_1.json") TMLE.write_json(configfile_1, config_1) @@ -32,7 +32,7 @@ include(joinpath(TESTDIR, "testutils.jl")) ) # Second Run - tmle_output_2 = TMLECLI.Outputs(hdf5=joinpath(tmpdir, "tmle_output_2.hdf5")) + tmle_output_2 = TmleCLI.Outputs(hdf5=joinpath(tmpdir, "tmle_output_2.hdf5")) config_2 = causal_and_joint_estimands_config() configfile_2 = joinpath(tmpdir, "configuration_2.json") TMLE.write_json(configfile_2, config_2) @@ -55,7 +55,7 @@ include(joinpath(TESTDIR, "testutils.jl")) ]) # Test correctness - inputs = TMLECLI.read_results_from_files([joinpath(tmpdir, "tmle_output_1.hdf5"), joinpath(tmpdir, "tmle_output_2.hdf5")]) + inputs = TmleCLI.read_results_from_files([joinpath(tmpdir, "tmle_output_1.hdf5"), joinpath(tmpdir, "tmle_output_2.hdf5")]) json_outputs = TMLE.read_json(json_output, use_mmap=false) jls_outputs = deserialize(jls_output) diff --git a/test/utils.jl b/test/utils.jl index b7a8a71..032661c 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -1,7 +1,7 @@ module TestUtils using Test -using TMLECLI +using TmleCLI using TMLE using DataFrames using CSV @@ -14,7 +14,7 @@ check_type(treatment_value, ::Type{T}) where T = @test treatment_value isa T check_type(treatment_values::NamedTuple, ::Type{T}) where T = @test treatment_values.case isa T && treatment_values.control isa T -TESTDIR = joinpath(pkgdir(TMLECLI), "test") +TESTDIR = joinpath(pkgdir(TmleCLI), "test") include(joinpath(TESTDIR, "testutils.jl")) @@ -22,16 +22,16 @@ include(joinpath(TESTDIR, "testutils.jl")) treatment_types = Dict(:T₁=> Union{Missing, Bool}, :T₂=> Int) Ψ = CM(;outcome = :Y, treatment_values=Dict(:T₁=>1, :T₂=>false)) - newT = TMLECLI.convert_estimand_treatment_values(Ψ, treatment_types) + newT = TmleCLI.convert_estimand_treatment_values(Ψ, treatment_types) @test newT[:T₁] === true !== 1 @test newT[:T₂] === 0 !== false Ψ = ATE(;outcome = :Y, treatment_values=Dict(:T₁ => (case=1, control=0.)), ) - newT = TMLECLI.convert_estimand_treatment_values(Ψ, treatment_types) + newT = TmleCLI.convert_estimand_treatment_values(Ψ, treatment_types) @test newT[:T₁] === (control=false, case=true) !== (control=0, case=1) Ψ = AIE(;outcome = :Y, treatment_values=Dict(:T₁ => (case=1, control=0.), :T₂ => (case=true, control=0)), ) - newT = TMLECLI.convert_estimand_treatment_values(Ψ, treatment_types) + newT = TmleCLI.convert_estimand_treatment_values(Ψ, treatment_types) @test newT[:T₁] === (control=false, case=true) !== (control=0, case=1) @test newT[:T₂] === (control=0, case=1) !== (control=false, case=true) end @@ -56,7 +56,7 @@ end treatment_values = (T1 = (case = true, control = false), T4 = (case = true, control = false),), )) ] - @test TMLECLI.treatments_from_estimands(estimands) == Set([:T1, :T2, :T3, :T4]) + @test TmleCLI.treatments_from_estimands(estimands) == Set([:T1, :T2, :T3, :T4]) end @testset "Test instantiate_config" for extension in ("yaml", "json") @@ -65,8 +65,8 @@ end eval(Meta.parse("TMLE.write_$extension"))(filename, statistical_estimands_only_config()) dataset = DataFrame(T1 = [1., 0.], T2=[true, false]) - config = TMLECLI.instantiate_config(filename) - estimands = TMLECLI.proofread_estimands(config, dataset) + config = TmleCLI.instantiate_config(filename) + estimands = TmleCLI.proofread_estimands(config, dataset) for estimand in estimands if haskey(estimand.treatment_values, :T1) check_type(estimand.treatment_values[:T1], Float64) @@ -81,13 +81,13 @@ end @testset "Test factorialATE" begin dataset = DataFrame(C=[1, 2, 3, 4],) - @test_throws ArgumentError TMLECLI.instantiate_estimands("factorialATE", dataset) + @test_throws ArgumentError TmleCLI.instantiate_estimands("factorialATE", dataset) dataset.T = [0, 1, missing, 2] - @test_throws ArgumentError TMLECLI.instantiate_estimands("factorialATE", dataset) + @test_throws ArgumentError TmleCLI.instantiate_estimands("factorialATE", dataset) dataset.Y = [0, 1, 2, 2] dataset.W1 = [1, 1, 1, 1] dataset.W_2 = [1, 1, 1, 1] - composedATE = TMLECLI.instantiate_estimands("factorialATE", dataset)[1] + composedATE = TmleCLI.instantiate_estimands("factorialATE", dataset)[1] @test composedATE.args == ( TMLE.StatisticalATE(:Y, (T = (case = 1, control = 0),), (T = (:W1, :W_2),), ()), TMLE.StatisticalATE(:Y, (T = (case = 2, control = 1),), (T = (:W1, :W_2),), ()) @@ -110,7 +110,7 @@ end treatment_values=(T₁=(case=1, control=0), T₂=(case="AC", control="CC")), treatment_confounders=(T₁=[:W₁, :W₂], T₂=[:W₁, :W₂]), ) - TMLECLI.coerce_types!(dataset, Ψ) + TmleCLI.coerce_types!(dataset, Ψ) @test scitype(dataset.T₁) == AbstractVector{Union{Missing, OrderedFactor{2}}} @test scitype(dataset.T₂) == AbstractVector{Union{Missing, Multiclass{3}}} @test scitype(dataset.Ycont) == AbstractVector{Union{Missing, MLJBase.Continuous}} @@ -124,7 +124,7 @@ end treatment_confounders=(T₂=[:W₂],), outcome_extra_covariates=[:C] ) - TMLECLI.coerce_types!(dataset, Ψ) + TmleCLI.coerce_types!(dataset, Ψ) @test scitype(dataset.Ybin) == AbstractVector{Union{Missing, OrderedFactor{2}}} @test scitype(dataset.C) == AbstractVector{Count} @@ -134,7 +134,7 @@ end treatment_values=(T₂=(case="AC", control="CC"), ), treatment_confounders=(T₂=[:W₂],), ) - TMLECLI.coerce_types!(dataset, Ψ) + TmleCLI.coerce_types!(dataset, Ψ) @test scitype(dataset.Ycount) == AbstractVector{Union{Missing, MLJBase.Continuous}} end @@ -150,7 +150,7 @@ end ), outcome_extra_covariates = [:C] ) - variables = TMLECLI.variables(Ψ) + variables = TmleCLI.variables(Ψ) @test variables == Set([:Y, :C, :T₁, :T₂, :W₁, :W₂, :W₃]) Ψ = ATE( outcome = :Y, @@ -162,7 +162,7 @@ end T₂=[:W₁, :W₂] ), ) - variables = TMLECLI.variables(Ψ) + variables = TmleCLI.variables(Ψ) @test variables == Set([:Y, :T₁, :T₂, :W₁, :W₂]) data = DataFrame( SAMPLE_ID = [1, 2, 3, 4, 5], @@ -172,20 +172,20 @@ end T₁ = [1, 2, 3, 4, 5], T₂ = [1, 2, 3, 4, missing], ) - sample_ids = TMLECLI.sample_ids_from_variables(data, variables) + sample_ids = TmleCLI.sample_ids_from_variables(data, variables) @test sample_ids == [2, 3] data.W₁ = [1, 2, missing, 4, 5] - sample_ids = TMLECLI.sample_ids_from_variables(data, variables) + sample_ids = TmleCLI.sample_ids_from_variables(data, variables) @test sample_ids == [2] # wrapped_ype col = categorical(["AC", "CC"]) - @test TMLECLI.wrapped_type(eltype(col)) == String + @test TmleCLI.wrapped_type(eltype(col)) == String col = categorical(["AC", "CC", missing]) - @test TMLECLI.wrapped_type(eltype(col)) == String + @test TmleCLI.wrapped_type(eltype(col)) == String col = [1, missing, 0.3] - @test TMLECLI.wrapped_type(eltype(col)) == Float64 + @test TmleCLI.wrapped_type(eltype(col)) == Float64 col = [1, 2, 3] - @test TMLECLI.wrapped_type(eltype(col)) == Int64 + @test TmleCLI.wrapped_type(eltype(col)) == Int64 end @@ -194,7 +194,7 @@ end T₁ = [1, 1, 0, 0], T₂ = ["AA", "AC", "CC", "CC"], ) - TMLECLI.make_categorical!(dataset, (:T₁, :T₂)) + TmleCLI.make_categorical!(dataset, (:T₁, :T₂)) @test dataset.T₁ isa CategoricalVector @test dataset.T₁.pool.ordered == false @test dataset.T₂ isa CategoricalVector @@ -205,18 +205,18 @@ end T₂ = ["AA", "AC", "CC", "CC"], C₁ = [1, 2, 3, 4], ) - TMLECLI.make_categorical!(dataset, (:T₁, :T₂), infer_ordered=true) + TmleCLI.make_categorical!(dataset, (:T₁, :T₂), infer_ordered=true) @test dataset.T₁ isa CategoricalVector @test dataset.T₁.pool.ordered == true @test dataset.T₂ isa CategoricalVector @test dataset.T₂.pool.ordered == false - TMLECLI.make_float!(dataset, [:C₁]) + TmleCLI.make_float!(dataset, [:C₁]) @test eltype(dataset.C₁) == Float64 # If the type is already coerced then no-operation is applied - TMLECLI.make_float(dataset.C₁) === dataset.C₁ - TMLECLI.make_categorical(dataset.T₁, true) === dataset.T₁ + TmleCLI.make_float(dataset.C₁) === dataset.C₁ + TmleCLI.make_categorical(dataset.T₁, true) === dataset.T₁ end end; diff --git a/tmle.jl b/tmle.jl index 2acd5ca..c31f8b0 100644 --- a/tmle.jl +++ b/tmle.jl @@ -1 +1 @@ -using TMLECLI; main() \ No newline at end of file +using TmleCLI; main() \ No newline at end of file