From 94194eb59ae4fac29d46d6f30c44a7e8256ffbaf Mon Sep 17 00:00:00 2001 From: Shantanu Singh Date: Mon, 13 Mar 2023 01:23:59 -0400 Subject: [PATCH 1/4] handle rep_or_group correctly --- matric/2.calculate_index.Rmd | 25 ++++++++++++++++++------- matric/3.calculate_metrics.Rmd | 2 +- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/matric/2.calculate_index.Rmd b/matric/2.calculate_index.Rmd index 917b0b5..4a87b80 100644 --- a/matric/2.calculate_index.Rmd +++ b/matric/2.calculate_index.Rmd @@ -32,7 +32,7 @@ params: - Metadata_cell_line - Metadata_pert_name - Metadata_gene_name - - Metadata_reference_or_other + - Metadata_reference_or_other --- # Setup @@ -81,16 +81,27 @@ profiles <- ```{r} log_info("Calculating index ...") -if (is.null(params$sim_params$any_different_cols_non_rep)) { - log_info("Reducing similarity calculations because `any_different_cols_non_rep` is NULL ...") - x_all_same_cols_rep <- params$sim_params$all_same_cols_rep +if (is.null(params$sim_params$any_different_cols_non_rep) & + xor( + !is.null(params$sim_params$all_same_cols_rep), + !is.null(params$sim_params$all_same_cols_group) + )) { + log_info( + "Reducing similarity calculations because `any_different_cols_non_rep` is NULL and only one of `all_same_cols_rep` and `all_same_cols_group` are specified..." + ) + if (!is.null(params$sim_params$all_same_cols_rep)) { + x_all_same_cols_rep_or_group <- params$sim_params$all_same_cols_rep + } else { + x_all_same_cols_rep_or_group <- + params$sim_params$all_same_cols_group + } x_all_same_cols_ref <- params$sim_params$all_same_cols_ref x_reference_df <- reference_df } else { log_info( - "Performing all similarity calculations because `any_different_cols_non_rep` is not NULL ..." + "Performing all similarity calculations because `any_different_cols_non_rep` is not NULL or both `all_same_cols_rep` and `all_same_cols_group` are specified...." ) - x_all_same_cols_rep <- NULL + x_all_same_cols_rep_or_group <- NULL x_all_same_cols_ref <- NULL x_reference_df <- NULL } @@ -102,7 +113,7 @@ sim_df <- population = profiles, method = NULL, lazy = TRUE, - all_same_cols_rep_or_group = x_all_same_cols_rep, + all_same_cols_rep_or_group = x_all_same_cols_rep_or_group, all_same_cols_ref = x_all_same_cols_ref, reference = x_reference_df ) diff --git a/matric/3.calculate_metrics.Rmd b/matric/3.calculate_metrics.Rmd index ec244d2..0a63b31 100644 --- a/matric/3.calculate_metrics.Rmd +++ b/matric/3.calculate_metrics.Rmd @@ -123,7 +123,7 @@ if (!is.null(attr(collated_sim, "params")$calculate_index$sim_params$any_differe sim_metrics( collated_sim, sim_type_background = "non_rep", - calculate_grouped = FALSE, + calculate_grouped = calculate_grouped, use_furrr = TRUE, calculate_pvalue = TRUE ) From 04a32b6fbcb7c09c620b3f24b9570b6902975784 Mon Sep 17 00:00:00 2001 From: Shantanu Singh Date: Mon, 13 Mar 2023 01:40:55 -0400 Subject: [PATCH 2/4] typo --- matric/2.calculate_index.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matric/2.calculate_index.Rmd b/matric/2.calculate_index.Rmd index 4a87b80..617fcab 100644 --- a/matric/2.calculate_index.Rmd +++ b/matric/2.calculate_index.Rmd @@ -32,7 +32,7 @@ params: - Metadata_cell_line - Metadata_pert_name - Metadata_gene_name - - Metadata_reference_or_other + - Metadata_reference_or_other --- # Setup From 22b91decffe09b66e82699a2a8702af73322880b Mon Sep 17 00:00:00 2001 From: Shantanu Singh Date: Mon, 13 Mar 2023 14:48:34 -0400 Subject: [PATCH 3/4] renv --- renv.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/renv.lock b/renv.lock index 99bacaf..7891af4 100644 --- a/renv.lock +++ b/renv.lock @@ -1314,7 +1314,7 @@ "RemoteRepo": "matric", "RemoteUsername": "shntnu", "RemoteRef": "null_df", - "RemoteSha": "419ed83c194624a5a5393f6c8765fae9c2940630", + "RemoteSha": "a2adb70e437cd1f07020ada415292590220d4ff1", "Requirements": [ "R", "arrow", @@ -1336,7 +1336,7 @@ "tidyr", "yardstick" ], - "Hash": "653b4266a16595683c83c021c59f030e" + "Hash": "ab6353150e1beacd9443747b47539fcd" }, "memoise": { "Package": "memoise", From e8623b2ab1b2d17dd5cfc34600582ad54c76d394 Mon Sep 17 00:00:00 2001 From: Shantanu Singh Date: Mon, 13 Mar 2023 14:49:15 -0400 Subject: [PATCH 4/4] read level_2_1 (but don't do anything with it) --- matric/5.inspect_metrics.Rmd | 38 ++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/matric/5.inspect_metrics.Rmd b/matric/5.inspect_metrics.Rmd index 8643414..d265206 100644 --- a/matric/5.inspect_metrics.Rmd +++ b/matric/5.inspect_metrics.Rmd @@ -71,6 +71,8 @@ plot_metric <- # Read +## Level 1_0 + ```{r} metric_set <- glue("level_1_0_{type}") @@ -84,11 +86,9 @@ log_info("Reading {parquet_file} ...") level_1_0_metrics <- arrow::read_parquet(glue(parquet_file)) - -all_same_cols_rep <- attr(level_1_0_metrics, "all_same_cols_rep") ``` -After reading level_1, drop duplicates that may result from annotating level 1_0 entities +## Level 1 ```{r} metric_set <- glue("level_1_{type}") @@ -102,11 +102,41 @@ parquet_file <- log_info("Reading {parquet_file} ...") level_1_metrics <- - arrow::read_parquet(glue(parquet_file)) %>% + arrow::read_parquet(glue(parquet_file)) +``` + +After reading level_1, drop duplicates that may result from annotating level 1_0 entities + +```{r} +all_same_cols_rep <- attr(level_1_0_metrics, "all_same_cols_rep") + +level_1_metrics <- + level_1_metrics %>% select(all_of(all_same_cols_rep), matches("^sim_")) %>% distinct() ``` + +## Level 2_1 + +```{r} +metric_set <- glue("level_2_1_{type}") + +parquet_file <- + with( + params, + glue("{input_metrics_file_prefix}_{metric_set}.parquet") + ) + +if (file.exists(parquet_file)) { + log_info("Reading {parquet_file} ...") + + level_2_1_metrics <- + arrow::read_parquet(glue(parquet_file)) +} +``` + + # Plot metrics ## Average Precision