diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 92c9158c..187e14f0 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -22,8 +22,8 @@ jobs: fail-fast: false matrix: config: - - {os: windows-latest, r: 'release'} - - {os: macOS-latest, r: 'release'} + - {os: windows-latest, r: '4.2'} + - {os: macOS-10.15, r: 'release'} - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} diff --git a/DESCRIPTION b/DESCRIPTION index 008a9c17..8ae8389d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: IOHanalyzer Type: Package Title: Data Analysis Part of 'IOHprofiler' -Version: 0.1.7.4 +Version: 0.1.8.2 Maintainer: Diederick Vermetten Authors@R: c( person("Hao", "Wang", email = "h.wang@liacs.leidenuniv.nl", role = "aut", comment = c(ORCID = "0000-0002-4933-5181")), @@ -17,7 +17,7 @@ Description: The data analysis module for the Iterative Optimization Heuristics License: BSD_3_clause + file LICENSE Encoding: UTF-8 LazyData: true -URL: http://iohprofiler.liacs.nl, https://github.com/IOHprofiler/IOHAnalyzer +URL: https://iohanalyzer.liacs.nl, https://github.com/IOHprofiler/IOHAnalyzer BugReports: https://github.com/IOHprofiler/IOHAnalyzer/issues Imports: magrittr, @@ -34,9 +34,11 @@ Imports: httr, knitr, methods, - rjson + rjson, + eaf, + viridis LinkingTo: Rcpp -SystemRequirements: C++11 +SystemRequirements: C++ RoxygenNote: 7.1.2 Suggests: Rcpp, diff --git a/NAMESPACE b/NAMESPACE index 80479a4d..930e2479 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -111,7 +111,11 @@ export(fast_RT_samples) export(generate_data.AUC) export(generate_data.Aggr) export(generate_data.CDP) +export(generate_data.EAF) +export(generate_data.EAF_Difference) +export(generate_data.EAF_diff_Approximate) export(generate_data.ECDF) +export(generate_data.ECDF_From_EAF) export(generate_data.ECDF_raw) export(generate_data.Heatmaps) export(generate_data.PMF) @@ -161,6 +165,8 @@ export(glicko2_ranking) export(max_ERTs) export(mean_FVs) export(pairwise.test) +export(plot_eaf_data) +export(plot_eaf_differences) export(plot_general_data) export(read_IOH_v1plus) export(read_index_file) @@ -183,15 +189,20 @@ importFrom(data.table,copy) importFrom(data.table,data.table) importFrom(data.table,frank) importFrom(data.table,fread) +importFrom(data.table,getDTthreads) importFrom(data.table,is.data.table) importFrom(data.table,melt) importFrom(data.table,rbindlist) importFrom(data.table,setDT) +importFrom(data.table,setDTthreads) importFrom(data.table,setnames) +importFrom(data.table,setorder) importFrom(data.table,setorderv) importFrom(data.table,transpose) importFrom(dplyr,"%>%") importFrom(dplyr,mutate) +importFrom(eaf,eafdiff) +importFrom(eaf,eafs) importFrom(ggplot2,aes) importFrom(ggplot2,element_text) importFrom(ggplot2,facet_wrap) @@ -226,8 +237,12 @@ importFrom(magrittr,set_names) importFrom(magrittr,set_rownames) importFrom(methods,hasArg) importFrom(plotly,add_annotations) +importFrom(plotly,add_contour) +importFrom(plotly,add_polygons) importFrom(plotly,add_trace) importFrom(plotly,animation_opts) +importFrom(plotly,colorbar) +importFrom(plotly,hide_colorbar) importFrom(plotly,layout) importFrom(plotly,orca) importFrom(plotly,plot_ly) @@ -254,10 +269,12 @@ importFrom(stringi,stri_locate_all) importFrom(stringi,stri_rand_strings) importFrom(stringi,stri_replace) importFrom(stringi,stri_sub) +importFrom(utils,compareVersion) importFrom(utils,data) importFrom(utils,head) importFrom(utils,read.csv) importFrom(utils,tail) importFrom(utils,type.convert) importFrom(utils,write.csv) +importFrom(viridis,viridis) useDynLib(IOHanalyzer) diff --git a/R/DataSet.R b/R/DataSet.R index 82aae72f..d91a9bc2 100644 --- a/R/DataSet.R +++ b/R/DataSet.R @@ -72,17 +72,7 @@ DataSet <- function(info, verbose = F, maximization = NULL, format = IOHprofiler tdatFile <- file.path(path, paste0(datBaseName, '.tdat')) cdatFile <- file.path(path, paste0(datBaseName, '.cdat')) - # NOTE: preference on data file for the alignment by RT: cdat > tdat > dat - if (file.exists(cdatFile)) - fvFile <- cdatFile - else if (file.exists(tdatFile)) - fvFile <- tdatFile - else if (file.exists(datFile)) - fvFile <- datFile - else - stop('No datafiles found, please verify the integrity of the chosen files') - - # NOTE: preference on data file for the alignment by FV: dat > tdat > cdat + # NOTE: preference on data file from coco: dat > tdat > cdat if (file.exists(datFile)) rtFile <- datFile else if (file.exists(tdatFile)) @@ -90,6 +80,8 @@ DataSet <- function(info, verbose = F, maximization = NULL, format = IOHprofiler else if (file.exists(cdatFile)) # TODO: perhaps turn on `subsampling` here as this would take quite some time rtFile <- cdatFile + else + stop('No datafiles found, please verify the integrity of the chosen files') read_raw <- switch( format, @@ -101,7 +93,6 @@ DataSet <- function(info, verbose = F, maximization = NULL, format = IOHprofiler ) RT_raw <- read_raw(rtFile, subsampling) - FV_raw <- read_raw(fvFile, subsampling) if (is.null(maximization)) { if (verbose) @@ -109,14 +100,14 @@ DataSet <- function(info, verbose = F, maximization = NULL, format = IOHprofiler function value progression") # TODO: idxTarget should be set depending on the data format idxTarget <- 2 - cond <- unique(lapply(FV_raw, function(FV) FV[1, idxTarget] >= FV[nrow(FV), idxTarget])) + cond <- unique(lapply(RT_raw, function(FV) FV[1, idxTarget] >= FV[nrow(FV), idxTarget])) if (length(cond) > 1) stop('The detected maximization differs in multiple runs') maximization <- cond } RT <- align_running_time(RT_raw, format = format, maximization = maximization) - FV <- align_function_value(FV_raw, format = format) + FV <- align_function_value(RT_raw, format = format) PAR <- list( 'by_FV' = RT[names(RT) != 'RT'], @@ -148,9 +139,9 @@ DataSet <- function(info, verbose = F, maximization = NULL, format = IOHprofiler # TODO: clean up these if-statements: Function to set idxTarget and n_data_column? # `idxTarget` is a global variable? if (format == TWO_COL) - finalFV <- set_names(sapply(FV_raw, function(d) d[nrow(d), idxTarget - 1]), NULL) + finalFV <- set_names(sapply(RT_raw, function(d) d[nrow(d), idxTarget - 1]), NULL) else - finalFV <- set_names(sapply(FV_raw, function(d) d[nrow(d), idxTarget]), NULL) + finalFV <- set_names(sapply(RT_raw, function(d) d[nrow(d), idxTarget]), NULL) if (any(finalFV != info$finalFV) && verbose) warning('Inconsitent finalFvalue in *.info file and *.dat file') @@ -168,9 +159,9 @@ DataSet <- function(info, verbose = F, maximization = NULL, format = IOHprofiler maximization = maximization, suite = suite, ID = info$algId)) ) if (isTRUE(info$constrained) || full_sampling) { - FV_raw_mat <- matrix(nrow = nrow(FV), ncol = length(FV_raw)) - for (idx in seq(length(FV_raw))) { - FV_raw_mat[,idx] = FV_raw[[idx]][,2] + FV_raw_mat <- matrix(nrow = nrow(FV), ncol = length(RT_raw)) + for (idx in seq(length(RT_raw))) { + FV_raw_mat[,idx] = RT_raw[[idx]][,2] } temp$FV_raw_mat <- FV_raw_mat attr(temp, 'contains_full_FV') <- TRUE diff --git a/R/DataSetList.R b/R/DataSetList.R index fef97ce4..1d8804e7 100644 --- a/R/DataSetList.R +++ b/R/DataSetList.R @@ -1155,9 +1155,10 @@ generate_data.hist <- function(dsList, target, use.equal.bins = F, which = 'by_R #' @examples #' generate_data.ECDF(subset(dsl, funcId == 1), c(10, 15, 16)) generate_data.ECDF <- function(dsList, targets, scale_log = F, which = 'by_RT', use_full_range = TRUE) { - V1 <- NULL #Set local binding to remove warnings by_rt <- which == 'by_RT' + V1 <- frac <- NULL if (by_rt) { + maximization <- attr(dsList, "maximization") RT <- get_runtimes(dsList) if (!use_full_range) { if (length(unique(get_funcId(dsList))) > 1 || length(unique(get_dim(dsList))) > 1) { @@ -1188,6 +1189,8 @@ generate_data.ECDF <- function(dsList, targets, scale_log = F, which = 'by_RT', x <- unique(seq_FV(FV, length.out = 50, scale = ifelse(scale_log, 'log', 'linear'))) } + + dt <- as.data.table(rbindlist(lapply(dsList, function(df) { ID <- get_id(df) if (by_rt) { @@ -1196,30 +1199,28 @@ generate_data.ECDF <- function(dsList, targets, scale_log = F, which = 'by_RT', } else targets_ <- targets - m <- lapply(targets_, function(target) { - if (by_rt) { - data <- get_RT_sample(df, target, output = 'long')$RT - data[is.na(data)] <- Inf - } - else - data <- get_FV_sample(df, target, output = 'long')$`f(x)` - if (all(is.na(data))) - return(rep(0, length(x))) - fun <- ecdf(data) - if (is.function(fun)) fun(x) else NA - }) %>% - do.call(rbind, .) - data.frame(x = x, - mean = apply(m, 2, . %>% mean(na.rm = T)), - sd = apply(m, 2, . %>% sd(na.rm = T))) %>% - mutate(upper = mean + sd, lower = mean - sd, ID = ID) + if (by_rt) { + data <- get_FV_sample(df, x, output = 'long') + if (maximization) + data$frac <- unlist(lapply(data$`f(x)`, function(temp) mean(temp > targets_))) + else + data$frac <- unlist(lapply(data$`f(x)`, function(temp) mean(temp < targets_))) + res <- data[, .(x=runtime, mean=mean(frac), sd=sd(frac)), by='runtime'] + } else { + data <- get_RT_sample(df, x, output = 'long') + data[is.na(data)] <- Inf + data$frac <- unlist(lapply(data$RT, function(temp) mean(temp < targets_))) + res <- data[, .(x=target, mean=mean(frac), sd=sd(frac)), by='target'] + } + mutate(res, upper = mean + sd, lower = mean - sd, ID = ID) }))) dt[, mean(mean), by = .(x, ID)][, .(mean = V1, ID = ID, x = x)] } + #' Generate dataframe containing the AUC for any ECDF-curves #' #' This function generates a dataframe which can be easily plotted using the `plot_general_data`-function @@ -1236,6 +1237,7 @@ generate_data.ECDF <- function(dsList, targets, scale_log = F, which = 'by_RT', #' #' @export #' @examples +#' \dontshow{data.table::setDTthreads(1)} #' generate_data.AUC(dsl, get_ECDF_targets(dsl)) #' generate_data.AUC(NULL, NULL, dt_ecdf = generate_data.ECDF(dsl, get_ECDF_targets(dsl))) generate_data.AUC <- function(dsList, targets, scale_log = F, which = 'by_RT', dt_ecdf = NULL, @@ -1246,21 +1248,36 @@ generate_data.AUC <- function(dsList, targets, scale_log = F, which = 'by_RT', d return(NULL) dt_ecdf <- generate_data.ECDF(dsList, targets, scale_log, which) } - max_idx <- nrow(unique(dt_ecdf[,'x'])) - dt_ecdf[, idx := seq(max_idx), by = 'ID'] - dt3 = copy(dt_ecdf) - dt3[, idx := idx - 1] - dt_merged = merge(dt_ecdf, dt3, by = c('ID', 'idx')) - colnames(dt_merged) <- c("ID", "idx", "mean_pre", "x_pre", "mean_post", "x") - dt_merged[, auc_contrib := ((mean_pre + mean_post)/2)*(x - x_pre)] - if (normalize){ - dt_merged[, auc := cumsum(auc_contrib)/x, by = 'ID'] - } else { - dt_merged[, auc := cumsum(auc_contrib), by = 'ID'] - } - if (multiple_x) - return(dt_merged[, c('ID','x','auc') ]) - return(dt_merged[idx == (max_idx - 1), c('ID','x','auc') ]) + dt_ecdf <- dt_ecdf[, c('mean', 'ID', 'x')] + rt_max <- max(dt_ecdf[,'x']) + dt_merged <- rbindlist(lapply(unique(dt_ecdf$ID), function(id) { + dt_part <- dt_ecdf[ID == id, ] + max_idx <- nrow(unique(dt_part[,'x'])) + dt_part[, idx := seq(max_idx), by = 'ID'] + dt3 = copy(dt_part) + dt3[, idx := idx - 1] + dt_merged_part = merge(dt_part, dt3, by = c('ID', 'idx')) + colnames(dt_merged_part) <- c("ID", "idx", "mean_pre", "x_pre", "mean_post", "x") + if (max(dt_part[,'x']) < rt_max){ + extreme <- data.table("ID" = id, "idx" = max_idx+1, + "x_pre" = max(dt_part[,'x']), + "x" = rt_max, + "mean_pre" = max(dt_part[,'mean']), + "mean_post" = max(dt_part[,'mean'])) + max_idx <- max_idx + 1 + dt_merged_part <- rbind(dt_merged_part, extreme) + } + dt_merged_part[, auc_contrib := ((mean_pre + mean_post)/2)*(x - x_pre)] + if (normalize){ + dt_merged_part[, auc := cumsum(auc_contrib)/x, by = 'ID'] + } else { + dt_merged_part[, auc := cumsum(auc_contrib), by = 'ID'] + } + if (multiple_x) + return(dt_merged_part[, c('ID','x','auc') ]) + return(dt_merged_part[idx == (max_idx - 1), c('ID','x','auc') ]) + })) + return(dt_merged) } @@ -1464,6 +1481,7 @@ generate_data.Aggr <- function(dsList, aggr_on = 'funcId', targets = NULL, which #' #' @export #' @examples +#' \dontshow{data.table::setDTthreads(1)} #' generate_data.ECDF_raw(subset(dsl, funcId == 1), c(10, 15, 16)) generate_data.ECDF_raw <- function(dsList, targets, scale_log = F) { V1 <- NULL #Set local binding to remove warnings @@ -1639,11 +1657,8 @@ generate_data.Heatmaps <- function(dsList, which = 'by_FV', target_dt = NULL) { #' @export #' @examples #' -#' dsl #' dsl_sub <- subset(dsl, funcId == 1) -#' runtime <- 15 -#' -#' generate_data.CDP(dsl_sub, runtime, TRUE) +#' generate_data.CDP(dsl_sub, 15, TRUE, nOfBootstrapSamples = 10) generate_data.CDP <- function(dsList, runtime_or_target_value, isFixedBudget, alpha=0.05, EPSILON=1e-80, nOfBootstrapSamples=1e3) { if (!requireNamespace("RVCompare", quietly = TRUE)) { @@ -1681,7 +1696,227 @@ generate_data.CDP <- function(dsList, runtime_or_target_value, isFixedBudget, al X_B <- - X_B } - res <- RVCompare::get_Y_AB_bounds_bootstrap(X_A, X_B, ignoreMinimumLengthCheck = TRUE, alpha=alpha, EPSILON=EPSILON, nOfBootstrapSamples=1e3) + res <- RVCompare::get_Y_AB_bounds_bootstrap(X_A, X_B, + ignoreMinimumLengthCheck = TRUE, + alpha=alpha, EPSILON=EPSILON, + nOfBootstrapSamples=nOfBootstrapSamples) return(data.frame(res)) } + + +#' Generate dataframe consisting of the levelsets of the EAF +#' +#' This function generates a dataframe which can be easily plotted using the `plot_eaf_data`-function +#' +#' @param dsList The DataSetList object +#' @param n_sets The number of level sets to calculate +#' @param subsampling Level of subsampling to use for runtime-values (number of runtimes to consider). +#' Setting to 0 will make the calculations more precise at the cost of potentially much longer exectution times +#' @param scale_xlog Only has effect when `subsampling` is True. The scaling of the subsampled runtimes +#' When true, these are equally spaced in log-space, when false they are linearly spaced. +#' @param xmin Minimum runtime value +#' @param xmax Maximum runtime value +#' +#' @export +#' @examples +#' generate_data.EAF(subset(dsl, funcId == 1)) +generate_data.EAF <- function(dsList, n_sets = 11, subsampling = 100, scale_xlog = F, + xmin = "", xmax = "") { + V1 <- NULL #Set local binding to remove warnings + + if (!requireNamespace("eaf", quietly = TRUE)) { + stop("Package \"eaf\" needed for this function to work. Please install it.", + call. = FALSE) + } + + # if (length(get_id(dsList)) != 1 ) { + # stop("Multiple IDs are present in provided DataSetList. + # Please call this function for each individual ID instead.") + # } + ids <- get_id(dsList) + runtimes <- get_runtimes(dsList) + xmin <- ifelse(xmin == "", min(runtimes, na.rm = T), as.numeric(xmin)) + xmax <- ifelse(xmax == "", max(runtimes, na.rm = T), as.numeric(xmax)) + + if (subsampling > 0){ + runtimes <- seq_RT(c(xmin,xmax), from=xmin, to=xmax, length.out = subsampling, scale=ifelse(scale_xlog, 'log', 'linear')) + } else { + runtimes <- runtimes[runtimes > xmin] + runtimes <- runtimes[runtimes < xmax] + } + + temp <- lapply(ids, function(id) { + dsl <- subset(dsList, ID == id) + + + dt <- get_FV_sample(dsl, runtimes, output='long') + max_runs <- max(dt$run) + dt_temp <- dt[!is.na(`f(x)`),.(temp=max_runs*funcId+ run, `f(x)`, `runtime`)] + + if (attr(dsl, 'maximization')) { + quals <- dt_temp[, .(`runtime`, `f(x)`=-1*`f(x)`)] + } else { + quals <- dt_temp[,c('runtime', 'f(x)')] + } + runs <- dt_temp$temp + eaf <- eafs(quals, runs, percentiles = seq(0,100, length.out=n_sets)) + if (attr(dsl, 'maximization')) { + eaf[,2] = eaf[,2]*-1 + } + eaf_table <- as.data.table(eaf) + colnames(eaf_table) <- c('runtime', 'f(x)', 'percentage') + eaf_table$ID <- id + return(eaf_table)} + ) + dt <- rbindlist(temp) + return(dt) +} + +#' Generate dataframe consisting of the ECDF-equivalent based on the EAF +#' +#' This function uses EAF-data to calculate a target-independent version of the ECDF +#' +#' @param eaf_table Datatable resulting from the `generate_data.EAF` function +#' @param min_val Minimum value to use for y-space +#' @param max_val Maximum value to use for y-space +#' @param maximization Whether the data resulted from maximization or not +#' @param scale_log Whether to use logarithmic scaling in y-space before calculating the partial integral +#' @param normalize Whether to normalize the resulting integrals to [0,1] (Based on `min_val` and `max_va`) +#' @export +#' @examples +#' \dontshow{data.table::setDTthreads(1)} +#' generate_data.ECDF_From_EAF(generate_data.EAF(subset(dsl, funcId == 1)), 1, 16, maximization = TRUE) +generate_data.ECDF_From_EAF <- function(eaf_table, min_val, max_val, maximization = F, + scale_log = F, normalize = T) { + + runtimes <- sort(unique(eaf_table[,`runtime`])) + + ext_func <- ifelse(maximization, max, min) + + fvals <- sort(unique(eaf_table[,`f(x)`])) + min_val <- ifelse(min_val == "", min(fvals, na.rm = T), as.numeric(min_val)) + max_val <- ifelse(max_val == "", max(fvals, na.rm = T), as.numeric(max_val)) + + if (scale_log){ + min_val <- max(min_val, 1e-12) + max_val <- max(max_val, 1e-12) + eaf_table <- eaf_table[,.(`runtime`, `f(x)`=log10(pmax( min_val, pmin( `f(x)`, max_val))), `percentage`, `ID`)] + min_val <- log10(min_val) + max_val <- log10(max_val) + } + + ecdf_full <- rbindlist(lapply(unlist(unique(eaf_table[,'ID'])), function(id) { + ecdf <- rbindlist(lapply(runtimes, function(runtime_value) { + temp <- eaf_table[runtime <= runtime_value , .(agg_fval = ext_func(`f(x)`)), by = c('percentage')] + agg_vals <- pmax(pmin(temp$agg_fval, max_val), min_val) + + if (maximization) { + partials <- rev(c(rev(agg_vals), max_val) - c(min_val, rev(agg_vals))) + } else { + partials <- c(agg_vals, max_val) - c(min_val,agg_vals) + } + list(runtime_value, sum(partials * c(0,temp$perc/100))) + })) + if (normalize) { + ecdf[,2] = ecdf[,2]/(max_val-min_val) + } + ecdf <- as.data.table(ecdf) + colnames(ecdf) <- c('x', 'mean') + ecdf$ID <- id + ecdf + })) + return(ecdf_full) +} + + +#' Generate differences between two EAFs +#' +#' This function uses the 'eaf' package to calculate eaf differences +#' +#' @param dsList1 The first DataSetList object +#' @param dsList2 The second DataSetList object +#' @export +#' @examples +#' generate_data.EAF_Difference(dsl[1], dsl[3]) +generate_data.EAF_Difference <- function(dsList1, dsList2) { + + dt <- get_FV_sample(dsList1, get_runtimes(dsList1), output='long') + max_runs <- max(dt$run) + dt_temp = dt[,.(temp=max_runs*funcId+ run, `f(x)`, `runtime`)] + + quals <- dt_temp[,c('runtime', 'f(x)')] + runs <- dt_temp$temp - min(dt_temp$temp) + 1 #need to start at 1 + x <- cbind(quals, runs) + x <- x[!is.na(`f(x)`),.(`runtime`, `f(x)`, `runs`)] + + dt <- get_FV_sample(dsList2, get_runtimes(dsList2), output='long') + max_runs <- max(dt$run) + dt_temp = dt[,.(temp=max_runs*funcId+ run, `f(x)`, `runtime`)] + + quals <- dt_temp[,c('runtime', 'f(x)')] + runs <- dt_temp$temp - min(dt_temp$temp) + 1 #need to start at 1 + y <- cbind(quals, runs) + y <- y[!is.na(`f(x)`),.(`runtime`, `f(x)`, `runs`)] + + diff <- eafdiff(x, y, rectangles = T) + + return(diff) +} + + +#' Generate EAF-differences between each function and the remaining portfolio +#' +#' This is an approximation of ``, since the number of required polygons +#' can quickly become problematic for plotly. This function uses discretized +#' contour matrices instead, which trades off accuracy for scalability. +#' +#' @param dsList The DataSetList object, containing at least 2 IDs +#' @param xmin Minimum runtime to consider +#' @param xmax Maximum runtime to consider +#' @param ymin Minimum f(x) to consider +#' @param ymax Maximum f(x) to consider +#' @param x.log Whether to scale the y-space logarithmically +#' @param y.log Whether to scale the y-space logarithmically +#' @export +#' @examples +#' generate_data.EAF_diff_Approximate(subset(dsl, funcId == 1), 1, 16, 1, 16) +generate_data.EAF_diff_Approximate <- function(dsList, xmin, xmax, ymin, ymax, + x.log=T, y.log=T) { + + RT <- get_runtimes(dsList) + xmin <- max(as.numeric(xmin), min(RT)) + xmax <- min(as.numeric(xmax), max(RT)) + + x <- unique(seq_RT(c(xmin, xmax), length.out = 50, scale = 'log')) + + FV <- get_funvals(dsList) + ymin <- max(as.numeric(ymin), min(FV)) + ymax <- min(as.numeric(ymax), max(FV)) + y <- rev(unique(seq_FV(c(ymin,ymax), length.out = 50, scale = 'log'))) + + algs <- get_algId(dsList) + + mats <- lapply(algs, function(alg) { + ds <- subset(dsList, algId == alg) + do.call(rbind, lapply(x, function(xval) {unlist(get_RT_summary(ds, y, xval)$ps)})) + }) + + names(mats) <- algs + fv_sum = get_FV_summary(dsList, x, include_limits = T) + + matrices_diffs <- lapply(algs, function(alg) { + mat_max <- Reduce(pmax, mats[algs[algs != alg]]) + diff <- mats[alg][[1]] - mat_max + # diff[diff<0] <- 0 + rownames(diff) <- x + colnames(diff) <- y + return(t(diff)) + }) + + names(matrices_diffs) <- algs + return(matrices_diffs) +} + + + diff --git a/R/IOHanalyzer.R b/R/IOHanalyzer.R index 53393031..e27d01d4 100644 --- a/R/IOHanalyzer.R +++ b/R/IOHanalyzer.R @@ -1,15 +1,16 @@ #' @importFrom stats dt ecdf integrate median quantile sd rgeom ks.test p.adjust ave #' @importFrom grDevices col2rgb colors nclass.FD #' @importFrom graphics hist -#' @importFrom utils data head read.csv tail type.convert write.csv +#' @importFrom utils data head read.csv tail type.convert write.csv compareVersion #' @importFrom dplyr %>% mutate #' @importFrom magrittr set_names set_rownames set_colnames %<>% mod #' @importFrom colorspace sequential_hcl #' @importFrom RColorBrewer brewer.pal #' @importFrom colorRamps primary.colors #' @importFrom data.table as.data.table rbindlist data.table fread := melt is.data.table -#' @importFrom data.table setorderv frank setnames rbindlist copy transpose setDT -#' @importFrom plotly add_annotations add_trace orca plot_ly rename_ subplot layout animation_opts save_image +#' @importFrom data.table setorderv frank setnames rbindlist copy transpose setDT setorder getDTthreads setDTthreads +#' @importFrom plotly add_annotations add_trace orca plot_ly rename_ subplot layout +#' @importFrom plotly animation_opts save_image add_polygons hide_colorbar add_contour colorbar #' @importFrom ggplot2 aes geom_jitter geom_line geom_ribbon geom_violin ggplot element_text #' @importFrom ggplot2 guides scale_color_manual scale_colour_manual scale_fill_manual #' @importFrom ggplot2 scale_x_continuous scale_x_log10 facet_wrap theme_set theme_grey theme @@ -21,6 +22,8 @@ #' @importFrom knitr kable #' @importFrom methods hasArg #' @importFrom rjson fromJSON +#' @importFrom eaf eafs eafdiff +#' @importFrom viridis viridis #' @useDynLib IOHanalyzer NULL # Ugly hack, but appears to be required to appease CRAN @@ -28,7 +31,8 @@ utils::globalVariables(c(".", "algId", "run", "ERT", "RT", "group", "DIM", "Fvalue", "lower", "upper", "target", "format", "runtime", "parId", "instance", "input", "funcId", "budget", "dimension", "loss", "name", "optimizer_name", - "rescale", "maxRT", "algnames", ".SD", "function_class", "ID", "ids")) + "rescale", "maxRT", "algnames", ".SD", "function_class", + "ID", "ids", "f(x)", "percentage")) options(shiny.port = 4242) @@ -132,4 +136,4 @@ SOS <- 'SOS' #' \dontrun{ #' runServer() #' } -NULL +"_PACKAGE" diff --git a/R/plot.R b/R/plot.R index ef29c8ce..1bd2b64a 100644 --- a/R/plot.R +++ b/R/plot.R @@ -399,3 +399,27 @@ save_plotly <- function(p, file, width = NULL, height = NULL, ...) { file.rename(file.path(pwd, file), file.path(des, file)) } + +#' Helper function from 'eaf' package +#' +#' @noRd +add.extremes <- function(x, extremes, maximise) +{ + best1 <- if (maximise[1]) max else min + best2 <- if (maximise[2]) max else min + rbind(c(best1(x[,1]), extremes[2]), x, c(extremes[1], best2(x[,2]))) +} + +#' Helper function from 'eaf' package +#' +#' @noRd +points.steps <- function(x) +{ + n <- nrow(x) + if (n == 1L) return(x) + x <- rbind(x, cbind(x=x[-1L, 1L, drop=FALSE], y=x[-n, 2L, drop=FALSE])) + idx <- c(as.vector(outer(c(0L, n), 1L:(n - 1L), "+")), n) + stopifnot(length(idx) == nrow(x)) + stopifnot(!anyDuplicated(idx)) + x[idx, ] +} diff --git a/R/plotDataSetList.R b/R/plotDataSetList.R index 5c51bac2..f4b76251 100644 --- a/R/plotDataSetList.R +++ b/R/plotDataSetList.R @@ -143,6 +143,7 @@ Plot.RT.Histogram <- function(dsList, ftarget, plot_mode = 'overlay', use.equal. #' the running times of the DataSetList at the target function values #' @export #' @examples +#' \dontshow{data.table::setDTthreads(1)} #' Plot.RT.ECDF_Per_Target(subset(dsl, funcId == 1), 14) Plot.RT.ECDF_Per_Target <- function(dsList, ftargets, scale.xlog = F) UseMethod("Plot.RT.ECDF_Per_Target", dsList) @@ -160,6 +161,7 @@ Plot.RT.ECDF_Per_Target <- function(dsList, ftargets, scale.xlog = F) #' the running times of the DataSetList #' @export #' @examples +#' \dontshow{data.table::setDTthreads(1)} #' Plot.RT.ECDF_Single_Func(subset(dsl, funcId == 1)) Plot.RT.ECDF_Single_Func <- function(dsList, fstart = NULL, fstop = NULL, fstep = NULL, show.per_target = F, @@ -175,6 +177,7 @@ Plot.RT.ECDF_Single_Func <- function(dsList, fstart = NULL, fstop = NULL, #' @return A radarplot of the area under the aggregated ECDF-curve of the DataSetList #' @export #' @examples +#' \dontshow{data.table::setDTthreads(1)} #' Plot.RT.ECDF_AUC(subset(dsl, funcId == 1)) Plot.RT.ECDF_AUC <- function(dsList, fstart = NULL, fstop = NULL, fstep = NULL, @@ -222,6 +225,7 @@ Plot.FV.Histogram <- function(dsList, runtime, plot_mode='overlay', use.equal.bi #' the fucntion values of the DataSetList at the target runtimes #' @export #' @examples +#' \dontshow{data.table::setDTthreads(1)} #' Plot.FV.ECDF_Per_Target(subset(dsl, funcId == 1), 10) Plot.FV.ECDF_Per_Target <- function(dsList, runtimes, scale.xlog = F, scale.reverse = F) UseMethod("Plot.FV.ECDF_Per_Target", dsList) @@ -240,6 +244,7 @@ Plot.FV.ECDF_Per_Target <- function(dsList, runtimes, scale.xlog = F, scale.reve #' the function values of the DataSetList #' @export #' @examples +#' \dontshow{data.table::setDTthreads(1)} #' Plot.FV.ECDF_Single_Func(subset(dsl, funcId == 1)) Plot.FV.ECDF_Single_Func <- function(dsList, rt_min = NULL, rt_max = NULL, rt_step = NULL, scale.xlog = F, @@ -255,6 +260,7 @@ Plot.FV.ECDF_Single_Func <- function(dsList, rt_min = NULL, rt_max = NULL, #' @return A radarplot of the area under the aggregated ECDF-curve of the DataSetList #' @export #' @examples +#' \dontshow{data.table::setDTthreads(1)} #' Plot.FV.ECDF_AUC(subset(dsl, funcId == 1)) Plot.FV.ECDF_AUC <- function(dsList, rt_min = NULL, rt_max = NULL, rt_step = NULL) UseMethod("Plot.FV.ECDF_AUC", dsList) @@ -316,6 +322,7 @@ Plot.FV.Parameters <- function(dsList, rt_min = NULL, rt_max = NULL, #' the running times of the DataSetList #' @export #' @examples +#' \dontshow{data.table::setDTthreads(1)} #' Plot.RT.ECDF_Multi_Func(dsl) Plot.RT.ECDF_Multi_Func <- function(dsList, targets = NULL, scale.xlog = F) UseMethod("Plot.RT.ECDF_Multi_Func", dsList) @@ -1003,8 +1010,10 @@ add_transparancy <- function(colors, percentage){ #' @param violin.showpoints Wheteher or not to show individual points when making a violinplot #' @param frame_attr Which attribute of the dataframe to use for the time element of the animation #' @param symbol_attr Which attribute of the dataframe to use for the scatter symbol +#' @param line.step Whether to plot lines as a step-function (T) or as linear interpolation (F, default) #' @param ... Additional parameters for the add_trace function #' +#' #' @export plot_general_data <- function(df, x_attr = 'ID', y_attr = 'vals', type = 'violin', legend_attr = 'ID', scale.xlog = F, scale.ylog = F, @@ -1012,7 +1021,7 @@ plot_general_data <- function(df, x_attr = 'ID', y_attr = 'vals', type = 'violin y_title = NULL, plot_title = NULL, upper_attr = NULL, lower_attr = NULL, subplot_attr = NULL, show.legend = F, inf.action = 'none', violin.showpoints = F, frame_attr = 'frame', - symbol_attr = 'run_nr', subplot_shareX = F, ...) { + symbol_attr = 'run_nr', subplot_shareX = F, line.step = F, ...) { l <- x <- isinf <- y <- text <- l_orig <- frame <- NULL #Set local binding to remove warnings @@ -1238,7 +1247,8 @@ plot_general_data <- function(df, x_attr = 'ID', y_attr = 'vals', type = 'violin linetype = ~l_orig, marker = list(size = getOption('IOHanalyzer.markersize', 4)), linetypes = dashes, colors = colors, showlegend = show.legend, - text = ~text, line = list(width = getOption('IOHanalyzer.linewidth', 2)), + text = ~text, line = list(width = getOption('IOHanalyzer.linewidth', 2), + shape = ifelse(line.step, "hv", "linear")), hovertemplate = '%{text}', ... ) @@ -1274,7 +1284,8 @@ plot_general_data <- function(df, x_attr = 'ID', y_attr = 'vals', type = 'violin type = 'scatter', mode = 'lines+markers', marker = list(size = getOption('IOHanalyzer.markersize', 4)), linetype = dashstyle, colors = colors, showlegend = show.legend, name = ~l, - text = y_atr, line = list(width = getOption('IOHanalyzer.linewidth', 2)), + text = y_atr, line = list(width = getOption('IOHanalyzer.linewidth', 2), + shape = ifelse(line.step, "hv", "linear")), ... ) ) @@ -1592,3 +1603,297 @@ Plot.cumulative_difference_plot <- function(dsList, runtime_or_target_value, isF return(fig) } + +#' Create EAF-based polygon plots +#' +#' +#' +#' @param df The dataframe containing the data to plot. This should come from `generate_data.EAF` +#' @param subplot_attr Which attribute of the dataframe to use for creating subplots +#' @param subplot_shareX Whether or not to share X-axis when using subplots +#' @param scale.xlog Logarithmic scaling of x-axis +#' @param scale.ylog Logarithmic scaling of y-axis +#' @param xmin Minimum value for the x-axis +#' @param xmax Maximum value for the x-axis +#' @param ymin Minimum value for the y-axis +#' @param ymax Maximum value for the y-axis +#' @param maximization Whether the data comes from maximization or minimization +#' @param scale.reverse Decreasing or increasing x-axis +#' @param x_title Title of x-axis. Defaults to x_attr +#' @param y_title Title of x-axis. Defaults to x_attr +#' @param plot_title Title of x-axis. Defaults to no title +#' @param p A previously existing plot on which to add traces. If NULL, a new canvas is created +#' @param show.colorbar Whether or not to include a colorbar +#' @param dt_overlay Dataframe containing additional data (e.g. quantiles) to plot +#' on top of the EAF. This should have a column labeled 'runtime'. The other columsn will +#' all be plotted as function values. +#' @param ... Additional parameters for the add_trace function +#' +#' @return An EAF plot +#' @export +#' @examples +#' \dontrun{ +#' plot_eaf_data(generate_data.EAF(subset(dsl, ID==get_id(dsl)[[1]])), maximization=T) +#' } +plot_eaf_data <- function(df, maximization = F, scale.xlog = F, scale.ylog = F, + scale.reverse = F, p = NULL, x_title = NULL, + xmin = NULL, xmax = NULL, ymin = NULL, ymax = NULL, + y_title = NULL, plot_title = NULL, subplot_attr = NULL, + show.colorbar = F, subplot_shareX = F, dt_overlay = NULL, + ...) { + + l <- x <- isinf <- y <- text <- l_orig <- frame <- NULL #Set local binding to remove warnings + + #Deal with subplots + if (!is.null(subplot_attr)) { + if (!subplot_attr %in% colnames(df)) { + stop("Provided subplot-attribut is not a colname of the selected data.table.") + } + colnames(df)[colnames(df) == subplot_attr] <- "subplot_attr" + if (!is.null(dt_overlay)){ + colnames(dt_overlay)[colnames(dt_overlay) == subplot_attr] <- "subplot_attr" + } + attrs <- unique(df[, subplot_attr]) + if (length(attrs) == 0) stop("Attempting to create subplots with fewer than 2 unique values of + `subplot_attrs`-column") + if (length(attrs) == 1) return(plot_eaf_data(df, scale.xlog=scale.xlog, scale.ylog=scale.ylog, + scale.reverse=scale.reverse, p=p, x_title=x_title, + xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, + y_title = y_title, show.colorbar = show.colorbar, + subplot_attr = NULL, dt_overlay = dt_overlay,...)) + + #Get some number of rows and columns + n_cols <- 1 + ceiling(length(attrs)/10) + n_rows <- ceiling(length(attrs) / n_cols) + + p <- lapply(seq(length(attrs)), function(idx) { + attr_val <- attrs[[idx]] + df_sub <- df[subplot_attr == attr_val] + dt_overlay_sub <- dt_overlay[subplot_attr == attr_val] + disp_y <- idx %% n_cols == 1 + disp_x <- idx > (length(attrs) - n_cols) + x.title = if (disp_x) x_title else "" + y.title = if (disp_y) y_title else "" + + #Generate title for the subplots + if (stri_detect_regex(subplot_attr, "(?i)fun")) + sub_title <- paste0('F', attr_val) + else if (stri_detect_regex(subplot_attr, "(?i)dim")) + sub_title <- paste0('D', attr_val) + else + sub_title <- paste0(attr_val) + p <- NULL + p <- plot_eaf_data(df_sub, scale.xlog=scale.xlog, scale.ylog=scale.ylog, + scale.reverse=scale.reverse, p=p, x_title=x_title, + xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, + y_title = y_title, show.colorbar = F, subplot_attr = NULL, + dt_overlay = dt_overlay_sub, ...) + if (getOption("IOHanalyzer.annotation_x", 0.5) >= 0 & + getOption("IOHanalyzer.annotation_y", 1) >= 0) { + p %<>% layout( + annotations = list( + text = sub_title, + font = f2, showarrow = FALSE, + xref = "paper", yref = "paper", + x = getOption("IOHanalyzer.annotation_x", 0.5), + y = getOption("IOHanalyzer.annotation_y", 1) + ) + ) + p + } + + }) + + p <- subplot( + p, nrows = n_rows, titleX = T, titleY = T, + margin = c(getOption("IOHanalyzer.margin_horizontal", 0.02), + getOption("IOHanalyzer.margin_vertical", 0.02), + getOption("IOHanalyzer.margin_horizontal", 0.02), + getOption("IOHanalyzer.margin_vertical", 0.02)), + shareX = subplot_shareX + ) %>% + layout(title = plot_title) + return(p) + } + + xscale <- if (scale.xlog) 'log' else 'linear' + yscale <- if (scale.ylog) 'log' else 'linear' + + + + + #If new plot is needed, create one. Store in bool to decide if axis scaling is needed. + is_new_plot <- F + if (is.null(p)) { + p <- IOH_plot_ly_default(x.title = x_title, + y.title = y_title, + title = plot_title) + is_new_plot <- T + } + + eaf_sets <- df$`percentage` + uniq_eaf_sets <- unique(eaf_sets) + att_surfs <- split.data.frame(df[,.(`runtime`, `f(x)`)], + factor(eaf_sets, + levels = uniq_eaf_sets, + labels = uniq_eaf_sets)) + cols <- rev(viridis(length(att_surfs))) + if (maximization) + extreme = as.matrix(df[, .(runtime=max(`runtime`), `f(x)`=min(`f(x)`))]) + else + extreme = as.matrix(df[, .(runtime=max(`runtime`), `f(x)`=max(`f(x)`))]) + + for (i in seq_along(att_surfs)) { + poli <- add.extremes(points.steps(as.matrix(att_surfs[[i]])), as.matrix(extreme), c(F,maximization)) + poli <- rbind(poli, extreme) + + p %<>% add_polygons(poli[,'runtime'], poli[,'f(x)'], alpha=1, fillcolor=cols[i], + line=list(width=0), name=names(att_surfs)[i], showlegend=F) + } + + # Set axis ranges + xmin <- ifelse((is.null(xmin) || xmin == ""), min(df$`runtime`), as.numeric(xmin)) + xmax <- ifelse((is.null(xmax) || xmax == ""), max(df$`runtime`), as.numeric(xmax)) + if (scale.xlog) { + xmin <- log10(xmin) + xmax <- log10(xmax) + } + ymin <- ifelse((is.null(ymin) || ymin == ""), min(df$`f(x)`), as.numeric(ymin)) + ymax <- ifelse((is.null(ymax) || ymax == ""), max(df$`f(x)`), as.numeric(ymax)) + if (scale.ylog) { + ymin <- log10(ymin) + ymax <- log10(ymax) + } + yrange <- c(ymin, ymax) + if (scale.reverse) yrange <- rev(yrange) + + p %<>% layout(xaxis = list(type = xscale, tickfont = f3(), ticklen = 3, + range = c(xmin, xmax)), + yaxis = list(type = yscale, tickfont = f3(), ticklen = 3, + range = yrange)) + + if (!is.null(dt_overlay)) { + cnames <- colnames(dt_overlay) + if (!('runtime' %in% cnames)) { + warning('dt_overlay needs to contain a columns labeled `runtime` to be used.') + } else { + for (cname in cnames[!(cnames %in% c('runtime', 'subplot_attr', 'ID'))]) { + p %<>% add_trace(x=dt_overlay[['runtime']], y=dt_overlay[[cname]], type='scatter', + mode='lines', name = cname, + line = list(width = getOption('IOHanalyzer.linewidth', 2), + color = 'black', shape = 'hv') + ) + } + } + } + if (show.colorbar) { + p %<>% add_contour(z=matrix(-0.1,1.1), zmin=-0.1, zmax=1.1, colorscale='Viridis', + contours = list(coloring='fill'), reversescale = T) + p %<>% colorbar(cmin=-0, cmax=1, thickness=0.03, thicknessmode='fraction', len=1, + tickvals=c(0,0.5,1), tickmode='array', outlinewidth=1, title='Fraction') + } + + p + + return(p) +} + + + +#' Create EAF-difference contour plots +#' +#' +#' +#' @param matrices The dataframes containing the data to plot. This should come from `generate_data.EAF_diff_Approximate` +#' @param scale.xlog Logarithmic scaling of x-axis +#' @param scale.ylog Logarithmic scaling of y-axis +#' @param zero_transparant Whether values of 0 should be made transparant or not +#' @param show_negatives Whether to also show negative values or not +#' +#' @return EAF difference plots +#' @export +#' @examples +#' \dontrun{ +#' plot_eaf_differences(generate_data.EAF_diff_Approximate(subset(dsl, funcId == 1), 1, 50, 1, 16)) +#' } +plot_eaf_differences <- function(matrices, scale.xlog = T, scale.ylog = F, zero_transparant = F, + show_negatives = F) { + + + xscale <- if (scale.xlog) 'log' else 'linear' + yscale <- if (scale.ylog) 'log' else 'linear' + + + show_colorbar <- T + ids <- names(matrices) + ps <- lapply(seq(length(ids)), function(idx) { + diff <- matrices[[idx]] + if (!show_negatives) + diff[diff< 0 ] = 0 + id <- ids[[idx]] + x <- as.numeric(colnames(diff)) + y <- as.numeric(rownames(diff)) + + p <- IOH_plot_ly_default('', 'Function Evaluations', 'f(x)') + if (zero_transparant) + diff[diff == 0] = NaN + + if (all(is.na(diff))) { + p %<>% add_trace(z = 0, type = "contour", x=x,y=y, + line = list(smoothing = 0), + contours = list( + start = ifelse(show_negatives, -1, 0), + end = 1, coloring='fill', showlines=F + ), colorscale = ifelse(show_negatives, 'BuRd_r' , 'Viridis'), + reversescale=show_negatives, + name = id) + } else { + p %<>% add_trace(z = diff, type = "contour", x=x,y=y, + line = list(smoothing = 0), + contours = list( + start = ifelse(show_negatives, -1, 0), + end = 1, coloring='fill', showlines=F + ), colorscale = ifelse(show_negatives, 'BuRd_r' , 'Viridis'), + reversescale=show_negatives, + name = id) + } + p %<>% layout(yaxis = list(type = yscale, ticklen = 3)) + p %<>% layout(xaxis = list(type = xscale, ticklen = 3)) + if (show_colorbar) { + show_colorbar <<- F + } else { + p %<>% hide_colorbar() + } + + if (getOption("IOHanalyzer.annotation_x", 0.5) >= 0 & + getOption("IOHanalyzer.annotation_y", 1) >= 0) { + p %<>% layout( + annotations = list( + text = id, + font = f2, showarrow = FALSE, + xref = "paper", yref = "paper", + x = getOption("IOHanalyzer.annotation_x", 0.5), + y = getOption("IOHanalyzer.annotation_y", 1) + ) + ) + } + + # p %<>% add_trace(x=x, y=fv_sum[ , .(max = max(max)), by = runtime]$max, color='white', type = "scatter", mode = "line", showlegend=F, alpha=0.4, name='max') + # p %<>% add_trace(x=x, y=fv_sum[ , .(min = min(min)), by = runtime]$min, color='white', type = "scatter", mode = "line", showlegend=F, alpha=0.4, name='min') + p + }) + + n_cols <- 1 + ceiling(length(matrices)/10) + n_rows <- ceiling(length(matrices) / n_cols) + p <- subplot( + ps, nrows = n_rows, titleX = T, titleY = T, + margin = c(getOption("IOHanalyzer.margin_horizontal", 0.02), + getOption("IOHanalyzer.margin_vertical", 0.02), + getOption("IOHanalyzer.margin_horizontal", 0.02), + getOption("IOHanalyzer.margin_vertical", 0.02)), + shareX = T, shareY = T + ) + p + + return(p) +} diff --git a/R/stats.R b/R/stats.R index 3372e062..5d62bc07 100644 --- a/R/stats.R +++ b/R/stats.R @@ -569,13 +569,18 @@ glicko2_ranking <- function(dsl, nr_rounds = 100, which = 'by_FV', target_dt = N x_arr <- get_FV_sample(dsl_s, target) win_operator <- ifelse(attr(dsl, 'maximization'), `>`, `<`) } + if (is.null(alg_names)) alg_names <- x_arr[,3] vals = array(dim = c(n_algs,ncol(x_arr) - 4)) for (i in seq(1,n_algs)) { + ds <- subset(dsl_s, algId == alg_names[i][[1]]) + n_valid <- length(attr(ds[[1]], 'instance')) + z <- x_arr[i] y <- as.numeric(z[,5:ncol(x_arr)]) - vals[i,] = y + + + vals[i,] = sample(y[1:n_valid], length(vals[i,]), replace = TRUE) } - if (is.null(alg_names)) alg_names <- x_arr[,3] for (i in seq(1,n_algs)) { for (j in seq(i,n_algs)) { @@ -733,6 +738,7 @@ set_DSC_credentials <- function(username, password) { paste0(repo_dir, "/config.rds")) } else { + keyring::keyring_unlock(password="") keyring::key_set_with_value("DSCtool", password = password) keyring::key_set_with_value("DSCtool_name", password = username) } @@ -756,6 +762,7 @@ get_DSC_credentials <- function() { return(list(name = data$DSCusername, pwd = data$DSCpassword)) } else { + keyring::keyring_unlock(password="") return(list(name = keyring::key_get("DSCtool_name"), pwd = keyring::key_get("DSCtool"))) } @@ -994,7 +1001,9 @@ get_marg_contrib_ecdf <- function(id, perm, j, dt) { #' #' @export #' @examples -#' get_shapley_values(dsl, get_ECDF_targets(dsl)) +#' \dontshow{data.table::setDTthreads(1)} +#' dsl_sub <- subset(dsl, funcId == 1) +#' get_shapley_values(dsl_sub, get_ECDF_targets(dsl_sub), group_size = 2) get_shapley_values <- function(dsList, targets, scale.log = T, group_size = 5, max_perm_size = 10, normalize = T){ hit <- NULL #Bind to avoid notes ids_full <- get_id(dsList) diff --git a/README.md b/README.md index 1e0284f1..aaa4900e 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![metacran downloads](https://cranlogs.r-pkg.org/badges/IOHanalyzer)](https://cran.r-project.org/package=IOHanalyzer) [![CRAN_Status_Badge_version_last_release](https://www.r-pkg.org/badges/version-last-release/IOHanalyzer)](https://cran.r-project.org/package=IOHanalyzer) -[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/licenses/BSD-3-Clause) +[![License](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://opensource.org/license/bsd-3-clause/) [![R-CMD-check](https://github.com/IOHprofiler/IOHanalyzer/workflows/R-CMD-check/badge.svg)](https://github.com/IOHprofiler/IOHanalyzer/actions) @@ -12,7 +12,7 @@ The __performance analyzer__ for **I**terative **O**ptimization **H**euristics ( * __Documentation__: [https://arxiv.org/abs/2007.03953](https://arxiv.org/abs/2007.03953) * __Wiki page__: [https://iohprofiler.github.io/IOHanalyzer/](https://iohprofiler.github.io/IOHanalyzer/) * __Bug reports__: [https://github.com/IOHprofiler/IOHAnalyzer/issues](https://github.com/IOHprofiler/IOHAnalyzer/issues) -* __Online service__: [http://iohanalyzer.liacs.nl](http://iohanalyzer.liacs.nl) *Due to server migrations, the webpage will be unavailable until June 16th* +* __Online service__: [https://iohanalyzer.liacs.nl](https://iohanalyzer.liacs.nl) * __General Contact__: [mailto:iohprofiler@liacs.leidenuniv.nl](mailto:iohprofiler@liacs.leidenuniv.nl) * __Mailing List__: [https://lists.leidenuniv.nl/mailman/listinfo/iohprofiler](https://lists.leidenuniv.nl/mailman/listinfo/iohprofiler) @@ -28,7 +28,7 @@ It _provides_: It is _built mainly on_: -* `R` packages [Shiny](https://shiny.rstudio.com/), [Plotly](https://plotly.com/) and [Rcpp](http://www.rcpp.org/). +* `R` packages [Shiny](https://shiny.posit.co/), [Plotly](https://plotly.com/) and [Rcpp](https://www.rcpp.org/). It is _available through_: @@ -38,7 +38,7 @@ It is _available through_: ## Online Service -A free server [http://iohprofiler.liacs.nl](http://iohprofiler.liacs.nl) running the stable version of __IOHanalyzer__ is hosted in [Leiden Institute of Advanced Computer Science](https://liacs.leidenuniv.nl/). You're welcome to check it out! +A free server [https://iohanalyzer.liacs.nl](http://iohprofiler.liacs.nl) running the stable version of __IOHanalyzer__ is hosted in [Leiden Institute of Advanced Computer Science](https://liacs.leidenuniv.nl/). You're welcome to check it out! ## Installation @@ -100,7 +100,7 @@ We provide a docker file for deploying __IOHanalyzer__ on the server. Please see Specific formats are required to load your benchmark data to **IOHanalyzer**. If your data sets are generated in the format of -* **COCO/BBOB** data format as regulated in [https://hal.inria.fr/inria-00362649](https://hal.inria.fr/inria-00362649), +* **COCO/BBOB** data format as regulated in [https://inria.hal.science/inria-00362649](https://inria.hal.science/inria-00362649), * **Nevergrad** data format (explained in [https://github.com/facebookresearch/nevergrad](https://github.com/facebookresearch/nevergrad)), or * **IOHprofiler** data format, which is motivated and modified from **COCO** data format, diff --git a/cran-comments.md b/cran-comments.md index 0dd20e65..56bb4fdb 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,9 +1,9 @@ ## Submission status -This is a new feature update submission, increasing to version 0.1.7 +This is a new feature update submission, increasing to version 0.1.8.1 (minor update after problems passing pre-check) -This release implements several new features, including new -visualization options, custom csv-file support and basic functionality for -constrained optimization. +This release implements several new features, primarily adding new analysis in the +form of empirical attainment functions. +This also addresses the @docType problem which we have been requested to fix by CRAN ## Test environments * Local Windows install, R 4.1.2 diff --git a/inst/shiny-server/global.R b/inst/shiny-server/global.R index 6e0b7502..99a78106 100644 --- a/inst/shiny-server/global.R +++ b/inst/shiny-server/global.R @@ -106,6 +106,7 @@ print_html <- function(s, widget_id = 'process_data_promt') # download file names: csv, image --------------------- AUC_ECDF_aggr_name <- parse(text = "paste0('AUC_ECDF_MULTI.', input$RTECDF.AUC.Table.Format)") +AUC_EAF_ECDF_aggr_name <- parse(text = "paste0('AUC_EAF.', input$EAF.AUC.Table.Format)") overview_single_name <- parse(text = "paste0('Overview-', paste0(input$Overall.Dim, 'D'), paste0('F', input$Overall.Funcid), '.', input$Overview.Single.Format)") overview_all_name <- parse(text = "paste0('Overview-All-', '.', input$Overview.All.Format)") @@ -191,6 +192,12 @@ FIG_NAME_RT_SHAPLEY <- parse(text = "paste0('RT-Shapley-', Sys.Date(), '.', inpu RT_NG_heatmap_name <- parse(text = "paste0('RT-NG_Heatmap-', Sys.Date(), '.', input$RT_NG.Heatmap.Format)") FV_NG_heatmap_name <- parse(text = "paste0('FV-NG_Heatmap-', Sys.Date(), '.', input$FV_NG.Heatmap.Format)") +FIG_NAME_EAF <- parse(text = "paste0('EAF-', Sys.Date(), '.', input$EAF.Single.Format)") +FIG_NAME_EAFCDF <- parse(text = "paste0('EAFCDF-', Sys.Date(), '.', input$EAF.CDF.Format)") +FIG_NAME_EAFDiff <- parse(text = "paste0('EAFDiff-', Sys.Date(), '.', input$EAF.Diff.Format)") +FIG_NAME_EAFMultiCDF <- parse(text = "paste0('EAF_multiCDF-', Sys.Date(), '.', input$EAF.MultiCDF.Format)") +AUC_ECDFEAF_aggr_name <- parse(text = "paste0('AUC_ECDFEAF_MULTI.', input$RTECDF.AUC.Table.Format)") + # ID of the control widget, whose current value should de always recorded and restored ---- # those control widget are switched on and off widget_id <- c('RTSummary.Statistics.Min', @@ -244,7 +251,16 @@ widget_id <- c('RTSummary.Statistics.Min', 'FCEECDF.AUC.Min', 'FCEECDF.AUC.Max', 'FCEECDF.AUC.Step', - 'FCEECDF.Single.Target') + 'FCEECDF.Single.Target', + 'EAF.Single.Min', + 'EAF.Single.Max', + 'EAF.Single.yMin', + 'EAF.Single.yMax', + 'EAF.CDF.yMin', + 'EAF.CDF.yMax', + 'EAF.Diff.yMin', + 'EAF.Diff.yMax' + ) eventExpr <- parse(text = paste0('{', paste(paste0('input$', widget_id), collapse = "\n"), '}')) diff --git a/inst/shiny-server/server/EAF_plots.R b/inst/shiny-server/server/EAF_plots.R new file mode 100644 index 00000000..9e2e96bb --- /dev/null +++ b/inst/shiny-server/server/EAF_plots.R @@ -0,0 +1,280 @@ +### EAF for single functions + +output$EAF.Single_Plot <- renderPlotly({ + req(length(DATA()) > 0) + render_EAF_Plot() +}) + +get_data_EAF <- reactive({ + input$EAF.Single.Refresh + dsList <- subset(DATA(), ID %in% input$EAF.Single.Algs) + + generate_data.EAF(dsList, n_sets = input$EAF.Single.levels, + subsampling = 50*input$EAF.Single.Subsampling, + scale_xlog = input$EAF.Single.Logx) +}) + + +render_EAF_Plot <- reactive({ + withProgress({ + dt_eaf <- get_data_EAF() + + if (input$EAF.Single.Problines) { + dsList <- subset(DATA(), ID %in% input$EAF.Single.Algs) + dt_fv <- get_FV_sample(dsList, unique(dt_eaf$runtime), output='long') + dt_lines <- dt_fv[, .(median = median(`f(x)`), per25 = quantile(`f(x)`, 0.25), + per75 = quantile(`f(x)`, 0.75), ID=`ID` ), by='runtime'] + + dt_lines <- dt_lines %>% set_colnames(c('runtime', 'median', '25%', '75%', 'ID')) + + } else { + dt_lines <- NULL + } + + plot_eaf_data(dt_eaf, attr(DATA(), 'maximization'), + scale.xlog = input$EAF.Single.Logx, scale.ylog = input$EAF.Single.Logy, + xmin = input$EAF.Single.Min, xmax = input$EAF.Single.Max, + ymin = input$EAF.Single.yMin, ymax = input$EAF.Single.yMax, + subplot_attr = 'ID', x_title = "Funciton Evaluations", + y_title = "f(x)", show.colorbar = input$EAF.Single.Colorbar, + dt_overlay = dt_lines) + }, + message = "Creating plot") +}) + +output$EAF.Single.Download <- downloadHandler( + filename = function() { + eval(FIG_NAME_EAF) + }, + content = function(file) { + save_plotly(render_EAF_Plot(), file) + }, + contentType = paste0('image/', input$EAF.Single.Format) +) + + +### EAF-based ECDF +output$EAF.CDF_Plot <- renderPlotly({ + req(length(DATA()) > 0) + render_EAFCDF_Plot() +}) + +get_data_EAFCDF <- reactive({ + dsList <- subset(DATA(), ID %in% input$EAF.CDF.Algs) + dt_eaf <- generate_data.EAF(dsList, + subsampling = 50*input$EAF.CDF.Subsampling, + scale_xlog = input$EAF.CDF.Logx) + + dt_ecdf <- rbindlist(lapply(input$EAF.CDF.Algs, function(id) { + dt_sub <- dt_eaf[ID == id, ] + temp <- generate_data.ECDF_From_EAF(dt_sub, + min_val = input$EAF.CDF.yMin, + max_val = input$EAF.CDF.yMax, scale_log=input$EAF.CDF.Logy) + temp$ID <- id + temp + })) +}) + +render_EAFCDF_Plot <- reactive({ + withProgress({ + plot_general_data(get_data_EAFCDF(), 'x', 'mean', 'line', 'ID', + scale.xlog = input$EAF.CDF.Logx, scale.ylog = F, x_title = "Function Evaluations", + y_title = "Fraction") + }, + message = "Creating plot") +}) + +output$EAF.CDF.Download <- downloadHandler( + filename = function() { + eval(FIG_NAME_EAFCDF) + }, + content = function(file) { + save_plotly(render_EAFCDF_Plot(), file) + }, + contentType = paste0('image/', input$EAF.CDF.Format) +) + +### EAF-differences + +output$EAF.Diff_Plot <- renderPlotly({ + req(length(DATA()) > 0) + render_EAFDiff_Plot() +}) + +get_data_EAFDiff <- reactive({ + dsList <- subset(DATA(), ID %in% input$EAF.Diff.Algs) + matrices <- generate_data.EAF_diff_Approximate(dsList, input$EAF.Diff.Min, input$EAF.Diff.Max, + input$EAF.Diff.yMin, input$EAF.Diff.yMax) + + matrices +}) + +render_EAFDiff_Plot <- reactive({ + withProgress({ + plot_eaf_differences(get_data_EAFDiff(), scale.xlog = input$EAF.Diff.Logx, + scale.ylog = input$EAF.Diff.Logy, zero_transparant = input$EAF.Diff.ZeroTransparant, + show_negatives = input$EAF.Diff.ShowNegatives) + }, + message = "Creating plot") +}) + +output$EAF.Diff.Download <- downloadHandler( + filename = function() { + eval(FIG_NAME_EAFDiff) + }, + content = function(file) { + save_plotly(render_EAFDiff_Plot(), file) + }, + contentType = paste0('image/', input$EAF.Diff.Format) +) + + +### ### Multi-function view ### ### + +### EAF for multiple functions + +output$EAF.Multi_Plot <- renderPlotly({ + req(length(DATA_RAW()) > 0) + render_EAF_multi_Plot() +}) + +get_data_EAF_multi <- reactive({ + input$EAF.Multi.Refresh + dsList <- subset(DATA_RAW(), ID %in% input$EAF.Multi.Algs, funcId %in% input$EAF.Multi.FuncIds, DIM == input$Overall.Dim ) + + generate_data.EAF(dsList, n_sets = input$EAF.Multi.levels, subsampling = 50*input$EAF.Multi.Subsampling, + scale_xlog = input$EAF.Multi.Logx, xmin=input$EAF.Multi.Min, xmax=input$EAF.Multi.Max) +}) + +render_EAF_multi_Plot <- reactive({ + withProgress({ + dt_eaf <- get_data_EAF_multi() + + if (input$EAF.Multi.Problines) { + dsList <- subset(DATA_RAW(), ID %in% input$EAF.Multi.Algs, funcId %in% input$EAF.Multi.FuncIds, DIM == input$Overall.Dim ) + + dt_fv <- get_FV_sample(dsList, unique(dt_eaf$runtime), output='long') + dt_lines <- dt_fv[, .(median = median(`f(x)`), per25 = quantile(`f(x)`, 0.25), + per75 = quantile(`f(x)`, 0.75), ID=`ID` ), by='runtime'] + + dt_lines <- dt_lines %>% set_colnames(c('runtime', 'median', '25%', '75%', 'ID')) + + } else { + dt_lines <- NULL + } + + plot_eaf_data(dt_eaf, attr(DATA_RAW(), 'maximization'), + scale.xlog = input$EAF.Multi.Logx, scale.ylog = input$EAF.Multi.Logy, + xmin = input$EAF.Multi.Min, xmax = input$EAF.Multi.Min, + ymin = input$EAF.Multi.yMin, ymax = input$EAF.Multi.yMax, + subplot_attr = 'ID', x_title = "Function Evaluations", + y_title = "f(x)", show.colorbar = input$EAF.Multi.Colorbar, + dt_overlay = dt_lines) + }, + message = "Creating plot") +}) + +output$EAF.Multi.Download <- downloadHandler( + filename = function() { + eval(FIG_NAME_EAF) + }, + content = function(file) { + save_plotly(render_EAF_multi_Plot(), file) + }, + contentType = paste0('image/', input$EAF.Multi.Format) +) + + +### EAF-based ECDF +output$EAF.MultiCDF_Plot <- renderPlotly({ + req(length(DATA_RAW()) > 0) + render_EAFMultiCDF_Plot() +}) + +get_data_EAFMultiCDF <- reactive({ + input$EAF.MultiCDF.Refresh + dsList <- subset(DATA_RAW(), ID %in% isolate(input$EAF.MultiCDF.Algs), + funcId %in% isolate(input$EAF.MultiCDF.FuncIds), + DIM == isolate(input$Overall.Dim)) + dt_eaf <- generate_data.EAF(dsList, subsampling = 50*isolate(input$EAF.MultiCDF.Subsampling), + scale_xlog = isolate(input$EAF.MultiCDF.Logx), + xmin = isolate(input$EAF.MultiCDF.xMin), + xmax = isolate(input$EAF.MultiCDF.xMax), n_sets = 100) + max_rt <- max(dt_eaf[,'runtime']) + dt_eMultiCDF <- rbindlist(lapply(isolate(input$EAF.MultiCDF.Algs), function(id) { + dt_sub <- dt_eaf[ID == id, ] + temp <- generate_data.ECDF_From_EAF(dt_sub, + min_val = isolate(input$EAF.MultiCDF.yMin), + max_val = isolate(input$EAF.MultiCDF.yMax), + scale_log=isolate(input$EAF.MultiCDF.Logy)) + if (max(temp[,'x']) < max_rt){ + extreme <- data.table("x" = max_rt, + "ID" = id, + "mean" = max(temp[,'mean'])) + temp <- rbind(temp, extreme) + } + temp$ID <- id + temp + })) + dt_eMultiCDF +}) + +render_EAFMultiCDF_Plot <- reactive({ + withProgress({ + plot_general_data(get_data_EAFMultiCDF(), 'x', 'mean', 'line', 'ID', + scale.xlog = isolate(input$EAF.MultiCDF.Logx), scale.ylog = F, x_title = "Function Evaluations", + y_title = "Fraction", line.step = T, show.legend = T) + }, + message = "Creating plot") +}) + +output$EAF.MultiCDF.Download <- downloadHandler( + filename = function() { + eval(FIG_NAME_EAFMultiCDF) + }, + content = function(file) { + save_plotly(render_EAFMultiCDF_Plot(), file) + }, + contentType = paste0('image/', input$EAF.MultiCDF.Format) +) + + +output$EAF.AUC.Table.Download <- downloadHandler( + filename = function() { + eval(AUC_ECDFEAF_aggr_name) + }, + content = function(file) { + save_table(auc_eaf_grid_table(), file) + } +) + +auc_eaf_grid_table <- reactive({ + dt_ecdf <- get_data_EAFMultiCDF() + df <- generate_data.AUC(NULL, NULL, dt_ecdf = dt_ecdf, normalize = input$EAF.MultiCDF.Normalize_AUC) + if (input$EAF.MultiCDF.Normalize_AUC) + df$aoc <- 1-df$auc + else + df$aoc <- df$x-df$auc + colnames(df) <- c("ID", "Runtime", "AUC", "AOC") + df +}) + +output$AUC_EAF_GRID_GENERATED <- DT::renderDataTable({ + req(length(DATA_RAW()) > 0) + auc_eaf_grid_table() +}, +editable = FALSE, +rownames = FALSE, +options = list( + pageLength = 10, + lengthMenu = c(5, 10, 25, -1), + scrollX = T, + server = T, + columnDefs = list( + list( + className = 'dt-right', targets = "_all" + ) + ) +), +filter = 'top' +) diff --git a/inst/shiny-server/server/FCEPlot.R b/inst/shiny-server/server/FCEPlot.R index 1fc4748d..838b6bbd 100644 --- a/inst/shiny-server/server/FCEPlot.R +++ b/inst/shiny-server/server/FCEPlot.R @@ -96,7 +96,7 @@ get_data_FCE_multi_func_bulk <- reactive({ if (length(get_id(data)) < 20) { #Arbitrary limit for the time being rbindlist(lapply(get_funcId(data), function(fid) { generate_data.Single_Function(subset(data, funcId == fid), scale_log = input$FCEPlot.Multi.Logx, - which = 'by_FV', start = start, stop = end) + which = 'by_FV', start = start, stop = end, include_geom_mean = T) })) } else @@ -128,10 +128,11 @@ get_data_FCEPlot_multi <- reactive({ render_FCEPlot_multi_plot <- reactive({ withProgress({ - plot_general_data(get_data_FCEPlot_multi(), x_attr = 'runtime', y_attr = 'mean', + plot_general_data(get_data_FCEPlot_multi(), x_attr = 'runtime', y_attr = input$FCEPlot.Multi.Geom_mean, subplot_attr = 'funcId', type = 'line', scale.xlog = input$FCEPlot.Multi.Logx, scale.ylog = input$FCEPlot.Multi.Logy, x_title = 'Function Evaluations', y_title = 'Best-so-far f(x)', show.legend = T, subplot_shareX = T) + }, message = "Creating plot") }) diff --git a/inst/shiny-server/server/FCE_ECDF.R b/inst/shiny-server/server/FCE_ECDF.R index ab393d5b..8b474a07 100644 --- a/inst/shiny-server/server/FCE_ECDF.R +++ b/inst/shiny-server/server/FCE_ECDF.R @@ -16,7 +16,7 @@ render_ecdf_per_target <- reactive({ x_title = "Target Value", y_title = "Proportion of runs", scale.xlog = input$FCEECDF.Single.Logx, show.legend = T, - scale.reverse = !attr(DATA()[[1]], 'maximization')) + scale.reverse = !attr(DATA()[[1]], 'maximization'), line.step = T) }, message = "Creating plot") }) @@ -71,7 +71,8 @@ render_FV_ECDF_AGGR <- reactive({ x_title = "Target Value", y_title = "Proportion of (run, target) pairs", scale.xlog = input$FCEECDF.Mult.Logx, - scale.reverse = !attr(DATA()[[1]], 'maximization'), show.legend = T) + scale.reverse = !attr(DATA()[[1]], 'maximization'), + show.legend = T, line.step = T) }, message = "Creating plot") }) diff --git a/inst/shiny-server/server/RT_ECDF.R b/inst/shiny-server/server/RT_ECDF.R index 2ef841fb..0bf5f8c0 100644 --- a/inst/shiny-server/server/RT_ECDF.R +++ b/inst/shiny-server/server/RT_ECDF.R @@ -38,7 +38,7 @@ render_RT_ECDF_MULT <- reactive({ scale.ylog = input$RTECDF.Aggr.Logy, x_title = "Function Evaluations", y_title = "Proportion of (run, target, ...) pairs", - show.legend = T) + show.legend = T, line.step = T) }, message = "Creating plot") }) @@ -194,7 +194,9 @@ output$RT_ECDF <- renderPlotly({ req(input$RTECDF.Single.Target) plot_general_data(get_data_RT_ECDF_Single(), 'x', 'mean', 'line', x_title = "Function Evaluations", - y_title = "Proportion of runs", scale.xlog = input$RTECDF.Single.Logx, show.legend = T) + y_title = "Proportion of runs", + scale.xlog = input$RTECDF.Single.Logx, show.legend = T, + line.step = T) # ftargets <- as.numeric(format_FV(input$RTECDF.Single.Target)) # data <- subset(DATA(), algId %in% input$RTECDF.Single.Algs) # Plot.RT.ECDF_Per_Target(data, ftargets, scale.xlog = input$RTECDF.Single.Logx) @@ -258,7 +260,8 @@ render_RT_ECDF_AGGR <- reactive({ plot_general_data(get_data_RT_ECDF_AGGR(), 'x', 'mean', 'line', x_title = "Function Evaluations", y_title = "Proportion of (run, target) pairs", - scale.xlog = input$RTECDF.Multi.Logx, show.legend = T) + scale.xlog = input$RTECDF.Multi.Logx, show.legend = T, + line.step = T) # Plot.RT.ECDF_Single_Func( # data, fstart, fstop, fstep, diff --git a/inst/shiny-server/server/upload.R b/inst/shiny-server/server/upload.R index c472cdf4..4fdafafd 100644 --- a/inst/shiny-server/server/upload.R +++ b/inst/shiny-server/server/upload.R @@ -458,6 +458,7 @@ observe({ DIMs <- get_dim(data) algIds <- get_algId(data) runtimes <- get_runtimes(data) + fvals <- get_funvals(data) selected_ds <- data[[1]] selected_f <- attr(selected_ds,'funcId') @@ -533,6 +534,9 @@ observe({ updateSelectInput(session, 'FV_Stats.Glicko.Funcid', choices = funcIds, selected = selected_f) updateSelectInput(session, 'FV_Stats.Glicko.Dim', choices = DIMs, selected = selected_dim) + updateSelectInput(session, 'EAF.Multi.FuncIds', choices = funcIds, selected = funcIds) + updateSelectInput(session, 'EAF.MultiCDF.FuncIds', choices = funcIds, selected = funcIds) + updateSelectInput(session, 'FV_Stats.Overview.ID', choices = IDs, selected = IDs) updateSelectInput(session, 'RTSummary.Statistics.ID', choices = IDs, selected = IDs) updateSelectInput(session, 'RTSummary.Overview.ID', choices = IDs, selected = IDs) @@ -569,6 +573,13 @@ observe({ updateSelectInput(session, 'FCEECDF.Single.Algs', choices = IDs, selected = IDs) updateSelectInput(session, 'FCEECDF.Mult.Algs', choices = IDs, selected = IDs) updateSelectInput(session, 'FCEECDF.AUC.Algs', choices = IDs, selected = IDs) + + updateSelectInput(session, 'EAF.Single.Algs', choices = IDs, selected = IDs[[1]]) + updateSelectInput(session, 'EAF.CDF.Algs', choices = IDs, selected = IDs[[1]]) + updateSelectInput(session, 'EAF.Multi.Algs', choices = IDs, selected = IDs[[1]]) + updateSelectInput(session, 'EAF.MultiCDF.Algs', choices = IDs, selected = IDs[[1]]) + updateSelectInput(session, 'EAF.Diff.Algs', choices = IDs, selected = IDs) + updateSelectInput(session, 'ParCoordPlot.Algs', choices = IDs, selected = IDs[[1]]) updateSelectInput(session, 'FV_PAR.CorrPlot.Param1', choices = c(parIds_RT_, 'f(x)'), selected = 'f(x)') if (length(parIds_RT_) == 0) @@ -586,6 +597,9 @@ observe({ updateSelectInput(session, 'Settings.ID.Variables', choices = attr_choices[!attr_choices %in% invalid_choices], selected = attr(data, 'ID_attributes')) + updateTextInput(session, 'EAF.MultiCDF.yMin', value = min(fvals, na.rm = T)) + updateTextInput(session, 'EAF.MultiCDF.yMax', value = max(fvals, na.rm = T)) + if (isTRUE(attr(data, 'constrained'))) { shinyjs::show(id = "Settings.Constrained") shinyjs::alert("The data you loaded seems to come from a constrained optimization problem. @@ -723,6 +737,16 @@ observe({ setTextInput(session, 'RT_Stats.Overview.Target', name, alternative = format_FV(stop)) setTextInput(session, 'RT.Multisample.Target', name, alternative = format_FV(median(v))) setTextInput(session, 'RT.MultiERT.Target', name, alternative = format_FV(median(v))) + + setTextInput(session, 'EAF.Single.yMin', name, alternative = format_FV(start)) + setTextInput(session, 'EAF.Single.yMax', name, alternative = format_FV(stop)) + setTextInput(session, 'EAF.CDF.yMin', name, alternative = format_FV(start)) + setTextInput(session, 'EAF.CDF.yMax', name, alternative = format_FV(stop)) + setTextInput(session, 'EAF.Diff.yMin', name, alternative = format_FV(start)) + setTextInput(session, 'EAF.Diff.yMax', name, alternative = format_FV(stop)) + setTextInput(session, 'RT.MultiERT.Target', name, alternative = format_FV(median(v))) + setTextInput(session, 'RT.MultiERT.Target', name, alternative = format_FV(median(v))) + }) # update the values for the grid of running times @@ -769,6 +793,11 @@ observe({ setTextInput(session, 'FV_PAR.Sample.Max', name, alternative = max(v)) setTextInput(session, 'FV_PAR.Sample.Step', name, alternative = step) setTextInput(session, 'FV_Stats.Overview.Target', name, alternative = max(v)) + + setTextInput(session, 'EAF.Single.Min', name, alternative = min(v)) + setTextInput(session, 'EAF.Single.Max', name, alternative = max(v)) + setTextInput(session, 'EAF.Diff.Min', name, alternative = min(v)) + setTextInput(session, 'EAF.Diff.Max', name, alternative = max(v)) #TODO: remove q and replace by single number setTextInput(session, 'FCEECDF.Single.Target', name, alternative = q[2]) }) diff --git a/inst/shiny-server/ui.R b/inst/shiny-server/ui.R index 3f0a6c6f..9e8d5a25 100644 --- a/inst/shiny-server/ui.R +++ b/inst/shiny-server/ui.R @@ -608,6 +608,46 @@ body <- dashboardBody( ) ) ), + tabItem(tabName = 'EAF', + fluidRow( + column( + width = 12, + EAF_box() + ) + ) + ), + tabItem(tabName = 'EAF_Diff', + fluidRow( + column( + width = 12, + EAF_Diff_box() + ) + ) + ), + tabItem(tabName = 'EAF_CDF', + fluidRow( + column( + width = 12, + EAF_CDF_box() + ) + ) + ), + tabItem(tabName = 'EAF_mult', + fluidRow( + column( + width = 12, + EAF_mult_box() + ) + ) + ), + tabItem(tabName = 'EAF_CDF_mult', + fluidRow( + column( + width = 12, + EAF_CDF_mult_box() + ) + ) + ), tabItem( tabName = 'Settings', diff --git a/inst/shiny-server/ui/EAF_boxes.R b/inst/shiny-server/ui/EAF_boxes.R new file mode 100644 index 00000000..06ba24c4 --- /dev/null +++ b/inst/shiny-server/ui/EAF_boxes.R @@ -0,0 +1,389 @@ +EAF_box <- function(width = 12, collapsible = T, collapsed = F) { + box( + title = HTML('

Empirical Attainment Function

'), + width = width, collapsible = collapsible, collapsed = collapsed, + solidHeader = T, status = "primary", + sidebarPanel( + width = 3, + selectInput('EAF.Single.Algs', label = 'Select which IDs to include:', + multiple = T, selected = NULL, choices = NULL) %>% shinyInput_label_embed( + custom_icon() %>% + bs_embed_popover( + title = "ID selection", content = alg_select_info, + placement = "auto" + ) + ), + HTML('

Set the ranges for the budgets and function values.

'), + textInput('EAF.Single.Min', label = RT_MIN_LABEL, value = ''), + textInput('EAF.Single.Max', label = RT_MAX_LABEL, value = ''), + + textInput('EAF.Single.yMin', label = F_MIN_LABEL, value = ''), + textInput('EAF.Single.yMax', label = F_MAX_LABEL, value = ''), + + actionButton('EAF.Single.Refresh', 'Force refresh plot'), + + checkboxInput('EAF.Single.Subsampling', + label = 'Use Subsampling for EAF calculation', + value = T) %>% shinyInput_label_embed( + custom_icon() %>% + bs_embed_popover( + title = "Subsampling", content = "When disabled, + all runtime values where improvements are present + will be used, resulting in a more accurate EAF. + This does come at the cost of longer execution time, + particularly when many runs are present.", + placement = "auto" + ) + ), + + checkboxInput('EAF.Single.Logx', + label = 'Scale x axis \\(\\log_{10}\\)', + value = T), + checkboxInput('EAF.Single.Logy', + label = 'Scale y axis \\(\\log_{10}\\)', + value = T), + checkboxInput('EAF.Single.Colorbar', + label = 'Show Colorbar', + value = F), + checkboxInput('EAF.Single.Problines', + label = 'Show Fixed Probability Lines', + value = F), + hr(), + + numericInput('EAF.Single.levels', label = 'Number of Levels in plot', value=11), + + hr(), + + selectInput('EAF.Single.Format', label = 'Select the figure format', + choices = supported_fig_format, selected = supported_fig_format[[1]]), + + downloadButton('EAF.Single.Download', label = 'Download the figure') + ), + + mainPanel( + width = 9, + column( + width = 12, align = "center", + HTML_P('The empirical attainment function (EAF) estimates the percentage + of runs that attain an arbitrary target value not later than a given + runtime. \n + For more information on the EAF, please see https://mlopez-ibanez.github.io/eaf/'), + + plotlyOutput.IOHanalyzer('EAF.Single_Plot') + ) + ) + ) +} + + +EAF_CDF_box <- function(width = 12, collapsible = T, collapsed = F) { + box( + title = HTML('

Empirical Attainment Function Partial Integral (`improved` ECDF)

'), + width = width, collapsible = collapsible, collapsed = collapsed, + solidHeader = T, status = "primary", + sidebarPanel( + width = 3, + selectInput('EAF.CDF.Algs', label = 'Select which IDs to include:', + multiple = T, selected = NULL, choices = NULL) %>% shinyInput_label_embed( + custom_icon() %>% + bs_embed_popover( + title = "ID selection", content = alg_select_info, + placement = "auto" + ) + ), + HTML('

Set the range of the budgets and targets + taken into account in the ECDF curve.

'), + + textInput('EAF.CDF.yMin', label = F_MIN_LABEL, value = ''), + textInput('EAF.CDF.yMax', label = F_MAX_LABEL, value = ''), + + checkboxInput('EAF.CDF.Logy', + label = 'Scale y space \\(\\log_{10}\\) before calculating the partial integrals', + value = T), + hr(), + checkboxInput('EAF.CDF.Subsampling', + label = 'Use Subsampling for EAF calculation', + value = T) %>% shinyInput_label_embed( + custom_icon() %>% + bs_embed_popover( + title = "Subsampling", content = "When disabled, + all runtime values where improvements are present + will be used, resulting in a more accurate EAF. + This does come at the cost of longer execution time, + particularly when many runs are present.", + placement = "auto" + ) + ), + checkboxInput('EAF.CDF.Logx', + label = 'Scale x axis \\(\\log_{10}\\)', + value = T), + + + hr(), + + selectInput('EAF.CDF.Format', label = 'Select the figure format', + choices = supported_fig_format, selected = supported_fig_format[[1]]), + + downloadButton('EAF.CDF.Download', label = 'Download the figure') + ), + + mainPanel( + width = 9, + column( + width = 12, align = "center", + HTML_P('The empirical attainment function (EAF) estimates the percentage + of runs that attain an arbitrary target value not later than a given + runtime. Taking the partial integral of the EAF results in a more + accurate version of the Empirical Cumulative Distribution Function, + since it does not rely on discritization of the targets.\n + For more information on the EAF, please see https://mlopez-ibanez.github.io/eaf/'), + + plotlyOutput.IOHanalyzer('EAF.CDF_Plot') + ) + ) + ) +} + +EAF_Diff_box <- function(width = 12, collapsible = T, collapsed = F) { + box( + title = HTML('

EAF based differences

'), + width = width, collapsible = collapsible, collapsed = collapsed, + solidHeader = T, status = "primary", + sidebarPanel( + width = 3, + selectInput('EAF.Diff.Algs', label = 'Select which IDs to include:', + multiple = T, selected = NULL, choices = NULL) %>% shinyInput_label_embed( + custom_icon() %>% + bs_embed_popover( + title = "ID selection", content = alg_select_info, + placement = "auto" + ) + ), + HTML('

Set the range of the budgets and targets + taken into account in the EDiff curve.

'), + textInput('EAF.Diff.Min', label = RT_MIN_LABEL, value = ''), + textInput('EAF.Diff.Max', label = RT_MAX_LABEL, value = ''), + + textInput('EAF.Diff.yMin', label = F_MIN_LABEL, value = ''), + textInput('EAF.Diff.yMax', label = F_MAX_LABEL, value = ''), + + checkboxInput('EAF.Diff.Logx', + label = 'Scale x axis \\(\\log_{10}\\)', + value = T), + checkboxInput('EAF.Diff.Logy', + label = 'Scale y axis \\(\\log_{10}\\)', + value = T), + + hr(), + + checkboxInput('EAF.Diff.ZeroTransparant', + label = 'Show values of 0 as transparant', + value = T), + checkboxInput('EAF.Diff.ShowNegatives', + label = 'Show negative differences on each subplot as well', + value = F), + hr(), + + selectInput('EAF.Diff.Format', label = 'Select the figure format', + choices = supported_fig_format, selected = supported_fig_format[[1]]), + + downloadButton('EAF.Diff.Download', label = 'Download the figure') + ), + + mainPanel( + width = 9, + column( + width = 12, align = "center", + HTML_P('The empirical attainment function (EAF) estimates the percentage + of runs that attain an arbitrary target value not later than a given + runtime. By taking the difference between two EAFs, we can see + areas of the (runtime, target)-space where one algorithm + dominates other algorithms.\n + For more information on the EAF, please see https://mlopez-ibanez.github.io/eaf/'), + + plotlyOutput.IOHanalyzer('EAF.Diff_Plot') + ) + ) + ) +} + +### Multi-function view +EAF_mult_box <- function(width = 12, collapsible = T, collapsed = F) { + box( + title = HTML('

Empirical Attainment Function

'), + width = width, collapsible = collapsible, collapsed = collapsed, + solidHeader = T, status = "primary", + sidebarPanel( + width = 3, + selectInput('EAF.Multi.FuncIds', label = "Functions to include:", + selected = NULL, choices = NULL, multiple = T), + selectInput('EAF.Multi.Algs', label = 'Select which IDs to include:', + multiple = T, selected = NULL, choices = NULL) %>% shinyInput_label_embed( + custom_icon() %>% + bs_embed_popover( + title = "ID selection", content = alg_select_info, + placement = "auto" + ) + ), + checkboxInput('EAF.Multi.CustomRanges', "Customize X and Y Ranges"), + conditionalPanel(condition = "input['EAF.Multi.CustomRanges']", + textInput('EAF.Multi.Min', label = RT_MIN_LABEL, value = ''), + textInput('EAF.Multi.Max', label = RT_MAX_LABEL, value = ''), + + textInput('EAF.Multi.yMin', label = F_MIN_LABEL, value = ''), + textInput('EAF.Multi.yMax', label = F_MAX_LABEL, value = '') + ), + actionButton('EAF.Multi.Refresh', 'Force refresh plot'), + + checkboxInput('EAF.Multi.Logx', + label = 'Scale x axis \\(\\log_{10}\\)', + value = T), + checkboxInput('EAF.Multi.Logy', + label = 'Scale y axis \\(\\log_{10}\\)', + value = T), + checkboxInput('EAF.Multi.Colorbar', + label = 'Show Colorbar', + value = F), + checkboxInput('EAF.Multi.Problines', + label = 'Show Fixed Probability Lines', + value = F), + hr(), + checkboxInput('EAF.Multi.Subsampling', + label = 'Use Subsampling for EAF calculation', + value = T) %>% shinyInput_label_embed( + custom_icon() %>% + bs_embed_popover( + title = "Subsampling", content = "When disabled, + all runtime values where improvements are present + will be used, resulting in a more accurate EAF. + This does come at the cost of longer execution time, + particularly when many runs are present.", + placement = "auto" + ) + ), + numericInput('EAF.Multi.levels', label = 'Number of Levels in plot', value=11), + + hr(), + + selectInput('EAF.Multi.Format', label = 'Select the figure format', + choices = supported_fig_format, selected = supported_fig_format[[1]]), + + downloadButton('EAF.Multi.Download', label = 'Download the figure') + ), + + mainPanel( + width = 9, + column( + width = 12, align = "center", + HTML_P('The empirical attainment function (EAF) estimates the percentage + of runs that attain an arbitrary target value not later than a given + runtime.\n + For more information on the EAF, please see https://mlopez-ibanez.github.io/eaf/'), + + plotlyOutput.IOHanalyzer('EAF.Multi_Plot') + ) + ) + ) +} + + +EAF_CDF_mult_box <- function(width = 12, collapsible = T, collapsed = F) { + box( + title = HTML('

Empirical Attainment Function Partial Integral (`improved` ECDF)

'), + width = width, collapsible = collapsible, collapsed = collapsed, + solidHeader = T, status = "primary", + sidebarPanel( + width = 3, + selectInput('EAF.MultiCDF.FuncIds', label = "Functions to include:", + selected = NULL, choices = NULL, multiple = T), + selectInput('EAF.MultiCDF.Algs', label = 'Select which IDs to include:', + multiple = T, selected = NULL, choices = NULL) %>% shinyInput_label_embed( + custom_icon() %>% + bs_embed_popover( + title = "ID selection", content = alg_select_info, + placement = "auto" + ) + ), + HTML('

Set the range of the budgets and targets + taken into account in the ECDF curve.

'), + checkboxInput('EAF.MultiCDF.CustomRanges', "Customize Y Range"), + conditionalPanel(condition = "input['EAF.MultiCDF.CustomRanges']", + textInput('EAF.MultiCDF.yMin', label = F_MIN_LABEL, value = ''), + textInput('EAF.MultiCDF.yMax', label = F_MAX_LABEL, value = '') + ), + checkboxInput('EAF.MultiCDF.CustomRangeX', "Customize X Range"), + conditionalPanel(condition = "input['EAF.MultiCDF.CustomRangeX']", + textInput('EAF.MultiCDF.xMin', label = RT_MIN_LABEL, value = ''), + textInput('EAF.MultiCDF.xMax', label = RT_MAX_LABEL, value = '') + ), + + checkboxInput('EAF.MultiCDF.Logy', + label = 'Scale y space \\(\\log_{10}\\) before calculating the partial integrals', + value = T), + hr(), + checkboxInput('EAF.MultiCDF.Logx', + label = 'Scale x axis \\(\\log_{10}\\)', + value = T), + + checkboxInput('EAF.MultiCDF.Subsampling', + label = 'Use Subsampling for EAF calculation', + value = T) %>% shinyInput_label_embed( + custom_icon() %>% + bs_embed_popover( + title = "Subsampling", content = "When disabled, + all runtime values where improvements are present + will be used, resulting in a more accurate EAF. + This does come at the cost of longer execution time, + particularly when many runs are present.", + placement = "auto" + ) + ), + actionButton('EAF.MultiCDF.Refresh', 'Force refresh plot'), + + hr(), + + selectInput('EAF.MultiCDF.Format', label = 'Select the figure format', + choices = supported_fig_format, selected = supported_fig_format[[1]]), + + downloadButton('EAF.MultiCDF.Download', label = 'Download the figure'), + + hr(), + + checkboxInput("EAF.MultiCDF.Normalize_AUC", "Normalize AOC / AUC values", value = T) %>% + shinyInput_label_embed( + custom_icon() %>% + bs_embed_popover( + title = "Normalization of AOC / AUC", content = "By default, the AUC values are + approximated based on the trapezium rule using the x-values as + present in the ECDF-plot. This value can then be normalized to [0,1] + by dividing by the maximum x-value (if no custom x-bounds are set, + note that this is the maximum budget across all selected algorithms!). + Please note that the AUC values generated are only comparable to other + AUC values if the used target boundaries and scaling are identical!", + placement = "auto" + ) + ), + + selectInput('EAF.AUC.Table.Format', 'Format', choices = supported_table_format, selected = supported_table_format[[1]]), + downloadButton('EAF.AUC.Table.Download', label = 'Download the AOC table') + + ), + + mainPanel( + width = 9, + column( + width = 12, align = "center", + HTML_P('The empirical attainment function (EAF) estimates the percentage + of runs that attain an arbitrary target value not later than a given + runtime. Taking the partial integral of the EAF results in a more + accurate version of the Empirical Cumulative Distribution Function, + since it does not rely on discritization of the targets.\n + For more information on the EAF, please see https://mlopez-ibanez.github.io/eaf/'), + + plotlyOutput.IOHanalyzer('EAF.MultiCDF_Plot'), + HTML_P('The approximated Area Over / Under the EAF is:'), + DT::dataTableOutput('AUC_EAF_GRID_GENERATED'), + ) + ) + ) +} diff --git a/inst/shiny-server/ui/fv_box.R b/inst/shiny-server/ui/fv_box.R index 6bc1a315..c19a78bd 100644 --- a/inst/shiny-server/ui/fv_box.R +++ b/inst/shiny-server/ui/fv_box.R @@ -103,6 +103,11 @@ fv_agg_box <- function(width = 12, height = '600px', collapsible = T, collapsed ), selectInput('FCEPlot.Multi.Funcs', label = 'Select which Functions to include:', multiple = T, selected = NULL, choices = NULL), + selectInput('FCEPlot.Multi.Geom_mean', + label = 'Select the aggregation method to use:', + multiple = F, selected = 'median', + choices = c('median', 'mean', 'geometric')), + checkboxInput('FCEPlot.Multi.Logx', label = 'Scale x axis \\(\\log_{10}\\)', value = T), diff --git a/inst/shiny-server/ui/sidebar_menu.R b/inst/shiny-server/ui/sidebar_menu.R index 656a4de2..77e9f144 100644 --- a/inst/shiny-server/ui/sidebar_menu.R +++ b/inst/shiny-server/ui/sidebar_menu.R @@ -41,7 +41,19 @@ sidebar_menu <- function() { menuSubItem('Ranking', tabName = "FCE_Statistics_aggr", icon = icon('project-diagram')) ) ), - menuItem("Position Information", tabName = "Positions", icon = icon("arrows-alt")), + menuItem("Empirical Attainment", tabName = "Other", icon = icon('file'), + menuItem("Single Function", tabName = "EAF_single", icon = icon("chart-line"), selected = F, + menuItem("Empirical Attainment", tabName = "EAF", icon = icon("project-diagram")), + menuItem("EAF Differences", tabName = "EAF_Diff", icon = icon("project-diagram")), + menuItem("EAF-based CDF", tabName = "EAF_CDF", icon = icon("chart-line")) + ), + menuItem("Multiple Functions", tabName = "EAF_aggr", icon = icon("chart-bar"), selected = F, + menuItem("Empirical Attainment", tabName = "EAF_mult", icon = icon("project-diagram")), + menuItem("EAF-based CDF", tabName = "EAF_CDF_mult", icon = icon("chart-line")) + ), + menuItem("Position Information", tabName = "Positions", icon = icon("arrows-alt")) + + ), # menuItem("Data Format", tabName = "dataformat", icon = icon("fas fa-database")), menuItem("About", tabName = "about", icon = icon("question")), menuItem("Settings", tabName = "Settings", icon = icon("cog")) diff --git a/man/IOHanalyzer.Rd b/man/IOHanalyzer.Rd index 6fb316ae..0c05e804 100644 --- a/man/IOHanalyzer.Rd +++ b/man/IOHanalyzer.Rd @@ -3,6 +3,7 @@ \docType{package} \name{IOHanalyzer} \alias{IOHanalyzer} +\alias{IOHanalyzer-package} \title{IOHanalyzer: Data Analysis Part of IOHprofiler} \description{ The data analysis module for the Iterative Optimization Heuristics Profiler (IOHprofiler). @@ -33,3 +34,23 @@ Plot.RT.Single_Func(dsList[1]) runServer() } } +\seealso{ +Useful links: +\itemize{ + \item \url{https://iohanalyzer.liacs.nl} + \item \url{https://github.com/IOHprofiler/IOHAnalyzer} + \item Report bugs at \url{https://github.com/IOHprofiler/IOHAnalyzer/issues} +} + +} +\author{ +\strong{Maintainer}: Diederick Vermetten \email{d.l.vermetten@liacs.leidenuniv.nl} (\href{https://orcid.org/0000-0003-3040-7162}{ORCID}) + +Authors: +\itemize{ + \item Hao Wang \email{h.wang@liacs.leidenuniv.nl} (\href{https://orcid.org/0000-0002-4933-5181}{ORCID}) + \item Carola Doerr \email{Carola.Doerr@mpi-inf.mpg.de} (\href{https://orcid.org/0000-0002-4981-3227}{ORCID}) + \item Thomas Bäck \email{t.h.w.baeck@liacs.leidenuniv.nl} (\href{https://orcid.org/0000-0001-6768-1478}{ORCID}) +} + +} diff --git a/man/Plot.FV.ECDF_AUC.Rd b/man/Plot.FV.ECDF_AUC.Rd index 6be02d36..c81ddf29 100644 --- a/man/Plot.FV.ECDF_AUC.Rd +++ b/man/Plot.FV.ECDF_AUC.Rd @@ -25,5 +25,6 @@ A radarplot of the area under the aggregated ECDF-curve of the DataSetList Radarplot of the area under the aggregated ECDF-curve of a DataSetList. } \examples{ +\dontshow{data.table::setDTthreads(1)} Plot.FV.ECDF_AUC(subset(dsl, funcId == 1)) } diff --git a/man/Plot.FV.ECDF_Per_Target.Rd b/man/Plot.FV.ECDF_Per_Target.Rd index 5d241161..75932e72 100644 --- a/man/Plot.FV.ECDF_Per_Target.Rd +++ b/man/Plot.FV.ECDF_Per_Target.Rd @@ -28,5 +28,6 @@ Plot the empirical cumulative distriburtion as a function of the target values o a DataSetList at certain target runtimes } \examples{ +\dontshow{data.table::setDTthreads(1)} Plot.FV.ECDF_Per_Target(subset(dsl, funcId == 1), 10) } diff --git a/man/Plot.FV.ECDF_Single_Func.Rd b/man/Plot.FV.ECDF_Single_Func.Rd index cbcb7856..09d200d8 100644 --- a/man/Plot.FV.ECDF_Single_Func.Rd +++ b/man/Plot.FV.ECDF_Single_Func.Rd @@ -50,5 +50,6 @@ Plot the aggregated empirical cumulative distriburtion as a function of the func a DataSetList. } \examples{ +\dontshow{data.table::setDTthreads(1)} Plot.FV.ECDF_Single_Func(subset(dsl, funcId == 1)) } diff --git a/man/Plot.RT.ECDF_AUC.Rd b/man/Plot.RT.ECDF_AUC.Rd index f1e553b3..6b2fe5c8 100644 --- a/man/Plot.RT.ECDF_AUC.Rd +++ b/man/Plot.RT.ECDF_AUC.Rd @@ -39,5 +39,6 @@ A radarplot of the area under the aggregated ECDF-curve of the DataSetList Radarplot of the area under the aggregated ECDF-curve of a DataSetList. } \examples{ +\dontshow{data.table::setDTthreads(1)} Plot.RT.ECDF_AUC(subset(dsl, funcId == 1)) } diff --git a/man/Plot.RT.ECDF_Multi_Func.Rd b/man/Plot.RT.ECDF_Multi_Func.Rd index bba1e776..2fabc808 100644 --- a/man/Plot.RT.ECDF_Multi_Func.Rd +++ b/man/Plot.RT.ECDF_Multi_Func.Rd @@ -27,5 +27,6 @@ Plot the aggregated empirical cumulative distriburtion as a function of the runn a DataSetList. Aggregated over multiple functions or dimensions. } \examples{ +\dontshow{data.table::setDTthreads(1)} Plot.RT.ECDF_Multi_Func(dsl) } diff --git a/man/Plot.RT.ECDF_Per_Target.Rd b/man/Plot.RT.ECDF_Per_Target.Rd index e1ef3845..1a6436d1 100644 --- a/man/Plot.RT.ECDF_Per_Target.Rd +++ b/man/Plot.RT.ECDF_Per_Target.Rd @@ -26,5 +26,6 @@ Plot the empirical cumulative distriburtion as a function of the running times o a DataSetList at certain target function values } \examples{ +\dontshow{data.table::setDTthreads(1)} Plot.RT.ECDF_Per_Target(subset(dsl, funcId == 1), 14) } diff --git a/man/Plot.RT.ECDF_Single_Func.Rd b/man/Plot.RT.ECDF_Single_Func.Rd index 57c8c9dd..e257396a 100644 --- a/man/Plot.RT.ECDF_Single_Func.Rd +++ b/man/Plot.RT.ECDF_Single_Func.Rd @@ -46,5 +46,6 @@ Plot the aggregated empirical cumulative distriburtion as a function of the runn a DataSetList. } \examples{ +\dontshow{data.table::setDTthreads(1)} Plot.RT.ECDF_Single_Func(subset(dsl, funcId == 1)) } diff --git a/man/generate_data.AUC.Rd b/man/generate_data.AUC.Rd index 0430c218..ae07b2f3 100644 --- a/man/generate_data.AUC.Rd +++ b/man/generate_data.AUC.Rd @@ -35,6 +35,7 @@ is provided together with dsList and targets} This function generates a dataframe which can be easily plotted using the `plot_general_data`-function } \examples{ +\dontshow{data.table::setDTthreads(1)} generate_data.AUC(dsl, get_ECDF_targets(dsl)) generate_data.AUC(NULL, NULL, dt_ecdf = generate_data.ECDF(dsl, get_ECDF_targets(dsl))) } diff --git a/man/generate_data.CDP.Rd b/man/generate_data.CDP.Rd index dfd8df20..85dadc3e 100644 --- a/man/generate_data.CDP.Rd +++ b/man/generate_data.CDP.Rd @@ -37,9 +37,6 @@ the `cumulative_difference_plot`. } \examples{ -dsl dsl_sub <- subset(dsl, funcId == 1) -runtime <- 15 - -generate_data.CDP(dsl_sub, runtime, TRUE) +generate_data.CDP(dsl_sub, 15, TRUE, nOfBootstrapSamples = 10) } diff --git a/man/generate_data.EAF.Rd b/man/generate_data.EAF.Rd new file mode 100644 index 00000000..e18cc493 --- /dev/null +++ b/man/generate_data.EAF.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataSetList.R +\name{generate_data.EAF} +\alias{generate_data.EAF} +\title{Generate dataframe consisting of the levelsets of the EAF} +\usage{ +generate_data.EAF( + dsList, + n_sets = 11, + subsampling = 100, + scale_xlog = F, + xmin = "", + xmax = "" +) +} +\arguments{ +\item{dsList}{The DataSetList object} + +\item{n_sets}{The number of level sets to calculate} + +\item{subsampling}{Level of subsampling to use for runtime-values (number of runtimes to consider). +Setting to 0 will make the calculations more precise at the cost of potentially much longer exectution times} + +\item{scale_xlog}{Only has effect when `subsampling` is True. The scaling of the subsampled runtimes +When true, these are equally spaced in log-space, when false they are linearly spaced.} + +\item{xmin}{Minimum runtime value} + +\item{xmax}{Maximum runtime value} +} +\description{ +This function generates a dataframe which can be easily plotted using the `plot_eaf_data`-function +} +\examples{ +generate_data.EAF(subset(dsl, funcId == 1)) +} diff --git a/man/generate_data.EAF_Difference.Rd b/man/generate_data.EAF_Difference.Rd new file mode 100644 index 00000000..15a108d9 --- /dev/null +++ b/man/generate_data.EAF_Difference.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataSetList.R +\name{generate_data.EAF_Difference} +\alias{generate_data.EAF_Difference} +\title{Generate differences between two EAFs} +\usage{ +generate_data.EAF_Difference(dsList1, dsList2) +} +\arguments{ +\item{dsList1}{The first DataSetList object} + +\item{dsList2}{The second DataSetList object} +} +\description{ +This function uses the 'eaf' package to calculate eaf differences +} +\examples{ +generate_data.EAF_Difference(dsl[1], dsl[3]) +} diff --git a/man/generate_data.EAF_diff_Approximate.Rd b/man/generate_data.EAF_diff_Approximate.Rd new file mode 100644 index 00000000..0d8ffb60 --- /dev/null +++ b/man/generate_data.EAF_diff_Approximate.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataSetList.R +\name{generate_data.EAF_diff_Approximate} +\alias{generate_data.EAF_diff_Approximate} +\title{Generate EAF-differences between each function and the remaining portfolio} +\usage{ +generate_data.EAF_diff_Approximate( + dsList, + xmin, + xmax, + ymin, + ymax, + x.log = T, + y.log = T +) +} +\arguments{ +\item{dsList}{The DataSetList object, containing at least 2 IDs} + +\item{xmin}{Minimum runtime to consider} + +\item{xmax}{Maximum runtime to consider} + +\item{ymin}{Minimum f(x) to consider} + +\item{ymax}{Maximum f(x) to consider} + +\item{x.log}{Whether to scale the y-space logarithmically} + +\item{y.log}{Whether to scale the y-space logarithmically} +} +\description{ +This is an approximation of ``, since the number of required polygons +can quickly become problematic for plotly. This function uses discretized +contour matrices instead, which trades off accuracy for scalability. +} +\examples{ +generate_data.EAF_diff_Approximate(subset(dsl, funcId == 1), 1, 16, 1, 16) +} diff --git a/man/generate_data.ECDF_From_EAF.Rd b/man/generate_data.ECDF_From_EAF.Rd new file mode 100644 index 00000000..c5462aed --- /dev/null +++ b/man/generate_data.ECDF_From_EAF.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/DataSetList.R +\name{generate_data.ECDF_From_EAF} +\alias{generate_data.ECDF_From_EAF} +\title{Generate dataframe consisting of the ECDF-equivalent based on the EAF} +\usage{ +generate_data.ECDF_From_EAF( + eaf_table, + min_val, + max_val, + maximization = F, + scale_log = F, + normalize = T +) +} +\arguments{ +\item{eaf_table}{Datatable resulting from the `generate_data.EAF` function} + +\item{min_val}{Minimum value to use for y-space} + +\item{max_val}{Maximum value to use for y-space} + +\item{maximization}{Whether the data resulted from maximization or not} + +\item{scale_log}{Whether to use logarithmic scaling in y-space before calculating the partial integral} + +\item{normalize}{Whether to normalize the resulting integrals to [0,1] (Based on `min_val` and `max_va`)} +} +\description{ +This function uses EAF-data to calculate a target-independent version of the ECDF +} +\examples{ +\dontshow{data.table::setDTthreads(1)} +generate_data.ECDF_From_EAF(generate_data.EAF(subset(dsl, funcId == 1)), 1, 16, maximization = TRUE) +} diff --git a/man/generate_data.ECDF_raw.Rd b/man/generate_data.ECDF_raw.Rd index db71d0f3..7d3e0c00 100644 --- a/man/generate_data.ECDF_raw.Rd +++ b/man/generate_data.ECDF_raw.Rd @@ -18,5 +18,6 @@ a data.table, it needs columns 'target', 'DIM' and 'funcId'} This provides an unaggregated version of the function `generate_data.ECDF`. } \examples{ +\dontshow{data.table::setDTthreads(1)} generate_data.ECDF_raw(subset(dsl, funcId == 1), c(10, 15, 16)) } diff --git a/man/get_shapley_values.Rd b/man/get_shapley_values.Rd index a9cfef39..c34b0e80 100644 --- a/man/get_shapley_values.Rd +++ b/man/get_shapley_values.Rd @@ -31,5 +31,7 @@ a data.table, it needs columns 'target', 'DIM' and 'funcId'} Based on the contribution to the ECDF-curve of the VBS of the portfolio } \examples{ -get_shapley_values(dsl, get_ECDF_targets(dsl)) +\dontshow{data.table::setDTthreads(1)} +dsl_sub <- subset(dsl, funcId == 1) +get_shapley_values(dsl_sub, get_ECDF_targets(dsl_sub), group_size = 2) } diff --git a/man/plot_eaf_data.Rd b/man/plot_eaf_data.Rd new file mode 100644 index 00000000..a6ca78de --- /dev/null +++ b/man/plot_eaf_data.Rd @@ -0,0 +1,77 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotDataSetList.R +\name{plot_eaf_data} +\alias{plot_eaf_data} +\title{Create EAF-based polygon plots} +\usage{ +plot_eaf_data( + df, + maximization = F, + scale.xlog = F, + scale.ylog = F, + scale.reverse = F, + p = NULL, + x_title = NULL, + xmin = NULL, + xmax = NULL, + ymin = NULL, + ymax = NULL, + y_title = NULL, + plot_title = NULL, + subplot_attr = NULL, + show.colorbar = F, + subplot_shareX = F, + dt_overlay = NULL, + ... +) +} +\arguments{ +\item{df}{The dataframe containing the data to plot. This should come from `generate_data.EAF`} + +\item{maximization}{Whether the data comes from maximization or minimization} + +\item{scale.xlog}{Logarithmic scaling of x-axis} + +\item{scale.ylog}{Logarithmic scaling of y-axis} + +\item{scale.reverse}{Decreasing or increasing x-axis} + +\item{p}{A previously existing plot on which to add traces. If NULL, a new canvas is created} + +\item{x_title}{Title of x-axis. Defaults to x_attr} + +\item{xmin}{Minimum value for the x-axis} + +\item{xmax}{Maximum value for the x-axis} + +\item{ymin}{Minimum value for the y-axis} + +\item{ymax}{Maximum value for the y-axis} + +\item{y_title}{Title of x-axis. Defaults to x_attr} + +\item{plot_title}{Title of x-axis. Defaults to no title} + +\item{subplot_attr}{Which attribute of the dataframe to use for creating subplots} + +\item{show.colorbar}{Whether or not to include a colorbar} + +\item{subplot_shareX}{Whether or not to share X-axis when using subplots} + +\item{dt_overlay}{Dataframe containing additional data (e.g. quantiles) to plot +on top of the EAF. This should have a column labeled 'runtime'. The other columsn will +all be plotted as function values.} + +\item{...}{Additional parameters for the add_trace function} +} +\value{ +An EAF plot +} +\description{ +Create EAF-based polygon plots +} +\examples{ +\dontrun{ +plot_eaf_data(generate_data.EAF(subset(dsl, ID==get_id(dsl)[[1]])), maximization=T) +} +} diff --git a/man/plot_eaf_differences.Rd b/man/plot_eaf_differences.Rd new file mode 100644 index 00000000..1836ee4f --- /dev/null +++ b/man/plot_eaf_differences.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotDataSetList.R +\name{plot_eaf_differences} +\alias{plot_eaf_differences} +\title{Create EAF-difference contour plots} +\usage{ +plot_eaf_differences( + matrices, + scale.xlog = T, + scale.ylog = F, + zero_transparant = F, + show_negatives = F +) +} +\arguments{ +\item{matrices}{The dataframes containing the data to plot. This should come from `generate_data.EAF_diff_Approximate`} + +\item{scale.xlog}{Logarithmic scaling of x-axis} + +\item{scale.ylog}{Logarithmic scaling of y-axis} + +\item{zero_transparant}{Whether values of 0 should be made transparant or not} + +\item{show_negatives}{Whether to also show negative values or not} +} +\value{ +EAF difference plots +} +\description{ +Create EAF-difference contour plots +} +\examples{ +\dontrun{ +plot_eaf_differences(generate_data.EAF_diff_Approximate(subset(dsl, funcId == 1), 1, 50, 1, 16)) +} +} diff --git a/man/plot_general_data.Rd b/man/plot_general_data.Rd index cd4b2d96..633c4a49 100644 --- a/man/plot_general_data.Rd +++ b/man/plot_general_data.Rd @@ -26,6 +26,7 @@ plot_general_data( frame_attr = "frame", symbol_attr = "run_nr", subplot_shareX = F, + line.step = F, ... ) } @@ -76,6 +77,8 @@ Only works in combination with`upper_attr` and `type` == 'ribbon'} \item{subplot_shareX}{Whether or not to share X-axis when using subplots} +\item{line.step}{Whether to plot lines as a step-function (T) or as linear interpolation (F, default)} + \item{...}{Additional parameters for the add_trace function} } \description{ diff --git a/tests/testthat.R b/tests/testthat.R index 121c9107..b20dad73 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,4 +1,8 @@ library(testthat) library(IOHanalyzer) +library(data.table) +threads <- getDTthreads() +setDTthreads(1) test_check("IOHanalyzer") +setDTthreads(threads)