diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 3349b73c..10950739 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -46,7 +46,7 @@ jobs: run: | if [ "$RUNNER_OS" == "Linux" ]; then sudo apt-get -y install \ - libudunits2-dev libgdal-dev libgeos-dev libproj-dev libglpk-dev + libudunits2-dev libgdal-dev libgeos-dev libproj-dev libglpk-dev libglu1-mesa-dev elif [ "$RUNNER_OS" == "macOS" ]; then brew install --cask xquartz brew install pkg-config @@ -68,6 +68,8 @@ jobs: any::pdp, stan-dev/cmdstanr, any::igraph, + any::lwgeom, + any::ncmeta, any::xgboost, any::dbarts, any::mboost, diff --git a/CITATION.cff b/CITATION.cff index 761595e3..d0ba6cef 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -8,7 +8,7 @@ message: 'To cite package "ibis.iSDM" in publications use:' type: software license: CC-BY-4.0 title: 'ibis.iSDM: Modelling framework for integrated biodiversity distribution scenarios' -version: 0.0.8 +version: 0.0.9 abstract: Integrated framework of modelling the distribution of species and ecosystems in a suitability framing. This package allows the estimation of integrated species distribution models (iSDM) based on several sources of evidence and provided presence-only @@ -64,7 +64,7 @@ references: - family-names: Hesselbarth given-names: Maximilian H.K. year: '2023' - version: 0.0.5 + version: 0.0.9 - type: software title: assertthat abstract: 'assertthat: Easy Pre and Post Assertions' @@ -217,6 +217,17 @@ references: given-names: David email: dpierce@ucsd.edu year: '2023' +- type: software + title: ncmeta + abstract: 'ncmeta: Straightforward ''NetCDF'' Metadata' + notes: Imports + url: https://github.com/hypertidy/ncmeta + repository: https://CRAN.R-project.org/package=ncmeta + authors: + - family-names: Sumner + given-names: Michael + email: mdsumner@gmail.com + year: '2023' - type: software title: Matrix abstract: 'Matrix: Sparse and Dense Matrix Classes and Methods' @@ -235,17 +246,6 @@ references: given-names: Mikael orcid: https://orcid.org/0000-0002-3542-2938 year: '2023' -- type: software - title: ncmeta - abstract: 'ncmeta: Straightforward ''NetCDF'' Metadata' - notes: Imports - url: https://github.com/hypertidy/ncmeta - repository: https://CRAN.R-project.org/package=ncmeta - authors: - - family-names: Sumner - given-names: Michael - email: mdsumner@gmail.com - year: '2023' - type: software title: parallel abstract: 'R: A Language and Environment for Statistical Computing' @@ -306,6 +306,18 @@ references: orcid: https://orcid.org/0000-0001-5872-2872 year: '2023' version: '>= 1.7-10' +- type: software + title: lwgeom + abstract: 'lwgeom: Bindings to Selected ''liblwgeom'' Functions for Simple Features' + notes: Imports + url: https://github.com/r-spatial/lwgeom/ + repository: https://CRAN.R-project.org/package=lwgeom + authors: + - family-names: Pebesma + given-names: Edzer + email: edzer.pebesma@uni-muenster.de + orcid: https://orcid.org/0000-0001-8049-7069 + year: '2023' - type: software title: sf abstract: 'sf: Simple Features for R' @@ -714,6 +726,9 @@ references: - family-names: Goodrich given-names: Ben email: benjamin.goodrich@columbia.edu + - family-names: Johnson + given-names: Andrew + email: andrew.johnson@arjohnsonau.com - family-names: Weber given-names: Sebastian email: sdw.post@waebers.de diff --git a/DESCRIPTION b/DESCRIPTION index 3617185a..8a885a26 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ibis.iSDM Type: Package Title: Modelling framework for integrated biodiversity distribution scenarios -Version: 0.0.8 +Version: 0.0.9 Authors@R: c(person(given = "Martin", family = "Jung", @@ -19,6 +19,7 @@ Description: Integrated framework of modelling the distribution of species and e Language: en-GB License: CC BY 4.0 Encoding: UTF-8 +Additional_repositories: https://inla.r-inla-download.org/R/testing Imports: assertthat (>= 0.2.0), doFuture (>= 0.12.2), @@ -30,12 +31,13 @@ Imports: graphics, methods, ncdf4, - Matrix, ncmeta, + Matrix, parallel, posterior, proto (>= 1.0.0), terra (>= 1.7-10), + lwgeom, sf (>= 1.0), stars (>= 0.5), stats, @@ -122,6 +124,7 @@ Collate: 'get_data.R' 'ibis.iSDM-package.R' 'limiting.R' + 'mask.R' 'misc.R' 'partial.R' 'plot.R' diff --git a/NAMESPACE b/NAMESPACE index 958b9437..43ff1042 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,10 @@ S3method(as.Id,character) S3method(coef,DistributionModel) S3method(effects,DistributionModel) +S3method(mask,BiodiversityDatasetCollection) +S3method(mask,BiodiversityScenario) +S3method(mask,DistributionModel) +S3method(mask,PredictorDataset) S3method(partial,DistributionModel) S3method(plot,BiodiversityDatasetCollection) S3method(plot,BiodiversityScenario) @@ -211,4 +215,10 @@ importFrom(graphics,par) importFrom(methods,as) importFrom(stats,complete.cases) importFrom(stats,effects) +importFrom(stats,mad) importFrom(stats,residuals) +importFrom(stats,sd) +importFrom(stats,terms.formula) +importFrom(stats,update.formula) +importFrom(terra,mask) +importFrom(utils,install.packages) diff --git a/NEWS.md b/NEWS.md index 89d94d8e..0a6b9365 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,15 +1,29 @@ -# ibis.iSDM 0.0.8 (current dev branch) +# ibis.iSDM 0.0.9 (current dev branch) +#### New features +* Added new vignette on available functions for data preparation #67 +* Addition of small `mask()` function that emulates the for `terra`. + +#### Minor improvements and bug fixes +* Small fix to `ensemble()` so that ensembles of future scenarios use correct standardization. +* Small fix to `threshold()` now returning threshold values correctly. +* Bug fix and error catching to `distribution()` and `ensemble_partial()`,`ensemble_spartial()` +* Further checks added to `check()` #45 +* Small fix to `alignRasters()`. +* Small fix to harmonize field_column throughout. +* Improved error messages and handling of formula's. + +# ibis.iSDM 0.0.8 #### New features * Implemented min size constraint (`add_constraint_minsize()`) #56 * Added a function for estimating partial effects of ensembles `ensemble_spartial()`. #### Minor improvements and bug fixes * Added warnings and checks for missing crs in supplied layers #65 -* Smaller bug and code hamornizations to `ensemble_partial()`, `partial()` and `spartial()`. -* Small fix to `threshold()` now returning threshold values correctly. +* Smaller bug and code harmonizations to `ensemble_partial()`, `partial()` and `spartial()`. * Smaller bug fixes to `threshold()` in `scenario()` projections. * Improved error messages in several functions. * Further documentation fixes towards CRAN submission #38 +* Allow to specify location of biodiversity point records in `threshold()`. # ibis.iSDM 0.0.7 #### New features diff --git a/R/add_biodiversity.R b/R/add_biodiversity.R index 2220ec8a..76087b87 100644 --- a/R/add_biodiversity.R +++ b/R/add_biodiversity.R @@ -71,7 +71,7 @@ NULL methods::setGeneric( "add_biodiversity_poipo", signature = methods::signature("x", "poipo"), - function(x, poipo, name = NULL, field_occurrence = "Observed", formula = NULL, family = "poisson", link = NULL, + function(x, poipo, name = NULL, field_occurrence = "observed", formula = NULL, family = "poisson", link = NULL, weight = 1, separate_intercept = TRUE, docheck = TRUE, pseudoabsence_settings = NULL, ...) {standardGeneric("add_biodiversity_poipo") }) #' @name add_biodiversity_poipo @@ -81,7 +81,7 @@ methods::setGeneric( methods::setMethod( "add_biodiversity_poipo", methods::signature(x = "BiodiversityDistribution", poipo = "sf"), - function(x, poipo, name = NULL, field_occurrence = "Observed", formula = NULL, family = "poisson", link = NULL, + function(x, poipo, name = NULL, field_occurrence = "observed", formula = NULL, family = "poisson", link = NULL, weight = 1, separate_intercept = TRUE, docheck = TRUE, pseudoabsence_settings = NULL, ...) { assertthat::assert_that(inherits(x, "BiodiversityDistribution"), inherits(poipo, "Spatial") || inherits(poipo, "sf") || inherits(poipo, "data.frame") || inherits(poipo, "tibble"), @@ -171,7 +171,7 @@ methods::setMethod( #' @param name The name of the biodiversity dataset used as internal identifier. #' @param field_occurrence A [`numeric`] or [`character`] location of #' biodiversity point records indicating presence/absence. By default set to -#' \code{"Observed"} and an error will be thrown if a [`numeric`] column with +#' \code{"observed"} and an error will be thrown if a [`numeric`] column with #' that name does not exist. #' @param formula A [`character`] or [`formula`] object to be passed. Default #' (\code{NULL}) is to use all covariates (if specified). @@ -214,7 +214,7 @@ NULL methods::setGeneric( "add_biodiversity_poipa", signature = methods::signature("x", "poipa"), - function(x, poipa, name = NULL, field_occurrence = "Observed", formula = NULL, family = "binomial", link = NULL, + function(x, poipa, name = NULL, field_occurrence = "observed", formula = NULL, family = "binomial", link = NULL, weight = 1, separate_intercept = TRUE, docheck = TRUE, ...) standardGeneric("add_biodiversity_poipa")) #' @name add_biodiversity_poipa @@ -224,7 +224,7 @@ methods::setGeneric( methods::setMethod( "add_biodiversity_poipa", methods::signature(x = "BiodiversityDistribution", poipa = "sf"), - function(x, poipa, name = NULL, field_occurrence = "Observed", formula = NULL, family = "binomial", link = NULL, + function(x, poipa, name = NULL, field_occurrence = "observed", formula = NULL, family = "binomial", link = NULL, weight = 1, separate_intercept = TRUE, docheck = TRUE, ... ) { assertthat::assert_that(inherits(x, "BiodiversityDistribution"), inherits(poipa, "Spatial") || inherits(poipa, "sf") || inherits(poipa, "data.frame") || inherits(poipa, "tibble"), @@ -362,7 +362,7 @@ NULL methods::setGeneric( "add_biodiversity_polpo", signature = methods::signature("x", "polpo"), - function(x, polpo, name = NULL, field_occurrence = "Observed", formula = NULL, family = "poisson", link = NULL, + function(x, polpo, name = NULL, field_occurrence = "observed", formula = NULL, family = "poisson", link = NULL, weight = 1, simulate = FALSE, simulate_points = 100, simulate_bias = NULL, simulate_strategy = "random", separate_intercept = TRUE, docheck = TRUE, pseudoabsence_settings = NULL, ...) standardGeneric("add_biodiversity_polpo")) @@ -373,7 +373,7 @@ methods::setGeneric( methods::setMethod( "add_biodiversity_polpo", methods::signature(x = "BiodiversityDistribution", polpo = "sf"), - function(x, polpo, name = NULL, field_occurrence = "Observed", formula = NULL,family = "poisson", link = NULL, + function(x, polpo, name = NULL, field_occurrence = "observed", formula = NULL,family = "poisson", link = NULL, weight = 1, simulate = FALSE, simulate_points = 100, simulate_bias = NULL, simulate_strategy = "random", separate_intercept = TRUE, docheck = TRUE, pseudoabsence_settings = NULL, ... ) { assertthat::assert_that(inherits(x, "BiodiversityDistribution"), @@ -559,7 +559,7 @@ NULL methods::setGeneric( "add_biodiversity_polpa", signature = methods::signature("x", "polpa"), - function(x, polpa, name = NULL, field_occurrence = "Observed", formula = NULL, family = "binomial", link = NULL, + function(x, polpa, name = NULL, field_occurrence = "observed", formula = NULL, family = "binomial", link = NULL, weight = 1, simulate = FALSE, simulate_points = 100, simulate_bias = NULL, simulate_strategy = "random", separate_intercept = TRUE, docheck = TRUE, pseudoabsence_settings = NULL, ...) standardGeneric("add_biodiversity_polpa")) @@ -570,7 +570,7 @@ methods::setGeneric( methods::setMethod( "add_biodiversity_polpa", methods::signature(x = "BiodiversityDistribution", polpa = "sf"), - function(x, polpa, name = NULL, field_occurrence = "Observed", formula = NULL, family = "binomial", link = NULL, + function(x, polpa, name = NULL, field_occurrence = "observed", formula = NULL, family = "binomial", link = NULL, weight = 1, simulate = FALSE, simulate_points = 100, simulate_bias = NULL, simulate_strategy = "random", separate_intercept = TRUE, docheck = TRUE, pseudoabsence_settings = NULL, ... ) { assertthat::assert_that(inherits(x, "BiodiversityDistribution"), diff --git a/R/add_offset.R b/R/add_offset.R index f04ca6e9..609c6112 100644 --- a/R/add_offset.R +++ b/R/add_offset.R @@ -397,7 +397,7 @@ methods::setGeneric( signature = methods::signature("x", "layer"), function(x, layer, distance_max = Inf, family = "poisson", presence_prop = 0.9, distance_clip = FALSE, distance_function = "negexp", - field_occurrence = "Observed", fraction = NULL, + field_occurrence = "observed", fraction = NULL, point = FALSE, add = TRUE) standardGeneric("add_offset_range")) #' Function for when raster is directly supplied (precomputed) @@ -467,7 +467,7 @@ methods::setMethod( methods::signature(x = "BiodiversityDistribution", layer = "sf"), function(x, layer, distance_max = Inf, family = "poisson", presence_prop = 0.9, distance_clip = FALSE, distance_function = "negexp", - field_occurrence = "Observed", fraction = NULL, point = FALSE, add = TRUE ) { + field_occurrence = "observed", fraction = NULL, point = FALSE, add = TRUE ) { assertthat::assert_that(inherits(x, "BiodiversityDistribution"), inherits(layer, 'sf'), is.null(distance_max) || is.numeric(distance_max) || is.infinite(distance_max), @@ -480,7 +480,7 @@ methods::setMethod( is.character(field_occurrence), is.logical(add) ) - # distance_max = Inf; family = "poisson"; presence_prop = 0.9; distance_clip = FALSE; distance_function = "negexp"; field_occurrence = "Observed"; fraction = NULL; add = TRUE + # distance_max = Inf; family = "poisson"; presence_prop = 0.9; distance_clip = FALSE; distance_function = "negexp"; field_occurrence = "observed"; fraction = NULL; add = TRUE # Match the type if set family <- match.arg(family, c("poisson", "binomial"), several.ok = FALSE) diff --git a/R/bdproto-biodiversitydataset.R b/R/bdproto-biodiversitydataset.R index db6f9105..8e796963 100644 --- a/R/bdproto-biodiversitydataset.R +++ b/R/bdproto-biodiversitydataset.R @@ -84,6 +84,22 @@ BiodiversityDatasetCollection <- bdproto( # Return coordinates if(utils::hasName(o,'geom')) sf::st_coordinates(o) else o[,c('x','y')] }, + # Masking function + mask = function(self, mask, inverse = FALSE){ + # Check whether prediction has been created + biob <- self$data + if(!is.Waiver(biob)){ + # If mask is SpatRaster, convert to polygons + if(!inherits(mask, 'sf')){ + mask <- terra::as.polygons(mask) |> sf::st_as_sf() + } + # Now for each element in biob, mask + for(id in names(biob)){ + biob[[id]]$mask(mask, inverse = inverse) + } + invisible() + } + }, # Remove a specific biodiversity dataset by id rm_data = function(self, id) { assertthat::assert_that(is.Id(id) || is.character(id), @@ -300,5 +316,25 @@ BiodiversityDataset <- bdproto( # Collect info statistics get_observations = function(self) { nrow(self$data) - } + }, + # Masking function + mask = function(self, mask, inverse = FALSE){ + # Check whether prediction has been created + biob <- self$data + if(!is.Waiver(biob)){ + # If mask is SpatRaster, convert to polygons + if(!inherits(mask, 'sf')){ + mask <- terra::as.polygons(mask) |> sf::st_as_sf() + } + # Now for each element in biob, mask + biob <- + suppressMessages( + suppressWarnings( + sf::st_crop(guess_sf(biob), mask) + ) + ) + self$data <- biob + invisible() + } + }, ) diff --git a/R/bdproto-biodiversitydistribution.R b/R/bdproto-biodiversitydistribution.R index fbbbba28..70577a64 100644 --- a/R/bdproto-biodiversitydistribution.R +++ b/R/bdproto-biodiversitydistribution.R @@ -348,6 +348,7 @@ BiodiversityDistribution <- bdproto( plot = function(self){ message("No generic plotting implemented!") }, + # Summary function summary = function(self){ message("No generic summary function implemented! Try print.") } diff --git a/R/bdproto-biodiversityscenario.R b/R/bdproto-biodiversityscenario.R index 7bbdd90d..e3343eb3 100644 --- a/R/bdproto-biodiversityscenario.R +++ b/R/bdproto-biodiversityscenario.R @@ -218,6 +218,14 @@ BiodiversityScenario <- bdproto( get_data = function(self, what = "scenarios"){ return(self[[what]]) }, + # Set data + set_data = function(self, x){ + # Get projected value + ff <- self$scenarios + # Set the object + ff[["scenarios"]] <- x + bdproto(NULL, self, scenarios = ff ) + }, # Plot the prediction plot = function(self, what = "suitability", which = NULL, ...){ if(is.Waiver(self$get_data())){ @@ -524,6 +532,40 @@ BiodiversityScenario <- bdproto( } return(out) }, + # Masking function + mask = function(self, mask, inverse = FALSE){ + # Check whether prediction has been created + projection <- self$get_data() + if(!is.Waiver(projection)){ + # Make valid + mask <- sf::st_make_valid(mask) + # If mask is sf, rasterize + if(!inherits(mask, 'sf')){ + mask <- terra::as.polygons(mask) |> sf::st_as_sf() + } + # Check that aligns + if(sf::st_crs(projection) != sf::st_crs(mask) ){ + mask <- mask |> sf::st_transform(crs = sf::st_crs(projection)) + } + # If there are multiple, ajoin + if(nrow(mask)>1){ + mask <- sf::st_combine(mask) |> sf::st_as_sf() + } + + # Now mask and save + projection <- + suppressMessages( + suppressWarnings( + projection[mask] + ) + ) + + # Save data + self[["scenarios"]] <- projection + + invisible() + } + }, # Get centroids get_centroid = function(self, patch = FALSE){ assertthat::assert_that( diff --git a/R/bdproto-distributionmodel.R b/R/bdproto-distributionmodel.R index dcf1fb6f..be7cd920 100644 --- a/R/bdproto-distributionmodel.R +++ b/R/bdproto-distributionmodel.R @@ -352,7 +352,7 @@ DistributionModel <- bdproto( }, # Set fit for this Model set_data = function(self, x, value) { - # Get biodiversity dataset collection + # Get projected value ff <- self$fits # Set the object ff[[x]] <- value @@ -453,6 +453,35 @@ DistributionModel <- bdproto( } return(cent) }, + # Masking function + mask = function(self, mask, inverse = FALSE){ + # Check whether prediction has been created + prediction <- self$get_data() + if(!is.Waiver(prediction)){ + # If mask is sf, rasterize + if(inherits(mask, 'sf')){ + mask <- terra::rasterize(mask, prediction) + } + # Check that mask aligns + if(!terra::compareGeom(prediction,mask)){ + mask <- terra::resample(mask, prediction, method = "near") + } + # Now mask and save + prediction <- terra::mask(prediction, mask, inverse = inverse) + + # Save data + self$fits[["prediction"]] <- prediction + + # Do the same for any thresholds eventually found + tr <- grep("threshold", self$show_rasters(), value = TRUE) + if(length(tr)){ + m <- self$get_data(x = tr) + m <- terra::mask(m, mask, inverse = inverse) + self$fits[[tr]] <- m + } + invisible() + } + }, # Save object save = function(self, fname, type = 'gtif', dt = 'FLT4S'){ assertthat::assert_that( diff --git a/R/bdproto-log.R b/R/bdproto-log.R index a158166d..3c82864f 100644 --- a/R/bdproto-log.R +++ b/R/bdproto-log.R @@ -27,7 +27,6 @@ Log <- bdproto( # Opens the connection to the output filename open = function(self){ assertthat::assert_that( - assertthat::is.writeable(dirname(self$filename)), assertthat::has_extension(self$filename,'txt') ) try({output = file( self$filename, open = 'wt', method = 'default') },silent = TRUE) diff --git a/R/bdproto-predictors.R b/R/bdproto-predictors.R index 82d7d514..32884e23 100644 --- a/R/bdproto-predictors.R +++ b/R/bdproto-predictors.R @@ -107,6 +107,28 @@ PredictorDataset <- bdproto( } invisible() }, + # Masking function + mask = function(self, mask, inverse = FALSE){ + # Check whether prediction has been created + prediction <- self$get_data(df = FALSE) + if(!is.Waiver(prediction)){ + # If mask is sf, rasterize + if(inherits(mask, 'sf')){ + mask <- terra::rasterize(mask, prediction) + } + # Check that mask aligns + if(!terra::compareGeom(prediction, mask)){ + mask <- terra::resample(mask, prediction, method = "near") + } + # Now mask and save + prediction <- terra::mask(prediction, mask, inverse = inverse) + + # Save data + self$fits[["data"]] <- prediction + + invisible() + } + }, # Add a new Predictor dataset to this collection set_data = function(self, x, value){ assertthat::assert_that(assertthat::is.string(x), diff --git a/R/bdproto-priorlist.R b/R/bdproto-priorlist.R index e05d2812..985cd1a1 100644 --- a/R/bdproto-priorlist.R +++ b/R/bdproto-priorlist.R @@ -156,14 +156,21 @@ PriorList <- bdproto( ids <- self$ids() varn <- self$varnames() out <- data.frame(id = character(), class = character(), variable = character(), - type = character(), hyper = character()) - for(id in ids){ + type = character(), hyper = character(), other = character()) + for(id in as.character(ids)){ pp <- self$collect(ids) co <- data.frame(id = as.character( id ), class = self$classes()[[id]], variable = varn[[id]], type = self$types()[id], hyper = paste0( self$get(varn[id]) ,collapse = " | ") ) + # Add other things if found + for(v in c("lims", "prop")){ + lim <- try({self$get(varn[id], what = v)},silent = TRUE) + if(!inherits(lim, "try-error")){ + co$other <- paste0(lim, collapse = " | ") + } + } out <- rbind(out, co) } assertthat::assert_that(nrow(out)>0) diff --git a/R/check.R b/R/check.R index 6d0ef54f..bdc7110b 100644 --- a/R/check.R +++ b/R/check.R @@ -71,9 +71,29 @@ methods::setMethod( # Type specific checks if(inherits(obj,"BiodiversityDistribution")){ - # Are there enough observations? - if( sum( obj$biodiversity$get_observations() ) < 200 ){ - ms$Warnings <- append(ms$Warnings, "Not enough observations found.") + # Check if necessary data is there + if(is.Waiver(obj$get_biodiversity_types())){ + ms$Warnings <- append(ms$Warnings, "No biodiversity data added.") + } else { + # Are there enough observations? + if( sum( obj$biodiversity$get_observations() ) < 100 ){ + ms$Warnings <- append(ms$Warnings, "Less that 100 observations found.") + } + + # Count number of observations and predictors + t1 <- obj$biodiversity$get_observations() |> as.numeric() + t2 <- length( obj$get_predictor_names() ) + if(!is.Waiver(t2)){ + if(t2 >= t1){ + ms$Warnings <- append(ms$Warnings, + "More predictors than observations.") + } + } + } + + # Check predictors + if(is.Waiver(obj$get_predictor_names())){ + ms$Warnings <- append(ms$Warnings, "No predictors added.") } } @@ -119,8 +139,10 @@ methods::setMethod( } } } - if(inherits(obj, "BiodiversityScenario")){ + + if(inherits(obj, "BiodiversityScenario")){ + invisible() } # Check for warnings diff --git a/R/distribution.R b/R/distribution.R index 93b9f113..09233880 100644 --- a/R/distribution.R +++ b/R/distribution.R @@ -119,8 +119,9 @@ methods::setMethod( assertthat::assert_that( inherits(background,'SpatRaster') ) # Check that provided background has a valid crs - assertthat::assert_that(!is.na(terra::crs(background,describe=TRUE)[['code']]), - msg = "Please provide a background with valid projection!") + if(!is.na(terra::crs(background,describe=TRUE)[['code']])){ + if(getOption('ibis.setupmessages')) myLog('[Setup]','red','Provide a background with a valid projection!') + } # Make an error check that units have a single units vals <- unique(background)[[1]] @@ -219,5 +220,5 @@ methods::setMethod( bdproto(NULL, BiodiversityDistribution, background = background, limits = limits - ) + ) }) diff --git a/R/engine_breg.R b/R/engine_breg.R index 2b5b09b9..833d8cdb 100644 --- a/R/engine_breg.R +++ b/R/engine_breg.R @@ -554,7 +554,7 @@ engine_breg <- function(x, if(is.null(x.var)){ x.var <- colnames(df) } else { - x.var <- match.arg(x.var, names(df), several.ok = FALSE) + x.var <- match.arg(x.var, colnames(df), several.ok = FALSE) } if(is.null(newdata)){ @@ -588,7 +588,7 @@ engine_breg <- function(x, factor(lvl[1], levels = lvl) } } else { - df_partial <- newdata |> dplyr::select(any_of(names(df))) + df_partial <- newdata |> dplyr::select(dplyr::any_of(names(df))) } # For Integrated model, take the last one diff --git a/R/engine_gdb.R b/R/engine_gdb.R index 8bc2b8ee..360e69d0 100644 --- a/R/engine_gdb.R +++ b/R/engine_gdb.R @@ -626,7 +626,7 @@ engine_gdb <- function(x, } } else { # Assume all present - dummy <- newdata |> dplyr::select(any_of(as.character(variables))) + dummy <- newdata |> dplyr::select(dplyr::any_of(as.character(variables))) } # Now predict with model diff --git a/R/engine_glmnet.R b/R/engine_glmnet.R index 94e78546..096101fa 100644 --- a/R/engine_glmnet.R +++ b/R/engine_glmnet.R @@ -164,7 +164,18 @@ engine_glmnet <- function(x, # Distribution specific procedure fam <- model$biodiversity[[1]]$family + form <- model$biodiversity[[1]]$equation + # -- # + # Respecify the predictor names if not matching + te <- formula_terms(form) + if(length(te) != nrow(model$biodiversity[[1]]$predictors_types)){ + model$biodiversity[[1]]$predictors_names <- + model$biodiversity[[1]]$predictors_names[model$biodiversity[[1]]$predictors_names %in% te] + model$biodiversity[[1]]$predictors_types <- + model$biodiversity[[1]]$predictors_types |> dplyr::filter( + predictors %in% te) + } # -- # # If a poisson family is used, weight the observations by their exposure @@ -194,12 +205,16 @@ engine_glmnet <- function(x, if(length(any_missing)>0) { presabs <- presabs[-any_missing,] # This works as they are in the same order model$biodiversity[[1]]$expect <- model$biodiversity[[1]]$expect[-any_missing] + } + df <- subset(df, stats::complete.cases(df)) + assertthat::assert_that(nrow(presabs) == nrow(df)) + + # Check that expect matches + if(length(model$biodiversity[[1]]$expect)!=nrow(df)){ # Fill the absences with 1 as multiplier. This works since absences follow the presences model$biodiversity[[1]]$expect <- c( model$biodiversity[[1]]$expect, rep(1, nrow(presabs)-length(model$biodiversity[[1]]$expect) )) } - df <- subset(df, stats::complete.cases(df)) - assertthat::assert_that(nrow(presabs) == nrow(df)) # Overwrite observation data model$biodiversity[[1]]$observations <- presabs @@ -207,7 +222,7 @@ engine_glmnet <- function(x, # Preprocessing security checks assertthat::assert_that( all( model$biodiversity[[1]]$observations[['observed']] >= 0 ), any(!is.na(presabs[['observed']])), - length(model$biodiversity[[1]]$expect)==nrow(model$biodiversity[[1]]$observations), + length(model$biodiversity[[1]]$expect) == nrow(model$biodiversity[[1]]$observations), nrow(df) == nrow(model$biodiversity[[1]]$observations) ) @@ -314,6 +329,12 @@ engine_glmnet <- function(x, full <- model$predictors w_full <- model$exposure + # Subset the predictor types to only those present + te <- formula_terms(form) + model$biodiversity[[1]]$predictors_types <- + model$biodiversity[[1]]$predictors_types |> dplyr::filter(predictors %in% te) + model$biodiversity[[1]]$predictors_names <- intersect(model$biodiversity[[1]]$predictors_names, te) + # Get offset and add it to exposure if(!is.Waiver(model$offset)){ # Add offset to full prediction and load vector @@ -343,6 +364,7 @@ engine_glmnet <- function(x, if(!is.Waiver(model$priors)){ # Reset those contained in the prior object for(v in model$priors$varnames()){ + if(!(v %in% names(p.fac))) next() p.fac[v] <- model$priors$get(v, what = "value") lowlim[v] <- model$priors$get(v, what = "lims")[1] upplim[v] <- model$priors$get(v, what = "lims")[2] @@ -567,7 +589,7 @@ engine_glmnet <- function(x, } } else { # Assume all variables are present - df2 <- newdata |> dplyr::select(any_of(names(df))) + df2 <- newdata |> dplyr::select(dplyr::any_of(names(df))) assertthat::assert_that(nrow(df2)>1, ncol(df2)>1) } diff --git a/R/engine_inlabru.R b/R/engine_inlabru.R index 7168fc02..3a6ba929 100644 --- a/R/engine_inlabru.R +++ b/R/engine_inlabru.R @@ -1098,7 +1098,7 @@ engine_inlabru <- function(x, factor(lvl[1], levels = lvl) } } else { - df_partial <- newdata |> dplyr::select(any_of(names(df))) + df_partial <- newdata |> dplyr::select(dplyr::any_of(names(df))) } ## plot the unique effect of the covariate diff --git a/R/engine_stan.R b/R/engine_stan.R index 2a39919e..4a774074 100644 --- a/R/engine_stan.R +++ b/R/engine_stan.R @@ -722,7 +722,8 @@ engine_stan <- function(x, } df_partial <- df_partial |> as.data.frame() } else { - df_partial <- newdata |> dplyr::select(any_of(model$predictors_names)) + df_partial <- newdata |> dplyr::select( + dplyr::any_of(model$predictors_names)) } # For Integrated model, follow poisson diff --git a/R/engine_xgboost.R b/R/engine_xgboost.R index 72203842..da3f71a1 100644 --- a/R/engine_xgboost.R +++ b/R/engine_xgboost.R @@ -184,7 +184,7 @@ engine_xgboost <- function(x, # Change the number of variables included if custom equation is used if(!is.Waiver(model$biodiversity[[1]]$equation)){ form <- model$biodiversity[[1]]$equation - varn <- model$biodiversity[[1]]$predictors_names[which( all.vars(form) %in% model$biodiversity[[1]]$predictors_names )] + varn <- model$biodiversity[[1]]$predictors_names[which( model$biodiversity[[1]]$predictors_names %in% formula_terms(form) )] assertthat::assert_that(length(varn)>0) # Match to existing ones and remove those not covered model$biodiversity[[1]]$predictors_names <- model$biodiversity[[1]]$predictors_names[match(varn, model$biodiversity[[1]]$predictors_names)] diff --git a/R/ensemble.R b/R/ensemble.R index 766bfdd9..a54751be 100644 --- a/R/ensemble.R +++ b/R/ensemble.R @@ -259,206 +259,211 @@ methods::setMethod( assertthat::assert_that(is.Raster(out)) return(out) - } else if(is.Raster(mods[[1]])) { - # Check that layer is present in supplied mods - if(!is.null(layer)){ - assertthat::assert_that( - all( sapply(mods, function(x) layer %in% names(x) ) ), - msg = paste("Layer", text_red(layer), "not found in supplied objects!") - ) - } else { layer <- 1 } # Take the first one - # TODO: - if(length(layer)>1) stop("Not implemented yet") - # Get prediction stacks from all mods - ll_ras <- sapply(mods, function(x) x[[layer]]) - # Ensure that the layers have the same resolution, otherwise align - if(!terra::compareGeom(ll_ras[[1]], ll_ras[[2]], stopOnError = FALSE)){ - if(getOption('ibis.setupmessages')) myLog('[Ensemble]','red','Rasters need to be aligned. Check.') - ll_ras[[2]] <- terra::resample(ll_ras[[2]], ll_ras[[1]], method = "bilinear") - } - ras <- terra::rast(ll_ras) - # Apply threshold if set. Set to 0 thus reducing the value of the ensembled - # layer. - if(!is.null(min.value)) ras[ras < min.value] <- 0 + } else if(is.Raster(mods[[1]])) { + # Check that layer is present in supplied mods + if(!is.null(layer)){ + assertthat::assert_that( + all( sapply(mods, function(x) layer %in% names(x) ) ), + msg = paste("Layer", text_red(layer), "not found in supplied objects!") + ) + } else { layer <- 1 } # Take the first one + # TODO: + if(length(layer)>1) stop("Not implemented yet") + # Get prediction stacks from all mods + ll_ras <- sapply(mods, function(x) x[[layer]]) + # Ensure that the layers have the same resolution, otherwise align + if(!terra::compareGeom(ll_ras[[1]], ll_ras[[2]], stopOnError = FALSE)){ + if(getOption('ibis.setupmessages')) myLog('[Ensemble]','red','Rasters need to be aligned. Check.') + ll_ras[[2]] <- terra::resample(ll_ras[[2]], ll_ras[[1]], method = "bilinear") + } + ras <- terra::rast(ll_ras) + # Apply threshold if set. Set to 0 thus reducing the value of the ensembled + # layer. + if(!is.null(min.value)) ras[ras < min.value] <- 0 + + # If normalize before running an ensemble if parameter set + if(normalize) ras <- predictor_transform(ras, option = "norm") + + # Now ensemble per layer entry + out <- terra::rast() + for(lyr in layer){ + + # Now create the ensemble depending on the option + if(method == 'mean'){ + new <- terra::mean( ras, na.rm = TRUE) + } else if(method == 'median'){ + new <- terra::median( ras, na.rm = TRUE) + } else if(method == 'max'){ + new <- max(ras, na.rm = TRUE) + } else if(method == 'min'){ + new <- min(ras, na.rm = TRUE) + } else if(method == 'weighted.mean'){ + new <- terra::weighted.mean( ras, w = weights, na.rm = TRUE) + } else if(method == 'threshold.frequency'){ + # Check that thresholds are available + stop("This function does not (yet) work with directly provided Raster objects.") + + } else if(method == 'min.sd'){ + # If method 'min.sd' furthermore check that there is a sd object for all + # of them + assertthat::assert_that( + all( sapply(mods, function(x) "sd" %in% names(mods) ) ), + msg = "Method \'min.sd\' needs parametrized uncertainty (sd) for all objects." + ) + # Also get SD prediction from models + ras_sd <- c( sapply(mods, function(x) x[['sd']])) + # Get the id of the layer where standard deviation is lower + min_sd <- terra::where.min(ras_sd) + new <- emptyraster(ras) + for(cl in terra::unique(min_sd)){ + new[min_sd == cl] <- ras[[cl]][min_sd == cl] + } + } else if(method == 'pca'){ + # Calculate a pca on the layers and return the first axes + new <- predictor_transform(ras, option = "pca", pca.var = 1)[[1]] + } + # Rename + names(new) <- paste0("ensemble_", lyr) + # Add attributes on the method of ensemble + attr(new, "method") <- method + if(uncertainty != "none"){ + if(uncertainty == "pca") { + stop("Currently, uncertainty = 'pca' is not implemented for SpatRaster input.") + } + # Add uncertainty + ras_uncertainty <- switch (uncertainty, + "sd" = terra::app(ras, fun = "sd", na.rm = TRUE), + "cv" = terra::app(ras, fun = "sd", na.rm = TRUE) / terra::mean(ras, na.rm = TRUE), + "range" = max(ras, na.rm = TRUE) - min(ras, na.rm = TRUE) + ) + names(ras_uncertainty) <- paste0(uncertainty, "_", lyr) + # Add attributes on the method of ensembling + attr(ras_uncertainty, "method") <- uncertainty + # Add all layers to out + suppressWarnings( out <- c(out, new, ras_uncertainty) ) + } else { + suppressWarnings( out <- c(out, new) ) + } + } + + assertthat::assert_that(is.Raster(out)) + return(out) + } else { + # Scenario objects as stars or Scenario objects + if(all(sapply(mods, function(z) inherits(z, "stars")))){ + # Check that layer is in stars + if(!assertthat::see_if(all( sapply(mods, function(z) layer %in% names(z)) ))){ + if(getOption('ibis.setupmessages')) myLog('[Ensemble]','red','Provided layer not in objects. Taking first option!') + layer <- names(mods[[1]])[1] + } + # Format to table + lmat <- do.call("rbind", mods) |> as.data.frame() + # Get dimensions + lmat_dim <- stars::st_dimensions(mods[[1]]) - # If normalize before running an ensemble if parameter set - if(normalize) ras <- predictor_transform(ras, option = "norm") + } else { + # Check that layers all have a prediction layer + assertthat::assert_that( + all( sapply(mods, function(x) !is.Waiver(x$get_data()) ) ), + msg = "All distribution models need a fitted scenario object!" + ) + # Check that layer is present in supplied mods + assertthat::assert_that( + all( sapply(mods, function(x) layer %in% names(x$get_data()) ) ), + msg = paste("Layer", text_red(layer), "not found in supplied objects!") + ) + # Get projected suitability from all mods + lmat <- stars::st_as_stars( + sapply(mods, function(x) x$get_data()[layer]) + ) |> as.data.frame() + # Get dimensions + lmat_dim <- stars::st_dimensions(mods[[1]]$get_data()) + } - # Now ensemble per layer entry - out <- terra::rast() - for(lyr in layer){ + # Normalize stars files + if(normalize){ + # Get overall means and max values + ovmin <- min(lmat[,4:ncol(lmat)],na.rm = TRUE) + ovmax <- max(lmat[,4:ncol(lmat)],na.rm = TRUE) + lmat[,4:ncol(lmat)] <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time + 2, function(x) { + (x - ovmin) / (ovmax - ovmin ) + }) + } # Now create the ensemble depending on the option if(method == 'mean'){ - new <- terra::mean( ras, na.rm = TRUE) + out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time + 1, function(x) mean(x, na.rm = TRUE)) } else if(method == 'median'){ - new <- terra::median( ras, na.rm = TRUE) + out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time + 1, function(x) stats::median(x, na.rm = TRUE)) } else if(method == 'max'){ - new <- max(ras, na.rm = TRUE) + out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time + 1, function(x) max(x, na.rm = TRUE)) } else if(method == 'min'){ - new <- min(ras, na.rm = TRUE) + out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time + 1, function(x) min(x, na.rm = TRUE)) } else if(method == 'weighted.mean'){ - new <- terra::weighted.mean( ras, w = weights, na.rm = TRUE) + out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time + 1, function(x) weighted.mean(x, w = weights, na.rm = TRUE)) } else if(method == 'threshold.frequency'){ + out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time + 1, function(x) sum(x, na.rm = TRUE) / (ncol(lmat)-3) ) # Check that thresholds are available - stop("This function does not (yet) work with directly provided Raster objects.") - } else if(method == 'min.sd'){ - # If method 'min.sd' furthermore check that there is a sd object for all - # of them - assertthat::assert_that( - all( sapply(mods, function(x) "sd" %in% names(mods) ) ), - msg = "Method \'min.sd\' needs parametrized uncertainty (sd) for all objects." - ) - # Also get SD prediction from models - ras_sd <- c( sapply(mods, function(x) x[['sd']])) - # Get the id of the layer where standard deviation is lower - min_sd <- terra::where.min(ras_sd) - new <- emptyraster(ras) - for(cl in terra::unique(min_sd)){ - new[min_sd == cl] <- ras[[cl]][min_sd == cl] - } + stop("This has not been reasonably implemented in this context.") } else if(method == 'pca'){ - # Calculate a pca on the layers and return the first axes - new <- predictor_transform(ras, option = "pca", pca.var = 1)[[1]] + stop("This has not been reasonably implemented in this context.") } + # Add dimensions to output + out <- cbind( sf::st_coordinates(mods[[1]]$get_data()[layer]), "ensemble" = out ) |> as.data.frame() + + # Convert to stars + out <- out |> stars:::st_as_stars.data.frame(dims = c(1,2,3), coords = 1:2) + # Rename dimension names + out <- out |> stars::st_set_dimensions(names = c("x", "y", "band")) # Rename - names(new) <- paste0("ensemble_", lyr) + names(out) <- paste0("ensemble_", layer) # Add attributes on the method of ensemble - attr(new, "method") <- method - if(uncertainty != "none"){ + attr(out, "method") <- method + + # --- # + if(uncertainty != 'none'){ if(uncertainty == "pca") { - stop("Currently, uncertainty = 'pca' is not implemented for SpatRaster input.") + stop("Currently, uncertainty = 'pca' is not implemented for stars input.") } # Add uncertainty - ras_uncertainty <- switch (uncertainty, - "sd" = terra::app(ras, fun = "sd", na.rm = TRUE), - "cv" = terra::app(ras, fun = "sd", na.rm = TRUE) / terra::mean(ras, na.rm = TRUE), - "range" = max(ras, na.rm = TRUE) - min(ras, na.rm = TRUE) + out_uncertainty <- switch (uncertainty, + "sd" = apply(lmat[,4:ncol(lmat)], 1, function(x) stats::sd(x, na.rm = TRUE)), + "cv" = apply(lmat[,4:ncol(lmat)], 1, function(x) stats::sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE)), + "range" = apply(lmat[,4:ncol(lmat)], 1, function(x) (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))) ) - names(ras_uncertainty) <- paste0(uncertainty, "_", lyr) + if(any(is.infinite(out_uncertainty))) out_uncertainty[is.infinite(out_uncertainty)] <- NA + # Add dimensions to output + out_uncertainty <- cbind( sf::st_coordinates(mods[[1]]$get_data()[layer]), "ensemble" = out_uncertainty ) |> as.data.frame() + + # Convert to stars + out_uncertainty <- out_uncertainty |> stars:::st_as_stars.data.frame(dims = c(1,2,3), coords = 1:2) + # Rename dimension names + out_uncertainty <- out_uncertainty |> stars::st_set_dimensions(names = c("x", "y", "band")) + # Rename + names(out_uncertainty) <- paste0(uncertainty, "_", layer) # Add attributes on the method of ensembling - attr(ras_uncertainty, "method") <- uncertainty - # Add all layers to out - suppressWarnings( out <- c(out, new, ras_uncertainty) ) + attr(out_uncertainty, "method") <- uncertainty + # --- # + # Combine both ensemble and uncertainty + ex <- stars:::c.stars(out, out_uncertainty) + # Correct projection is unset + if(is.na(sf::st_crs(ex))) ex <- sf::st_set_crs(ex, sf::st_crs(mods[[1]]$get_data())) } else { - suppressWarnings( out <- c(out, new) ) - } - } - - assertthat::assert_that(is.Raster(out)) - return(out) - } else { - # Scenario objects as stars or Scenario objects - if(all(sapply(mods, function(z) inherits(z, "stars")))){ - # Check that layer is in stars - if(!assertthat::see_if(all( sapply(mods, function(z) layer %in% names(z)) ))){ - if(getOption('ibis.setupmessages')) myLog('[Ensemble]','red','Provided layer not in objects. Taking first option!') - layer <- names(mods[[1]])[1] - } - # Format to table - lmat <- do.call("rbind", mods) |> as.data.frame() - # Get dimensions - lmat_dim <- stars::st_dimensions(mods[[1]]) - - } else { - # Check that layers all have a prediction layer - assertthat::assert_that( - all( sapply(mods, function(x) !is.Waiver(x$get_data()) ) ), - msg = "All distribution models need a fitted scenario object!" - ) - # Check that layer is present in supplied mods - assertthat::assert_that( - all( sapply(mods, function(x) layer %in% names(x$get_data()) ) ), - msg = paste("Layer", text_red(layer), "not found in supplied objects!") - ) - # Get projected suitability from all mods - lmat <- stars::st_as_stars( - sapply(mods, function(x) x$get_data()[layer]) - ) |> as.data.frame() - # Get dimensions - lmat_dim <- stars::st_dimensions(mods[[1]]$get_data()) - } - if(normalize){ - lmat[,4:ncol(lmat)] <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time - 2, function(x) { - (x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE) ) - }) - } - - # Now create the ensemble depending on the option - if(method == 'mean'){ - out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time - 1, function(x) mean(x, na.rm = TRUE)) - } else if(method == 'median'){ - out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time - 1, function(x) stats::median(x, na.rm = TRUE)) - } else if(method == 'max'){ - out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time - 1, function(x) max(x, na.rm = TRUE)) - } else if(method == 'min'){ - out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time - 1, function(x) min(x, na.rm = TRUE)) - } else if(method == 'weighted.mean'){ - out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time - 1, function(x) weighted.mean(x, w = weights, na.rm = TRUE)) - } else if(method == 'threshold.frequency'){ - out <- apply(lmat[,4:ncol(lmat)], # On the assumption that col 1-3 are coordinates+time - 1, function(x) sum(x, na.rm = TRUE) / (ncol(lmat)-3) ) - # Check that thresholds are available - } else if(method == 'min.sd'){ - stop("This has not been reasonably implemented in this context.") - } else if(method == 'pca'){ - stop("This has not been reasonably implemented in this context.") - } - # Add dimensions to output - out <- cbind( sf::st_coordinates(mods[[1]]$get_data()[layer]), "ensemble" = out ) |> as.data.frame() - - # Convert to stars - out <- out |> stars:::st_as_stars.data.frame(dims = c(1,2,3), coords = 1:2) - # Rename dimension names - out <- out |> stars::st_set_dimensions(names = c("x", "y", "band")) - # Rename - names(out) <- paste0("ensemble_", layer) - # Add attributes on the method of ensemble - attr(out, "method") <- method - - # --- # - if(uncertainty != 'none'){ - if(uncertainty == "pca") { - stop("Currently, uncertainty = 'pca' is not implemented for stars input.") + # Only the output + ex <- out } - # Add uncertainty - out_uncertainty <- switch (uncertainty, - "sd" = apply(lmat[,4:ncol(lmat)], 1, function(x) stats::sd(x, na.rm = TRUE)), - "cv" = apply(lmat[,4:ncol(lmat)], 1, function(x) stats::sd(x, na.rm = TRUE) / mean(x, na.rm = TRUE)), - "range" = apply(lmat[,4:ncol(lmat)], 1, function(x) (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))) - ) - if(any(is.infinite(out_uncertainty))) out_uncertainty[is.infinite(out_uncertainty)] <- NA - # Add dimensions to output - out_uncertainty <- cbind( sf::st_coordinates(mods[[1]]$get_data()[layer]), "ensemble" = out_uncertainty ) |> as.data.frame() - - # Convert to stars - out_uncertainty <- out_uncertainty |> stars:::st_as_stars.data.frame(dims = c(1,2,3), coords = 1:2) - # Rename dimension names - out_uncertainty <- out_uncertainty |> stars::st_set_dimensions(names = c("x", "y", "band")) - # Rename - names(out_uncertainty) <- paste0(uncertainty, "_", layer) - # Add attributes on the method of ensembling - attr(out_uncertainty, "method") <- uncertainty - # --- # - # Combine both ensemble and uncertainty - ex <- stars:::c.stars(out, out_uncertainty) # Correct projection is unset if(is.na(sf::st_crs(ex))) ex <- sf::st_set_crs(ex, sf::st_crs(mods[[1]]$get_data())) - } else { - # Only the output - ex <- out - } - # Correct projection is unset - if(is.na(sf::st_crs(ex))) ex <- sf::st_set_crs(ex, sf::st_crs(mods[[1]]$get_data())) - assertthat::assert_that(inherits(ex, "stars")) - return(ex) + assertthat::assert_that(inherits(ex, "stars")) + return(ex) } } ) @@ -598,13 +603,19 @@ methods::setMethod( out <- rbind(out, o) } + # Catch error in case none of them computed + if(nrow(out)==0){ + if(getOption("ibis.setupmessages")) myLog("[Inference]","red","None of the models seemed to contain the variable.") + stop("No estimates found!") + } + # Now composite the ensemble depending on the option if(method == 'mean'){ new <- aggregate(out[,layer], by = list(partial_effect = out$partial_effect), - FUN = function(x = out[[layer]]) { - return(cbind( mean = mean(x,na.rm = TRUE), - sd = stats::sd(x,na.rm = TRUE))) - }) |> as.matrix() |> as.data.frame() + FUN = function(x = out[[layer]]) { + return(cbind( mean = mean(x,na.rm = TRUE), + sd = stats::sd(x,na.rm = TRUE))) + }) |> as.matrix() |> as.data.frame() colnames(new) <- c("partial_effect", "mean", "sd") } else if(method == 'median'){ new <- aggregate(out[,layer], by = list(partial_effect = out$partial_effect), @@ -624,13 +635,13 @@ methods::setMethod( new[,1] <- sapply(new[,1], function(z) rr[which.min(abs(z - rr))] ) if(method=="mean"){ new <- new |> dplyr::group_by(partial_effect) |> - dplyr::summarise(sd = sd(mean,na.rm=TRUE), + dplyr::summarise(sd = stats::sd(mean,na.rm=TRUE), mean = mean(mean,na.rm=TRUE) - ) |> - dplyr::relocate(mean,.before = sd) + ) |> + dplyr::relocate(mean,.before = sd) } else { new <- new |> dplyr::group_by(partial_effect) |> - dplyr::summarise(mad = mad(median,na.rm=TRUE), + dplyr::summarise(mad = stats::mad(median,na.rm=TRUE), median = median(median,na.rm=TRUE) ) |> dplyr::relocate(median,.before = mad) @@ -719,7 +730,7 @@ methods::setMethod( assertthat::assert_that(all(x.var %in% names(newdata)), msg = "Variable not found in newdata!") template <- terra::rast(mods[[1]]$model$predictors[,c("x", "y")], - crs = terra::crs(mods[[1]]$model$background),type = "xyz") |> + crs = terra::crs(mods[[1]]$model$background),type = "xyz") |> emptyraster() newdata <- alignRasters(newdata, template,cl = FALSE) assertthat::assert_that(is.Raster(template), @@ -729,15 +740,11 @@ methods::setMethod( means <- terra::global(newdata, "mean", na.rm = TRUE) assertthat::assert_that(x.var %in% rownames(means)) # Set all variables except x.var to the means - nd <- newdata + nd <- newdata |> terra::as.data.frame(xy = TRUE, na.rm = FALSE) for(val in names(nd)){ - if(val %in% x.var) next() - nd[[val]] <- terra::setValues(nd[[val]], means[rownames(means)==val,1], - keeptime = TRUE, keepnames = TRUE) - nd[[val]] <- terra::mask(nd[[val]], newdata[[val]]) + if(val %in% c(x.var,"x", "y")) next() + nd[[val]][!is.na(nd[[val]])] <- means[rownames(means)==val,1] } - assertthat::assert_that(is.Raster(nd)) - nd <- terra::as.data.frame(nd, xy = TRUE, na.rm = FALSE) } # Now for each object get the partial values for the target variable @@ -764,12 +771,24 @@ methods::setMethod( # Append to output object out[[as.character(obj$id)]] <- o } - # Now construct an ensemble by calling ensemble directly - new <- ensemble(out, method = method, - normalize = normalize, - layer = layer, - min.value = min.value, - uncertainty = "none") + # Catch error in case none of them computed + if(length(out)==0){ + if(getOption("ibis.setupmessages")) myLog("[Inference]","red","None of the models seemed to contain the variable.") + stop("No estimates found!") + } + + if(length(out)==1){ + if(getOption("ibis.setupmessages")) myLog("[Inference]","yellow","Only a single model was estimated. Returning output.") + new <- out[[1]] + } else { + # Now construct an ensemble by calling ensemble directly + new <- ensemble(out, method = method, + normalize = normalize, + layer = layer, + min.value = min.value, + uncertainty = "none") + } + assertthat::assert_that( is.Raster(new),msg = "Something went wrong with the ensemble calculation!" ) diff --git a/R/ibis.iSDM-package.R b/R/ibis.iSDM-package.R index 90be9e0f..eca0abc9 100644 --- a/R/ibis.iSDM-package.R +++ b/R/ibis.iSDM-package.R @@ -4,9 +4,15 @@ ## usethis namespace: start #' @importFrom foreach %do% %dopar% #' @importFrom methods as +#' @importFrom terra mask #' @importFrom stats effects #' @importFrom stats residuals #' @importFrom stats complete.cases +#' @importFrom stats mad +#' @importFrom stats sd +#' @importFrom stats terms.formula +#' @importFrom stats update.formula +#' @importFrom utils install.packages #' @importFrom graphics par ## usethis namespace: end NULL diff --git a/R/mask.R b/R/mask.R new file mode 100644 index 00000000..5834935f --- /dev/null +++ b/R/mask.R @@ -0,0 +1,63 @@ +#' @include utils.R +NULL + +#' Mask data with an external layer +#' +#' @description This is a helper function that takes an existing object created +#' by the ibis.iSDM package and an external layer, then intersects both. It +#' currently takes either a [DistributionModel], [BiodiversityDatasetCollection], +#' [PredictorDataset] or [BiodiversityScenario] as input. +#' +#' As mask either a [`sf`] or [`SpatRaster`] object can be chosen. The mask will +#' be converted internally depending on the object. +#' +#' @param x Any object belonging to [DistributionModel], +#' [BiodiversityDatasetCollection], [PredictorDataset] or +#' [BiodiversityScenario]. +#' @param mask A [`sf`] or [`SpatRaster`] object. +#' @param inverse A [`logical`] flag whether to take inverse of the mask instead +#' (Default: \code{FALSE}). +#' +#' @seealso [terra::mask()] +#' @aliases mask +#' @examples +#' \dontrun{ +#' # Build and train a model +#' mod <- distribution(background) |> +#' add_biodiversity_poipo(species) |> +#' add_predictors(predictors) |> +#' engine_glmnet() |> +#' train() +#' +#' # Constrain the prediction by another object +#' mod <- mask(mod, speciesrange) +#' +#' } +#' @return A respective object of the input type. +#' @keywords utils +#' @name mask +NULL + +#' @rdname mask +#' @method mask DistributionModel +#' @keywords utils +#' @export +mask.DistributionModel <- function(x, mask, inverse = FALSE) x$mask(mask,inverse) + +#' @rdname mask +#' @method mask BiodiversityDatasetCollection +#' @keywords utils +#' @export +mask.BiodiversityDatasetCollection <- function(x, mask, inverse = FALSE) x$mask(mask,inverse) + +#' @rdname mask +#' @method mask PredictorDataset +#' @keywords utils +#' @export +mask.PredictorDataset <- function(x, mask, inverse = FALSE) x$mask(mask,inverse) + +#' @rdname mask +#' @method mask BiodiversityScenario +#' @keywords utils +#' @export +mask.BiodiversityScenario <- function(x, mask, inverse = FALSE) x$mask(mask,inverse) diff --git a/R/misc.R b/R/misc.R index 650a9014..628961f6 100644 --- a/R/misc.R +++ b/R/misc.R @@ -90,10 +90,14 @@ ibis_dependencies <- function(deps = getOption("ibis.dependencies"), update = TR new.packages <- deps[!(deps %in% utils::installed.packages()[, "Package"])] if(length(new.packages)>0){ if("INLA" %in% new.packages){ - suppressMessages( - utils::install.packages("INLA", repos=c(getOption("repos"), INLA="https://inla.r-inla-download.org/R/stable"), - dependencies = TRUE, quiet = TRUE) - ) + if (!requireNamespace("BiocManager", quietly = TRUE)) + utils::install.packages("BiocManager") + BiocManager::install(c("graph", "Rgraphviz"), dep=TRUE) + # Then install INLA + utils::install.packages("INLA", + repos=c(getOption("repos"), + INLA="https://inla.r-inla-download.org/R/stable"), + dep=TRUE) } suppressMessages( utils::install.packages(new.packages, dependencies = TRUE, quiet = TRUE, diff --git a/R/pseudoabsence.R b/R/pseudoabsence.R index 55c0463d..ac8f1aec 100644 --- a/R/pseudoabsence.R +++ b/R/pseudoabsence.R @@ -82,7 +82,7 @@ NULL #' # points to the resulting model #' all_my_points <- add_pseudoabsence( #' df = virtual_points, -#' field_occurrence = 'Observed', +#' field_occurrence = 'observed', #' template = background, #' settings = ass1) #' } @@ -257,7 +257,7 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL bias <- terra::resample(bias, background, method = "bilinear") } # Normalize if not already set - if(terra::global(bias, 'max', na.rm = TRUE) > 1 || terra::global(bias, 'min', na.rm = TRUE) < 0 ){ + if(terra::global(bias, 'max', na.rm = TRUE)[,1] > 1 || terra::global(bias, 'min', na.rm = TRUE)[,1] < 0 ){ bias <- predictor_transform(bias, option = "norm") } } else { bias <- NULL } @@ -276,11 +276,11 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL # Now sample from all cells not occupied if(!is.null(bias)){ # Get probability values for cells where no sampling has been conducted - prob_bias <- bias[which(bg1[]==0)] + prob_bias <- bias[which(terra::values(bg1)[,1]==0)][,1] if(any(is.na(prob_bias))) prob_bias[is.na(prob_bias)] <- 0 - abs <- sample(which(bg1[]==0), size = nrpoints, replace = TRUE, prob = prob_bias) + abs <- sample(which(terra::values(bg1)[,1]==0), size = nrpoints, replace = TRUE, prob = prob_bias) } else { - abs <- sample(which(bg1[]==0), size = nrpoints, replace = TRUE) + abs <- sample(which(terra::values(bg1)[,1]==0), size = nrpoints, replace = TRUE) } } else if(method == "buffer"){ assertthat::assert_that(is.numeric(buffer_distance),msg = "Buffer distance parameter not numeric!") @@ -301,7 +301,7 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL # Now sample from all cells not occupied if(!is.null(bias)){ # Get probability values for cells where no sampling has been conducted - prob_bias <- bias[which(bg2[]==1)] + prob_bias <- bias[which(bg2[]==1)][,1] if(any(is.na(prob_bias))) prob_bias[is.na(prob_bias)] <- 0 abs <- sample(which(bg2[]==1), size = nrpoints, replace = TRUE, prob = prob_bias) } else { @@ -317,7 +317,7 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL bg2 <- terra::mask(bg1, mask = pol, inverse = !inside) if(!is.null(bias)){ # Get probability values for cells where no sampling has been conducted - prob_bias <- bias[which(bg2[]==0)] + prob_bias <- bias[which(bg2[]==0)][,1] if(any(is.na(prob_bias))) prob_bias[is.na(prob_bias)] <- 0 abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE, prob = prob_bias) } else { @@ -340,11 +340,11 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL bg2 <- terra::mask(bg1, mask = layer, inverse = !inside) if(!is.null(bias)){ # Get probability values for cells where no sampling has been conducted - prob_bias <- bias[which(bg2[]==0)] + prob_bias <- bias[which(terra::values(bg2)[,1]==0)][,1] if(any(is.na(prob_bias))) prob_bias[is.na(prob_bias)] <- 0 - abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE, prob = prob_bias) + abs <- sample(which(terra::values(bg2)[,1]==0), size = nrpoints, replace = TRUE, prob = prob_bias) } else { - abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE) + abs <- sample(which(terra::values(bg2)[,1]==0), size = nrpoints, replace = TRUE) } rm(bg2) } else if(method == "zones"){ @@ -379,11 +379,11 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL bg2 <- terra::mask(bg1, mask = zones) if(!is.null(bias)){ # Get probability values for cells where no sampling has been conducted - prob_bias <- bias[which(bg2[]==0)] + prob_bias <- bias[which(terra::values(bg2)[,1]==0)][,1] if(any(is.na(prob_bias))) prob_bias[is.na(prob_bias)] <- 0 - abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE, prob = prob_bias) + abs <- sample(which(terra::values(bg2)[,1]==0), size = nrpoints, replace = TRUE, prob = prob_bias) } else { - abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE) + abs <- sample(which(terra::values(bg2)[,1]==0), size = nrpoints, replace = TRUE) } rm(bg2) } else if(method == "target"){ @@ -398,11 +398,11 @@ add_pseudoabsence <- function(df, field_occurrence = "observed", template = NULL bg2 <- terra::mask(bg1, mask = layer) if(!is.null(bias)){ # Get probability values for cells where no sampling has been conducted - prob_bias <- bias[which(bg2[]==0)] + prob_bias <- bias[which(terra::values(bg2)[,1]==0)][,1] if(any(is.na(prob_bias))) prob_bias[is.na(prob_bias)] <- 0 - abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE, prob = prob_bias) + abs <- sample(which(terra::values(bg2)[,1]==0), size = nrpoints, replace = TRUE, prob = prob_bias) } else { - abs <- sample(which(bg2[]==0), size = nrpoints, replace = TRUE) + abs <- sample(which(terra::values(bg2)[,1]==0), size = nrpoints, replace = TRUE) } rm(bg2) } else { diff --git a/R/threshold.R b/R/threshold.R index 7523324d..de6fbeff 100644 --- a/R/threshold.R +++ b/R/threshold.R @@ -27,6 +27,8 @@ NULL #' (Default: \code{NULL}). #' @param point A [`sf`] object containing observational data used for model #' training. +#' @param field_occurrence A [`character`] location of +#' biodiversity point records. #' @param format [`character`] indication of whether \code{"binary"}, #' \code{"normalize"} or \code{"percentile"} formatted thresholds are to be #' created (Default: \code{"binary"}). Also see Muscatello et al. (2021). @@ -71,7 +73,7 @@ NULL methods::setGeneric( "threshold", signature = methods::signature("obj", "method", "value"), - function(obj, method = 'mtp', value = NULL, point = NULL, format = "binary", return_threshold = FALSE, ...) standardGeneric("threshold")) + function(obj, method = 'mtp', value = NULL, point = NULL, field_occurrence = "observed", format = "binary", return_threshold = FALSE, ...) standardGeneric("threshold")) #' Generic threshold with supplied DistributionModel object #' @name threshold @@ -81,7 +83,7 @@ methods::setGeneric( methods::setMethod( "threshold", methods::signature(obj = "ANY"), - function(obj, method = 'mtp', value = NULL, point = NULL, format = "binary", return_threshold = FALSE, ...) { + function(obj, method = 'mtp', value = NULL, point = NULL, field_occurrence = "observed", format = "binary", return_threshold = FALSE, ...) { assertthat::assert_that(any( class(obj) %in% getOption('ibis.engines') ), is.character(method), is.null(value) || is.numeric(value), @@ -123,7 +125,7 @@ methods::setMethod( } point <- collect_occurrencepoints(model = model, include_absences = TRUE, - point_column = "observed", + point_column = field_occurrence, addName = TRUE, tosf = TRUE ) } else { @@ -131,27 +133,27 @@ methods::setMethod( } # If TSS or kappa is chosen, check whether there is poipa data among the sources - if((!any(point$observed==0) & method %in% c('TSS','kappa','F1score','Sensitivity','Specificity')) || length(unique(point$name)) > 1){ + if((!any(point[, field_occurrence, drop = TRUE]==0) & method %in% c('TSS','kappa','F1score','Sensitivity','Specificity')) || length(unique(point$name)) > 1){ if(getOption('ibis.setupmessages')) myLog('[Threshold]','red','Threshold method needs absence-data. Generating some now...') bg <- terra::rasterize(obj$model$background, emptyraster(obj$get_data('prediction'))) ass <- model$biodiversity[[1]]$pseudoabsence_settings if(is.null(ass)) ass <- getOption("ibis.pseudoabsence") # Get Default settings suppressMessages( abs <- add_pseudoabsence(df = point, - field_occurrence = 'observed', + field_occurrence = field_occurrence, template = bg, # Assuming that settings are comparable among objects settings = ass ) ) - abs <- subset(abs, select = c('x','y'));abs$observed <- 0 + abs <- subset(abs, select = c('x','y'));abs[, field_occurrence] <- 0 abs <- guess_sf(abs) abs$name <- 'Background point'; abs$type <- "generated" suppressWarnings( abs <- sf::st_set_crs(abs, value = sf::st_crs(obj$get_data('prediction'))) ) - point <- point |> dplyr::select(observed, geometry, dplyr::any_of(c("name", "type"))) - abs <- abs |> dplyr::select(observed, geometry, dplyr::any_of(c("name", "type"))) + point <- point |> dplyr::select(dplyr::all_of(field_occurrence), geometry, dplyr::any_of(c("name", "type"))) + abs <- abs |> dplyr::select(dplyr::all_of(field_occurrence), geometry, dplyr::any_of(c("name", "type"))) point <- dplyr::bind_rows(point,abs) rm(abs) } @@ -176,7 +178,7 @@ methods::setMethod( #' @noRd #' @keywords internal .stackthreshold <- function(obj, method = 'fixed', value = NULL, - point = NULL, format = "binary", return_threshold = FALSE, ...) { + point = NULL, field_occurrence = "observed", format = "binary", return_threshold = FALSE, ...) { assertthat::assert_that(is.Raster(obj), is.character(method), inherits(point,'sf'), @@ -223,7 +225,7 @@ methods::setMethod( methods::setMethod( "threshold", methods::signature(obj = "SpatRaster"), - function(obj, method = 'fixed', value = NULL, point = NULL, format = "binary", return_threshold = FALSE) { + function(obj, method = 'fixed', value = NULL, point = NULL, field_occurrence = "observed", format = "binary", return_threshold = FALSE) { assertthat::assert_that(is.Raster(obj), inherits(obj,'SpatRaster'), is.character(method), @@ -247,15 +249,15 @@ methods::setMethod( # Check that raster has at least a mean prediction in name if(!is.null(point)) { - assertthat::assert_that(utils::hasName(point,"observed"), - msg = "Provided point data needs to have column observed!") + assertthat::assert_that(utils::hasName(point,field_occurrence), + msg = "Provided point data needs to include specified occurrence column!") # If observed is a factor, convert to numeric - if(is.factor(point$observed)){ - point$observed <- as.numeric(as.character( point$observed )) + if(is.factor(point[, field_occurrence, drop = TRUE])){ + point[, field_occurrence] <- as.numeric(as.character(point[, field_occurrence, drop = TRUE])) } assertthat::assert_that(unique(sf::st_geometry_type(point)) %in% c('POINT','MULTIPOINT')) - assertthat::assert_that(utils::hasName(point, 'observed')) - poi_pres <- subset(point, observed > 0) # Remove any eventual absence data for a poi_pres evaluation + assertthat::assert_that(utils::hasName(point, field_occurrence)) + poi_pres <- point[point[, field_occurrence, drop = TRUE] > 0, ] # Remove any eventual absence data for a poi_pres evaluation } else poi_pres <- NULL # Loop through each raster @@ -307,17 +309,18 @@ methods::setMethod( # now. Rather have users install the package here check_package("modEvA") # Assure that point data is correctly specified - assertthat::assert_that(inherits(point, 'sf'), utils::hasName(point, 'observed')) - point$observed <- ifelse(point$observed>1, 1, point$observed) # Ensure that observed is <=1 - assertthat::assert_that(all( unique(point$observed) %in% c(0,1) )) + assertthat::assert_that(inherits(point, 'sf'), utils::hasName(point, field_occurrence)) + point[, field_occurrence] <- ifelse(point[, field_occurrence, drop = TRUE] > 1, + 1, point[, field_occurrence, drop = TRUE]) # Ensure that observed is <=1 + assertthat::assert_that(all( unique(point[, field_occurrence, drop = TRUE]) %in% c(0,1) )) # Re-extract point vals but with the full dataset pointVals <- get_rastervalue(coords = point, env = raster_thresh)[[val]] assertthat::assert_that(length(pointVals)>2) # Calculate the optimal thresholds suppressWarnings( - opt <- modEvA::optiThresh(obs = point$observed, pred = pointVals, - measures = method, + opt <- modEvA::optiThresh(obs = point[, field_occurrence, drop = TRUE], + pred = pointVals, measures = method, optimize = "each", plot = FALSE) ) if(method %in% opt$optimals.each$measure){ diff --git a/R/train.R b/R/train.R index 597f86c8..ce9efaac 100644 --- a/R/train.R +++ b/R/train.R @@ -419,6 +419,13 @@ methods::setMethod( model[['biodiversity']][[id]][['use_intercept']]<- x$biodiversity$data[[id]]$use_intercept # Separate intercept? model[['biodiversity']][[id]][['expect']] <- x$biodiversity$get_weights()[[id]] # Weights per dataset # --- # + + # Check that if a custom formula supplied + if(model[['biodiversity']][[id]][['equation']] != ""){ + te <- formula_terms(model[['biodiversity']][[id]][['equation']]) + assertthat::assert_that(all(te %in% model$predictors_names), + msg = "Predictors in custom formula not found!") + } # Rename observation column to 'observed'. Needs to be consistent for INLA # FIXME: try and not use dplyr as dependency (although it is probably loaded already) model$biodiversity[[id]]$observations <- model$biodiversity[[id]]$observations |> dplyr::rename('observed' = x$biodiversity$get_columns_occ()[[id]]) @@ -886,6 +893,17 @@ methods::setMethod( model$predictors_names <- c(model$predictors_names, names(new)) model$predictors_types <- rbind(model$predictors_types, data.frame(predictors = names(new), type = c('numeric'))) + + # Finally if custom formula found, add the variable there. + for(other_id in names(model$biodiversity)){ + if(other_id == id) next() # Skip if current id + ff <- model$biodiversity[[other_id]]$equation + if(is.formula(ff)){ + ff <- stats::update.formula(ff, paste0("~ . + ", pred_name)) + model$biodiversity[[other_id]]$equation <- ff + } # Else skip + } + } else if(method_integration == "offset"){ # Adding the prediction as offset new <- out$get_data("prediction") @@ -986,6 +1004,16 @@ methods::setMethod( model$predictors_types <- rbind(model$predictors_types, data.frame(predictors = names(new), type = c('numeric'))) + # Finally if custom formula found, add the variable there. + for(other_id in names(model$biodiversity)){ + if(other_id == id) next() # Skip if current id + ff <- model$biodiversity[[other_id]]$equation + if(is.formula(ff)){ + ff <- stats::update.formula(ff, paste0("~ . + ", pred_name)) + model$biodiversity[[other_id]]$equation <- ff + } # Else skip + } + } else if(method_integration == "offset"){ # Adding the prediction as offset new <- out$get_data("prediction") @@ -1086,6 +1114,17 @@ methods::setMethod( model$predictors_names <- c(model$predictors_names, names(new)) model$predictors_types <- rbind(model$predictors_types, data.frame(predictors = names(new), type = c('numeric'))) + + # Finally if custom formula found, add the variable there. + for(other_id in names(model$biodiversity)){ + if(other_id == id) next() # Skip if current id + ff <- model$biodiversity[[other_id]]$equation + if(is.formula(ff)){ + ff <- stats::update.formula(ff, paste0("~ . + ", pred_name)) + model$biodiversity[[other_id]]$equation <- ff + } # Else skip + } + } else if(method_integration == "offset"){ # Adding the prediction as offset new <- out$get_data("prediction")[["mean"]] @@ -1208,6 +1247,16 @@ methods::setMethod( model$predictors_types <- rbind(model$predictors_types, data.frame(predictors = names(new), type = c('numeric'))) + # Finally if custom formula found, add the variable there. + for(other_id in names(model$biodiversity)){ + if(other_id == id) next() # Skip if current id + ff <- model$biodiversity[[other_id]]$equation + if(is.formula(ff)){ + ff <- stats::update.formula(ff, paste0("~ . + ", pred_name)) + model$biodiversity[[other_id]]$equation <- ff + } # Else skip + } + } else if(method_integration == "offset"){ # Adding the prediction as offset new <- out$get_data("prediction") @@ -1300,6 +1349,16 @@ methods::setMethod( model$predictors_types <- rbind(model$predictors_types, data.frame(predictors = names(new), type = c('numeric'))) + # Finally if custom formula found, add the variable there. + for(other_id in names(model$biodiversity)){ + if(other_id == id) next() # Skip if current id + ff <- model$biodiversity[[other_id]]$equation + if(is.formula(ff)){ + ff <- stats::update.formula(ff, paste0("~ . + ", pred_name)) + model$biodiversity[[other_id]]$equation <- ff + } # Else skip + } + } else if(method_integration == "offset"){ # Adding the prediction as offset new <- out$get_data("prediction") diff --git a/R/utils-form.R b/R/utils-form.R index 3fa5d2a5..d6dda167 100644 --- a/R/utils-form.R +++ b/R/utils-form.R @@ -1,3 +1,36 @@ +#' Convert character to formula object +#' +#' @param x [`character`] text. +#' @keywords utils +#' @noRd +to_formula <- function(formula){ + # Convert to formula object + if(!is.null(formula)) { + formula = stats::as.formula(formula) + } else { + # Asign a new waiver object + formula = new_waiver() + } + return(formula) +} + +#' Get terms from a formula object +#' +#' @param formula A [`formula`] or [`character`] object. +#' @keywords internal, utils +#' @noRd +formula_terms <- function(formula){ + # Check + assertthat::assert_that( + is.character(formula) || inherits(formula, "formula") + ) + if(formula == "") stop("Default formula found!") + if(!inherits(formula, "formula")) formula <- stats::as.formula(formula) + # Get the terms from the formula + te <- attr(terms.formula(formula), "term.labels") + return(te) +} + #' Logistic (invlogit) transformation function #' @param x A [`numeric`] value. #' @keywords utils @@ -94,3 +127,189 @@ ilink <- function(x, link = "log"){ ) } +#' Create formula matrix +#' +#' Function to create list of formulas with all possible combinations of +#' variables +#' @param form An input [`formula`] object. +#' @param response A [`character`] object giving the response. (Default: +#' \code{NULL}) +#' @param type Currently implemented are \code{'inla'} (variable groups), +#' \code{'All'} (All possible combinations) or \code{'forward'}. +#' @returns A [`vector`] object with [`formula`] objects. +#' @examples \dontrun{ +#' formula_combinations(form) +#' } +#' @keywords utils +#' @noRd +formula_combinations <- function(form, response = NULL, type= 'forward'){ + assertthat::assert_that(is.formula(form), + is.character(response) || is.null(response), + tolower(type) %in% c('inla','forward','all')) + # --- # + # Response + if(is.null(response)) response <- all.vars(form)[1] + # Formula terms + te <- attr(stats::terms.formula(form),'term.label') + # Varnames + varnames <- all.vars(form) + varnames <- varnames[varnames %notin% c('spde','spatial.field','observed','Intercept')] # Exclude things not necessarily needed in there + # Variable length + fl <- length(varnames) + # --- # + assertthat::assert_that(fl>0, !is.null(response)) + + if(tolower(type) == 'inla'){ + # INLA modelling groups + # Instead of selecting variables piece by piece, consider individual groups + form_temp <- c() + val_int <- grep(pattern = 'Intercept',x = te, value = T) + val_lin <- grep(pattern = 'linear',x = te, value = T) + val_rw1 <- grep(pattern = 'rw1',x = te,value = TRUE) + # Alternative quadratic variables in case rw1 fails + if(length(val_rw1)>0){ + val_quad <- all.vars(stats::as.formula(paste('observed ~ ', paste0(val_rw1,collapse = '+'))))[-1] + } else { val_quad <- all.vars(stats::as.formula(paste('observed ~ ', paste0(val_lin,collapse = '+'))))[-1] } + val_spde <- grep(pattern = 'spde',x = te,value = TRUE) + val_ofs <- grep(pattern = 'offset',x = te,value = TRUE) + + # Construct formulas --- + # Original form + form_temp <- c(form_temp, deparse1(form)) + + # Intercept only + form_temp <- c(form_temp, + paste0(response,' ~ 0 +', paste(val_int,collapse = ' + ') )) + + # Add all linear variables as base + form_temp <- c(form_temp, + paste0(response,' ~ 0 +', paste(val_int,collapse = ' + '), + '+', paste0(varnames, collapse = ' + ') )) + + # Intercept + linear effect + if(length(val_lin)>0){ + form_temp <- c(form_temp, + paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), + '+', + paste(val_lin,collapse = ' + ')) + ) + } + # Intercept + rw1 effects (if existing) + if(length(val_rw1)>0){ + form_temp <- c(form_temp, + paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), + '+', + paste(val_rw1,collapse = ' + ')) + ) + } + # Alternative formulation using quadratic + form_temp <- c(form_temp, + paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), + '+', + paste0('I(',val_quad,'^2)',collapse = ' + ')) + ) + + # Intercept + spde + if(length(val_spde)>0){ + form_temp <- c(form_temp, + paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), + '+', + paste(val_spde,collapse = ' + ')) + ) + } + # Intercept + linear + spde + if(length(val_spde)>0 && length(val_lin)>0){ + form_temp <- c(form_temp, + paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), + '+', + paste(val_spde,collapse = ' + '),'+',paste(val_lin,collapse = ' + ')) + ) + form_temp <- c(form_temp, + paste0(response,' ~ 0 +', paste(val_int,collapse = ' + '), + '+', paste0(varnames, collapse = ' + '), + '+',paste(val_spde,collapse = ' + '))) + + } + # intercept + rw1 + spde + if(length(val_spde)>0 && length(val_rw1)>0){ + form_temp <- c(form_temp, + paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), + '+', + paste(val_spde,collapse = ' + '),'+',paste(val_rw1,collapse = ' + ')) + ) + } + if(length(val_spde)>0){ + # Quad replacement + form_temp <- c(form_temp, + paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), + '+', + paste(val_spde,collapse = ' + '),'+',paste0('I(',val_quad,'^2)',collapse = ' + ')) + ) + } + # intercept + linear + rw1 + spde + if(length(val_rw1)>0 && length(val_lin)>0 && length(val_spde)>0){ + form_temp <- c(form_temp, + paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), + '+', + paste(val_lin,collapse = ' + '),'+',paste(val_rw1,collapse = ' + '),'+',paste(val_spde,collapse = ' + ')) + ) + + } + if(length(val_spde)>0){ + # Quad replacement + form_temp <- c(form_temp, + paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), + '+', + paste(val_lin,collapse = ' + '),'+',paste0('I(',val_quad,'^2)',collapse = ' + '),'+',paste(val_spde,collapse = ' + ')) + ) + } + # intercept + linear + offset + if(length(val_lin)>0 && length(val_ofs)>0){ + form_temp <- c(form_temp, + paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), + '+', + paste(val_lin,collapse = ' + '),'+',paste(val_ofs,collapse = ' + ')) + ) + } + # intercept + linear + rw1 + offset + if(length(val_rw1)>0 && length(val_lin)>0 && length(val_ofs)>0){ + form_temp <- c(form_temp, + paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), + '+', + paste(val_lin,collapse = ' + '),'+',paste(val_rw1,collapse = ' + '),'+',paste(val_ofs,collapse = ' + ')) + ) + } + if(length(val_lin)>0 && length(val_ofs)>0){ + # Quad replacement + form_temp <- c(form_temp, + paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), + '+', + paste(val_lin,collapse = ' + '),'+', + paste0('I(',val_quad,'^2)',collapse = ' + '),'+',paste(val_ofs,collapse = ' + ')) + ) + } + + # Other types of variable selection + } else if(tolower(type) == 'forward'){ + # Forward variable addition + # Note this ignores unique combinations + form_temp <- c() + for(i in 1:fl) { + new <- paste0(response, '~ 0 + ',paste(val_int,collapse = '+'),'+', + paste(varnames[1:i],collapse = ' + ') ) + form_temp <- c(form_temp, new) + } + + } else if(tolower(type) == 'all'){ + # Construct all possible unique combinations + varnames_comb <- lapply(1:length(varnames), function(i){ + utils::combn(varnames, i) |> apply(2, list) |> unlist(recursive = F) + })|> unlist(recursive = F) + + form_temp <- lapply(varnames_comb, function(i) { + paste0(response, " ~ ", paste(val_int,collapse = '+'),'+', paste(i, collapse = " + ")) + }) + } + + return(form_temp) +} diff --git a/R/utils-gdb.R b/R/utils-gdb.R index 305917c1..bc3cd3be 100644 --- a/R/utils-gdb.R +++ b/R/utils-gdb.R @@ -100,12 +100,12 @@ built_formula_gdb <- function(model, id, x, settings){ } # Check that bols/bbs are in terms and if not add them for each variable if( settings$get("only_linear") ){ - if( length(grep("bols",attr(stats::terms(form2), "term.labels") ))==0 ){ + if( length(grep("bols",attr(stats::terms(form), "term.labels") ))==0 ){ # Assume that each variable as none, so add form <- stats::as.formula(paste0("observed ~", paste0("bols(", obj$predictors_names, ")",collapse = " + "))) } } else { - if( length(grep("bbs",attr(stats::terms(form2), "term.labels") ))==0 ){ + if( length(grep("bbs",attr(stats::terms(form), "term.labels") ))==0 ){ # Assume that each variable as none, so add form <- stats::as.formula(paste0("observed ~", paste0("bbs(", obj$predictors_names, ")",collapse = " + "))) } diff --git a/R/utils-inla.R b/R/utils-inla.R index 80a1b20e..c7a957d4 100644 --- a/R/utils-inla.R +++ b/R/utils-inla.R @@ -1186,7 +1186,7 @@ inla_make_projection_stack <- function(stk_resp, model, mesh, mesh.area, type, b #' Prediction coordinates for INLA #' -#' @param mesh A [INLA::inla.mesh] object. +#' @param mesh A \code{"INLA::inla.mesh"} object. #' @param background A [sf] object containing the background region. #' @param cov A [data.frame] or [matrix] with the covariates for the modelling. #' @param proj_stepsize A numeric indication on the prediction stepsize to be diff --git a/R/utils-predictors.R b/R/utils-predictors.R index bb907120..0c8ab472 100644 --- a/R/utils-predictors.R +++ b/R/utils-predictors.R @@ -918,7 +918,7 @@ predictors_filter_abess <- function( env, observed, method, family, tune.type = if((requireNamespace("abess", quietly = TRUE))){ # Build model - abess_fit <- abess(x = env, + abess_fit <- abess::abess(x = env, y = observed, family = family, tune.type = tune.type, @@ -931,7 +931,7 @@ predictors_filter_abess <- function( env, observed, method, family, tune.type = if(anyNA(stats::coef(abess_fit)[,1]) ) { # Refit with minimum support size - abess_fit <- abess(x = env, + abess_fit <- abess::abess(x = env, y = observed, family = family, lambda = lambda, @@ -1016,7 +1016,7 @@ predictors_filter_boruta <- function( env, obs, method, keep = NULL, # Apply boruta if((requireNamespace("Boruta", quietly = TRUE))){ - bo_test <- Boruta(env, y = observed, + bo_test <- Boruta::Boruta(env, y = observed, maxRuns = iter, # Verbosity doTrace = ifelse(verbose, 1, 0)) diff --git a/R/utils-spatial.R b/R/utils-spatial.R index c20e1054..718893d5 100644 --- a/R/utils-spatial.R +++ b/R/utils-spatial.R @@ -156,8 +156,8 @@ create_mcp <- function(biod, limits){ # Get biodiversity data obs <- collect_occurrencepoints(model = biod, - include_absences = FALSE, - tosf = TRUE) |> + include_absences = FALSE, + tosf = TRUE) |> dplyr::select("observed") # Assing unique id @@ -346,7 +346,7 @@ raster_centroid <- function(obj, patch = FALSE){ w = df$values), stats::weighted.mean(df$y, w = df$values) - ) + ) cent <- data.frame(x = cent[1], y = cent[2], weight = base::mean(df$values)) # Convert to sf coordinate @@ -441,44 +441,44 @@ rename_geometry <- function(g, name){ #' @keywords internal, utils #' @noRd guess_sf <- function(df, geom_name = 'geometry'){ - assertthat::assert_that( - inherits(df,'data.frame') || inherits(df, 'sf') || inherits(df, 'tibble') - ) - # If sf, return immediately - if(inherits(df, 'sf')) return(df) - # If there is an attribute, but for some reason the file is not sf, use that one - if(!is.null(attr(df, "sf_column"))) { - df <- sf::st_as_sf(df) - if(attr(df, "sf_column") != geom_name){ - names(df)[which(names(df) == attr(df, "sf_column"))] <- geom_name - sf::st_geometry(df) <- geom_name - } - return(df) - } - # Commonly used column names - nx = c("x","X","lon","longitude") - ny = c("y", "Y", "lat", "latitude") - ng = c("geom", "geometry", "geometry") - - # Check if geom is present - if(any( ng %in% names(df) )){ - attr(df, "sf_column") <- ng[which(ng %in% names(df))] - df <- sf::st_as_sf(df) - } - # Finally check if any commonly used coordinate name exist - if(any( nx %in% names(df))){ - df <- sf::st_as_sf(df, coords = c(nx[which(nx %in% names(df))], - ny[which(ny %in% names(df))]) - ) - } - # If at this point df is still not a sf object, then it is unlikely to be converted - assertthat::assert_that(inherits(df, 'sf'), - msg = "Point object could not be converted to an sf object.") - if(attr(df, "sf_column") != geom_name){ - names(df)[which(names(df) == attr(df, "sf_column"))] <- geom_name - sf::st_geometry(df) <- geom_name - } - return(df) + assertthat::assert_that( + inherits(df,'data.frame') || inherits(df, 'sf') || inherits(df, 'tibble') + ) + # If sf, return immediately + if(inherits(df, 'sf')) return(df) + # If there is an attribute, but for some reason the file is not sf, use that one + if(!is.null(attr(df, "sf_column"))) { + df <- sf::st_as_sf(df) + if(attr(df, "sf_column") != geom_name){ + names(df)[which(names(df) == attr(df, "sf_column"))] <- geom_name + sf::st_geometry(df) <- geom_name + } + return(df) + } + # Commonly used column names + nx = c("x","X","lon","longitude") + ny = c("y", "Y", "lat", "latitude") + ng = c("geom", "geometry", "geometry") + + # Check if geom is present + if(any( ng %in% names(df) )){ + attr(df, "sf_column") <- ng[which(ng %in% names(df))] + df <- sf::st_as_sf(df) + } + # Finally check if any commonly used coordinate name exist + if(any( nx %in% names(df))){ + df <- sf::st_as_sf(df, coords = c(nx[which(nx %in% names(df))], + ny[which(ny %in% names(df))]) + ) + } + # If at this point df is still not a sf object, then it is unlikely to be converted + assertthat::assert_that(inherits(df, 'sf'), + msg = "Point object could not be converted to an sf object.") + if(attr(df, "sf_column") != geom_name){ + names(df)[which(names(df) == attr(df, "sf_column"))] <- geom_name + sf::st_geometry(df) <- geom_name + } + return(df) } #' Kernel density estimation of coordinates @@ -662,7 +662,7 @@ extent_dimensions <- function(ex, lonlat = terra::is.lonlat(ex), output_unit = ' Raster = as.vector( terra::ext(ex) ), sf = as.vector( terra::ext(ex) ), numeric = ex - ) + ) # Rename the vector names(ex) <- c("xmin", "xmax", "ymin", "ymax") @@ -937,7 +937,7 @@ get_rastervalue <- function(coords, env, ngb_fill = TRUE, rm.na = FALSE){ is.Raster(env), is.logical(ngb_fill), is.logical(rm.na) - ) + ) # Try an extraction ex <- try({terra::extract(x = env, @@ -1411,7 +1411,7 @@ thin_observations <- function(df, background, env = NULL, method = "random", min # Get a matrix of all environmental data, also with coordinates # However first normalize all data stk <- terra::as.data.frame( - predictor_transform(env, option = "norm"), + predictor_transform(env, option = "norm"), xy = TRUE) stk$cid <- 1:nrow(stk) @@ -1448,8 +1448,15 @@ thin_observations <- function(df, background, env = NULL, method = "random", min stop("Not yet implemented!") } + # check if any points were selected to thin + if (length(sel) == 0){ + message("No points were selected during thinning.") + return(df) + } + # Return subsampled coordinates out <- df[sel,] + if(nrow(out)==0) { message("Thinning failed for some reason") return(df) diff --git a/R/utils-xgboost.R b/R/utils-xgboost.R index 56ed2088..eca5532d 100644 --- a/R/utils-xgboost.R +++ b/R/utils-xgboost.R @@ -31,7 +31,7 @@ built_formula_xgboost <- function(obj){ form <- to_formula(obj$equation) # Get all variables and check - varn <- obj$predictors_names[which( all.vars(form) %in% obj$predictors_names )] + varn <- obj$predictors_names[which( obj$predictors_names %in% formula_terms(form))] form <- to_formula( paste0("observed ~", paste0(varn, collapse = " + ")) ) assertthat::assert_that( is.formula(form), length(all.vars(form))>1 diff --git a/R/utils.R b/R/utils.R index cc9c5648..5428e1da 100644 --- a/R/utils.R +++ b/R/utils.R @@ -154,22 +154,6 @@ capitalize_text <- function(x) { sep="", collapse=" ") } -#' Convert character to formula object -#' -#' @param x [`character`] text. -#' @keywords utils -#' @noRd -to_formula <- function(formula){ - # Convert to formula object - if(!is.null(formula)) { - formula = stats::as.formula(formula) - } else { - # Asign a new waiver object - formula = new_waiver() - } - return(formula) -} - #' Guess time to Posix #' #' @description This little wrapper converts and ensures that a vector of time @@ -423,193 +407,6 @@ clamp_predictions <- function(model, pred){ return(pred) } -#' Create formula matrix -#' -#' Function to create list of formulas with all possible combinations of -#' variables -#' @param form An input [`formula`] object. -#' @param response A [`character`] object giving the response. (Default: -#' \code{NULL}) -#' @param type Currently implemented are \code{'inla'} (variable groups), -#' \code{'All'} (All possible combinations) or \code{'forward'}. -#' @returns A [`vector`] object with [`formula`] objects. -#' @examples \dontrun{ -#' formula_combinations(form) -#' } -#' @keywords utils -#' @noRd -formula_combinations <- function(form, response = NULL, type= 'forward'){ - assertthat::assert_that(is.formula(form), - is.character(response) || is.null(response), - tolower(type) %in% c('inla','forward','all')) - # --- # - # Response - if(is.null(response)) response <- all.vars(form)[1] - # Formula terms - te <- attr(stats::terms.formula(form),'term.label') - # Varnames - varnames <- all.vars(form) - varnames <- varnames[varnames %notin% c('spde','spatial.field','observed','Intercept')] # Exclude things not necessarily needed in there - # Variable length - fl <- length(varnames) - # --- # - assertthat::assert_that(fl>0, !is.null(response)) - - if(tolower(type) == 'inla'){ - # INLA modelling groups - # Instead of selecting variables piece by piece, consider individual groups - form_temp <- c() - val_int <- grep(pattern = 'Intercept',x = te, value = T) - val_lin <- grep(pattern = 'linear',x = te, value = T) - val_rw1 <- grep(pattern = 'rw1',x = te,value = TRUE) - # Alternative quadratic variables in case rw1 fails - if(length(val_rw1)>0){ - val_quad <- all.vars(stats::as.formula(paste('observed ~ ', paste0(val_rw1,collapse = '+'))))[-1] - } else { val_quad <- all.vars(stats::as.formula(paste('observed ~ ', paste0(val_lin,collapse = '+'))))[-1] } - val_spde <- grep(pattern = 'spde',x = te,value = TRUE) - val_ofs <- grep(pattern = 'offset',x = te,value = TRUE) - - # Construct formulas --- - # Original form - form_temp <- c(form_temp, deparse1(form)) - - # Intercept only - form_temp <- c(form_temp, - paste0(response,' ~ 0 +', paste(val_int,collapse = ' + ') )) - - # Add all linear variables as base - form_temp <- c(form_temp, - paste0(response,' ~ 0 +', paste(val_int,collapse = ' + '), - '+', paste0(varnames, collapse = ' + ') )) - - # Intercept + linear effect - if(length(val_lin)>0){ - form_temp <- c(form_temp, - paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), - '+', - paste(val_lin,collapse = ' + ')) - ) - } - # Intercept + rw1 effects (if existing) - if(length(val_rw1)>0){ - form_temp <- c(form_temp, - paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), - '+', - paste(val_rw1,collapse = ' + ')) - ) - } - # Alternative formulation using quadratic - form_temp <- c(form_temp, - paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), - '+', - paste0('I(',val_quad,'^2)',collapse = ' + ')) - ) - - # Intercept + spde - if(length(val_spde)>0){ - form_temp <- c(form_temp, - paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), - '+', - paste(val_spde,collapse = ' + ')) - ) - } - # Intercept + linear + spde - if(length(val_spde)>0 && length(val_lin)>0){ - form_temp <- c(form_temp, - paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), - '+', - paste(val_spde,collapse = ' + '),'+',paste(val_lin,collapse = ' + ')) - ) - form_temp <- c(form_temp, - paste0(response,' ~ 0 +', paste(val_int,collapse = ' + '), - '+', paste0(varnames, collapse = ' + '), - '+',paste(val_spde,collapse = ' + '))) - - } - # intercept + rw1 + spde - if(length(val_spde)>0 && length(val_rw1)>0){ - form_temp <- c(form_temp, - paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), - '+', - paste(val_spde,collapse = ' + '),'+',paste(val_rw1,collapse = ' + ')) - ) - } - if(length(val_spde)>0){ - # Quad replacement - form_temp <- c(form_temp, - paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), - '+', - paste(val_spde,collapse = ' + '),'+',paste0('I(',val_quad,'^2)',collapse = ' + ')) - ) - } - # intercept + linear + rw1 + spde - if(length(val_rw1)>0 && length(val_lin)>0 && length(val_spde)>0){ - form_temp <- c(form_temp, - paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), - '+', - paste(val_lin,collapse = ' + '),'+',paste(val_rw1,collapse = ' + '),'+',paste(val_spde,collapse = ' + ')) - ) - - } - if(length(val_spde)>0){ - # Quad replacement - form_temp <- c(form_temp, - paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), - '+', - paste(val_lin,collapse = ' + '),'+',paste0('I(',val_quad,'^2)',collapse = ' + '),'+',paste(val_spde,collapse = ' + ')) - ) - } - # intercept + linear + offset - if(length(val_lin)>0 && length(val_ofs)>0){ - form_temp <- c(form_temp, - paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), - '+', - paste(val_lin,collapse = ' + '),'+',paste(val_ofs,collapse = ' + ')) - ) - } - # intercept + linear + rw1 + offset - if(length(val_rw1)>0 && length(val_lin)>0 && length(val_ofs)>0){ - form_temp <- c(form_temp, - paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), - '+', - paste(val_lin,collapse = ' + '),'+',paste(val_rw1,collapse = ' + '),'+',paste(val_ofs,collapse = ' + ')) - ) - } - if(length(val_lin)>0 && length(val_ofs)>0){ - # Quad replacement - form_temp <- c(form_temp, - paste0(response,' ~ 0 + ', paste(val_int,collapse = ' + '), - '+', - paste(val_lin,collapse = ' + '),'+', - paste0('I(',val_quad,'^2)',collapse = ' + '),'+',paste(val_ofs,collapse = ' + ')) - ) - } - - # Other types of variable selection - } else if(tolower(type) == 'forward'){ - # Forward variable addition - # Note this ignores unique combinations - form_temp <- c() - for(i in 1:fl) { - new <- paste0(response, '~ 0 + ',paste(val_int,collapse = '+'),'+', - paste(varnames[1:i],collapse = ' + ') ) - form_temp <- c(form_temp, new) - } - - } else if(tolower(type) == 'all'){ - # Construct all possible unique combinations - varnames_comb <- lapply(1:length(varnames), function(i){ - utils::combn(varnames, i) |> apply(2, list) |> unlist(recursive = F) - })|> unlist(recursive = F) - - form_temp <- lapply(varnames_comb, function(i) { - paste0(response, " ~ ", paste(val_int,collapse = '+'),'+', paste(i, collapse = " + ")) - }) - } - - return(form_temp) -} - #' Outlier detection via reverse jackknife #' #' @description Implementation of a Reverse Jackknife procedure as described by @@ -753,7 +550,7 @@ aggregate_observations2grid <- function(df, template, field_occurrence = 'observ #' objects. #' @param include_absences A [`logical`] of whether absences should be included #' (Default: \code{FALSE}). -#' @param point_column [`chracter`] on the column with observed values. +#' @param point_column [`character`] on the column with observed values. #' @param addName [`logical`] Should the name of the feature be added (Default: #' \code{FALSE}). #' @param tosf [`logical`] of whether the output should be [`sf`] object diff --git a/R/write_output.R b/R/write_output.R index ea27cb59..a5ae41bd 100644 --- a/R/write_output.R +++ b/R/write_output.R @@ -18,7 +18,7 @@ #' @returns No R-output is created. A file is written to the target direction. #' @examples \dontrun{ #' x <- distribution(background) |> -#' add_biodiversity_poipo(virtual_points, field_occurrence = 'Observed', name = 'Virtual points') |> +#' add_biodiversity_poipo(virtual_points, field_occurrence = 'observed', name = 'Virtual points') |> #' add_predictors(pred_current, transform = 'scale',derivates = 'none') |> #' engine_xgboost(nrounds = 2000) |> train(varsel = FALSE, only_linear = TRUE) #' write_output(x, "testmodel.tif") @@ -294,7 +294,7 @@ writeNetCDF <- function(file, fname, #' @returns No R-output is created. A file is written to the target direction. #' @examples \dontrun{ #' x <- distribution(background) |> -#' add_biodiversity_poipo(virtual_points, field_occurrence = 'Observed', name = 'Virtual points') |> +#' add_biodiversity_poipo(virtual_points, field_occurrence = 'observed', name = 'Virtual points') |> #' add_predictors(pred_current, transform = 'scale',derivates = 'none') |> #' engine_xgboost(nrounds = 2000) |> train(varsel = FALSE, only_linear = TRUE) #' write_summary(x, "testmodel.rds") @@ -329,10 +329,6 @@ methods::setMethod( inherits(mod, "DistributionModel") || inherits(mod, "BiodiversityScenario"), msg = "Only objects created through `train()` or `project()` are supported!" ) - # Check writeable or not - assertthat::assert_that( - assertthat::is.writeable(dirname(fname)),msg = "Given input folder is not writeable!" - ) # Get file extension ext <- tolower( tools::file_ext(fname) ) @@ -340,7 +336,7 @@ methods::setMethod( ext <- match.arg(ext, choices = c("rds", "rdata"), several.ok = FALSE) fname <- paste0(tools::file_path_sans_ext(fname), ".", ext) if(file.exists(fname) && (verbose && getOption('ibis.setupmessages'))) myLog('[Output]','yellow','Overwriting existing file...') - assertthat::assert_that(assertthat::is.writeable(dirname(fname))) + assertthat::assert_that(dir.exists(dirname(fname))) # --- # # Gather the statistics and parameters from the provided file output <- list() @@ -478,7 +474,7 @@ methods::setMethod( #' @returns No R-output is created. A file is written to the target direction. #' @examples \dontrun{ #' x <- distribution(background) |> -#' add_biodiversity_poipo(virtual_points, field_occurrence = 'Observed', name = 'Virtual points') |> +#' add_biodiversity_poipo(virtual_points, field_occurrence = 'observed', name = 'Virtual points') |> #' add_predictors(pred_current, transform = 'scale',derivates = 'none') |> #' engine_xgboost(nrounds = 2000) |> train(varsel = FALSE, only_linear = TRUE) #' write_model(x, "testmodel.rds") diff --git a/R/zzz.R b/R/zzz.R index 682c924e..215e3483 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -18,7 +18,7 @@ # Option to have variable names "cleaned" by default options('ibis.cleannames' = TRUE) # Known seed - options('ibis.seed' = (rpois(1, 1000) + Sys.getpid())) + options('ibis.seed' = (stats::rpois(1, 1000) + Sys.getpid())) # Known engines options('ibis.engines' = c('GDB-Model','BART-Model', 'INLABRU-Model','BREG-Model','GLMNET-Model', diff --git a/_pkgdown.yml b/_pkgdown.yml index 5662a8a0..83a173b1 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -33,21 +33,23 @@ navbar: articles: text: Articles menu: - - text: 1. Train a model - href: articles/01_train_simple_model.html - - text: 2. Integrate data - href: articles/02_integrate_data.html - - text: 3. Biodiversity projections - href: articles/03_biodiversity_projections.html - - text: 4. Simulating mechanisms - href: articles/04_mechanistic_estimation.html - - text: 5. Engine comparison - href: articles/05_engine_comparison.html - - text: 6. Other packages - href: articles/06_package_comparison.html + - text: 1. Helper functions to prepare inputs + href: articles/01_data_preparationhelpers.html + - text: 2. Train a model + href: articles/02_train_simple_model.html + - text: 3. Integrate data + href: articles/03_integrate_data.html + - text: 4. Biodiversity projections + href: articles/04_biodiversity_projections.html + - text: 5. Simulating mechanisms + href: articles/05_mechanistic_estimation.html + - text: 6. Engine comparison + href: articles/06_engine_comparison.html + - text: 7. Other packages + href: articles/07_package_comparison.html faq: text: FAQ - href: articles/07_frequently-asked-questions.html + href: articles/08_frequently-asked-questions.html contributing: text: Contributing href: articles/contributing.html diff --git a/inst/CITATION b/inst/CITATION index 45139f9d..39463526 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -21,5 +21,5 @@ citEntry( as.person("Maximilian H.K. Hesselbarth") ), year = "2023", - version = "0.0.5", + version = "0.0.9", textVersion = "Jung, M., Hesselbarth, H.K.M. (2023). An integrated species distribution modelling framework for heterogeneous biodiversity data. R package version 0.0.5") diff --git a/man/add_biodiversity_poipa.Rd b/man/add_biodiversity_poipa.Rd index 722519b5..45de1a97 100644 --- a/man/add_biodiversity_poipa.Rd +++ b/man/add_biodiversity_poipa.Rd @@ -8,7 +8,7 @@ add_biodiversity_poipa( x, poipa, name = NULL, - field_occurrence = "Observed", + field_occurrence = "observed", formula = NULL, family = "binomial", link = NULL, @@ -30,7 +30,7 @@ occurrences.} \item{field_occurrence}{A \code{\link{numeric}} or \code{\link{character}} location of biodiversity point records indicating presence/absence. By default set to -\code{"Observed"} and an error will be thrown if a \code{\link{numeric}} column with +\code{"observed"} and an error will be thrown if a \code{\link{numeric}} column with that name does not exist.} \item{formula}{A \code{\link{character}} or \code{\link{formula}} object to be passed. Default diff --git a/man/add_biodiversity_poipo.Rd b/man/add_biodiversity_poipo.Rd index 2ca0a95a..2c733577 100644 --- a/man/add_biodiversity_poipo.Rd +++ b/man/add_biodiversity_poipo.Rd @@ -8,7 +8,7 @@ add_biodiversity_poipo( x, poipo, name = NULL, - field_occurrence = "Observed", + field_occurrence = "observed", formula = NULL, family = "poisson", link = NULL, diff --git a/man/add_biodiversity_polpa.Rd b/man/add_biodiversity_polpa.Rd index ec2a3c70..61827796 100644 --- a/man/add_biodiversity_polpa.Rd +++ b/man/add_biodiversity_polpa.Rd @@ -8,7 +8,7 @@ add_biodiversity_polpa( x, polpa, name = NULL, - field_occurrence = "Observed", + field_occurrence = "observed", formula = NULL, family = "binomial", link = NULL, diff --git a/man/add_biodiversity_polpo.Rd b/man/add_biodiversity_polpo.Rd index 2433d5b1..2c945002 100644 --- a/man/add_biodiversity_polpo.Rd +++ b/man/add_biodiversity_polpo.Rd @@ -8,7 +8,7 @@ add_biodiversity_polpo( x, polpo, name = NULL, - field_occurrence = "Observed", + field_occurrence = "observed", formula = NULL, family = "poisson", link = NULL, diff --git a/man/add_offset_range.Rd b/man/add_offset_range.Rd index a5c8c006..6830a819 100644 --- a/man/add_offset_range.Rd +++ b/man/add_offset_range.Rd @@ -12,7 +12,7 @@ add_offset_range( presence_prop = 0.9, distance_clip = FALSE, distance_function = "negexp", - field_occurrence = "Observed", + field_occurrence = "observed", fraction = NULL, point = FALSE, add = TRUE diff --git a/man/mask.Rd b/man/mask.Rd new file mode 100644 index 00000000..273bdaee --- /dev/null +++ b/man/mask.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mask.R +\name{mask} +\alias{mask} +\alias{mask.DistributionModel} +\alias{mask.BiodiversityDatasetCollection} +\alias{mask.PredictorDataset} +\alias{mask.BiodiversityScenario} +\title{Mask data with an external layer} +\usage{ +\method{mask}{DistributionModel}(x, mask, inverse = FALSE) + +\method{mask}{BiodiversityDatasetCollection}(x, mask, inverse = FALSE) + +\method{mask}{PredictorDataset}(x, mask, inverse = FALSE) + +\method{mask}{BiodiversityScenario}(x, mask, inverse = FALSE) +} +\arguments{ +\item{x}{Any object belonging to \link{DistributionModel}, +\link{BiodiversityDatasetCollection}, \link{PredictorDataset} or +\link{BiodiversityScenario}.} + +\item{mask}{A \code{\link{sf}} or \code{\link{SpatRaster}} object.} + +\item{inverse}{A \code{\link{logical}} flag whether to take inverse of the mask instead +(Default: \code{FALSE}).} +} +\value{ +A respective object of the input type. +} +\description{ +This is a helper function that takes an existing object created +by the ibis.iSDM package and an external layer, then intersects both. It +currently takes either a \link{DistributionModel}, \link{BiodiversityDatasetCollection}, +\link{PredictorDataset} or \link{BiodiversityScenario} as input. + +As mask either a \code{\link{sf}} or \code{\link{SpatRaster}} object can be chosen. The mask will +be converted internally depending on the object. +} +\examples{ +\dontrun{ +# Build and train a model +mod <- distribution(background) |> + add_biodiversity_poipo(species) |> + add_predictors(predictors) |> + engine_glmnet() |> + train() + +# Constrain the prediction by another object +mod <- mask(mod, speciesrange) + +} +} +\seealso{ +\code{\link[terra:mask]{terra::mask()}} +} +\keyword{utils} diff --git a/man/pseudoabs_settings.Rd b/man/pseudoabs_settings.Rd index 119d1bdd..670b6dc0 100644 --- a/man/pseudoabs_settings.Rd +++ b/man/pseudoabs_settings.Rd @@ -98,7 +98,7 @@ ass2 <- pseudoabs_settings(nrpoints = 0, min_ratio = 1) # points to the resulting model all_my_points <- add_pseudoabsence( df = virtual_points, - field_occurrence = 'Observed', + field_occurrence = 'observed', template = background, settings = ass1) } diff --git a/man/threshold.Rd b/man/threshold.Rd index ab6e430d..43714efe 100644 --- a/man/threshold.Rd +++ b/man/threshold.Rd @@ -9,6 +9,7 @@ threshold( method = "mtp", value = NULL, point = NULL, + field_occurrence = "observed", format = "binary", return_threshold = FALSE, ... @@ -33,6 +34,9 @@ options.} \item{point}{A \code{\link{sf}} object containing observational data used for model training.} +\item{field_occurrence}{A \code{\link{character}} location of +biodiversity point records.} + \item{format}{\code{\link{character}} indication of whether \code{"binary"}, \code{"normalize"} or \code{"percentile"} formatted thresholds are to be created (Default: \code{"binary"}). Also see Muscatello et al. (2021).} diff --git a/man/write_model.Rd b/man/write_model.Rd index de377bc7..662c2818 100644 --- a/man/write_model.Rd +++ b/man/write_model.Rd @@ -33,7 +33,7 @@ By default output files will be overwritten if already existing! \examples{ \dontrun{ x <- distribution(background) |> - add_biodiversity_poipo(virtual_points, field_occurrence = 'Observed', name = 'Virtual points') |> + add_biodiversity_poipo(virtual_points, field_occurrence = 'observed', name = 'Virtual points') |> add_predictors(pred_current, transform = 'scale',derivates = 'none') |> engine_xgboost(nrounds = 2000) |> train(varsel = FALSE, only_linear = TRUE) write_model(x, "testmodel.rds") diff --git a/man/write_output.Rd b/man/write_output.Rd index 55c4749d..56c9c088 100644 --- a/man/write_output.Rd +++ b/man/write_output.Rd @@ -45,7 +45,7 @@ By default output files will be overwritten if already existing! \examples{ \dontrun{ x <- distribution(background) |> - add_biodiversity_poipo(virtual_points, field_occurrence = 'Observed', name = 'Virtual points') |> + add_biodiversity_poipo(virtual_points, field_occurrence = 'observed', name = 'Virtual points') |> add_predictors(pred_current, transform = 'scale',derivates = 'none') |> engine_xgboost(nrounds = 2000) |> train(varsel = FALSE, only_linear = TRUE) write_output(x, "testmodel.tif") diff --git a/man/write_summary.Rd b/man/write_summary.Rd index bee45b31..c379aadb 100644 --- a/man/write_summary.Rd +++ b/man/write_summary.Rd @@ -40,7 +40,7 @@ No predictions or tabular data is saved through this function. Use \examples{ \dontrun{ x <- distribution(background) |> - add_biodiversity_poipo(virtual_points, field_occurrence = 'Observed', name = 'Virtual points') |> + add_biodiversity_poipo(virtual_points, field_occurrence = 'observed', name = 'Virtual points') |> add_predictors(pred_current, transform = 'scale',derivates = 'none') |> engine_xgboost(nrounds = 2000) |> train(varsel = FALSE, only_linear = TRUE) write_summary(x, "testmodel.rds") diff --git a/tests/testthat/test_BiodiversityDistribution.R b/tests/testthat/test_BiodiversityDistribution.R index 926a0d56..381e0b60 100644 --- a/tests/testthat/test_BiodiversityDistribution.R +++ b/tests/testthat/test_BiodiversityDistribution.R @@ -50,6 +50,12 @@ test_that('Setting up a distribution model',{ expect_true(is.Waiver(x$engine)) expect_error(train(x)) # Try to solve without solver + # Apply a mask + expect_no_error( x$biodiversity$mask(virtual_range) ) + x <- distribution(background) |> add_biodiversity_poipo(virtual_points, + field_occurrence = 'Observed', + name = 'Virtual points') + # And a range off invisible( suppressWarnings( suppressMessages(y <- x |> add_offset_range(virtual_range))) ) expect_equal(y$get_offset(),'range_distance') @@ -109,7 +115,11 @@ test_that('Setting up a distribution model',{ testthat::expect_s3_class(y, "BiodiversityDistribution") rm(y) - x <- x |> engine_inla() + suppressWarnings( x <- x |> engine_inla() ) + + # Do a check + expect_no_error(check(x)) + # Mesh is not created yet expect_s3_class(x$engine$get_data("mesh"),'Waiver') expect_equal(x$engine$name,'') diff --git a/tests/testthat/test_Scenarios.R b/tests/testthat/test_Scenarios.R index 84f78d09..d6363939 100644 --- a/tests/testthat/test_Scenarios.R +++ b/tests/testthat/test_Scenarios.R @@ -3,6 +3,7 @@ test_that('Testing functions for spatial-temporal data in stars', { skip_if_not_installed('geosphere') skip_if_not_installed('cubelyr') + skip_if_not_installed('lwgeom') suppressWarnings( requireNamespace('stars', quietly = TRUE) ) suppressWarnings( requireNamespace('cubelyr', quietly = TRUE) ) @@ -78,6 +79,7 @@ test_that('Scenarios and constraints', { skip_if_not_installed('glmnet') skip_if_not_installed('geosphere') skip_if_not_installed('cubelyr') + skip_if_not_installed('lwgeom') skip_on_travis() skip_on_cran() @@ -194,6 +196,10 @@ test_that('Scenarios and constraints', { mod <- x |> project() expect_s3_class(mod$get_data(), "stars") + # Apply a mask + mod$mask(virtual_range) + mod <- x |> project() # Project anew + # Calculate centroids expect_s3_class(mod$get_centroid(), "sf") diff --git a/tests/testthat/test_functions.R b/tests/testthat/test_functions.R index 0ee0beb9..63b73cd8 100644 --- a/tests/testthat/test_functions.R +++ b/tests/testthat/test_functions.R @@ -272,7 +272,7 @@ test_that('Test data preparation convenience functions', { # Similarity function test x <- distribution(background) |> add_predictors(predictors) expect_error( m1 <- similarity(x, method = "mess") ) - x <- x |> add_biodiversity_poipo(virtual_points) + x <- x |> add_biodiversity_poipo(virtual_points,field_occurrence = "Observed") m1 <- similarity(x, method = "mess", plot = FALSE) expect_true(is.Raster(m1)) expect_true(any(is.factor(m1))) diff --git a/tests/testthat/test_modelFits.R b/tests/testthat/test_modelFits.R index 1625294c..a9fae5dd 100644 --- a/tests/testthat/test_modelFits.R +++ b/tests/testthat/test_modelFits.R @@ -79,6 +79,10 @@ test_that('Add further tests for model fits', { o <- mod$calc_suitabilityindex() expect_s4_class(o, "SpatRaster") + # ----------- # + # Clip the projected data with an external mask + expect_no_error( mod$mask(virtual_range) ) + # ----------- # # Write model outputs tf <- base::tempfile() diff --git a/tests/testthat/test_objectinheritance.R b/tests/testthat/test_objectinheritance.R index a687eb81..6d817ab1 100644 --- a/tests/testthat/test_objectinheritance.R +++ b/tests/testthat/test_objectinheritance.R @@ -3,10 +3,11 @@ test_that('Check that distribution objects are properly inherited', { skip_if_not_installed('igraph') skip_if_not_installed('abind') - - skip_if_not_installed("cmdstanr") - skip_if(condition = tryCatch(expr = cmdstanr::cmdstan_path(), error = function(e) return(TRUE)), - message = "No cmdstan path") + skip_if_not_installed("glmnet") + skip_if_not_installed("INLA") + # skip_if_not_installed("cmdstanr") + # skip_if(condition = tryCatch(expr = cmdstanr::cmdstan_path(), error = function(e) return(TRUE)), + # message = "No cmdstan path") # Load packages suppressWarnings( requireNamespace("terra", quietly = TRUE) ) @@ -74,18 +75,22 @@ test_that('Check that distribution objects are properly inherited', { # Engine x |> engine_gdb(boosting_iterations = 500) expect_true(is.Waiver(x$engine)) - x |> engine_stan() + x |> engine_glmnet() expect_true(is.Waiver(x$engine)) # Priors - x |> add_predictors(predictors, transform = 'none',derivates = 'none',priors = priors(INLAPrior(names(predictors)[1],'normal'))) + x |> add_predictors(predictors, transform = 'none',derivates = 'none', + priors = priors(GDBPrior(names(predictors)[1],'increasing'))) expect_true(is.Waiver(x$priors)) - x |> add_latent_spatial(method = "spde", priors = priors(INLAPrior('spde','prior.range'))) + x |> add_latent_spatial(method = "spde", + priors = priors(INLAPrior('spde','prior.range'))) expect_true(is.Waiver(x$priors)) # Two different priors x |> - add_predictors(predictors, transform = 'none',derivates = 'none',priors = priors(INLAPrior(names(predictors)[1],'normal'))) |> - add_latent_spatial(method = "spde", priors = priors(INLAPrior('spde','prior.range'))) + add_predictors(predictors, transform = 'none',derivates = 'none', + priors = priors(INLAPrior(names(predictors)[1],'normal'))) |> + add_latent_spatial(method = "spde", + priors = priors(INLAPrior('spde','prior.range'))) expect_true(is.Waiver(x$priors)) # Check variable removal diff --git a/tests/testthat/test_trainOtherEngines.R b/tests/testthat/test_trainOtherEngines.R index dfe78cf1..fce212ba 100644 --- a/tests/testthat/test_trainOtherEngines.R +++ b/tests/testthat/test_trainOtherEngines.R @@ -26,12 +26,18 @@ test_that('Train a distribution model with XGboost', { add_predictors(predictors, transform = 'none',derivates = 'none') |> engine_xgboost() + # Make a check + expect_no_error( check(x) ) + # Train the model suppressWarnings( mod <- train(x, "test", inference_only = FALSE, only_linear = TRUE, varsel = "none", verbose = FALSE) ) + # Make a check + expect_no_error( check(mod) ) + # Expect summary expect_s3_class(summary(mod), "data.frame") expect_s3_class(mod$show_duration(), "difftime") diff --git a/vignettes/articles/01_data_preparationhelpers.Rmd b/vignettes/articles/01_data_preparationhelpers.Rmd new file mode 100644 index 00000000..9e6eaadf --- /dev/null +++ b/vignettes/articles/01_data_preparationhelpers.Rmd @@ -0,0 +1,352 @@ +--- +title: "Preparation of biodiversity and predictor data" +author: "Martin Jung" +date: "`r Sys.Date()`" +output: + rmarkdown::html_vignette: + toc: true +fontsize: 11pt +vignette: > + %\VignetteIndexEntry{Preparation of biodiversity and predictor data} + %\VignetteEncoding{UTF-8} + %\VignetteEngine{knitr::rmarkdown} +editor_options: + chunk_output_type: console +--- + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +# Define variables for vignette figures and code execution +h <- 5.5 +w <- 5.5 +is_check <- ("CheckExEnv" %in% search()) || any(c("_R_CHECK_TIMINGS_", + "_R_CHECK_LICENSE_") %in% names(Sys.getenv())) +knitr::opts_chunk$set(fig.align = "center", eval = !is_check) +``` + +The first step with any species distribution modelling (SDM) is to collect and prepare +the necessary input data and parameters for the modelling. Predictors or +(environomental) covariates need to be acquired, in many cases formatted, +for example through masking and reprojections, and standardized. Similarly, +Biodiversity data are usually collected through independent field survey or from +available databases such as GBIF. In particular for the `ibis.iSDM` package it +can furthermore be useful to collect any other information available for a +biodiversity feature (e.g. species, habitat, ...) in question. Such as for example +broad delineation of an expert range or parameters related to the habitat preference +of a species. + +Preparing input data for the `ibis.iSDM` package requires a modest understanding +of modern geospatial R-packages, such as `sf`, `terra` and `stars`. If such +knowledge is not existing, it is advised to consult a search engine. Particularly +highlighted is the free [Geocomputation with R](https://r.geocompx.org/) book. +Many unexpected errors or patterns when using the package can usually be tracked +down to data preparation. + +```{r Load the package, warning = FALSE, message=FALSE, eval=TRUE} +# Load the package +library(ibis.iSDM) +library(terra) +library(sf) + +# Don't print out as many messages +options("ibis.setupmessages" = FALSE) +``` + +--- + +## Preparing and altering biodiversity data + +SDM approaches require observation biodiversity data, typically in the form of +presence-only or presence-absence data, which can be available in a range of +different formats from points to polygons. + +There are a range of [existing tools](https://linkinghub.elsevier.com/retrieve/pii/S0304380022003404) +that assist modellers in preparing and cleaning input data (for instance of +biases). This vignette does intend to give an overview of the options. Rather it +highlights the few functions that have been created specifically for the +`ibis.iSDM` package and that might help in some situations. + +### Adding pseudo-absence points to presence-only data + +Although in the philosophy of the `ibis.iSDM` package it is advisable to use +presence-only models in a Poisson point process modelling framework ('poipo' modelling +functions that use background points (see [Warton and Sheperd 2010](http://projecteuclid.org/euclid.aoas/1287409378)). +Yet, a good case can also be made to instead add pseudo-absence points to existing +presence-only data. This allows the use of logistic regressions and 'poipa' methods +in `ibis.iSDM` which are generally easier to interpret (response scale from 0 to 1) and +also faster to fit as model. + + +```{r Loading biodiversity testing data, message=FALSE} +## Lets load some testing data from the package + +# Background layer +background <- terra::rast(system.file("extdata/europegrid_50km.tif",package = "ibis.iSDM", mustWork = TRUE)) +# Load virtual species points +virtual_species <- sf::st_read(system.file("extdata/input_data.gpkg",package = "ibis.iSDM", mustWork = TRUE), "points",quiet = TRUE) +# Add a range +virtual_range <- sf::st_read(system.file('extdata/input_data.gpkg', package='ibis.iSDM'), 'range', quiet = TRUE) + +``` + +Adding pseudo-absence data in the `ibis.iSDM package` works by first specifiying +a *Pseudoabsence options* object that contains parameters how many and where +pseudo-absences are to be sampled. The respective function for this is called +`pseudoabs_settings()`. Further details on the available options here (there are many) can be found in +the help file. +By default the packages uses a random sampling of absence points and the +settings for this can be queried here `ibis_options()$ibis.pseudoabsence`. + +After such options have been defined, pseudoa-absence data can be added to any +point dataset via `add_pseudoabsence()`. + +Example: + +```{r Define and add pseudo-absence data, message=FALSE} + +# Define new settings for sampling points outside the minimum convex polygon of +# the known presence data +abs <- pseudoabs_settings(background = background, + nrpoints = 1000, # Sample 1000 points + method = "mcp", # Option for minimum convex polygon + inside = FALSE # Sample exclusively outside + ) +print( abs ) # See object, check abs$data for the options + +# Now add to the point data +point1 <- add_pseudoabsence(virtual_species, + # Point to the column with the presence information + field_occurrence = 'Observed', + settings = abs) + +plot(point1['Observed']) + +# --- # +# Another option sampling inside the range, but biased by a bias layer +bias <- terra::rast(system.file("extdata/predictors/hmi_mean_50km.tif", + package = "ibis.iSDM", mustWork = TRUE)) + +abs <- pseudoabs_settings(background = background, + nrpoints = 100, # Sample 100 points + method = "range", # Define range as method + inside = TRUE, # Sample exclusively inside + layer = virtual_range, # Define the range + bias = bias # Set a bias layer + ) + +# Add again to the point data +point2 <- add_pseudoabsence(virtual_species, + # Point to the column with the presence information + field_occurrence = 'Observed', + settings = abs) + +plot(point2['Observed']) + +``` + +### Thinning observations + +Many presence-only records are often spatially highly biased with varying +observational processes resulting in quite clustered point observations. For example, +urban areas and natural sites near them are considerably more often frequented by +citizens observed wildlife than sites in remote areas. +Particular for Poisson process models that can be problematic as such models critically +assume - without accounting for it - that the observational process is homogeneous +in space. + +Thinning observations is a method to remove point observations from areas that have +been "oversampled". Critically however it does only remove points from grid cells +of a provided background where this is the case and never removes the entire grid +cell fully. It can also be beneficial for model convergence and modelling speed, +as particular for well-sampled species (e.g. the common blackbird *Turdus merula*) +there are diminishing returns of fitting a SDM with like 1 million presence-only +points instead of just 20000 well separated ones. +The `ibis.iSDM` package has its own implementation for spatial thinning, but +one can also refer to [Aiello-Lammens et al.](https://doi.org/10.1111/ecog.01132) +for an alternative implementation and rationale for thinning. + +**Thinning needs to be conducted with care as it effectively discards data!** + +```{r Thinning, message=TRUE} +## We use the data loaded in above +plot(virtual_species['Observed'], main = "Original data") + +# Random thinning. Note the messages of number of thinned points +point1 <- thin_observations(df = virtual_species, + background = background, + method = 'random', + minpoints = 1 # Retain at minimum one point per grid cell! + ) + +plot(point1['Observed'], main = "Random thinning") + +# Another way: Use environmental thinning to retain enough points +# across the niche defined by a set of covariates +covariates <- terra::rast(list.files(system.file("extdata/predictors/", package = "ibis.iSDM", mustWork = TRUE), "*.tif",full.names = TRUE)) + +point2 <- thin_observations(df = virtual_species, + background = background, + env = covariates, + method = 'environmental', + minpoints = 5 # Retain at minimum five points! + ) + +plot(point2['Observed'], main = "Environmentally stratified data") + +``` + + +--- + +## Preparing and altering predictor data + +In order to be used for species distribution modelling all predictors need to +be provided in a common extent, grain size and geographic projections. They need +to align with the provided background extent to `distribution()` and should ideally +not contain no missing data. If there are missing data, the package will check and +remove during model fitting points that fall into any grid cells with missing data. + +The `ibis.iSDM` package has a number of convenience functions to modify input +predictors. These functions rather provide nuance(s) and variation to the modelling +process, rather than preparing the input data (which needs to be undertaken using +the `terra` package). + +```{r Load Predictors, message=FALSE} +# Load some test covariates +predictors <- terra::rast(list.files(system.file("extdata/predictors/", package = "ibis.iSDM", mustWork = TRUE), "*.tif",full.names = TRUE)) + +``` + + +### Transforming predictors + +For better model convergence it usually makes sense to bring all predictors to a +common unit, for example by noramlizing or scaling them. The `ibis.iSDM` package +has a convenience function that can be applied to any `terra` 'SpatRaster' object. + +**NOTE: This functionality are also available directly in `add_predictors()` as parameter!** + +```{r Transform predictors} +# Let's take a simple layer for an example +layer <- predictors$bio19_mean_50km + +# Transform it in various way +new1 <- predictor_transform(layer, option = "norm") +new2 <- predictor_transform(layer, option = "scale") + +new <- c(layer, new1, new2) +names(new) <- c("original", "normalized", "scaled") +terra::plot( new ) + +# Another common use case is to windsorize a layer, for example by removing +# top outliers form a prediction. +# Here the values are capped to a defined percentile +new3 <- predictor_transform(layer, option = "windsor", + # Clamp the upper values to the 90% percentile + windsor_props = c(0,.9)) + +new <- c(layer, new3) +names(new) <- c("original", "windsorized") +terra::plot( new ) + +``` + +Other options for transformation are also available and are listed in the methods +file. + +### Derivates of predictors + +A simple linear SDM (e.g. `engine_glmnet()`) includes the predictors as such and +thus assumes that any increase in the response variable follows a linear relationship +with the covariate. However, reality is not always that simple and usually it can +be assumed that many relationships are highly non-linear or otherwise complex. + +A standard way to introduce non-linearities to a linear algorithm is to create +derivates of predictors, such as for example a quadratic transformation of temperature. +The `ibis.iSDM` package has a convenience function that can be applied to any +`terra` 'SpatRaster' object to create such additional derivates for a model. +Note that this creates (in some cases substantial) additional predictors. + +**NOTE: This functionality are also available directly in `add_predictors()` as parameter!** + +```{r Creating derivates of predictors} +# Let's take a simple layer for an example +layer <- predictors$ndvi_mean_50km + +# Make a quadratic transformation +new1 <- predictor_derivate(layer, option = "quadratic") + +new <- c(layer, new1) +names(new) <- c("original", "quadratic") +terra::plot( new ) + +# Create some hinge transformations +new2 <- predictor_derivate(layer, option = "hinge", + # The number is controlled by the number of knots + nknots = 4 + ) +terra::plot( new2 ) + +# What does this do precisely? +# Lets check +df <- data.frame( ndvi = terra::values(layer), + terra::values(new2)) + +plot(df$ndvi_mean_50km, df[,2], ylab = "First hinge of ndvi", xlab = "NDVI") +plot(df$ndvi_mean_50km, df[,3], ylab = "Second hinge of ndvi",xlab = "NDVI") +plot(df$ndvi_mean_50km, df[,4], ylab = "Third hinge of ndvi", xlab = "NDVI") +plot(df$ndvi_mean_50km, df[,5], ylab = "Fourth hinge of ndvi",xlab = "NDVI") + +``` + +More fine-tuned control can also be achieved by creating specific interactions +among variables, for example if one expects climate to interact with forest cover. + +```{r Derivate interaction} +# Create interacting variables + +new <- predictor_derivate(predictors,option = "interaction", + int_variables = c("bio01_mean_50km", "CLC3_312_mean_50km")) + +plot(new, main = "Interaction variable") +``` + +### Homogenize missing data among predictors + +As mentioned above, during model training covariates will be extracted for each +biodiversity observational record. Missing data will in this case be discarded. +For example if 10 predictors are considered and a single one has a missing value +in one grid cell, the grid cell is considered missing among all other predictors +as well. +The `ibis.iSDM` package has some convenience functions to easily harmonize and check +the extent of missing data in a set of predictors which can be convenient for +assessing errors during data preparation. + +```{r} +# Make a subset of all predictors to show the concept +layers <- subset(predictors, c("aspect_mean_50km", + "CLC3_312_mean_50km", + "elevation_mean_50km")) + +# All these layers have identical data coverage. +# Now add missing data in one of the layers for testing +layers$CLC3_312_mean_50km[sample(1:ncell(layers), 1000)] <- NA + +# Harmonize the predictors +new <- predictor_homogenize_na(env = layers) + +# Now all the predictors have identical coverage of NA values +terra::plot(new) +# Or assess like this +plot(!terra::noNA(new$aspect_mean_50km), main = "Missing observations") + +``` + +--- + +## Preparing and altering future scenario data + +< Upcoming :) > diff --git a/vignettes/articles/01_train_simple_model.Rmd b/vignettes/articles/02_train_simple_model.Rmd similarity index 98% rename from vignettes/articles/01_train_simple_model.Rmd rename to vignettes/articles/02_train_simple_model.Rmd index 28903aaf..e0c16155 100644 --- a/vignettes/articles/01_train_simple_model.Rmd +++ b/vignettes/articles/02_train_simple_model.Rmd @@ -91,7 +91,7 @@ predictors <- subset(predictors, c("bio01_mean_50km","bio03_mean_50km","bio19_me For our example model we are going to use 'Integrated Nested Laplace approximation (INLA)' modelling framework as available through the `INLA` and `inlabru` packages. Both have been implemented separately in the ibis.iSDM package, but especially when dealing with future scenarios the use of the `inlabru` package is advised. -Now lets build a simple model object. In this case we make use of presence-only biodiversity records (`add_biodiversity_poipo`). Any presence-only records added to an object created through `distribution()` are by default modelled as intensity $\lambda$ through an inhomogeneous Poisson point proccess model (PPM), where the Number of Individuals $N$ is integrated as relative rate of occurrence per unit area: $N_i \sim Poisson(\lambda_i|A_i)$. Here $\lambda$ can then be estimated by relating it to environmental covariates $log(\lambda_i) = \alpha + \beta(x_i)$, where $i$ is a grid cell. +Now lets build a simple model object. In this case we make use of presence-only biodiversity records (`add_biodiversity_poipo`). Any presence-only records added to an object created through `distribution()` are by default modelled as intensity $\lambda$ through an inhomogeneous Poisson point process model (PPM), where the Number of Individuals $N$ is integrated as relative rate of occurrence per unit area: $N_i \sim Poisson(\lambda_i|A_i)$. Here $\lambda$ can then be estimated by relating it to environmental covariates $log(\lambda_i) = \alpha + \beta(x_i)$, where $i$ is a grid cell. It is inhomogeneous since the $lambda$ varies over the whole sampling extent. In the context of species distribution modelling PPMs are structurally similar to the popular Maxent modelling framework (see [Renner & Warton 2013](https://onlinelibrary.wiley.com/doi/10.1111/j.1541-0420.2012.01824.x) and [Renner et al. 2015](http://doi.wiley.com/10.1111/2041-210X.12352). Critically, presence-only records can only give an indication of a biased sampling and thus sampling bias has to be taken somehow into account, either through careful data preparation, apriori thinning or model-based control by including covariates $\sigma_i$ that might explain this sampling bias. diff --git a/vignettes/articles/02_integrate_data.Rmd b/vignettes/articles/03_integrate_data.Rmd similarity index 100% rename from vignettes/articles/02_integrate_data.Rmd rename to vignettes/articles/03_integrate_data.Rmd diff --git a/vignettes/articles/03_biodiversity_projections.Rmd b/vignettes/articles/04_biodiversity_projections.Rmd similarity index 100% rename from vignettes/articles/03_biodiversity_projections.Rmd rename to vignettes/articles/04_biodiversity_projections.Rmd diff --git a/vignettes/articles/04_mechanistic_estimation.Rmd b/vignettes/articles/05_mechanistic_estimation.Rmd similarity index 100% rename from vignettes/articles/04_mechanistic_estimation.Rmd rename to vignettes/articles/05_mechanistic_estimation.Rmd diff --git a/vignettes/articles/05_engine_comparison.Rmd b/vignettes/articles/06_engine_comparison.Rmd similarity index 100% rename from vignettes/articles/05_engine_comparison.Rmd rename to vignettes/articles/06_engine_comparison.Rmd diff --git a/vignettes/articles/06_package_comparison.Rmd b/vignettes/articles/07_package_comparison.Rmd similarity index 100% rename from vignettes/articles/06_package_comparison.Rmd rename to vignettes/articles/07_package_comparison.Rmd diff --git a/vignettes/articles/07_frequently-asked-questions.Rmd b/vignettes/articles/08_frequently-asked-questions.Rmd similarity index 100% rename from vignettes/articles/07_frequently-asked-questions.Rmd rename to vignettes/articles/08_frequently-asked-questions.Rmd