Skip to content

Commit

Permalink
Merge pull request #36 from MattCowgill/master
Browse files Browse the repository at this point in the history
bring in `strayr` functions
  • Loading branch information
Will Mackey authored May 31, 2021
2 parents 2963b10 + 228e6b2 commit 9d80d34
Show file tree
Hide file tree
Showing 22 changed files with 711 additions and 56 deletions.
14 changes: 9 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
Package: abscorr
Type: Package
Title: Ready-to-use Australian common structures and classifications and tools for working with them
Version: 0.1.0
Author: person("Will", "Mackey", email = "wfmackey@gmail.com",
Version: 0.1.1
Author: c(person("Will", "Mackey", email = "wfmackey@gmail.com",
role = c("aut", "cre")),
person(given = "Matt", family = "Johnson",
role = c("aut", "cre")),
person(given = "David", family = "Diviny", email = "david.diviny@nousgroup.com.au"
role = c("aut", "cre"))
role = c("aut", "cre")),
person("Matt", "Cowgill", role = c("aut"), email = "mattcowgill@gmail.com", comment = c(ORCID = "0000-0003-0422-3300")),
person("Bryce", "Roney", role = c("ctb")))
Maintainer: Will Mackey <wfmackey@gmail.com>
License: GPL-3
Description: This package provides data and functions for working with common structures and classifications used in Australia.
Expand All @@ -21,9 +23,11 @@ Imports:
readxl,
magrittr,
stringr,
stringdist,
tidyr,
rlang
rlang,
stringdist (>= 0.9.5.1),
lubridate,
parsedate
Suggests:
testthat,
here
Expand Down
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,11 @@
export("%>%")
export(clean_anzsco)
export(clean_anzsic)
export(clean_state)
export(get_seifa)
export(get_seifa_index_sheet)
export(is_holiday)
export(strayr)
import(absmapsdata)
import(readxl)
importFrom(dplyr,across)
Expand All @@ -20,6 +23,7 @@ importFrom(dplyr,starts_with)
importFrom(lifecycle,deprecate_soft)
importFrom(magrittr,"%>%")
importFrom(purrr,map_dfr)
importFrom(purrr,map_lgl)
importFrom(rlang,.data)
importFrom(stringdist,amatch)
importFrom(stringr,str_remove_all)
Expand Down
3 changes: 2 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# abscorr 0.1.1
* Functions from `strayr` package to wrangle state names + public holidays added

# abscorr 0.1.0

* use pkgdown for documentation
* Added a `NEWS.md` file to track changes to the package
* Import SEIFA scores for various spatial geometries
10 changes: 10 additions & 0 deletions R/auholidays.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#' @title Australian Public Holidays
#' @description Tidy data set of Australian Public Holidays
#' @format A data frame with 776 rows and 3 variables:
#' \describe{
#' \item{\code{Date}}{The date of the holiday}
#' \item{\code{Name}}{The name of the holiday}
#' \item{\code{Jurisdiction}}{Pipe seperated list of jurisdictions to which the holiday applies}
#'}
#' @source \url{https://data.gov.au/data/dataset/australian-holidays-machine-readable-dataset}
"auholidays"
103 changes: 103 additions & 0 deletions R/clean_state.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#' Convert Australian state names and abbreviations into a consistent format
#'
#' @param x a (character) vector containing Australian state names or abbreviations or
#' a (numeric) vector containing state codes (1 = NSW, 2 = Vic, 3 = Qld, 4 = SA,
#' 5 = WA, 6 = Tas, 7 = NT, 8 = ACT).
#'
#' @param to what form should the state names be converted to? Options are
#' "state_name", "state_abbr" (the default), "iso", "postal", and "code".
#'
#' @param fuzzy_match logical; either TRUE (the default) which indicates that
#' approximate/fuzzy string matching should be used, or FALSE which indicates that
#' only exact matches should be used.
#'
#' @param max_dist numeric, sets the maximum acceptable distance between your
#' string and the matched string. Default is 0.4. Only relevant when fuzzy_match is TRUE.
#'
#' @param method the method used for approximate/fuzzy string matching. Default
#' is "jw", the Jaro-Winker distance; see `??stringdist-metrics` for more options.
#'
#' @param ... all arguments to `strayr` are passed to `clean_state`
#'
#' @return a character vector of state names, abbreviations, or codes.
#'
#' @details `strayr()` is a wrapper around `clean_state()` and is provided for
#' backwards compatibility. `strayr()` is soft-deprecated, but will not be removed
#' for the foreseeable future. New code should use `clean_state()`.
#'
#' @rdname clean_state
#' @examples
#'
#' x <- c("western Straya", "w. A ", "new soth wailes", "SA", "tazz")
#'
#' # Convert the above to state abbreviations
#' clean_state(x)
#'
#' # Convert the elements of `x` to state names
#'
#' clean_state(x, to = "state_name")
#'
#' # Disable fuzzy matching; you'll get NAs unless exact matches can be found
#'
#' clean_state(x, fuzzy_match = FALSE)
#'
#' # You can use clean_state in a dplyr mutate call
#'
#' x_df <- data.frame(state = x, stringsAsFactors = FALSE)
#'
#' \dontrun{x_df %>% mutate(state_abbr = clean_state(state))}
#'
#' @importFrom stringdist amatch
#' @export

clean_state <- function(x, to = "state_abbr", fuzzy_match = TRUE, max_dist = 0.4, method = "jw"){


if(!is.logical(fuzzy_match)){
stop("`fuzzy_match` argument must be either `TRUE` or `FALSE`")
}

if(!is.numeric(x)) {
x <- state_string_tidy(x)
}

if(fuzzy_match){
matched_abbr <- names(state_dict[stringdist::amatch(x, tolower(state_dict),
method = method,
matchNA = FALSE,
maxDist = max_dist)])
} else {
matched_abbr <- names(state_dict[match(x, tolower(state_dict))])
}

ret <- state_table[[to]][match(matched_abbr, state_table$state_abbr)]

ret <- as.character(ret)

ret

}

#' @rdname clean_state
#' @export
strayr <- function(...) {
.Deprecated(new = "clean_state",
msg = "The strayr() function has been renamed clean_state().")
clean_state(...)
}



state_string_tidy <- function(string){

string <- tolower(string)

string <- trimws(string, "both")

string <- ifelse(string %in% c("na", "n.a", "n.a.", "n a",
"not applicable"),
NA_character_,
string)

string
}
47 changes: 47 additions & 0 deletions R/is_holiday.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#' @title is_holiday
#' @description Identifies if a given date is a public holiday in Australia
#' @param date Date, POSIXct object or a string that can be parsed with `parsedate::parse_date`
#' @param jurisdictions Vector of state jurisdictions you wish to filter on, defaults to all of Australia
#' @return logical(`TRUE` or `FALSE`) vector
#' @examples
#' is_holiday('2020-01-01')
#' is_holiday('2019-05-27', jurisdictions=c('ACT', 'TAS'))
#' h_df <- data.frame(dates = c('2020-01-01', '2020-01-10'))
#' h_df %>%
#' dplyr::mutate(IsHoliday = is_holiday(dates))
#' @rdname is_holiday
#' @export
#' @importFrom purrr map_lgl
is_holiday <- function(date, jurisdictions = c()) {
ret <- purrr::map_lgl(date, do_is_holiday, jurisdictions = jurisdictions)

return(ret)
}

do_is_holiday <- function(date, jurisdictions = c()) {
if (is.na(date)) {
stop("`date` argument cannot be NA")
}

if (!lubridate::is.Date(date) & !lubridate::is.POSIXct(date)) {
# Attempt to coerce to date
new_date <- parsedate::parse_date(date)

if (is.na(new_date)) {
stop("`date` must be a Date, POSIXct object or a string that can be parsed as a date")
}

# Remove any time or time zone element
new_date <- lubridate::as_date(new_date)
} else {
new_date <- lubridate::as_date(date)
}

if (length(jurisdictions) == 0) {
ret <- abscorr::auholidays[abscorr::auholidays$Date == new_date, ]
} else {
ret <- abscorr::auholidays[abscorr::auholidays$Date == new_date & (abscorr::auholidays$Jurisdiction %in% jurisdictions | abscorr::auholidays$Jurisdiction == "NAT"), ]
}

return(nrow(ret) > 0)
}
Binary file modified R/sysdata.rda
Binary file not shown.
Loading

0 comments on commit 9d80d34

Please sign in to comment.