-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #36 from MattCowgill/master
bring in `strayr` functions
- Loading branch information
Showing
22 changed files
with
711 additions
and
56 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
# abscorr 0.1.1 | ||
* Functions from `strayr` package to wrangle state names + public holidays added | ||
|
||
# abscorr 0.1.0 | ||
|
||
* use pkgdown for documentation | ||
* Added a `NEWS.md` file to track changes to the package | ||
* Import SEIFA scores for various spatial geometries |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
#' @title Australian Public Holidays | ||
#' @description Tidy data set of Australian Public Holidays | ||
#' @format A data frame with 776 rows and 3 variables: | ||
#' \describe{ | ||
#' \item{\code{Date}}{The date of the holiday} | ||
#' \item{\code{Name}}{The name of the holiday} | ||
#' \item{\code{Jurisdiction}}{Pipe seperated list of jurisdictions to which the holiday applies} | ||
#'} | ||
#' @source \url{https://data.gov.au/data/dataset/australian-holidays-machine-readable-dataset} | ||
"auholidays" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
#' Convert Australian state names and abbreviations into a consistent format | ||
#' | ||
#' @param x a (character) vector containing Australian state names or abbreviations or | ||
#' a (numeric) vector containing state codes (1 = NSW, 2 = Vic, 3 = Qld, 4 = SA, | ||
#' 5 = WA, 6 = Tas, 7 = NT, 8 = ACT). | ||
#' | ||
#' @param to what form should the state names be converted to? Options are | ||
#' "state_name", "state_abbr" (the default), "iso", "postal", and "code". | ||
#' | ||
#' @param fuzzy_match logical; either TRUE (the default) which indicates that | ||
#' approximate/fuzzy string matching should be used, or FALSE which indicates that | ||
#' only exact matches should be used. | ||
#' | ||
#' @param max_dist numeric, sets the maximum acceptable distance between your | ||
#' string and the matched string. Default is 0.4. Only relevant when fuzzy_match is TRUE. | ||
#' | ||
#' @param method the method used for approximate/fuzzy string matching. Default | ||
#' is "jw", the Jaro-Winker distance; see `??stringdist-metrics` for more options. | ||
#' | ||
#' @param ... all arguments to `strayr` are passed to `clean_state` | ||
#' | ||
#' @return a character vector of state names, abbreviations, or codes. | ||
#' | ||
#' @details `strayr()` is a wrapper around `clean_state()` and is provided for | ||
#' backwards compatibility. `strayr()` is soft-deprecated, but will not be removed | ||
#' for the foreseeable future. New code should use `clean_state()`. | ||
#' | ||
#' @rdname clean_state | ||
#' @examples | ||
#' | ||
#' x <- c("western Straya", "w. A ", "new soth wailes", "SA", "tazz") | ||
#' | ||
#' # Convert the above to state abbreviations | ||
#' clean_state(x) | ||
#' | ||
#' # Convert the elements of `x` to state names | ||
#' | ||
#' clean_state(x, to = "state_name") | ||
#' | ||
#' # Disable fuzzy matching; you'll get NAs unless exact matches can be found | ||
#' | ||
#' clean_state(x, fuzzy_match = FALSE) | ||
#' | ||
#' # You can use clean_state in a dplyr mutate call | ||
#' | ||
#' x_df <- data.frame(state = x, stringsAsFactors = FALSE) | ||
#' | ||
#' \dontrun{x_df %>% mutate(state_abbr = clean_state(state))} | ||
#' | ||
#' @importFrom stringdist amatch | ||
#' @export | ||
|
||
clean_state <- function(x, to = "state_abbr", fuzzy_match = TRUE, max_dist = 0.4, method = "jw"){ | ||
|
||
|
||
if(!is.logical(fuzzy_match)){ | ||
stop("`fuzzy_match` argument must be either `TRUE` or `FALSE`") | ||
} | ||
|
||
if(!is.numeric(x)) { | ||
x <- state_string_tidy(x) | ||
} | ||
|
||
if(fuzzy_match){ | ||
matched_abbr <- names(state_dict[stringdist::amatch(x, tolower(state_dict), | ||
method = method, | ||
matchNA = FALSE, | ||
maxDist = max_dist)]) | ||
} else { | ||
matched_abbr <- names(state_dict[match(x, tolower(state_dict))]) | ||
} | ||
|
||
ret <- state_table[[to]][match(matched_abbr, state_table$state_abbr)] | ||
|
||
ret <- as.character(ret) | ||
|
||
ret | ||
|
||
} | ||
|
||
#' @rdname clean_state | ||
#' @export | ||
strayr <- function(...) { | ||
.Deprecated(new = "clean_state", | ||
msg = "The strayr() function has been renamed clean_state().") | ||
clean_state(...) | ||
} | ||
|
||
|
||
|
||
state_string_tidy <- function(string){ | ||
|
||
string <- tolower(string) | ||
|
||
string <- trimws(string, "both") | ||
|
||
string <- ifelse(string %in% c("na", "n.a", "n.a.", "n a", | ||
"not applicable"), | ||
NA_character_, | ||
string) | ||
|
||
string | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#' @title is_holiday | ||
#' @description Identifies if a given date is a public holiday in Australia | ||
#' @param date Date, POSIXct object or a string that can be parsed with `parsedate::parse_date` | ||
#' @param jurisdictions Vector of state jurisdictions you wish to filter on, defaults to all of Australia | ||
#' @return logical(`TRUE` or `FALSE`) vector | ||
#' @examples | ||
#' is_holiday('2020-01-01') | ||
#' is_holiday('2019-05-27', jurisdictions=c('ACT', 'TAS')) | ||
#' h_df <- data.frame(dates = c('2020-01-01', '2020-01-10')) | ||
#' h_df %>% | ||
#' dplyr::mutate(IsHoliday = is_holiday(dates)) | ||
#' @rdname is_holiday | ||
#' @export | ||
#' @importFrom purrr map_lgl | ||
is_holiday <- function(date, jurisdictions = c()) { | ||
ret <- purrr::map_lgl(date, do_is_holiday, jurisdictions = jurisdictions) | ||
|
||
return(ret) | ||
} | ||
|
||
do_is_holiday <- function(date, jurisdictions = c()) { | ||
if (is.na(date)) { | ||
stop("`date` argument cannot be NA") | ||
} | ||
|
||
if (!lubridate::is.Date(date) & !lubridate::is.POSIXct(date)) { | ||
# Attempt to coerce to date | ||
new_date <- parsedate::parse_date(date) | ||
|
||
if (is.na(new_date)) { | ||
stop("`date` must be a Date, POSIXct object or a string that can be parsed as a date") | ||
} | ||
|
||
# Remove any time or time zone element | ||
new_date <- lubridate::as_date(new_date) | ||
} else { | ||
new_date <- lubridate::as_date(date) | ||
} | ||
|
||
if (length(jurisdictions) == 0) { | ||
ret <- abscorr::auholidays[abscorr::auholidays$Date == new_date, ] | ||
} else { | ||
ret <- abscorr::auholidays[abscorr::auholidays$Date == new_date & (abscorr::auholidays$Jurisdiction %in% jurisdictions | abscorr::auholidays$Jurisdiction == "NAT"), ] | ||
} | ||
|
||
return(nrow(ret) > 0) | ||
} |
Binary file not shown.
Oops, something went wrong.