Skip to content

Commit

Permalink
update holidays (and re-run data-raw files)
Browse files Browse the repository at this point in the history
include everything() in across
styler for data-raw files
  • Loading branch information
peteowen1 committed Oct 9, 2024
1 parent 23f1524 commit 0707d69
Show file tree
Hide file tree
Showing 23 changed files with 271 additions and 175 deletions.
Binary file modified R/sysdata.rda
Binary file not shown.
131 changes: 79 additions & 52 deletions data-raw/create_anzsco2009.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,58 +20,75 @@ download.file(anzsco_url, temp_path, mode = "wb")

# Read
raw <- readxl::read_excel(temp_path,
sheet = 6,
range = "A11:G1555",
col_names = FALSE) %>%
sheet = 6,
range = "A11:G1555",
col_names = FALSE
) %>%
janitor::clean_names()

# Extract each level:
anzsco1 <- raw %>%
filter(!is.na(x1)) %>%
select(anzsco1_code = 1,
anzsco1 = 2) %>%
select(
anzsco1_code = 1,
anzsco1 = 2
) %>%
mutate(anzsco1_code = as.character(anzsco1_code))

anzsco2 <- raw %>%
anti_join(anzsco1, by = c("x2" = "anzsco1")) %>%
filter(!is.na(x2)) %>%
select(anzsco2_code = 2,
anzsco2 = 3) %>%
select(
anzsco2_code = 2,
anzsco2 = 3
) %>%
mutate(anzsco1_code = substr(anzsco2_code, 1, 1))

anzsco3 <- raw %>%
anti_join(anzsco1, by = c("x2" = "anzsco1")) %>%
anti_join(anzsco2, by = c("x3" = "anzsco2")) %>%
filter(!is.na(x3)) %>%
select(anzsco3_code = 3,
anzsco3 = 4) %>%
mutate(anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1))
select(
anzsco3_code = 3,
anzsco3 = 4
) %>%
mutate(
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1)
)

anzsco4 <- raw %>%
anti_join(anzsco1, by = c("x2" = "anzsco1")) %>%
anti_join(anzsco2, by = c("x3" = "anzsco2")) %>%
anti_join(anzsco3, by = c("x4" = "anzsco3")) %>%
filter(!is.na(x4)) %>%
select(anzsco4_code = 4,
anzsco4 = 5) %>%
mutate(anzsco3_code = substr(anzsco4_code, 1, 3),
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1))
select(
anzsco4_code = 4,
anzsco4 = 5
) %>%
mutate(
anzsco3_code = substr(anzsco4_code, 1, 3),
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1)
)

anzsco6 <- raw %>%
anti_join(anzsco1, by = c("x2" = "anzsco1")) %>%
anti_join(anzsco2, by = c("x3" = "anzsco2")) %>%
anti_join(anzsco3, by = c("x4" = "anzsco3")) %>%
anti_join(anzsco4, by = c("x5" = "anzsco4")) %>%
filter(!is.na(x5)) %>%
select(anzsco6_code = 5,
anzsco6 = 6,
skill_level = 7) %>%
mutate(anzsco4_code = substr(anzsco6_code, 1, 4),
anzsco3_code = substr(anzsco4_code, 1, 3),
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1))
select(
anzsco6_code = 5,
anzsco6 = 6,
skill_level = 7
) %>%
mutate(
anzsco4_code = substr(anzsco6_code, 1, 4),
anzsco3_code = substr(anzsco4_code, 1, 3),
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1)
)

# Join into wide anzscoupation list
comb <- anzsco1 %>%
Expand All @@ -86,39 +103,47 @@ comb <- anzsco1 %>%
nfd1 <- comb %>%
select(anzsco1_code, anzsco1) %>%
distinct() %>%
mutate(anzsco2 = glue("{anzsco1}, nfd"),
anzsco2_code = glue("{anzsco1_code}0"),
anzsco3 = glue("{anzsco1}, nfd"),
anzsco3_code = glue("{anzsco1_code}00"),
anzsco4 = glue("{anzsco1}, nfd"),
anzsco4_code = glue("{anzsco1_code}000"),
anzsco6 = glue("{anzsco1}, nfd"),
anzsco6_code = glue("{anzsco1_code}00000"))
mutate(
anzsco2 = glue("{anzsco1}, nfd"),
anzsco2_code = glue("{anzsco1_code}0"),
anzsco3 = glue("{anzsco1}, nfd"),
anzsco3_code = glue("{anzsco1_code}00"),
anzsco4 = glue("{anzsco1}, nfd"),
anzsco4_code = glue("{anzsco1_code}000"),
anzsco6 = glue("{anzsco1}, nfd"),
anzsco6_code = glue("{anzsco1_code}00000")
)

nfd2 <- comb %>%
select(anzsco1_code, anzsco1, anzsco2_code, anzsco2) %>%
distinct() %>%
mutate(anzsco3 = glue("{anzsco2}, nfd"),
anzsco3_code = glue("{anzsco2_code}0"),
anzsco4 = glue("{anzsco2}, nfd"),
anzsco4_code = glue("{anzsco2_code}00"),
anzsco6 = glue("{anzsco2}, nfd"),
anzsco6_code = glue("{anzsco2_code}0000"))
mutate(
anzsco3 = glue("{anzsco2}, nfd"),
anzsco3_code = glue("{anzsco2_code}0"),
anzsco4 = glue("{anzsco2}, nfd"),
anzsco4_code = glue("{anzsco2_code}00"),
anzsco6 = glue("{anzsco2}, nfd"),
anzsco6_code = glue("{anzsco2_code}0000")
)


nfd3 <- comb %>%
select(anzsco1_code, anzsco1, anzsco2_code, anzsco2, anzsco3_code, anzsco3) %>%
distinct() %>%
mutate(anzsco4 = glue("{anzsco3}, nfd"),
anzsco4_code = glue("{anzsco3_code}0"),
anzsco6 = glue("{anzsco3}, nfd"),
anzsco6_code = glue("{anzsco3_code}000"))
mutate(
anzsco4 = glue("{anzsco3}, nfd"),
anzsco4_code = glue("{anzsco3_code}0"),
anzsco6 = glue("{anzsco3}, nfd"),
anzsco6_code = glue("{anzsco3_code}000")
)

anzsco2009 <- comb %>%
bind_rows(nfd1, nfd2, nfd3) %>%
arrange(anzsco1_code, anzsco2_code, anzsco3_code,
anzsco4_code, anzsco6_code) %>%
mutate(across(.fns = as.character)) %>%
arrange(
anzsco1_code, anzsco2_code, anzsco3_code,
anzsco4_code, anzsco6_code
) %>%
mutate(across(everything(), .fns = as.character)) %>%
arrange(anzsco6_code)

if (include_factor_variants) {
Expand All @@ -129,15 +154,17 @@ if (include_factor_variants) {
anzsco3_f = as_factor(anzsco3),
anzsco4_f = as_factor(anzsco4),
anzsco6_f = as_factor(anzsco6),
skill_level = as_factor(skill_level)) %>%
skill_level = as_factor(skill_level)
) %>%
# order
select(anzsco1_code, anzsco1, anzsco1_f,
anzsco2_code, anzsco2, anzsco2_f,
anzsco3_code, anzsco3, anzsco3_f,
anzsco4_code, anzsco4, anzsco4_f,
anzsco6_code, anzsco6, anzsco6_f,
skill_level)

select(
anzsco1_code, anzsco1, anzsco1_f,
anzsco2_code, anzsco2, anzsco2_f,
anzsco3_code, anzsco3, anzsco3_f,
anzsco4_code, anzsco4, anzsco4_f,
anzsco6_code, anzsco6, anzsco6_f,
skill_level
)
}

# Rename using new conventions: https://github.com/runapp-aus/abscorr/issues/17
Expand Down
131 changes: 79 additions & 52 deletions data-raw/create_anzsco2013.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,58 +20,75 @@ download.file(anzsco_url, temp_path, mode = "wb")

# Read
raw <- readxl::read_excel(temp_path,
sheet = 6,
range = "A11:G1555",
col_names = FALSE) %>%
sheet = 6,
range = "A11:G1555",
col_names = FALSE
) %>%
janitor::clean_names()

# Extract each level:
anzsco1 <- raw %>%
filter(!is.na(x1)) %>%
select(anzsco1_code = 1,
anzsco1 = 2) %>%
select(
anzsco1_code = 1,
anzsco1 = 2
) %>%
mutate(anzsco1_code = as.character(anzsco1_code))

anzsco2 <- raw %>%
anti_join(anzsco1, by = c("x2" = "anzsco1")) %>%
filter(!is.na(x2)) %>%
select(anzsco2_code = 2,
anzsco2 = 3) %>%
select(
anzsco2_code = 2,
anzsco2 = 3
) %>%
mutate(anzsco1_code = substr(anzsco2_code, 1, 1))

anzsco3 <- raw %>%
anti_join(anzsco1, by = c("x2" = "anzsco1")) %>%
anti_join(anzsco2, by = c("x3" = "anzsco2")) %>%
filter(!is.na(x3)) %>%
select(anzsco3_code = 3,
anzsco3 = 4) %>%
mutate(anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1))
select(
anzsco3_code = 3,
anzsco3 = 4
) %>%
mutate(
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1)
)

anzsco4 <- raw %>%
anti_join(anzsco1, by = c("x2" = "anzsco1")) %>%
anti_join(anzsco2, by = c("x3" = "anzsco2")) %>%
anti_join(anzsco3, by = c("x4" = "anzsco3")) %>%
filter(!is.na(x4)) %>%
select(anzsco4_code = 4,
anzsco4 = 5) %>%
mutate(anzsco3_code = substr(anzsco4_code, 1, 3),
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1))
select(
anzsco4_code = 4,
anzsco4 = 5
) %>%
mutate(
anzsco3_code = substr(anzsco4_code, 1, 3),
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1)
)

anzsco6 <- raw %>%
anti_join(anzsco1, by = c("x2" = "anzsco1")) %>%
anti_join(anzsco2, by = c("x3" = "anzsco2")) %>%
anti_join(anzsco3, by = c("x4" = "anzsco3")) %>%
anti_join(anzsco4, by = c("x5" = "anzsco4")) %>%
filter(!is.na(x5)) %>%
select(anzsco6_code = 5,
anzsco6 = 6,
skill_level = 7) %>%
mutate(anzsco4_code = substr(anzsco6_code, 1, 4),
anzsco3_code = substr(anzsco4_code, 1, 3),
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1))
select(
anzsco6_code = 5,
anzsco6 = 6,
skill_level = 7
) %>%
mutate(
anzsco4_code = substr(anzsco6_code, 1, 4),
anzsco3_code = substr(anzsco4_code, 1, 3),
anzsco2_code = substr(anzsco3_code, 1, 2),
anzsco1_code = substr(anzsco2_code, 1, 1)
)

# Join into wide anzscoupation list
comb <- anzsco1 %>%
Expand All @@ -86,39 +103,47 @@ comb <- anzsco1 %>%
nfd1 <- comb %>%
select(anzsco1_code, anzsco1) %>%
distinct() %>%
mutate(anzsco2 = glue("{anzsco1}, nfd"),
anzsco2_code = glue("{anzsco1_code}0"),
anzsco3 = glue("{anzsco1}, nfd"),
anzsco3_code = glue("{anzsco1_code}00"),
anzsco4 = glue("{anzsco1}, nfd"),
anzsco4_code = glue("{anzsco1_code}000"),
anzsco6 = glue("{anzsco1}, nfd"),
anzsco6_code = glue("{anzsco1_code}00000"))
mutate(
anzsco2 = glue("{anzsco1}, nfd"),
anzsco2_code = glue("{anzsco1_code}0"),
anzsco3 = glue("{anzsco1}, nfd"),
anzsco3_code = glue("{anzsco1_code}00"),
anzsco4 = glue("{anzsco1}, nfd"),
anzsco4_code = glue("{anzsco1_code}000"),
anzsco6 = glue("{anzsco1}, nfd"),
anzsco6_code = glue("{anzsco1_code}00000")
)

nfd2 <- comb %>%
select(anzsco1_code, anzsco1, anzsco2_code, anzsco2) %>%
distinct() %>%
mutate(anzsco3 = glue("{anzsco2}, nfd"),
anzsco3_code = glue("{anzsco2_code}0"),
anzsco4 = glue("{anzsco2}, nfd"),
anzsco4_code = glue("{anzsco2_code}00"),
anzsco6 = glue("{anzsco2}, nfd"),
anzsco6_code = glue("{anzsco2_code}0000"))
mutate(
anzsco3 = glue("{anzsco2}, nfd"),
anzsco3_code = glue("{anzsco2_code}0"),
anzsco4 = glue("{anzsco2}, nfd"),
anzsco4_code = glue("{anzsco2_code}00"),
anzsco6 = glue("{anzsco2}, nfd"),
anzsco6_code = glue("{anzsco2_code}0000")
)


nfd3 <- comb %>%
select(anzsco1_code, anzsco1, anzsco2_code, anzsco2, anzsco3_code, anzsco3) %>%
distinct() %>%
mutate(anzsco4 = glue("{anzsco3}, nfd"),
anzsco4_code = glue("{anzsco3_code}0"),
anzsco6 = glue("{anzsco3}, nfd"),
anzsco6_code = glue("{anzsco3_code}000"))
mutate(
anzsco4 = glue("{anzsco3}, nfd"),
anzsco4_code = glue("{anzsco3_code}0"),
anzsco6 = glue("{anzsco3}, nfd"),
anzsco6_code = glue("{anzsco3_code}000")
)

anzsco2013 <- comb %>%
bind_rows(nfd1, nfd2, nfd3) %>%
arrange(anzsco1_code, anzsco2_code, anzsco3_code,
anzsco4_code, anzsco6_code) %>%
mutate(across(.fns = as.character)) %>%
arrange(
anzsco1_code, anzsco2_code, anzsco3_code,
anzsco4_code, anzsco6_code
) %>%
mutate(across(everything(), .fns = as.character)) %>%
arrange(anzsco6_code)

if (include_factor_variants) {
Expand All @@ -129,15 +154,17 @@ if (include_factor_variants) {
anzsco3_f = as_factor(anzsco3),
anzsco4_f = as_factor(anzsco4),
anzsco6_f = as_factor(anzsco6),
skill_level = as_factor(skill_level)) %>%
skill_level = as_factor(skill_level)
) %>%
# order
select(anzsco1_code, anzsco1, anzsco1_f,
anzsco2_code, anzsco2, anzsco2_f,
anzsco3_code, anzsco3, anzsco3_f,
anzsco4_code, anzsco4, anzsco4_f,
anzsco6_code, anzsco6, anzsco6_f,
skill_level)

select(
anzsco1_code, anzsco1, anzsco1_f,
anzsco2_code, anzsco2, anzsco2_f,
anzsco3_code, anzsco3, anzsco3_f,
anzsco4_code, anzsco4, anzsco4_f,
anzsco6_code, anzsco6, anzsco6_f,
skill_level
)
}

# Rename using new conventions: https://github.com/runapp-aus/abscorr/issues/17
Expand Down
2 changes: 1 addition & 1 deletion data-raw/create_anzsco2019.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ anzsco2019 <- comb %>%
bind_rows(nfd1, nfd2, nfd3) %>%
arrange(anzsco1_code, anzsco2_code, anzsco3_code,
anzsco4_code, anzsco6_code) %>%
mutate(across(.fns = as.character)) %>%
mutate(across(everything(), .fns = as.character)) %>%
arrange(anzsco6_code)

if (include_factor_variants) {
Expand Down
Loading

0 comments on commit 0707d69

Please sign in to comment.