From f910f38b3060029e73b38a5e57f1018dab0d5a68 Mon Sep 17 00:00:00 2001 From: Tan Date: Fri, 29 Dec 2023 15:25:45 -0500 Subject: [PATCH 1/3] pb_download_url now can return browser or api url, closes #116 --- R/pb_download.R | 26 +++++++++----------------- R/pb_download_url.R | 39 ++++++++++++++++++++++++++++++--------- R/pb_info.R | 4 ++++ man/pb_download_url.Rd | 23 ++++++++++++++++------- 4 files changed, 59 insertions(+), 33 deletions(-) diff --git a/R/pb_download.R b/R/pb_download.R index 03441c2..a26a6eb 100644 --- a/R/pb_download.R +++ b/R/pb_download.R @@ -11,7 +11,7 @@ #' #' @export #' @examples \donttest{ -#' try({ # this try block is to avoid errors on CRAN, not needed for normal use +#' \dontshow{try(\{} #' ## Download a specific file. #' ## (if dest is omitted, will write to current directory) #' dest <- tempdir() @@ -29,8 +29,8 @@ #' dest = dest #' ) #' list.files(dest) -#' }) -#' \dontshow{ +#' \dontshow{\})} +#' \dontshow{ #' try(unlink(list.files(dest, full.names = TRUE))) #' } #' } @@ -96,11 +96,9 @@ pb_download <- function(file = NULL, resp <- lapply(seq_along(df$id), function(i) gh_download_asset( - download_url = df$browser_download_url[i], + browser_download_url = df$browser_download_url[i], + api_download_url = df$api_download_url[i], destfile = df$dest[i], - owner = df$owner[1], - repo = df$repo[1], - id = df$id[i], overwrite = overwrite, .token = .token, progress = progress @@ -110,11 +108,9 @@ pb_download <- function(file = NULL, ## gh() fails on this, so we do with httr. See https://github.com/r-lib/gh/issues/57 ## Consider option to suppress progress bar? -gh_download_asset <- function(download_url, +gh_download_asset <- function(browser_download_url, destfile, - owner, - repo, - id, + api_download_url, overwrite = TRUE, .token = gh::gh_token(), progress = httr::progress("down")) { @@ -140,7 +136,7 @@ gh_download_asset <- function(download_url, # Attempt download via browser download URL to avoid ratelimiting resp <- httr::RETRY( verb = "GET", - url = download_url, + url = browser_download_url, httr::add_headers(Accept = "application/octet-stream"), auth_token, httr::write_disk(destfile, overwrite = overwrite), @@ -151,11 +147,7 @@ gh_download_asset <- function(download_url, if (httr::http_error(resp)){ resp <- httr::RETRY( verb = "GET", - url = paste0( - "https://", - "api.github.com/repos/", owner, "/", - repo, "/", "releases/assets/", id - ), + url = api_download_url, httr::add_headers(Accept = "application/octet-stream"), auth_token, httr::write_disk(destfile, overwrite = overwrite), diff --git a/R/pb_download_url.R b/R/pb_download_url.R index 15c4947..af9bd77 100644 --- a/R/pb_download_url.R +++ b/R/pb_download_url.R @@ -1,25 +1,42 @@ #' Get the download url of a given file #' -#' Returns the URL download for a public file. This can be useful when writing -#' scripts that may want to download the file directly without introducing any -#' dependency on `piggyback` or authentication steps. +#' Returns the URL download for a given file. This can be useful when using +#' functions that are able to accept URLs. +#' +#' @param url_type choice: one of "browser" or "api" - default "browser" is a +#' web-facing URL that is not subject to API ratelimits but does not work for +#' private repositories. "api" URLs work for private repos, but require a GitHub +#' token passed in an Authorization header (see examples) #' @inheritParams pb_download #' @return the URL to download a file #' @export -#' @examples \dontrun{ +#' @examples \donttest{ +#' \dontshow{try(\{} +#' +#' # returns browser url by default +#' pb_download_url("iris.tsv.xz", repo = "cboettig/piggyback-tests", tag = "v0.0.1") #' -#' pb_download_url("iris.tsv.xz", -#' repo = "cboettig/piggyback-tests", -#' tag = "v0.0.1") +#' # can return api url if desired +#' pb_download_url("iris.tsv.xz", repo = "cboettig/piggyback-tests", tag = "v0.0.1", url_type = "api") #' +#' \dontshow{\})} #' } pb_download_url <- function(file = NULL, repo = guess_repo(), tag = "latest", + url_type = c("browser","api"), .token = gh::gh_token()) { + url_type <- rlang::arg_match(url_type, values = c("browser","api")) + df <- pb_info(repo, tag, .token) - if(is.null(file)) return(df$browser_download_url) + if(is.null(file)) { + switch( + url_type, + "browser" = return(df$browser_download_url), + "api" = return(df$api_download_url) + ) + } if(any(!file %in% df$file_name)) { @@ -32,5 +49,9 @@ pb_download_url <- function(file = NULL, if(length(file) == 0) return(cli::cli_abort("No download URLs to return.")) - return(df[df$file_name %in% file,"browser_download_url"]) + switch( + url_type, + "browser" = return(df$browser_download_url[df$file_name %in% file]), + "api" = return(df$api_download_url[df$file_name %in% file]) + ) } diff --git a/R/pb_info.R b/R/pb_info.R index 1a48aec..1668a07 100644 --- a/R/pb_info.R +++ b/R/pb_info.R @@ -106,6 +106,9 @@ get_release_assets <- function(releases, r, .token) { repo = r[[2]], upload_url = releases$upload_url[i], browser_download_url = .extract_chr(a, "browser_download_url"), + api_download_url = glue::glue( + "https://api.github.com/repos/{r[[1]]}/{r[[2]]}/releases/assets/{.extract_int(a, 'id')}" + ), id = .extract_int(a, "id"), state = .extract_chr(a, "state"), stringsAsFactors = FALSE @@ -143,6 +146,7 @@ pb_info <- function(repo = guess_repo(), repo = r[[2]], upload_url = "", browser_download_url = "", + api_download_url = "", id = "", state = "", stringsAsFactors = FALSE diff --git a/man/pb_download_url.Rd b/man/pb_download_url.Rd index 5b20923..0172211 100644 --- a/man/pb_download_url.Rd +++ b/man/pb_download_url.Rd @@ -8,6 +8,7 @@ pb_download_url( file = NULL, repo = guess_repo(), tag = "latest", + url_type = c("browser", "api"), .token = gh::gh_token() ) } @@ -20,22 +21,30 @@ tries to guess based on current working directory's git repository} \item{tag}{string: tag for the GH release, defaults to "latest"} +\item{url_type}{choice: one of "browser" or "api" - default "browser" is a +web-facing URL that is not subject to API ratelimits but does not work for +private repositories. "api" URLs work for private repos, but require a GitHub +token passed in an Authorization header (see examples)} + \item{.token}{GitHub authentication token, see \code{\link[gh:gh_token]{gh::gh_token()}}} } \value{ the URL to download a file } \description{ -Returns the URL download for a public file. This can be useful when writing -scripts that may want to download the file directly without introducing any -dependency on \code{piggyback} or authentication steps. +Returns the URL download for a given file. This can be useful when using +functions that are able to accept URLs. } \examples{ -\dontrun{ +\donttest{ +\dontshow{try(\{} + +# returns browser url by default +pb_download_url("iris.tsv.xz", repo = "cboettig/piggyback-tests", tag = "v0.0.1") -pb_download_url("iris.tsv.xz", - repo = "cboettig/piggyback-tests", - tag = "v0.0.1") +# can return api url if desired +pb_download_url("iris.tsv.xz", repo = "cboettig/piggyback-tests", tag = "v0.0.1", url_type = "api") +\dontshow{\})} } } From 4307013a7877708b38dd2fe005ba06aff4fb179b Mon Sep 17 00:00:00 2001 From: Tan Date: Fri, 29 Dec 2023 15:27:16 -0500 Subject: [PATCH 2/3] bumpver + news --- DESCRIPTION | 2 +- NEWS.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index da26aa9..edbb42f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: piggyback -Version: 0.1.5.9003 +Version: 0.1.5.9004 Title: Managing Larger Data on a GitHub Repository Description: Because larger (> 50 MB) data files cannot easily be committed to git, a different approach is required to manage data associated with an analysis in a diff --git a/NEWS.md b/NEWS.md index 3046695..4761470 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,7 @@ provides the code to create the release in the error body. before trying API download URLs. This should reduce/eliminate effect of API rate limits for pb_download. [#109] * `"latest"` release now aligns with GitHub's "latest" release definition [#113] +* `pb_download_url()` now can return choice of "browser" or "api" download URLs [#116] # piggyback 0.1.5 From f235961b06ed089c880b5a07e9f428dbb89d65d0 Mon Sep 17 00:00:00 2001 From: Tan Date: Fri, 29 Dec 2023 15:48:34 -0500 Subject: [PATCH 3/3] update pb_download_url examples --- R/pb_download_url.R | 29 ++++++++++++++++++++++++++--- man/pb_download.Rd | 6 +++--- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/R/pb_download_url.R b/R/pb_download_url.R index af9bd77..79dce69 100644 --- a/R/pb_download_url.R +++ b/R/pb_download_url.R @@ -13,11 +13,34 @@ #' @examples \donttest{ #' \dontshow{try(\{} #' -#' # returns browser url by default -#' pb_download_url("iris.tsv.xz", repo = "cboettig/piggyback-tests", tag = "v0.0.1") +#' # returns browser url by default (and all files if none are specified) +#' browser_url <- pb_download_url( +#' repo = "tanho63/piggyback-tests", +#' tag = "v0.0.2" +#' ) +#' print(browser_url) +#' utils::read.csv(browser_url[[1]]) #' #' # can return api url if desired -#' pb_download_url("iris.tsv.xz", repo = "cboettig/piggyback-tests", tag = "v0.0.1", url_type = "api") +#' api_url <- pb_download_url( +#' "mtcars.csv", +#' repo = "tanho63/piggyback-tests", +#' tag = "v0.0.2" +#' ) +#' print(api_url) +#' +#' # for public repositories, this will still work +#' utils::read.csv(api_url) +#' +#' # for private repos, can use httr or curl to fetch and then pass into read function +#' gh_pat <- Sys.getenv("GITHUB_PAT") +#' +#' if(!identical(gh_pat, "")){ +#' resp <- httr::GET(api_url, httr::add_headers(Authorization = paste("Bearer", gh_pat))) +#' utils::read.csv(text = httr::content(resp, as = "text")) +#' } +#' +#' # or use pb_read which bundles some of this for you #' #' \dontshow{\})} #' } diff --git a/man/pb_download.Rd b/man/pb_download.Rd index 9b0b082..27ca0ab 100644 --- a/man/pb_download.Rd +++ b/man/pb_download.Rd @@ -45,7 +45,7 @@ Download data from an existing release } \examples{ \donttest{ - try({ # this try block is to avoid errors on CRAN, not needed for normal use +\dontshow{try(\{} ## Download a specific file. ## (if dest is omitted, will write to current directory) dest <- tempdir() @@ -63,8 +63,8 @@ Download data from an existing release dest = dest ) list.files(dest) - }) - \dontshow{ +\dontshow{\})} +\dontshow{ try(unlink(list.files(dest, full.names = TRUE))) } }