Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pb_download_url returns choice of browser or api download urls #117

Merged
merged 3 commits into from
Dec 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: piggyback
Version: 0.1.5.9003
Version: 0.1.5.9004
Title: Managing Larger Data on a GitHub Repository
Description: Because larger (> 50 MB) data files cannot easily be committed to git,
a different approach is required to manage data associated with an analysis in a
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ provides the code to create the release in the error body.
before trying API download URLs. This should reduce/eliminate effect of API rate
limits for pb_download. [#109]
* `"latest"` release now aligns with GitHub's "latest" release definition [#113]
* `pb_download_url()` now can return choice of "browser" or "api" download URLs [#116]

# piggyback 0.1.5

Expand Down
26 changes: 9 additions & 17 deletions R/pb_download.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#'
#' @export
#' @examples \donttest{
#' try({ # this try block is to avoid errors on CRAN, not needed for normal use
#' \dontshow{try(\{}
tanho63 marked this conversation as resolved.
Show resolved Hide resolved
#' ## Download a specific file.
#' ## (if dest is omitted, will write to current directory)
#' dest <- tempdir()
Expand All @@ -29,8 +29,8 @@
#' dest = dest
#' )
#' list.files(dest)
#' })
#' \dontshow{
#' \dontshow{\})}
#' \dontshow{
#' try(unlink(list.files(dest, full.names = TRUE)))
#' }
#' }
Expand Down Expand Up @@ -96,11 +96,9 @@ pb_download <- function(file = NULL,

resp <- lapply(seq_along(df$id), function(i)
gh_download_asset(
download_url = df$browser_download_url[i],
browser_download_url = df$browser_download_url[i],
api_download_url = df$api_download_url[i],
destfile = df$dest[i],
owner = df$owner[1],
repo = df$repo[1],
id = df$id[i],
overwrite = overwrite,
.token = .token,
progress = progress
Expand All @@ -110,11 +108,9 @@ pb_download <- function(file = NULL,

## gh() fails on this, so we do with httr. See https://github.com/r-lib/gh/issues/57
## Consider option to suppress progress bar?
gh_download_asset <- function(download_url,
gh_download_asset <- function(browser_download_url,
destfile,
owner,
repo,
id,
api_download_url,
overwrite = TRUE,
.token = gh::gh_token(),
progress = httr::progress("down")) {
Expand All @@ -140,7 +136,7 @@ gh_download_asset <- function(download_url,
# Attempt download via browser download URL to avoid ratelimiting
resp <- httr::RETRY(
verb = "GET",
url = download_url,
url = browser_download_url,
httr::add_headers(Accept = "application/octet-stream"),
auth_token,
httr::write_disk(destfile, overwrite = overwrite),
Expand All @@ -151,11 +147,7 @@ gh_download_asset <- function(download_url,
if (httr::http_error(resp)){
resp <- httr::RETRY(
verb = "GET",
url = paste0(
"https://",
"api.github.com/repos/", owner, "/",
repo, "/", "releases/assets/", id
),
url = api_download_url,
httr::add_headers(Accept = "application/octet-stream"),
auth_token,
httr::write_disk(destfile, overwrite = overwrite),
Expand Down
62 changes: 53 additions & 9 deletions R/pb_download_url.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,65 @@
#' Get the download url of a given file
#'
#' Returns the URL download for a public file. This can be useful when writing
#' scripts that may want to download the file directly without introducing any
#' dependency on `piggyback` or authentication steps.
#' Returns the URL download for a given file. This can be useful when using
#' functions that are able to accept URLs.
#'
#' @param url_type choice: one of "browser" or "api" - default "browser" is a
#' web-facing URL that is not subject to API ratelimits but does not work for
#' private repositories. "api" URLs work for private repos, but require a GitHub
#' token passed in an Authorization header (see examples)
#' @inheritParams pb_download
#' @return the URL to download a file
#' @export
#' @examples \dontrun{
#' @examples \donttest{
#' \dontshow{try(\{}
#'
#' # returns browser url by default (and all files if none are specified)
#' browser_url <- pb_download_url(
#' repo = "tanho63/piggyback-tests",
#' tag = "v0.0.2"
#' )
#' print(browser_url)
#' utils::read.csv(browser_url[[1]])
#'
#' # can return api url if desired
#' api_url <- pb_download_url(
#' "mtcars.csv",
#' repo = "tanho63/piggyback-tests",
#' tag = "v0.0.2"
#' )
#' print(api_url)
#'
#' pb_download_url("iris.tsv.xz",
#' repo = "cboettig/piggyback-tests",
#' tag = "v0.0.1")
#' # for public repositories, this will still work
#' utils::read.csv(api_url)
#'
#' # for private repos, can use httr or curl to fetch and then pass into read function
#' gh_pat <- Sys.getenv("GITHUB_PAT")
#'
#' if(!identical(gh_pat, "")){
#' resp <- httr::GET(api_url, httr::add_headers(Authorization = paste("Bearer", gh_pat)))
#' utils::read.csv(text = httr::content(resp, as = "text"))
#' }
#'
#' # or use pb_read which bundles some of this for you
#'
#' \dontshow{\})}
#' }
pb_download_url <- function(file = NULL,
repo = guess_repo(),
tag = "latest",
url_type = c("browser","api"),
.token = gh::gh_token()) {
url_type <- rlang::arg_match(url_type, values = c("browser","api"))

df <- pb_info(repo, tag, .token)

if(is.null(file)) return(df$browser_download_url)
if(is.null(file)) {
switch(
url_type,
"browser" = return(df$browser_download_url),
"api" = return(df$api_download_url)
)
}

if(any(!file %in% df$file_name)) {

Expand All @@ -32,5 +72,9 @@ pb_download_url <- function(file = NULL,

if(length(file) == 0) return(cli::cli_abort("No download URLs to return."))

return(df[df$file_name %in% file,"browser_download_url"])
switch(
url_type,
"browser" = return(df$browser_download_url[df$file_name %in% file]),
"api" = return(df$api_download_url[df$file_name %in% file])
)
}
4 changes: 4 additions & 0 deletions R/pb_info.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,9 @@ get_release_assets <- function(releases, r, .token) {
repo = r[[2]],
upload_url = releases$upload_url[i],
browser_download_url = .extract_chr(a, "browser_download_url"),
api_download_url = glue::glue(
"https://api.github.com/repos/{r[[1]]}/{r[[2]]}/releases/assets/{.extract_int(a, 'id')}"
),
id = .extract_int(a, "id"),
state = .extract_chr(a, "state"),
stringsAsFactors = FALSE
Expand Down Expand Up @@ -143,6 +146,7 @@ pb_info <- function(repo = guess_repo(),
repo = r[[2]],
upload_url = "",
browser_download_url = "",
api_download_url = "",
id = "",
state = "",
stringsAsFactors = FALSE
Expand Down
6 changes: 3 additions & 3 deletions man/pb_download.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 16 additions & 7 deletions man/pb_download_url.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading