Skip to content

Commit

Permalink
fix ModelTransferModule
Browse files Browse the repository at this point in the history
  • Loading branch information
egillax committed Oct 7, 2024
1 parent a59d13e commit d5a01b2
Showing 1 changed file with 49 additions and 34 deletions.
83 changes: 49 additions & 34 deletions R/Module-ModelTransferModule.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ ModelTransferModule <- R6::R6Class(
# finding localFile details
localFileSettings <- jobContext$settings$localFileSettings

modelLocationsS3 <- tryCatch({.getModelsFromS3(
modelLocationsS3 <- tryCatch({private$.getModelsFromS3(
s3Settings = s3Settings,
saveFolder = modelSaveLocation
)}, error = function(e){ParallelLogger::logInfo(e); return(NULL)}
Expand All @@ -64,16 +64,16 @@ ModelTransferModule <- R6::R6Class(
readr::write_csv(modelLocationsS3, file = file.path(resultsFolder, 's3_export.csv'))
}

modelLocationsGithub <- tryCatch({.getModelsFromGithub(
githubSettings = githubSettings$locations,
modelLocationsGithub <- tryCatch({private$.getModelsFromGithub(
settings = githubSettings,
saveFolder = modelSaveLocation
)}, error = function(e){ParallelLogger::logInfo(e); return(NULL)}
)
if(!is.null(modelLocationsGithub)){
readr::write_csv(modelLocationsGithub, file = file.path(resultsFolder, 'github_export.csv'))
}

modelLocationsLocalFiles <- tryCatch({.getModelsFromLocalFiles(
modelLocationsLocalFiles <- tryCatch({private$.getModelsFromLocalFiles(
localFileSettings = localFileSettings$locations,
saveFolder = modelSaveLocation
)}, error = function(e){ParallelLogger::logInfo(e); return(NULL)}
Expand Down Expand Up @@ -150,33 +150,33 @@ ModelTransferModule <- R6::R6Class(
return(info)
},
# code that takes s3 details and download the models and returns the locations plus details as data.frame
.getModelsFromS3 = function(s3Settings, saveFolder){
.getModelsFromS3 = function(settings, saveFolder){

# need to have settings
# AWS_ACCESS_KEY_ID=<my access key id>
# AWS_SECRET_ACCESS_KEY=<my secret key>
# AWS_DEFAULT_REGION=ap-southeast-2

if(is.null(s3Settings)){
if(is.null(settings)){
return(NULL)
}

info <- data.frame()

for(i in 1:nrow(s3Settings)){
for(i in 1:nrow(settings)){

modelSaved <- F
saveToLoc <- ''

validBucket <- aws.s3::bucket_exists(
bucket = s3Settings$bucket[i],
region = s3Settings$region[i]
bucket = settings$bucket[i],
region = settings$region[i]
)

if(validBucket){
subfolder <- s3Settings$modelZipLocation[i]
bucket <- s3Settings$bucket[i]
region <- s3Settings$region[i]
subfolder <- settings$modelZipLocation[i]
bucket <- settings$bucket[i]
region <- settings$region[i]

result <- aws.s3::get_bucket_df(bucket = bucket, region = region, max = Inf)
paths <- fs::path(result$Key)
Expand Down Expand Up @@ -207,10 +207,10 @@ ModelTransferModule <- R6::R6Class(
ParallelLogger::logInfo(paste0("Downloaded: ", analysis, " to ", saveToLoc))
}
} else{
ParallelLogger::logInfo(paste0("No ",s3Settings$modelZipLocation[i]," in bucket ", s3Settings$bucket[i], " in region ", s3Settings$region[i] ))
ParallelLogger::logInfo(paste0("No ",settings$modelZipLocation[i]," in bucket ", settings$bucket[i], " in region ", settings$region[i] ))
}
}else{
ParallelLogger::logInfo(paste0("No bucket ", s3Settings$bucket[i] ," in region ", s3Settings$region[i]))
ParallelLogger::logInfo(paste0("No bucket ", settings$bucket[i] ," in region ", settings$region[i]))
}

info <- rbind(
Expand All @@ -227,38 +227,53 @@ ModelTransferModule <- R6::R6Class(
return(info)
},
# code that takes github details and download the models and returns the locations plus details as data.frame
.getModelsFromGithub = function(githubSettings,saveFolder){
.getModelsFromGithub = function(settings, saveFolder) {

if(is.null(githubSettings)){
if (is.null(settings)) {
return(NULL)
}

info <- data.frame()

for(i in 1:length(githubSettings)){
for (i in 1:nrow(settings)) {

githubUser <- githubSettings[[i]]$githubUser #'ohdsi-studies'
githubRepository <- githubSettings[[i]]$githubRepository #'lungCancerPrognostic'
githubBranch <- githubSettings[[i]]$githubBranch #'master'
user <- settings[i, ]$user
repository <- settings[i, ]$repository
ref <- settings[i, ]$ref

downloadCheck <- tryCatch({
downloadRepo <- tryCatch({
utils::download.file(
url = file.path("https://github.com",githubUser,githubRepository, "archive", paste0(githubBranch,".zip")),
url = file.path("https://github.com", user, repository, "archive",
paste0(ref,".zip")),
destfile = file.path(tempdir(), "tempGitHub.zip")
)}, error = function(e){ ParallelLogger::logInfo('GitHub repository download failed') ; return(NULL)}
)}, error = function(e) {
ParallelLogger::logInfo('GitHub repository download failed')
return(NULL)
}
)

if(!is.null(downloadCheck)){
if(!is.null(downloadRepo)){
# unzip into the workFolder
OhdsiSharing::decompressFolder(
sourceFileName = file.path(tempdir(), "tempGitHub.zip"),
targetFolder = file.path(tempdir(), "tempGitHub")
)
for(j in 1:length(githubSettings[[i]]$githubModelsFolder)){
githubModelsFolder <- githubSettings[[i]]$githubModelsFolder[j] #'models'
githubModelFolder <- githubSettings[[i]]$githubModelFolder[j] #'full_model'
for(j in 1:length(settings[i, ]$modelsFolder)) {
modelsFolder <- settings[i, ]$modelsFolder[j] #'models'
modelFolder <- if (is.null(settings[i, ]$modelFolder[j])) {
""
} else {
settings[i, ]$modelFolder[j]
}

tempModelLocation <- file.path(file.path(tempdir(), "tempGitHub"), dir(file.path(file.path(tempdir(), "tempGitHub"))), 'inst', githubModelsFolder, githubModelFolder )
tempModelLocation <- file.path(
tempdir(),
"tempGitHub",
paste0(repository, "-", ref),
"inst",
modelsFolder,
modelFolder
)

if(!dir.exists(file.path(saveFolder,"models",paste0('model_github_', i, '_', j)))){
dir.create(file.path(saveFolder,"models",paste0('model_github_', i, '_', j)), recursive = T)
Expand All @@ -271,14 +286,14 @@ ModelTransferModule <- R6::R6Class(
)
}

modelSaved <- T
modelSaved <- TRUE
saveToLoc <- file.path(saveFolder,"models",paste0('model_github_', i, '_', j))

info <- rbind(
info,
data.frame(
githubLocation = file.path("https://github.com",githubUser,githubRepository, "archive", paste0(githubBranch,".zip")),
githubPath = file.path('inst', githubModelsFolder, githubModelFolder),
githubLocation = file.path("https://github.com", user, repository, "archive", paste0(ref,".zip")),
githubPath = file.path('inst', modelsFolder, modelFolder),
modelSavedLocally = modelSaved,
localLocation = saveToLoc
)
Expand All @@ -291,9 +306,9 @@ ModelTransferModule <- R6::R6Class(
info <- rbind(
info,
data.frame(
githubLocation = file.path("https://github.com",githubUser,githubRepository, "archive", paste0(githubBranch,".zip")),
githubPath = file.path('inst', githubSettings[[i]]$githubModelsFolder, githubSettings[[i]]$githubModelFolder),
modelSavedLocally = F,
githubLocation = file.path("https://github.com", user, repository, "archive", paste0(ref,".zip")),
githubPath = file.path('inst', settings[i, ]$modelsFolder, settings[i, ]$modelFolder),
modelSavedLocally = FALSE,
localLocation = ''
)
)
Expand Down

0 comments on commit d5a01b2

Please sign in to comment.