Skip to content

Commit

Permalink
Exclude size of curation folder for skipped articles during space check
Browse files Browse the repository at this point in the history
  • Loading branch information
HafeezOJ committed Sep 16, 2024
1 parent 95eae49 commit 4062de9
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 10 deletions.
26 changes: 16 additions & 10 deletions figshare/Article.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import re
from figshare.Integration import Integration
from figshare.Utils import standardize_api_result, sorter_api_result, get_preserved_version_hash_and_size
from figshare.Utils import compare_hash, check_wasabi, calculate_payload_size
from figshare.Utils import compare_hash, check_wasabi, calculate_payload_size, get_article_id_and_version_from_path
from slugify import slugify
from requests.adapters import HTTPAdapter, Retry

Expand Down Expand Up @@ -158,8 +158,9 @@ def __get_article_versions(self, article):
if version_data is None:
article_version = 'v' + str(version['version']).zfill(2) if version['version'] <= 9 \
else 'v' + str(version['version'])
self.skipped_article_versions[article['id']] = []
self.skipped_article_versions[article['id']].append(article_version)
article_id = str(article['id'])
self.skipped_article_versions[article_id] = []
self.skipped_article_versions[article_id].append(article_version)
continue
metadata.append(version_data)
else:
Expand Down Expand Up @@ -563,7 +564,7 @@ def read_version_dirs_fun(self, read_version_dirs, version_dir, version_data):
return version_data

"""
Get size of files of the given directory path
Get size of files of the given directory path, excluding skipped articles UAL_RDM
:param dir_path string path of dir where file size require to calculate.
:param include_only string include in the total only paths that contain this string. If ommitted, includes all paths.
:return size integer
Expand All @@ -572,12 +573,16 @@ def get_file_size_of_given_path(self, dir_path, include_only=""):
size = 0
for path, dirs, files in os.walk(dir_path):
if include_only in path:
for f in files:
fp = os.path.join(path, f)
try:
size += os.path.getsize(fp)
except Exception:
pass
article_id, article_version = get_article_id_and_version_from_path(path)
if article_id in self.skipped_article_versions.keys() and article_version in self.skipped_article_versions[article_id]:
size += 0
else:
for f in files:
fp = os.path.join(path, f)
try:
size += os.path.getsize(fp)
except Exception:
pass

return size

Expand Down Expand Up @@ -971,6 +976,7 @@ def process_articles(self, articles):
# Calculate the size of the curation folder
# When article IDs are explicitly passed, curation folder size is calculated based on matched curation folders.
# Otherwise, it is calculated considering all curation folders.
# Size of curation folders for skipped articles are excluded in all cases.
if (self.matched_curation_folder_list):
curation_folder_size = 0
for folder in self.matched_curation_folder_list:
Expand Down
21 changes: 21 additions & 0 deletions figshare/Utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,3 +321,24 @@ def calculate_payload_size(config: dict, version_data: dict) -> int:
payload_size = version_ual_rdm_size + json_file_size + article_files_size

return payload_size


def get_article_id_and_version_from_path(path: str) -> tuple:
"""
Extract article_id and version from UAL_RDM path
:param path: UAL_RDM path of an article
:type: str
:return: A tuple containing article_id and version
:rtype: tuple
"""
version_no = ''
article_id = ''
if path:
path_elements = path.split('/')
version_no = path_elements[-2]
article_id = path_elements[-3].split('_')[-1]

return article_id, version_no

Check warning on line 344 in figshare/Utils.py

View workflow job for this annotation

GitHub Actions / lint

blank line at end of file

0 comments on commit 4062de9

Please sign in to comment.