From 71e9709831e38560e88035a16ed717e8d1795671 Mon Sep 17 00:00:00 2001 From: zoidy Date: Wed, 11 Sep 2024 22:40:37 +0000 Subject: [PATCH] Clean up and prettify log outputs --- app.py | 41 ++++++++++++++++++++--------------------- figshare/Article.py | 5 +++++ figshare/Collection.py | 3 ++- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/app.py b/app.py index d835a22..cf8d476 100644 --- a/app.py +++ b/app.py @@ -142,9 +142,8 @@ def main(): get_args() config, log = main() - log.write_log_in_file('info', - "Fetching articles...", - True) + log.write_log_in_file('info', " ", True) + log.write_log_in_file('info', "------- Fetching articles -------", True) article_obj = Article(config, log, args.ids) article_data, already_preserved_counts_dict = article_obj.get_articles() @@ -167,9 +166,7 @@ def main(): True) print(" ") - log.write_log_in_file('info', - "Fetching collections...", - True) + log.write_log_in_file('info', "------- Fetching collections -------", True) collection_obj = Collection(config, log, args.ids) collection_data = collection_obj.get_collections() @@ -194,20 +191,21 @@ def main(): preserved_collection_versions_in_wasabi = already_preserved_collections_counts['wasabi_preserved_versions'] preserved_collection_versions_in_ap_trust = already_preserved_collections_counts['ap_trust_preserved_versions'] - log.write_log_in_file('info', '------- Summary -------') + log.write_log_in_file('info', ' ', True) + log.write_log_in_file('info', '------- Summary -------', True) log.write_log_in_file('info', - "Total articles/published articles: \t\t\t\t\t\t" - + f'{published_unpublished_count} / {published_articles_count + already_preserved_articles_count}', + f"Total articles: \t\t\t\t\t\t\t\t\t{published_unpublished_count}", True) log.write_log_in_file('info', - "Total count of already(skipped) preserved articles: \t\t\t\t" - + f'{already_preserved_articles_count}', + "Total published articles/article versions: \t\t\t\t\t" + + f'{published_articles_count + already_preserved_articles_count} / ' + + f'{published_articles_versions_count + already_preserved_versions_count}', True) log.write_log_in_file('info', - "Total count of already(skipped) preserved article versions: \t\t\t" - + f'{already_preserved_versions_count}', + "Total count of already preserved (skipped) articles / article versions: \t\t" + + f'{already_preserved_articles_count} / {already_preserved_versions_count}', True) if article_obj.processor.duplicate_bag_in_preservation_storage_count > 0: @@ -216,7 +214,7 @@ def main(): True) log.write_log_in_file('info', - "Total articles versions matched/published: \t\t\t\t\t" + "Total articles versions matched/published (unskipped): \t\t\t\t" + f'{article_obj.no_matched} / {published_articles_versions_count}', True) log.write_log_in_file('info', @@ -240,19 +238,20 @@ def main(): "Total processed articles bags successfully preserved: \t\t\t\t" + f'{article_obj.processor.bag_preserved_count}', True) + + log.write_log_in_file('info', "", True) log.write_log_in_file('info', - "Total collections/published collections: \t\t\t\t\t\t" - + f'{collections_count} / {collections_count}', + "Total collections: \t\t\t\t\t\t\t\t" + + f'{collections_count}', True) - log.write_log_in_file('info', - "Total count of already(skipped) preserved collections: \t\t\t\t" - + f'{already_preserved_collections}', + "Total published collections / collection versions: \t\t\t\t" + + f'{collections_count} / {collections_versions_count}', True) log.write_log_in_file('info', - "Total count of already(skipped) preserved collection versions: \t\t\t" - + f'{already_preserved_collection_versions}', + "Total count of already preserved (skipped) collections / collection versions: \t" + + f'{already_preserved_collections} / {already_preserved_collection_versions}', True) log.write_log_in_file('info', diff --git a/figshare/Article.py b/figshare/Article.py index 2189050..e6eda70 100644 --- a/figshare/Article.py +++ b/figshare/Article.py @@ -829,6 +829,10 @@ def find_matched_articles(self, articles): self.logs.write_log_in_file("info", f"Total unmatched unique articles: {len(set(unmatched_articles))}.", True) self.logs.write_log_in_file("info", f"Total matched article versions: {self.no_matched}.", True) self.logs.write_log_in_file("info", f"Total unmatched article versions: {self.no_unmatched}.", True) + self.logs.write_log_in_file("info", f"Total skipped unique articles: " + + f"{len(self.already_preserved_counts_dict['already_preserved_article_ids'])}.", True) + self.logs.write_log_in_file("info", f"Total skipped article versions: " + + f"{self.already_preserved_counts_dict['already_preserved_versions']}.", True) if len(set(unmatched_articles)) > 0 or len(self.article_non_match_info) > 0: self.logs.write_log_in_file("warning", "There were unmatched articles or article versions." @@ -955,6 +959,7 @@ def __initial_process(self): def process_articles(self, articles): processed_count = 0 curation_storage_location = self.__initial_process() + self.logs.write_log_in_file("info", "------- Processing articles -------", True) self.logs.write_log_in_file("info", "Finding matched articles.", True) article_data = self.find_matched_articles(articles) diff --git a/figshare/Collection.py b/figshare/Collection.py index f94bee8..0859ceb 100644 --- a/figshare/Collection.py +++ b/figshare/Collection.py @@ -247,7 +247,8 @@ def get_article_api_url(self, collection): def process_collections(self, collections): processed_count = 0 - self.logs.write_log_in_file("info", "Processing collections.", True) + self.logs.write_log_in_file("info", " ", True) + self.logs.write_log_in_file("info", "------- Processing collections -------", True) for collection in collections: data = collections[collection] articles = data["articles"]