From 0d341ff75f20a72dbfa9721c2e59a62e115adef8 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 4 Oct 2024 10:53:40 -0400 Subject: [PATCH 01/14] ensure collection date is fetched and correctly cached for use --- 8Knot/cache_manager/db_init.py | 6 ++++-- 8Knot/queries/ossf_score_query.py | 3 ++- 8Knot/queries/repo_info_query.py | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/8Knot/cache_manager/db_init.py b/8Knot/cache_manager/db_init.py index dd2c2a83..17483d38 100644 --- a/8Knot/cache_manager/db_init.py +++ b/8Knot/cache_manager/db_init.py @@ -306,7 +306,8 @@ def _create_application_tables() -> None: CREATE UNLOGGED TABLE IF NOT EXISTS ossf_score_query( repo_id int, name text, - score float4 + score float4, + data_collection_date timestamp ) """ ) @@ -323,7 +324,8 @@ def _create_application_tables() -> None: stars_count int, code_of_conduct_file text, security_issue_file text, - security_audit_file text + security_audit_file text, + data_collection_date timestamp ) """ ) diff --git a/8Knot/queries/ossf_score_query.py b/8Knot/queries/ossf_score_query.py index a826042e..816f53c0 100644 --- a/8Knot/queries/ossf_score_query.py +++ b/8Knot/queries/ossf_score_query.py @@ -33,7 +33,8 @@ def ossf_score_query(self, repos): SELECT repo_id as id, name, - score + score, + data_collection_date FROM repo_deps_scorecard WHERE diff --git a/8Knot/queries/repo_info_query.py b/8Knot/queries/repo_info_query.py index 064dfee2..6c594cc9 100644 --- a/8Knot/queries/repo_info_query.py +++ b/8Knot/queries/repo_info_query.py @@ -39,7 +39,8 @@ def repo_info_query(self, repos): stars_count, code_of_conduct_file, security_issue_file, - security_audit_file + security_audit_file, + data_collection_date FROM repo_info ri WHERE From f72bf72d668c0329980710c1d1076101c17ddc9b Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 4 Oct 2024 10:54:29 -0400 Subject: [PATCH 02/14] attempt to hook this data up to the UI --- .../visualizations/ossf_scorecard.py | 36 +++++++++++++++++-- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py index b413fea0..8542b91f 100644 --- a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py +++ b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py @@ -10,6 +10,7 @@ import cache_manager.cache_facade as cf from pages.utils.job_utils import nodata_graph import time +from datetime import datetime PAGE = "repo_info" VIZ_ID = "ossf-scorecard" @@ -40,6 +41,14 @@ [ dbc.Row( [ + dbc.Col( + dbc.Row( + [ + dbc.Label("Last Updated:", className="mr-2"), + html.Div(id=f"{PAGE}-{VIZ_ID}-updated"), + ] + ), + ), dbc.Col( dbc.Button( "Scorecard Info", @@ -76,7 +85,10 @@ def toggle_popover(n, is_open): # callback for ossf scorecard @callback( - Output(f"{PAGE}-{VIZ_ID}", "children"), + [ + Output(f"{PAGE}-{VIZ_ID}", "children"), + Output(f"{PAGE}-{VIZ_ID}-updated", "children") + ], [ Input("repo-info-selection", "value"), ], @@ -100,16 +112,34 @@ def ossf_scorecard(repo): # test if there is data if df.empty: logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE") - return dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True) + return dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True), dbc.Label("No data") # repo id not needed for table df.drop(["repo_id"], axis=1, inplace=True) + # get all values from the data_collection_date column + updated_times = df[["data_collection_date"]] + + unique_updated_times = updated_times.drop_duplicates() + + if len(unique_updated_times) > 1: + logging.warning(f"{VIZ_ID} - MORE THAN ONE DATA COLLECTION DATE") + + + # we dont need to display this column for every entry + df.drop(["data_collection_date"], axis=1, inplace=True) + df.loc[df.name == "OSSF_SCORECARD_AGGREGATE_SCORE", "name"] = "Aggregate Score" df.sort_values("name", ascending=True, inplace=True) df.rename(columns={"name": "Check Type", "score": "Score"}, inplace=True) table = dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True) + # format the date + updated_date = [datetime.fromisoformat(ut) for ut in unique_updated_times] + logging.info(f"updated_date: {updated_date}") + updated_date = updated_date[-1].strftime("%d/%m/%Y") + logging.info(f"updated_date: {updated_date}") + logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") - return table + return table, dbc.Label(updated_date) \ No newline at end of file From ffdbfd726431fbd9905882c66ca001bf2ba3ae03 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Sat, 5 Oct 2024 14:38:44 -0400 Subject: [PATCH 03/14] move most of the processing into a try, catch --- .../visualizations/ossf_scorecard.py | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py index 8542b91f..1d668c7a 100644 --- a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py +++ b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py @@ -120,12 +120,6 @@ def ossf_scorecard(repo): # get all values from the data_collection_date column updated_times = df[["data_collection_date"]] - unique_updated_times = updated_times.drop_duplicates() - - if len(unique_updated_times) > 1: - logging.warning(f"{VIZ_ID} - MORE THAN ONE DATA COLLECTION DATE") - - # we dont need to display this column for every entry df.drop(["data_collection_date"], axis=1, inplace=True) @@ -136,10 +130,27 @@ def ossf_scorecard(repo): table = dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True) # format the date - updated_date = [datetime.fromisoformat(ut) for ut in unique_updated_times] - logging.info(f"updated_date: {updated_date}") - updated_date = updated_date[-1].strftime("%d/%m/%Y") - logging.info(f"updated_date: {updated_date}") + try: + unique_updated_times = updated_times.drop_duplicates() + unique_updated_times = unique_updated_times.to_numpy().flatten() + logging.warning(unique_updated_times) + for t in unique_updated_times: + logging.warning(f"t: {t}, {type(t)}") + + + if len(unique_updated_times) > 1: + logging.warning(f"{VIZ_ID} - MORE THAN ONE DATA COLLECTION DATE") + + logging.warning(f"unique_updated_times: {unique_updated_times}") + + updated_date = [datetime.fromisoformat(str(ut)) for ut in unique_updated_times] + logging.warning(f"updated_date: {updated_date}") + updated_date = updated_date[-1].strftime("%d/%m/%Y") + logging.warning(f"updated_date: {updated_date}") + except Exception as e: + logging.error(f"Error converting date: {e}") + updated_date = "Error" + raise e logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") return table, dbc.Label(updated_date) \ No newline at end of file From 2929d7c4811c6f3eed2dd3f4af9ad174e5a31ef8 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Sat, 5 Oct 2024 14:39:29 -0400 Subject: [PATCH 04/14] use pandas to convert the datetime since this seems to be causing issues --- 8Knot/pages/repo_overview/visualizations/ossf_scorecard.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py index 1d668c7a..24950665 100644 --- a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py +++ b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py @@ -118,7 +118,7 @@ def ossf_scorecard(repo): df.drop(["repo_id"], axis=1, inplace=True) # get all values from the data_collection_date column - updated_times = df[["data_collection_date"]] + updated_times = pd.to_datetime(df["data_collection_date"]) # we dont need to display this column for every entry df.drop(["data_collection_date"], axis=1, inplace=True) @@ -143,7 +143,6 @@ def ossf_scorecard(repo): logging.warning(f"unique_updated_times: {unique_updated_times}") - updated_date = [datetime.fromisoformat(str(ut)) for ut in unique_updated_times] logging.warning(f"updated_date: {updated_date}") updated_date = updated_date[-1].strftime("%d/%m/%Y") logging.warning(f"updated_date: {updated_date}") From 99cbb21e47f60ec65ef6d9b355cce88a7292492a Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Sat, 5 Oct 2024 14:40:19 -0400 Subject: [PATCH 05/14] attempt to hook up UI and last updated date for general info section in the same way --- .../visualizations/repo_general_info.py | 39 +++++++++++++++++-- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/8Knot/pages/repo_overview/visualizations/repo_general_info.py b/8Knot/pages/repo_overview/visualizations/repo_general_info.py index 14c266b3..418a4923 100644 --- a/8Knot/pages/repo_overview/visualizations/repo_general_info.py +++ b/8Knot/pages/repo_overview/visualizations/repo_general_info.py @@ -15,6 +15,7 @@ import cache_manager.cache_facade as cf from pages.utils.job_utils import nodata_graph import time +from datetime import datetime PAGE = "repo_info" VIZ_ID = "repo-general-info" @@ -31,6 +32,12 @@ dcc.Loading( html.Div(id=f"{PAGE}-{VIZ_ID}"), ), + dbc.Row( + [ + dbc.Label("Last Updated:", className="mr-2"), + html.Div(id=f"{PAGE}-{VIZ_ID}-updated"), + ] + ) ] ) ], @@ -51,7 +58,10 @@ def toggle_popover(n, is_open): # callback for repo general info @callback( - Output(f"{PAGE}-{VIZ_ID}", "children"), + [ + Output(f"{PAGE}-{VIZ_ID}", "children"), + Output(f"{PAGE}-{VIZ_ID}-updated", "children") + ], [ Input("repo-info-selection", "value"), ], @@ -68,9 +78,9 @@ def repo_general_info(repo): # test if there is data if df_repo_files.empty and df_repo_info.empty and df_releases.empty: logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE") - return dbc.Table.from_dataframe(pd.DataFrame(), striped=True, bordered=True, hover=True) + return dbc.Table.from_dataframe(pd.DataFrame(), striped=True, bordered=True, hover=True), dbc.Label("No data") - df = process_data(df_repo_files, df_repo_info, df_releases) + df, last_updated = process_data(df_repo_files, df_repo_info, df_releases) table = dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True) @@ -80,6 +90,27 @@ def repo_general_info(repo): def process_data(df_repo_files, df_repo_info, df_releases): + # get all values from the data_collection_date column + updated_times_repo_files = pd.to_datetime(df_repo_files["data_collection_date"]) + updated_times_repo_info = pd.to_datetime(df_repo_info["data_collection_date"]) + updated_times_releases = pd.to_datetime(df_releases["data_collection_date"]) + + # format the date + try: + updated_times = pd.concat([updated_times_repo_files, updated_times_repo_info, updated_times_releases]) + unique_updated_times = updated_times.drop_duplicates() + unique_updated_times = unique_updated_times.to_numpy().flatten() + + if len(unique_updated_times) > 1: + logging.warning(f"{VIZ_ID} - MORE THAN ONE LAST UPDATE DATE") + + + logging.warning(f"updated_date: {updated_date}") + updated_date = updated_date[-1].strftime("%d/%m/%Y") + logging.warning(f"updated_date: {updated_date}") + except Exception as e: + logging.error(f"Error converting date: {e}") + updated_date = "Error" # convert to datetime objects rather than strings df_releases["release_published_at"] = pd.to_datetime(df_releases["release_published_at"], utc=True) @@ -164,7 +195,7 @@ def process_data(df_repo_files, df_repo_info, df_releases): } ) - return df + return df, dbc.Label(updated_date) def multi_query_helper(repos): From b034dc2e9d8500358420f3854b342b4afb0bf29f Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Sat, 5 Oct 2024 14:44:19 -0400 Subject: [PATCH 06/14] fix reference before assignment --- 8Knot/pages/repo_overview/visualizations/ossf_scorecard.py | 1 - 8Knot/pages/repo_overview/visualizations/repo_general_info.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py index 24950665..52cfcd6f 100644 --- a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py +++ b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py @@ -143,7 +143,6 @@ def ossf_scorecard(repo): logging.warning(f"unique_updated_times: {unique_updated_times}") - logging.warning(f"updated_date: {updated_date}") updated_date = updated_date[-1].strftime("%d/%m/%Y") logging.warning(f"updated_date: {updated_date}") except Exception as e: diff --git a/8Knot/pages/repo_overview/visualizations/repo_general_info.py b/8Knot/pages/repo_overview/visualizations/repo_general_info.py index 418a4923..334f550d 100644 --- a/8Knot/pages/repo_overview/visualizations/repo_general_info.py +++ b/8Knot/pages/repo_overview/visualizations/repo_general_info.py @@ -104,8 +104,6 @@ def process_data(df_repo_files, df_repo_info, df_releases): if len(unique_updated_times) > 1: logging.warning(f"{VIZ_ID} - MORE THAN ONE LAST UPDATE DATE") - - logging.warning(f"updated_date: {updated_date}") updated_date = updated_date[-1].strftime("%d/%m/%Y") logging.warning(f"updated_date: {updated_date}") except Exception as e: From fed06791b44f8dd0d9e26c57e735d94151ffbf74 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 7 Oct 2024 11:19:56 -0400 Subject: [PATCH 07/14] proper datetime conversion from pandas format --- 8Knot/pages/repo_overview/visualizations/ossf_scorecard.py | 2 +- 8Knot/pages/repo_overview/visualizations/repo_general_info.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py index 52cfcd6f..d6496d42 100644 --- a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py +++ b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py @@ -143,7 +143,7 @@ def ossf_scorecard(repo): logging.warning(f"unique_updated_times: {unique_updated_times}") - updated_date = updated_date[-1].strftime("%d/%m/%Y") + updated_date = pd.to_datetime(str(unique_updated_times[-1])).strftime("%d/%m/%Y") logging.warning(f"updated_date: {updated_date}") except Exception as e: logging.error(f"Error converting date: {e}") diff --git a/8Knot/pages/repo_overview/visualizations/repo_general_info.py b/8Knot/pages/repo_overview/visualizations/repo_general_info.py index 334f550d..a3b7bd1a 100644 --- a/8Knot/pages/repo_overview/visualizations/repo_general_info.py +++ b/8Knot/pages/repo_overview/visualizations/repo_general_info.py @@ -104,7 +104,7 @@ def process_data(df_repo_files, df_repo_info, df_releases): if len(unique_updated_times) > 1: logging.warning(f"{VIZ_ID} - MORE THAN ONE LAST UPDATE DATE") - updated_date = updated_date[-1].strftime("%d/%m/%Y") + updated_date = pd.to_datetime(str(unique_updated_times[-1])).strftime("%d/%m/%Y") logging.warning(f"updated_date: {updated_date}") except Exception as e: logging.error(f"Error converting date: {e}") From 7060c9d35063c4d737cc6d531d9df61a26e886aa Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 7 Oct 2024 11:37:05 -0400 Subject: [PATCH 08/14] format --- .../repo_overview/visualizations/ossf_scorecard.py | 12 ++++-------- .../visualizations/repo_general_info.py | 9 +++------ 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py index d6496d42..2faef5e8 100644 --- a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py +++ b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py @@ -85,10 +85,7 @@ def toggle_popover(n, is_open): # callback for ossf scorecard @callback( - [ - Output(f"{PAGE}-{VIZ_ID}", "children"), - Output(f"{PAGE}-{VIZ_ID}-updated", "children") - ], + [Output(f"{PAGE}-{VIZ_ID}", "children"), Output(f"{PAGE}-{VIZ_ID}-updated", "children")], [ Input("repo-info-selection", "value"), ], @@ -137,13 +134,12 @@ def ossf_scorecard(repo): for t in unique_updated_times: logging.warning(f"t: {t}, {type(t)}") - if len(unique_updated_times) > 1: logging.warning(f"{VIZ_ID} - MORE THAN ONE DATA COLLECTION DATE") - + logging.warning(f"unique_updated_times: {unique_updated_times}") - updated_date = pd.to_datetime(str(unique_updated_times[-1])).strftime("%d/%m/%Y") + updated_date = pd.to_datetime(str(unique_updated_times[-1])).strftime("%d/%m/%Y") logging.warning(f"updated_date: {updated_date}") except Exception as e: logging.error(f"Error converting date: {e}") @@ -151,4 +147,4 @@ def ossf_scorecard(repo): raise e logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") - return table, dbc.Label(updated_date) \ No newline at end of file + return table, dbc.Label(updated_date) diff --git a/8Knot/pages/repo_overview/visualizations/repo_general_info.py b/8Knot/pages/repo_overview/visualizations/repo_general_info.py index a3b7bd1a..3d6a3a16 100644 --- a/8Knot/pages/repo_overview/visualizations/repo_general_info.py +++ b/8Knot/pages/repo_overview/visualizations/repo_general_info.py @@ -37,7 +37,7 @@ dbc.Label("Last Updated:", className="mr-2"), html.Div(id=f"{PAGE}-{VIZ_ID}-updated"), ] - ) + ), ] ) ], @@ -58,10 +58,7 @@ def toggle_popover(n, is_open): # callback for repo general info @callback( - [ - Output(f"{PAGE}-{VIZ_ID}", "children"), - Output(f"{PAGE}-{VIZ_ID}-updated", "children") - ], + [Output(f"{PAGE}-{VIZ_ID}", "children"), Output(f"{PAGE}-{VIZ_ID}-updated", "children")], [ Input("repo-info-selection", "value"), ], @@ -90,7 +87,7 @@ def repo_general_info(repo): def process_data(df_repo_files, df_repo_info, df_releases): - # get all values from the data_collection_date column + # get all values from the data_collection_date column updated_times_repo_files = pd.to_datetime(df_repo_files["data_collection_date"]) updated_times_repo_info = pd.to_datetime(df_repo_info["data_collection_date"]) updated_times_releases = pd.to_datetime(df_releases["data_collection_date"]) From 9fb275839a78808c3058de85d06dc4f3865bb44e Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 7 Oct 2024 12:21:32 -0400 Subject: [PATCH 09/14] bypass the uncooperative dataframes for now --- .../repo_overview/visualizations/repo_general_info.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/8Knot/pages/repo_overview/visualizations/repo_general_info.py b/8Knot/pages/repo_overview/visualizations/repo_general_info.py index 3d6a3a16..85e03641 100644 --- a/8Knot/pages/repo_overview/visualizations/repo_general_info.py +++ b/8Knot/pages/repo_overview/visualizations/repo_general_info.py @@ -88,13 +88,14 @@ def repo_general_info(repo): def process_data(df_repo_files, df_repo_info, df_releases): # get all values from the data_collection_date column - updated_times_repo_files = pd.to_datetime(df_repo_files["data_collection_date"]) + # updated_times_repo_files = pd.to_datetime(df_repo_files["data_collection_date"]) updated_times_repo_info = pd.to_datetime(df_repo_info["data_collection_date"]) - updated_times_releases = pd.to_datetime(df_releases["data_collection_date"]) + # updated_times_releases = pd.to_datetime(df_releases["data_collection_date"]) # format the date try: - updated_times = pd.concat([updated_times_repo_files, updated_times_repo_info, updated_times_releases]) + # updated_times = pd.concat([updated_times_repo_files, updated_times_repo_info, updated_times_releases]) + updated_times = updated_times_repo_info unique_updated_times = updated_times.drop_duplicates() unique_updated_times = unique_updated_times.to_numpy().flatten() From 9fcabf466c960f1dad24534c5c66b8a61ad038c5 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Mon, 7 Oct 2024 12:56:06 -0400 Subject: [PATCH 10/14] actually pass the data collection date through --- 8Knot/pages/repo_overview/visualizations/repo_general_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/8Knot/pages/repo_overview/visualizations/repo_general_info.py b/8Knot/pages/repo_overview/visualizations/repo_general_info.py index 85e03641..633ab528 100644 --- a/8Knot/pages/repo_overview/visualizations/repo_general_info.py +++ b/8Knot/pages/repo_overview/visualizations/repo_general_info.py @@ -82,7 +82,7 @@ def repo_general_info(repo): table = dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True) logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") - return table + return table, last_updated def process_data(df_repo_files, df_repo_info, df_releases): From ba9b32bf899ca6fa4ac638deea296deaf274579d Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 9 Oct 2024 09:26:54 -0400 Subject: [PATCH 11/14] put date on same line as "Last Updated" label --- .../pages/repo_overview/visualizations/ossf_scorecard.py | 8 ++++++-- .../repo_overview/visualizations/repo_general_info.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py index 2faef5e8..60f9c80d 100644 --- a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py +++ b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py @@ -44,8 +44,12 @@ dbc.Col( dbc.Row( [ - dbc.Label("Last Updated:", className="mr-2"), - html.Div(id=f"{PAGE}-{VIZ_ID}-updated"), + dbc.Label( + [ + "Last Updated:", + html.Span(id=f"{PAGE}-{VIZ_ID}-updated") + ], className="mr-2" + ) ] ), ), diff --git a/8Knot/pages/repo_overview/visualizations/repo_general_info.py b/8Knot/pages/repo_overview/visualizations/repo_general_info.py index 633ab528..331ddd0e 100644 --- a/8Knot/pages/repo_overview/visualizations/repo_general_info.py +++ b/8Knot/pages/repo_overview/visualizations/repo_general_info.py @@ -34,8 +34,12 @@ ), dbc.Row( [ - dbc.Label("Last Updated:", className="mr-2"), - html.Div(id=f"{PAGE}-{VIZ_ID}-updated"), + dbc.Label( + [ + "Last Updated:", + html.Span(id=f"{PAGE}-{VIZ_ID}-updated") + ], className="mr-2" + ) ] ), ] From bd1facdecaca081b418eb975f0be3c8ba9e7b858 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 9 Oct 2024 09:28:57 -0400 Subject: [PATCH 12/14] simplify the date formatting code and remove debug stuff --- .../visualizations/ossf_scorecard.py | 25 ++++------------ .../visualizations/repo_general_info.py | 29 ++++++------------- 2 files changed, 15 insertions(+), 39 deletions(-) diff --git a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py index 60f9c80d..e4f760b5 100644 --- a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py +++ b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py @@ -130,25 +130,12 @@ def ossf_scorecard(repo): table = dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True) - # format the date - try: - unique_updated_times = updated_times.drop_duplicates() - unique_updated_times = unique_updated_times.to_numpy().flatten() - logging.warning(unique_updated_times) - for t in unique_updated_times: - logging.warning(f"t: {t}, {type(t)}") - - if len(unique_updated_times) > 1: - logging.warning(f"{VIZ_ID} - MORE THAN ONE DATA COLLECTION DATE") - - logging.warning(f"unique_updated_times: {unique_updated_times}") - - updated_date = pd.to_datetime(str(unique_updated_times[-1])).strftime("%d/%m/%Y") - logging.warning(f"updated_date: {updated_date}") - except Exception as e: - logging.error(f"Error converting date: {e}") - updated_date = "Error" - raise e + unique_updated_times = updated_times.drop_duplicates().to_numpy().flatten() + + if len(unique_updated_times) > 1: + logging.warning(f"{VIZ_ID} - MORE THAN ONE DATA COLLECTION DATE") + + updated_date = pd.to_datetime(str(unique_updated_times[-1])).strftime("%d/%m/%Y") logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") return table, dbc.Label(updated_date) diff --git a/8Knot/pages/repo_overview/visualizations/repo_general_info.py b/8Knot/pages/repo_overview/visualizations/repo_general_info.py index 331ddd0e..afc1014c 100644 --- a/8Knot/pages/repo_overview/visualizations/repo_general_info.py +++ b/8Knot/pages/repo_overview/visualizations/repo_general_info.py @@ -90,27 +90,16 @@ def repo_general_info(repo): def process_data(df_repo_files, df_repo_info, df_releases): - - # get all values from the data_collection_date column - # updated_times_repo_files = pd.to_datetime(df_repo_files["data_collection_date"]) + updated_times_repo_info = pd.to_datetime(df_repo_info["data_collection_date"]) - # updated_times_releases = pd.to_datetime(df_releases["data_collection_date"]) - - # format the date - try: - # updated_times = pd.concat([updated_times_repo_files, updated_times_repo_info, updated_times_releases]) - updated_times = updated_times_repo_info - unique_updated_times = updated_times.drop_duplicates() - unique_updated_times = unique_updated_times.to_numpy().flatten() - - if len(unique_updated_times) > 1: - logging.warning(f"{VIZ_ID} - MORE THAN ONE LAST UPDATE DATE") - - updated_date = pd.to_datetime(str(unique_updated_times[-1])).strftime("%d/%m/%Y") - logging.warning(f"updated_date: {updated_date}") - except Exception as e: - logging.error(f"Error converting date: {e}") - updated_date = "Error" + + unique_updated_times = updated_times_repo_info.drop_duplicates().to_numpy().flatten() + + if len(unique_updated_times) > 1: + logging.warning(f"{VIZ_ID} - MORE THAN ONE LAST UPDATE DATE") + + updated_date = pd.to_datetime(str(unique_updated_times[-1])).strftime("%d/%m/%Y") + # convert to datetime objects rather than strings df_releases["release_published_at"] = pd.to_datetime(df_releases["release_published_at"], utc=True) From 9d2d0b11870132990dc2a48ce8714c4d5d4812dd Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Wed, 9 Oct 2024 09:32:46 -0400 Subject: [PATCH 13/14] run formatter --- .../repo_overview/visualizations/ossf_scorecard.py | 6 ++---- .../visualizations/repo_general_info.py | 13 ++----------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py index e4f760b5..29eb36a2 100644 --- a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py +++ b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py @@ -45,10 +45,8 @@ dbc.Row( [ dbc.Label( - [ - "Last Updated:", - html.Span(id=f"{PAGE}-{VIZ_ID}-updated") - ], className="mr-2" + ["Last Updated:", html.Span(id=f"{PAGE}-{VIZ_ID}-updated")], + className="mr-2", ) ] ), diff --git a/8Knot/pages/repo_overview/visualizations/repo_general_info.py b/8Knot/pages/repo_overview/visualizations/repo_general_info.py index afc1014c..e0db7cb5 100644 --- a/8Knot/pages/repo_overview/visualizations/repo_general_info.py +++ b/8Knot/pages/repo_overview/visualizations/repo_general_info.py @@ -32,16 +32,7 @@ dcc.Loading( html.Div(id=f"{PAGE}-{VIZ_ID}"), ), - dbc.Row( - [ - dbc.Label( - [ - "Last Updated:", - html.Span(id=f"{PAGE}-{VIZ_ID}-updated") - ], className="mr-2" - ) - ] - ), + dbc.Row([dbc.Label(["Last Updated:", html.Span(id=f"{PAGE}-{VIZ_ID}-updated")], className="mr-2")]), ] ) ], @@ -90,7 +81,7 @@ def repo_general_info(repo): def process_data(df_repo_files, df_repo_info, df_releases): - + updated_times_repo_info = pd.to_datetime(df_repo_info["data_collection_date"]) unique_updated_times = updated_times_repo_info.drop_duplicates().to_numpy().flatten() From bf3b214372bc6701d3dcdcb956d43c44e00d2747 Mon Sep 17 00:00:00 2001 From: Adrian Edwards Date: Fri, 11 Oct 2024 12:53:45 -0400 Subject: [PATCH 14/14] add spaces after "last updated" label --- 8Knot/pages/repo_overview/visualizations/ossf_scorecard.py | 2 +- 8Knot/pages/repo_overview/visualizations/repo_general_info.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py index 29eb36a2..9a6ba46d 100644 --- a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py +++ b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py @@ -45,7 +45,7 @@ dbc.Row( [ dbc.Label( - ["Last Updated:", html.Span(id=f"{PAGE}-{VIZ_ID}-updated")], + ["Last Updated: ", html.Span(id=f"{PAGE}-{VIZ_ID}-updated")], className="mr-2", ) ] diff --git a/8Knot/pages/repo_overview/visualizations/repo_general_info.py b/8Knot/pages/repo_overview/visualizations/repo_general_info.py index e0db7cb5..d844f5b1 100644 --- a/8Knot/pages/repo_overview/visualizations/repo_general_info.py +++ b/8Knot/pages/repo_overview/visualizations/repo_general_info.py @@ -32,7 +32,7 @@ dcc.Loading( html.Div(id=f"{PAGE}-{VIZ_ID}"), ), - dbc.Row([dbc.Label(["Last Updated:", html.Span(id=f"{PAGE}-{VIZ_ID}-updated")], className="mr-2")]), + dbc.Row([dbc.Label(["Last Updated: ", html.Span(id=f"{PAGE}-{VIZ_ID}-updated")], className="mr-2")]), ] ) ],