diff --git a/8Knot/cache_manager/db_init.py b/8Knot/cache_manager/db_init.py index dd2c2a83..17483d38 100644 --- a/8Knot/cache_manager/db_init.py +++ b/8Knot/cache_manager/db_init.py @@ -306,7 +306,8 @@ def _create_application_tables() -> None: CREATE UNLOGGED TABLE IF NOT EXISTS ossf_score_query( repo_id int, name text, - score float4 + score float4, + data_collection_date timestamp ) """ ) @@ -323,7 +324,8 @@ def _create_application_tables() -> None: stars_count int, code_of_conduct_file text, security_issue_file text, - security_audit_file text + security_audit_file text, + data_collection_date timestamp ) """ ) diff --git a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py index b413fea0..9a6ba46d 100644 --- a/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py +++ b/8Knot/pages/repo_overview/visualizations/ossf_scorecard.py @@ -10,6 +10,7 @@ import cache_manager.cache_facade as cf from pages.utils.job_utils import nodata_graph import time +from datetime import datetime PAGE = "repo_info" VIZ_ID = "ossf-scorecard" @@ -40,6 +41,16 @@ [ dbc.Row( [ + dbc.Col( + dbc.Row( + [ + dbc.Label( + ["Last Updated: ", html.Span(id=f"{PAGE}-{VIZ_ID}-updated")], + className="mr-2", + ) + ] + ), + ), dbc.Col( dbc.Button( "Scorecard Info", @@ -76,7 +87,7 @@ def toggle_popover(n, is_open): # callback for ossf scorecard @callback( - Output(f"{PAGE}-{VIZ_ID}", "children"), + [Output(f"{PAGE}-{VIZ_ID}", "children"), Output(f"{PAGE}-{VIZ_ID}-updated", "children")], [ Input("repo-info-selection", "value"), ], @@ -100,16 +111,29 @@ def ossf_scorecard(repo): # test if there is data if df.empty: logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE") - return dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True) + return dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True), dbc.Label("No data") # repo id not needed for table df.drop(["repo_id"], axis=1, inplace=True) + # get all values from the data_collection_date column + updated_times = pd.to_datetime(df["data_collection_date"]) + + # we dont need to display this column for every entry + df.drop(["data_collection_date"], axis=1, inplace=True) + df.loc[df.name == "OSSF_SCORECARD_AGGREGATE_SCORE", "name"] = "Aggregate Score" df.sort_values("name", ascending=True, inplace=True) df.rename(columns={"name": "Check Type", "score": "Score"}, inplace=True) table = dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True) + unique_updated_times = updated_times.drop_duplicates().to_numpy().flatten() + + if len(unique_updated_times) > 1: + logging.warning(f"{VIZ_ID} - MORE THAN ONE DATA COLLECTION DATE") + + updated_date = pd.to_datetime(str(unique_updated_times[-1])).strftime("%d/%m/%Y") + logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") - return table + return table, dbc.Label(updated_date) diff --git a/8Knot/pages/repo_overview/visualizations/repo_general_info.py b/8Knot/pages/repo_overview/visualizations/repo_general_info.py index 14c266b3..d844f5b1 100644 --- a/8Knot/pages/repo_overview/visualizations/repo_general_info.py +++ b/8Knot/pages/repo_overview/visualizations/repo_general_info.py @@ -15,6 +15,7 @@ import cache_manager.cache_facade as cf from pages.utils.job_utils import nodata_graph import time +from datetime import datetime PAGE = "repo_info" VIZ_ID = "repo-general-info" @@ -31,6 +32,7 @@ dcc.Loading( html.Div(id=f"{PAGE}-{VIZ_ID}"), ), + dbc.Row([dbc.Label(["Last Updated: ", html.Span(id=f"{PAGE}-{VIZ_ID}-updated")], className="mr-2")]), ] ) ], @@ -51,7 +53,7 @@ def toggle_popover(n, is_open): # callback for repo general info @callback( - Output(f"{PAGE}-{VIZ_ID}", "children"), + [Output(f"{PAGE}-{VIZ_ID}", "children"), Output(f"{PAGE}-{VIZ_ID}-updated", "children")], [ Input("repo-info-selection", "value"), ], @@ -68,18 +70,27 @@ def repo_general_info(repo): # test if there is data if df_repo_files.empty and df_repo_info.empty and df_releases.empty: logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE") - return dbc.Table.from_dataframe(pd.DataFrame(), striped=True, bordered=True, hover=True) + return dbc.Table.from_dataframe(pd.DataFrame(), striped=True, bordered=True, hover=True), dbc.Label("No data") - df = process_data(df_repo_files, df_repo_info, df_releases) + df, last_updated = process_data(df_repo_files, df_repo_info, df_releases) table = dbc.Table.from_dataframe(df, striped=True, bordered=True, hover=True) logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") - return table + return table, last_updated def process_data(df_repo_files, df_repo_info, df_releases): + updated_times_repo_info = pd.to_datetime(df_repo_info["data_collection_date"]) + + unique_updated_times = updated_times_repo_info.drop_duplicates().to_numpy().flatten() + + if len(unique_updated_times) > 1: + logging.warning(f"{VIZ_ID} - MORE THAN ONE LAST UPDATE DATE") + + updated_date = pd.to_datetime(str(unique_updated_times[-1])).strftime("%d/%m/%Y") + # convert to datetime objects rather than strings df_releases["release_published_at"] = pd.to_datetime(df_releases["release_published_at"], utc=True) @@ -164,7 +175,7 @@ def process_data(df_repo_files, df_repo_info, df_releases): } ) - return df + return df, dbc.Label(updated_date) def multi_query_helper(repos): diff --git a/8Knot/queries/ossf_score_query.py b/8Knot/queries/ossf_score_query.py index a826042e..816f53c0 100644 --- a/8Knot/queries/ossf_score_query.py +++ b/8Knot/queries/ossf_score_query.py @@ -33,7 +33,8 @@ def ossf_score_query(self, repos): SELECT repo_id as id, name, - score + score, + data_collection_date FROM repo_deps_scorecard WHERE diff --git a/8Knot/queries/repo_info_query.py b/8Knot/queries/repo_info_query.py index 064dfee2..6c594cc9 100644 --- a/8Knot/queries/repo_info_query.py +++ b/8Knot/queries/repo_info_query.py @@ -39,7 +39,8 @@ def repo_info_query(self, repos): stars_count, code_of_conduct_file, security_issue_file, - security_audit_file + security_audit_file, + data_collection_date FROM repo_info ri WHERE