diff --git a/.wordlist-md b/.wordlist-md index 86bf4a46..2c407053 100644 --- a/.wordlist-md +++ b/.wordlist-md @@ -72,3 +72,11 @@ Docker's data-center OAuth postgres +Podman +filesystem +credsStore +credStore +filesystem +Podman +credsStore +credStore diff --git a/8Knot/pages/chaoss/visualizations/project_velocity.py b/8Knot/pages/chaoss/visualizations/project_velocity.py index 3740c1ab..5b21efa8 100644 --- a/8Knot/pages/chaoss/visualizations/project_velocity.py +++ b/8Knot/pages/chaoss/visualizations/project_velocity.py @@ -331,9 +331,11 @@ def process_data( # replace all nan to 0 df_consolidated.fillna(value=0, inplace=True) - # log of commits and contribs - df_consolidated["log_num_commits"] = df_consolidated["Commit"].apply(math.log) - df_consolidated["log_num_contrib"] = df_consolidated["num_unique_contributors"].apply(math.log) + # log of commits and contribs if values are not 0 + df_consolidated["log_num_commits"] = df_consolidated["Commit"].apply(lambda x: math.log(x) if x != 0 else 0) + df_consolidated["log_num_contrib"] = df_consolidated["num_unique_contributors"].apply( + lambda x: math.log(x) if x != 0 else 0 + ) # column to hold the weighted values of pr and issues actions summed together df_consolidated["prs_issues_actions_weighted"] = ( diff --git a/8Knot/pages/codebase/codebase.py b/8Knot/pages/codebase/codebase.py index 4979461a..43a36a2e 100644 --- a/8Knot/pages/codebase/codebase.py +++ b/8Knot/pages/codebase/codebase.py @@ -16,14 +16,14 @@ [ dbc.Row( [ - dbc.Col(gc_cntrb_file_heatmap, width=12), + dbc.Col(gc_contribution_file_heatmap, width=12), ], align="center", style={"marginBottom": ".5%"}, ), dbc.Row( [ - dbc.Col(gc_contribution_file_heatmap, width=12), + dbc.Col(gc_cntrb_file_heatmap, width=12), ], align="center", style={"marginBottom": ".5%"}, diff --git a/8Knot/pages/codebase/visualizations/cntrb_file_heatmap.py b/8Knot/pages/codebase/visualizations/cntrb_file_heatmap.py index 431b199b..081f335e 100644 --- a/8Knot/pages/codebase/visualizations/cntrb_file_heatmap.py +++ b/8Knot/pages/codebase/visualizations/cntrb_file_heatmap.py @@ -80,6 +80,7 @@ classNames={"values": "dmc-multiselect-custom"}, searchable=True, clearable=False, + value="Top Level Directory", ), ], className="me-2", @@ -215,7 +216,7 @@ def directory_dropdown(repo_id): # add top level directory to the list of directories directories.insert(0, "Top Level Directory") - logging.warning(f"DIRECTORY DROPDOWN - FINISHED") + logging.warning(f"CNTRB DIRECTORY DROPDOWN - FINISHED") return directories, "Top Level Directory" @@ -224,18 +225,19 @@ def directory_dropdown(repo_id): @callback( Output(f"{PAGE}-{VIZ_ID}", "figure"), [ + Input("repo-choices", "data"), Input(f"repo-{PAGE}-{VIZ_ID}", "value"), Input(f"directory-{PAGE}-{VIZ_ID}", "value"), Input("bot-switch", "value"), ], background=True, ) -def cntrb_file_heatmap_graph(repo_id, directory, bot_switch): +def cntrb_file_heatmap_graph(searchbar_repos, repo_id, directory, bot_switch): start = time.perf_counter() logging.warning(f"{VIZ_ID}- START") # get dataframes of data from cache - df_file, df_actions, df_file_cntbs = multi_query_helper([repo_id]) + df_file, df_actions, df_file_cntbs = multi_query_helper(searchbar_repos, [repo_id]) # test if there is data if df_file.empty or df_actions.empty or df_file_cntbs.empty: @@ -255,7 +257,7 @@ def cntrb_file_heatmap_graph(repo_id, directory, bot_switch): return fig -def multi_query_helper(repos): +def multi_query_helper(searchbar_repos, repo): """ For cntrb_file_heatmap_graph- hack to put all of the cache-retrieval @@ -263,32 +265,32 @@ def multi_query_helper(repos): """ # wait for data to asynchronously download and become available. - while not_cached := cf.get_uncached(func_name=rfq.__name__, repolist=repos): + while not_cached := cf.get_uncached(func_name=rfq.__name__, repolist=repo): logging.warning(f"CONTRIBUTOR FILE HEATMAP - WAITING ON DATA TO BECOME AVAILABLE") time.sleep(0.5) # wait for data to asynchronously download and become available. - while not_cached := cf.get_uncached(func_name=cnq.__name__, repolist=repos): + while not_cached := cf.get_uncached(func_name=cnq.__name__, repolist=searchbar_repos): logging.warning(f"CONTRIBUTOR FILE HEATMAP - WAITING ON DATA TO BECOME AVAILABLE") time.sleep(0.5) # wait for data to asynchronously download and become available. - while not_cached := cf.get_uncached(func_name=cpfq.__name__, repolist=repos): + while not_cached := cf.get_uncached(func_name=cpfq.__name__, repolist=repo): logging.warning(f"CONTRIBUTOR FILE HEATMAP - WAITING ON DATA TO BECOME AVAILABLE") time.sleep(0.5) # GET ALL DATA FROM POSTGRES CACHE df_file = cf.retrieve_from_cache( tablename=rfq.__name__, - repolist=repos, + repolist=repo, ) df_actions = cf.retrieve_from_cache( tablename=cnq.__name__, - repolist=repos, + repolist=searchbar_repos, ) df_file_cntrbs = cf.retrieve_from_cache( tablename=cpfq.__name__, - repolist=repos, + repolist=repo, ) # necessary preprocessing steps that were lifted out of the querying step @@ -305,6 +307,64 @@ def process_data( directory, bot_switch, ): + """ + Processing steps + + 1 - Cleans up file data to only include current files and relate files in the repository to the contributors who have reviewed them in past PRs. + 2 - For a given level in the directory tree, aggregate the list of contributors for sub-directories and for individual files at the level. + 3 - For each contributor, identify their most recent contribution. + 4 - Transforms dataframe where columns are months with counts of "last seen" dates in that month and the rows are the file/subdirectory + """ + + df_file = df_file_clean(df_file, df_file_cntbs, bot_switch) + + df_dynamic_directory = cntrb_per_directory_value(directory, df_file) + + # work around for using functions, will clean later + if df_dynamic_directory.empty: + return df_dynamic_directory + + df_dynamic_directory = cntrb_to_last_activity(df_actions, df_dynamic_directory) + + final = file_cntrb_activity_by_month(df_dynamic_directory, df_actions) + + return final + + +def create_figure(df: pd.DataFrame): + fig = px.imshow( + df, + labels=dict(x="Time", y="Directory Entries", color="Contributors"), + color_continuous_scale=px.colors.sequential.deep, + ) + + fig["layout"]["yaxis"]["tickmode"] = "linear" + fig["layout"]["height"] = 700 + fig["layout"]["coloraxis_colorbar_x"] = -0.15 + fig["layout"]["yaxis"]["side"] = "right" + + return fig + + +def df_file_clean(df_file: pd.DataFrame, df_file_cntbs: pd.DataFrame, bot_switch): + """ + This function cleans the df_file data and combines it with the related cntrb_ids + + Args: + ----- + df_file : Pandas Dataframe + Dataframe with the output of the repo_files_query + + df_file_cntrbs : Pandas Dataframe + Dataframe with the output of the cntrb_per_file_query + + bot_switch : boolan + T/F for the status of the bot switch + + Returns: + -------- + df_file: df with file and cntrb_ids of contributors that reviewed a pr with that file in it + """ # strings to hold the values for each column (always the same for every row of this query) repo_name = df_file["repo_name"].iloc[0] repo_path = df_file["repo_path"].iloc[0] @@ -326,7 +386,7 @@ def process_data( df_file_cntbs.drop(["repo_id", "reviewer_ids"], axis=1, inplace=True) # Left join on df_files to only get the files that are currently in the repository - # and the contributors that have ever opened a pr that included edits on the file + # and the contributors that have ever reviewed a pr that included edits on the file df_file = pd.merge(df_file, df_file_cntbs, on="file_path", how="left") # replace nan with empty string to avoid errors in list comprehension df_file.cntrb_ids.fillna("", inplace=True) @@ -343,6 +403,26 @@ def process_data( axis=1, ) + return df_file + + +def cntrb_per_directory_value(directory, df_file): + """ + This function gets the files in the specified directory, groups together any files in + subdirectories, and creates a list of their contributors cntrb_ids + + Args: + ----- + directory : string + Output from the directory drop down + + df_file : Pandas Dataframe + Dataframe with file and related cntrb_id information + + Returns: + -------- + df_dynamic_directory: df with the file and subdirectories and their reviewers cntrb_ids + """ # determine directory level to use in later step level = directory.count("/") if directory == "Top Level Directory": @@ -377,6 +457,25 @@ def process_data( lambda row: set(row.cntrb_ids), axis=1, ) + return df_dynamic_directory + + +def cntrb_to_last_activity(df_actions: pd.DataFrame, df_dynamic_directory: pd.DataFrame): + """ + This function creates a df with the files and the the dates of the most recent activity for each cntrb_id. + + Args: + ----- + df_actions : Pandas Dataframe + Dataframe with contributor activity + + df_dynamic_directory : Pandas Dataframe + Dataframe with file and related cntrb_id information + + Returns: + -------- + df_dynamic_directory: df with the file and subdirectories and the dates of the most recent activity for the reviewers. + """ # date reformating df_actions["created_at"] = pd.to_datetime(df_actions["created_at"], utc=True) @@ -406,6 +505,26 @@ def process_data( # most recent activity - preprocessing step df_dynamic_directory = df_dynamic_directory.explode("dates") + return df_dynamic_directory + + +def file_cntrb_activity_by_month(df_dynamic_directory: pd.DataFrame, df_actions: pd.DataFrame): + """ + This function transforms the df_dynamic_directory to be counts of "last seen" contributors by month. + + Args: + ----- + df_actions : Pandas Dataframe + Dataframe with contributor activity + + df_dynamic_directory : Pandas Dataframe + Dataframe with file and related cntrb_id information + + Returns: + -------- + df_final: df with files and subdirectories as rows and the months as columns + """ + # get files that have no contributors and remove from set to prevent errors in grouper function no_contribs = df_dynamic_directory["directory_value"][df_dynamic_directory.dates.isnull()].tolist() @@ -415,8 +534,9 @@ def process_data( there will be a column for every month even if there is no "last contribution" date in it. This greatly improves the heatmap ploting""" - # dates based on action so it represents the length of the project - min_date = df_actions.created_at.min() + # dates based on action so it represents the length of the project, min based on PR + # open date to avoid committer inputted dates + min_date = df_actions[df_actions["Action"] == "PR Opened"].created_at.min() max_date = df_actions.created_at.max() dates = pd.date_range(start=min_date, end=max_date, freq="M", inclusive="both") df_fill = dates.to_frame(index=False, name="dates") @@ -436,18 +556,3 @@ def process_data( final.loc[files] = None return final - - -def create_figure(df: pd.DataFrame): - fig = px.imshow( - df, - labels=dict(x="Time", y="Directory Entries", color="Contributors"), - color_continuous_scale=px.colors.sequential.deep, - ) - - fig["layout"]["yaxis"]["tickmode"] = "linear" - fig["layout"]["height"] = 700 - fig["layout"]["coloraxis_colorbar_x"] = -0.15 - fig["layout"]["yaxis"]["side"] = "right" - - return fig diff --git a/8Knot/pages/codebase/visualizations/contribution_file_heatmap.py b/8Knot/pages/codebase/visualizations/contribution_file_heatmap.py index e7fccbe2..59a86caa 100644 --- a/8Knot/pages/codebase/visualizations/contribution_file_heatmap.py +++ b/8Knot/pages/codebase/visualizations/contribution_file_heatmap.py @@ -314,6 +314,65 @@ def process_data( directory, graph_view, ): + """ + Processing steps + + 1 - Cleans up file data to only include current files and relate files in the repository to the prs that impact them. + 2 - For a given level in the directory tree, aggregate the list of prs for sub-directories and for individual files at the level. + 3 - For each pr, identify their open and merged. + 4 - Transforms dataframe where columns are months with counts of pr open/merge dates in that month and the rows are the file/subdirectory + """ + + df_file = df_file_clean(df_file, df_file_pr) + + df_dynamic_directory = pr_per_directory_value(directory, df_file) + + # work around for using functions, will clean later + if df_dynamic_directory.empty: + return df_dynamic_directory + + df_dynamic_directory = pr_to_dates(df_pr, df_dynamic_directory, graph_view) + + final = file_pr_activity_by_month(df_dynamic_directory, df_pr, graph_view) + + return final + + +def create_figure(df: pd.DataFrame, graph_view): + legend_title = "PRs Opened" + if graph_view == "merged_at": + legend_title = "PRs Merged" + + fig = px.imshow( + df, + labels=dict(x="Time", y="Directory Entries", color=legend_title), + color_continuous_scale=px.colors.sequential.deep, + ) + + fig["layout"]["yaxis"]["tickmode"] = "linear" + fig["layout"]["height"] = 700 + fig["layout"]["coloraxis_colorbar_x"] = -0.15 + fig["layout"]["yaxis"]["side"] = "right" + + return fig + + +def df_file_clean(df_file: pd.DataFrame, df_file_pr: pd.DataFrame): + """ + This function cleans the df_file data and combines it with the related pull request ids + + Args: + ----- + df_file : Pandas Dataframe + Dataframe with the output of the repo_files_query + + df_file_prs : Pandas Dataframe + Dataframe with the output of the pr_file_query + + Returns: + -------- + df_file: df with file and pull_request_ids of prs with that file in it + """ # strings to hold the values for each column (always the same for every row of this query) repo_name = df_file["repo_name"].iloc[0] repo_path = df_file["repo_path"].iloc[0] @@ -323,9 +382,6 @@ def process_data( path_slice = repo_id + "-" + repo_path + "/" + repo_name + "/" df_file["file_path"] = df_file["file_path"].str.rsplit(path_slice, n=1).str[1] - # drop columns not in the most recent collection - df_file = df_file[df_file["rl_analysis_date"] == df_file["rl_analysis_date"].max()] - # drop unneccessary columns not needed after preprocessing steps df_file = df_file.reset_index() df_file.drop(["index", "repo_name", "repo_path", "rl_analysis_date"], axis=1, inplace=True) @@ -341,9 +397,29 @@ def process_data( df_file_pr = df_file_pr.groupby("file_path")["pull_request_id"].apply(list) # Left join on df_files to only get the files that are currently in the repository - # and the contributors that have ever opened a pr that included edits on the file + # and the prs that included edits on the file df_file = pd.merge(df_file, df_file_pr, on="file_path", how="left") + return df_file + + +def pr_per_directory_value(directory, df_file): + """ + This function gets the files in the specified directory, groups together any files in + subdirectories, and creates a list of pull_request_ids that touched those files + + Args: + ----- + directory : string + Output from the directory drop down + + df_file : Pandas Dataframe + Dataframe with file and related pull_request_id information + + Returns: + -------- + df_dynamic_directory: df with the file and subdirectories and their prs pull_request_ids + """ # determine directory level to use in later step level = directory.count("/") if directory == "Top Level Directory": @@ -361,7 +437,7 @@ def process_data( group_column = level + 1 # Groupby the level above the selected directory for all files nested in folders are together. - # For each, create a list of all of pull request that include that file + # For each, create a list of all of the contributors who have contributed df_dynamic_directory = ( df_dynamic_directory.groupby(group_column)["pull_request_id"] .sum() @@ -377,6 +453,27 @@ def process_data( lambda row: set(row.pull_request_id), axis=1, ) + return df_dynamic_directory + + +def pr_to_dates(df_pr: pd.DataFrame, df_dynamic_directory: pd.DataFrame, graph_view): + """ + This function creates a df with the files and the the open and merge dates of the prs that + touch each file or subdirectory. + + Args: + ----- + df_pr : Pandas Dataframe + Dataframe with pull request data + + df_dynamic_directory : Pandas Dataframe + Dataframe with file and related pull_request_id information + + Returns: + -------- + df_dynamic_directory: df with the file and subdirectories and the dates of open and merge dates + of the prs that touch each file or subdirectory. + """ # date reformating df_pr["created_at"] = pd.to_datetime(df_pr["created_at"], utc=True) @@ -407,6 +504,26 @@ def process_data( # reformat into each row being a directory value and a date of one of the pull request dates df_dynamic_directory = df_dynamic_directory.explode(graph_view) + return df_dynamic_directory + + +def file_pr_activity_by_month(df_dynamic_directory: pd.DataFrame, df_pr: pd.DataFrame, graph_view): + """ + This function transforms the df_dynamic_directory to be counts of open or merged prs by month. + + Args: + ----- + df_dynamic_directory : Pandas Dataframe + Dataframe with file and related reviewer_id information + + df_pr : Pandas Dataframe + Dataframe with pull request data + + Returns: + -------- + df_final: df with files and subdirectories as rows and the months as columns + """ + # get files that have no pull requests and remove from set to prevent errors in grouper function no_contribs = df_dynamic_directory["directory_value"][df_dynamic_directory[graph_view].isnull()].tolist() @@ -438,22 +555,3 @@ def process_data( final.loc[files] = None return final - - -def create_figure(df: pd.DataFrame, graph_view): - legend_title = "PRs Opened" - if graph_view == "merged_at": - legend_title = "PRs Merged" - - fig = px.imshow( - df, - labels=dict(x="Time", y="Directory Entries", color=legend_title), - color_continuous_scale=px.colors.sequential.deep, - ) - - fig["layout"]["yaxis"]["tickmode"] = "linear" - fig["layout"]["height"] = 700 - fig["layout"]["coloraxis_colorbar_x"] = -0.15 - fig["layout"]["yaxis"]["side"] = "right" - - return fig diff --git a/8Knot/pages/codebase/visualizations/reviewer_file_heatmap.py b/8Knot/pages/codebase/visualizations/reviewer_file_heatmap.py index 2a799b6d..9020eba3 100644 --- a/8Knot/pages/codebase/visualizations/reviewer_file_heatmap.py +++ b/8Knot/pages/codebase/visualizations/reviewer_file_heatmap.py @@ -80,6 +80,7 @@ classNames={"values": "dmc-multiselect-custom"}, searchable=True, clearable=False, + value="Top Level Directory", ), ], className="me-2", @@ -215,7 +216,7 @@ def directory_dropdown(repo_id): # add top level directory to the list of directories directories.insert(0, "Top Level Directory") - logging.warning(f"DIRECTORY DROPDOWN - FINISHED") + logging.warning(f"REVIEWER DIRECTORY DROPDOWN - FINISHED") return directories, "Top Level Directory" @@ -224,18 +225,19 @@ def directory_dropdown(repo_id): @callback( Output(f"{PAGE}-{VIZ_ID}", "figure"), [ + Input("repo-choices", "data"), Input(f"repo-{PAGE}-{VIZ_ID}", "value"), Input(f"directory-{PAGE}-{VIZ_ID}", "value"), Input("bot-switch", "value"), ], background=True, ) -def reviewer_file_heatmap_graph(repo_id, directory, bot_switch): +def reviewer_file_heatmap_graph(searchbar_repos, repo_id, directory, bot_switch): start = time.perf_counter() logging.warning(f"{VIZ_ID}- START") # get dataframes of data from cache - df_file, df_actions, df_file_cntbs = multi_query_helper([repo_id]) + df_file, df_actions, df_file_cntbs = multi_query_helper(searchbar_repos, [repo_id]) # test if there is data if df_file.empty or df_actions.empty or df_file_cntbs.empty: @@ -255,7 +257,7 @@ def reviewer_file_heatmap_graph(repo_id, directory, bot_switch): return fig -def multi_query_helper(repos): +def multi_query_helper(searchbar_repos, repo): """ For reviewer_file_heatmap_graph- hack to put all of the cache-retrieval @@ -263,32 +265,32 @@ def multi_query_helper(repos): """ # wait for data to asynchronously download and become available. - while not_cached := cf.get_uncached(func_name=rfq.__name__, repolist=repos): + while not_cached := cf.get_uncached(func_name=rfq.__name__, repolist=repo): logging.warning(f"CONTRIBUTOR FILE HEATMAP - WAITING ON DATA TO BECOME AVAILABLE") time.sleep(0.5) # wait for data to asynchronously download and become available. - while not_cached := cf.get_uncached(func_name=cnq.__name__, repolist=repos): + while not_cached := cf.get_uncached(func_name=cnq.__name__, repolist=searchbar_repos): logging.warning(f"CONTRIBUTOR FILE HEATMAP - WAITING ON DATA TO BECOME AVAILABLE") time.sleep(0.5) # wait for data to asynchronously download and become available. - while not_cached := cf.get_uncached(func_name=cpfq.__name__, repolist=repos): + while not_cached := cf.get_uncached(func_name=cpfq.__name__, repolist=repo): logging.warning(f"CONTRIBUTOR FILE HEATMAP - WAITING ON DATA TO BECOME AVAILABLE") time.sleep(0.5) # GET ALL DATA FROM POSTGRES CACHE df_file = cf.retrieve_from_cache( tablename=rfq.__name__, - repolist=repos, + repolist=repo, ) df_actions = cf.retrieve_from_cache( tablename=cnq.__name__, - repolist=repos, + repolist=searchbar_repos, ) df_file_cntrbs = cf.retrieve_from_cache( tablename=cpfq.__name__, - repolist=repos, + repolist=repo, ) # necessary preprocessing steps that were lifted out of the querying step @@ -400,13 +402,13 @@ def df_file_clean(df_file: pd.DataFrame, df_file_cntbs: pd.DataFrame, bot_switch lambda row: [x for x in row.reviewer_ids], axis=1, ) - return df_file def cntrb_per_directory_value(directory, df_file): """ - This function cleans the df_file data and combines it with the related reviewer cntrb_ids + This function gets the files in the specified directory, groups together any files in + subdirectories, and creates a list of their reviewers cntrb_ids. Args: ----- @@ -459,12 +461,12 @@ def cntrb_per_directory_value(directory, df_file): def cntrb_to_last_activity(df_actions: pd.DataFrame, df_dynamic_directory: pd.DataFrame): """ - This function created a df with the files and the the dates of the most recent activity for each cntrb_id. + This function creates a df with the files and the the dates of the most recent activity for each cntrb_id. Args: ----- - df_actions : string - Output from the directory drop down + df_actions : Pandas Dataframe + Dataframe with contributor activity df_dynamic_directory : Pandas Dataframe Dataframe with file and related reviewer_id information @@ -507,16 +509,16 @@ def cntrb_to_last_activity(df_actions: pd.DataFrame, df_dynamic_directory: pd.Da def file_cntrb_activity_by_month(df_dynamic_directory: pd.DataFrame, df_actions: pd.DataFrame): """ - This function created a df with the files and the the dates of the most recent activity for each cntrb_id. + This function transforms the df_dynamic_directory to be counts of "last seen" reviewers by month. Args: ----- - df_actions : string - Output from the directory drop down - df_dynamic_directory : Pandas Dataframe Dataframe with file and related reviewer_id information + df_actions : Pandas Dataframe + Dataframe with contributor activity + Returns: -------- df_final: df with files and subdirectories as rows and the months as columns @@ -531,8 +533,9 @@ def file_cntrb_activity_by_month(df_dynamic_directory: pd.DataFrame, df_actions: there will be a column for every month even if there is no "last contribution" date in it. This greatly improves the heatmap ploting""" - # dates based on action so it represents the length of the project - min_date = df_actions.created_at.min() + # dates based on action so it represents the length of the project, min based on PR + # open date to avoid committer inputted dates + min_date = df_actions[df_actions["Action"] == "PR Opened"].created_at.min() max_date = df_actions.created_at.max() dates = pd.date_range(start=min_date, end=max_date, freq="M", inclusive="both") df_fill = dates.to_frame(index=False, name="dates") diff --git a/8Knot/pages/contributions/visualizations/cntrb_pr_assignment.py b/8Knot/pages/contributions/visualizations/cntrb_pr_assignment.py index b3907a07..0376975d 100644 --- a/8Knot/pages/contributions/visualizations/cntrb_pr_assignment.py +++ b/8Knot/pages/contributions/visualizations/cntrb_pr_assignment.py @@ -90,7 +90,7 @@ width=2, ), dbc.Alert( - children="No contributors meet assignment requirement", + children="No contributors in date range meet assignment requirement", id=f"check-alert-{PAGE}-{VIZ_ID}", dismissable=True, fade=False, @@ -195,6 +195,11 @@ def cntrib_pr_assignment_graph(repolist, interval, assign_req, start_date, end_d df = process_data(df, interval, assign_req, start_date, end_date) + # test if there is data in criteria + if df.empty: + logging.warning(f"{VIZ_ID} - NO DATA IN CRITERIA AVAILABLE") + return nodata_graph, True + fig = create_figure(df, interval) logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") @@ -222,10 +227,6 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date): # create list of all contributors that meet the assignment requirement contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list() - # no update if there are not any contributors that meet the criteria - if len(contributors) == 0: - return dash.no_update, True - # filter values based on date picker if start_date is not None: df = df[df.created_at >= start_date] @@ -235,6 +236,10 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date): # only include contributors that meet the criteria df = df.loc[df["assignee"].isin(contributors)] + # check if there is data that meet contributor and date range criteria + if df.empty: + return pd.DataFrame() + # first and last elements of the dataframe are the # earliest and latest events respectively earliest = df["created_at"].min() diff --git a/8Knot/pages/contributions/visualizations/cntrib_issue_assignment.py b/8Knot/pages/contributions/visualizations/cntrib_issue_assignment.py index 0d5ff9e3..eb7b33b6 100644 --- a/8Knot/pages/contributions/visualizations/cntrib_issue_assignment.py +++ b/8Knot/pages/contributions/visualizations/cntrib_issue_assignment.py @@ -90,7 +90,7 @@ width=2, ), dbc.Alert( - children="No contributors meet assignment requirement", + children="No contributors in date range meet assignment requirement", id=f"check-alert-{PAGE}-{VIZ_ID}", dismissable=True, fade=False, @@ -192,6 +192,11 @@ def cntrib_issue_assignment_graph(repolist, interval, assign_req, start_date, en df = process_data(df, interval, assign_req, start_date, end_date) + # test if there is data in criteria + if df.empty: + logging.warning(f"{VIZ_ID} - NO DATA IN CRITERIA AVAILABLE") + return nodata_graph, True + fig = create_figure(df, interval) logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") @@ -219,10 +224,6 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date): # create list of all contributors that meet the assignment requirement contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list() - # no update if there are not any contributors that meet the criteria - if len(contributors) == 0: - return dash.no_update, True - # filter values based on date picker if start_date is not None: df = df[df.created_at >= start_date] @@ -232,6 +233,10 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date): # only include contributors that meet the criteria df = df.loc[df["assignee"].isin(contributors)] + # check if there is data that meet contributor and date range criteria + if df.empty: + return pd.DataFrame() + # first and last elements of the dataframe are the # earliest and latest events respectively earliest = df["created_at"].min() diff --git a/8Knot/pages/contributions/visualizations/pr_over_time.py b/8Knot/pages/contributions/visualizations/pr_over_time.py index 255b896a..3dbcb813 100644 --- a/8Knot/pages/contributions/visualizations/pr_over_time.py +++ b/8Knot/pages/contributions/visualizations/pr_over_time.py @@ -233,7 +233,7 @@ def create_figure( hovertemplate=hover + "
Created: %{y}
" + "", offsetgroup=0, marker=dict(color=color_seq[2]), - name="created_at", + name="Opened", ) fig.add_bar( x=df_closed_merged["Date"], diff --git a/8Knot/pages/contributors/visualizations/contrib_importance_over_time.py b/8Knot/pages/contributors/visualizations/contrib_importance_over_time.py index 0d775d4c..63ce11cf 100644 --- a/8Knot/pages/contributors/visualizations/contrib_importance_over_time.py +++ b/8Knot/pages/contributors/visualizations/contrib_importance_over_time.py @@ -65,30 +65,7 @@ dbc.Row( [ dbc.Label( - "Threshold:", - html_for=f"threshold-{PAGE}-{VIZ_ID}", - width="auto", - ), - dbc.Col( - [ - dcc.Slider( - id=f"threshold-{PAGE}-{VIZ_ID}", - min=10, - max=95, - value=50, - marks={i: f"{i}%" for i in range(10, 100, 5)}, - ), - ], - className="me-2", - width=10, - ), - ], - align="center", - ), - dbc.Row( - [ - dbc.Label( - "Window Width:", + "Window Width (Months):", html_for=f"window-width-{PAGE}-{VIZ_ID}", width="auto", ), @@ -106,7 +83,7 @@ width=2, ), dbc.Label( - "Step Size:", + "Step Size (Months):", html_for=f"step-size-{PAGE}-{VIZ_ID}", width="auto", ), @@ -137,55 +114,35 @@ dbc.Row( [ dbc.Label( - "Filter Out Contributors with Keyword(s) in Login:", - html_for=f"patterns-{PAGE}-{VIZ_ID}", + "Threshold:", + html_for=f"threshold-{PAGE}-{VIZ_ID}", width="auto", ), dbc.Col( [ - dmc.MultiSelect( - id=f"patterns-{PAGE}-{VIZ_ID}", - placeholder="Bot filter values", - data=[ - {"value": "bot", "label": "bot"}, - ], - classNames={"values": "dmc-multiselect-custom"}, - creatable=True, - searchable=True, + dcc.Slider( + id=f"threshold-{PAGE}-{VIZ_ID}", + min=10, + max=95, + value=50, + marks={i: f"{i}%" for i in range(10, 100, 5)}, ), ], className="me-2", + width=9, ), - ], - align="center", - ), - dbc.Row( - [ dbc.Col( - dcc.DatePickerRange( - id=f"date-picker-range-{PAGE}-{VIZ_ID}", - min_date_allowed=dt.date(2005, 1, 1), - max_date_allowed=dt.date.today(), - initial_visible_month=dt.date(dt.date.today().year, 1, 1), - clearable=True, + dbc.Button( + "About Graph", + id=f"popover-target-{PAGE}-{VIZ_ID}", + color="secondary", + size="sm", ), width="auto", - ), - dbc.Col( - [ - dbc.Button( - "About Graph", - id=f"popover-target-{PAGE}-{VIZ_ID}", - color="secondary", - size="sm", - ), - ], - width="auto", style={"paddingTop": ".5em"}, ), ], align="center", - justify="between", ), ] ), @@ -223,33 +180,21 @@ def graph_title(window_width): Output(f"check-alert-{PAGE}-{VIZ_ID}", "is_open"), [ Input("repo-choices", "data"), - Input(f"patterns-{PAGE}-{VIZ_ID}", "value"), Input(f"threshold-{PAGE}-{VIZ_ID}", "value"), Input(f"window-width-{PAGE}-{VIZ_ID}", "value"), Input(f"step-size-{PAGE}-{VIZ_ID}", "value"), - Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "start_date"), - Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "end_date"), Input("bot-switch", "value"), ], background=True, ) -def create_contrib_prolificacy_over_time_graph( - repolist, - patterns, - threshold, - window_width, - step_size, - start_date, - end_date, - bot_switch, -): +def create_contrib_prolificacy_over_time_graph(repolist, threshold, window_width, step_size, bot_switch): # wait for data to asynchronously download and become available. while not_cached := cf.get_uncached(func_name=ctq.__name__, repolist=repolist): logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE") time.sleep(0.5) - logging.warning(f"{VIZ_ID} - START") start = time.perf_counter() + logging.warning(f"{VIZ_ID} - START") # GET ALL DATA FROM POSTGRES CACHE df = cf.retrieve_from_cache( @@ -263,10 +208,6 @@ def create_contrib_prolificacy_over_time_graph( if bot_switch: df = df[~df["cntrb_id"].isin(app.bots_list)] - # data ready. - start = time.perf_counter() - logging.warning(f"{VIZ_ID}- START") - # test if there is data if df.empty: logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE") @@ -276,34 +217,26 @@ def create_contrib_prolificacy_over_time_graph( if step_size > window_width: return dash.no_update, True - df_final = process_data(df, patterns, threshold, window_width, step_size, start_date, end_date) + df = process_data(df, threshold, window_width, step_size) - fig = create_figure(df_final, threshold, step_size) + fig = create_figure(df, threshold, step_size) logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}") return fig, False -def process_data(df, patterns, threshold, window_width, step_size, start_date, end_date): +def process_data(df, threshold, window_width, step_size): # convert to datetime objects rather than strings df["created_at"] = pd.to_datetime(df["created_at"], utc=True) # order values chronologically by created_at date df = df.sort_values(by="created_at", ascending=True) - # if the start_date and/or the end date is not specified set them to the beginning and most recent created_at date - if start_date is None: - start_date = df["created_at"].min() - if end_date is None: - end_date = df["created_at"].max() - - if patterns: - # remove rows where Login column value contains the substring 'bot' - patterns_mask = df["login"].str.contains("|".join(patterns), na=False) - df = df[~patterns_mask] + # get start and end date from created column + start_date = df["created_at"].min() + end_date = df["created_at"].max() - # threshold is an integer value eg. 10, 20,..., 90 since dcc.Slider only accepts integers as values - # divide by 100 to convert it to a decimal representation of a percentage eg. 0.10, 0.20,..., 0.90 + # convert percent to its decimal representation threshold = threshold / 100 # create bins with a size equivalent to the the step size starting from the start date up to the end date diff --git a/README.md b/README.md index cb14d3c1..0102af7d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# 8Knot (Explorer) +# 8Knot ![Pre-Commit](https://github.com/JamesKunstle/explorer/actions/workflows/pre-commit.yml/badge.svg) ![Build-Push](https://github.com/JamesKunstle/explorer/actions/workflows/build-push-quay.yml/badge.svg) @@ -219,9 +219,18 @@ docker && docker compose || docker-compose (above just runs docker and docker-compose and checks if both work) -NOTE: `podman-compose` has been generally verified to work as well, but our preference is `docker compose` -`podman-compose` doesn't support the `--scale` flag as we would expect so we don't use it for our own -development applications, but the application is built to work with the minimum number of containers. "your mileage my vary". +NOTE: As of 3/29/24 we recommend using `Podman` and `Podman Desktop` instead of `Docker` and `Docker Desktop`. It will be our default development environment going forward. +There are many guides to transitioning from `Docker` (Desktop) to `Podman` (Desktop), but here's a rough outline of our "golden path." + +1. Uninstall `Docker Desktop`. This will require a GUI uninstall and looking through your apps + filesystem for remnants. +2. Install `Podman` and `Podman Desktop`. You'll also explicitly have to provision a `Podman Machine` if you're on a Mac. +3. Enable the "Docker compatibility add-on" available in `Podman Desktop`. This will route traffic headed for a Docker machine to the running Podman machine. (Under the hood, this points Podman at the Docker socket." +4. Install `docker-compose`. This is a standalone, open source tool that `Podman Compose` delegates compose responsibilities to. + +At this point, the `Podman` docs claim that one should have moved over to `Podman` as a drop-in replacement for `Docker`. However, here are two steps that we noticed were necessary in some cases. + +1. In `$HOME/.docker/config.json` replace "credsStore" with "credStore" (minus an 's') to solve registry credentialing problems. +2. Set `export DOCKER_HOST=` to the `Podman machine`'s socket on your system, which you can find in the `Resources` tab of `Podman Desktop`. The path starts with `unix://`. ### Build and Run