diff --git a/.wordlist-md b/.wordlist-md
index 86bf4a46..2c407053 100644
--- a/.wordlist-md
+++ b/.wordlist-md
@@ -72,3 +72,11 @@ Docker's
data-center
OAuth
postgres
+Podman
+filesystem
+credsStore
+credStore
+filesystem
+Podman
+credsStore
+credStore
diff --git a/8Knot/pages/chaoss/visualizations/project_velocity.py b/8Knot/pages/chaoss/visualizations/project_velocity.py
index 3740c1ab..5b21efa8 100644
--- a/8Knot/pages/chaoss/visualizations/project_velocity.py
+++ b/8Knot/pages/chaoss/visualizations/project_velocity.py
@@ -331,9 +331,11 @@ def process_data(
# replace all nan to 0
df_consolidated.fillna(value=0, inplace=True)
- # log of commits and contribs
- df_consolidated["log_num_commits"] = df_consolidated["Commit"].apply(math.log)
- df_consolidated["log_num_contrib"] = df_consolidated["num_unique_contributors"].apply(math.log)
+ # log of commits and contribs if values are not 0
+ df_consolidated["log_num_commits"] = df_consolidated["Commit"].apply(lambda x: math.log(x) if x != 0 else 0)
+ df_consolidated["log_num_contrib"] = df_consolidated["num_unique_contributors"].apply(
+ lambda x: math.log(x) if x != 0 else 0
+ )
# column to hold the weighted values of pr and issues actions summed together
df_consolidated["prs_issues_actions_weighted"] = (
diff --git a/8Knot/pages/codebase/codebase.py b/8Knot/pages/codebase/codebase.py
index 4979461a..43a36a2e 100644
--- a/8Knot/pages/codebase/codebase.py
+++ b/8Knot/pages/codebase/codebase.py
@@ -16,14 +16,14 @@
[
dbc.Row(
[
- dbc.Col(gc_cntrb_file_heatmap, width=12),
+ dbc.Col(gc_contribution_file_heatmap, width=12),
],
align="center",
style={"marginBottom": ".5%"},
),
dbc.Row(
[
- dbc.Col(gc_contribution_file_heatmap, width=12),
+ dbc.Col(gc_cntrb_file_heatmap, width=12),
],
align="center",
style={"marginBottom": ".5%"},
diff --git a/8Knot/pages/codebase/visualizations/cntrb_file_heatmap.py b/8Knot/pages/codebase/visualizations/cntrb_file_heatmap.py
index 431b199b..081f335e 100644
--- a/8Knot/pages/codebase/visualizations/cntrb_file_heatmap.py
+++ b/8Knot/pages/codebase/visualizations/cntrb_file_heatmap.py
@@ -80,6 +80,7 @@
classNames={"values": "dmc-multiselect-custom"},
searchable=True,
clearable=False,
+ value="Top Level Directory",
),
],
className="me-2",
@@ -215,7 +216,7 @@ def directory_dropdown(repo_id):
# add top level directory to the list of directories
directories.insert(0, "Top Level Directory")
- logging.warning(f"DIRECTORY DROPDOWN - FINISHED")
+ logging.warning(f"CNTRB DIRECTORY DROPDOWN - FINISHED")
return directories, "Top Level Directory"
@@ -224,18 +225,19 @@ def directory_dropdown(repo_id):
@callback(
Output(f"{PAGE}-{VIZ_ID}", "figure"),
[
+ Input("repo-choices", "data"),
Input(f"repo-{PAGE}-{VIZ_ID}", "value"),
Input(f"directory-{PAGE}-{VIZ_ID}", "value"),
Input("bot-switch", "value"),
],
background=True,
)
-def cntrb_file_heatmap_graph(repo_id, directory, bot_switch):
+def cntrb_file_heatmap_graph(searchbar_repos, repo_id, directory, bot_switch):
start = time.perf_counter()
logging.warning(f"{VIZ_ID}- START")
# get dataframes of data from cache
- df_file, df_actions, df_file_cntbs = multi_query_helper([repo_id])
+ df_file, df_actions, df_file_cntbs = multi_query_helper(searchbar_repos, [repo_id])
# test if there is data
if df_file.empty or df_actions.empty or df_file_cntbs.empty:
@@ -255,7 +257,7 @@ def cntrb_file_heatmap_graph(repo_id, directory, bot_switch):
return fig
-def multi_query_helper(repos):
+def multi_query_helper(searchbar_repos, repo):
"""
For cntrb_file_heatmap_graph-
hack to put all of the cache-retrieval
@@ -263,32 +265,32 @@ def multi_query_helper(repos):
"""
# wait for data to asynchronously download and become available.
- while not_cached := cf.get_uncached(func_name=rfq.__name__, repolist=repos):
+ while not_cached := cf.get_uncached(func_name=rfq.__name__, repolist=repo):
logging.warning(f"CONTRIBUTOR FILE HEATMAP - WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)
# wait for data to asynchronously download and become available.
- while not_cached := cf.get_uncached(func_name=cnq.__name__, repolist=repos):
+ while not_cached := cf.get_uncached(func_name=cnq.__name__, repolist=searchbar_repos):
logging.warning(f"CONTRIBUTOR FILE HEATMAP - WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)
# wait for data to asynchronously download and become available.
- while not_cached := cf.get_uncached(func_name=cpfq.__name__, repolist=repos):
+ while not_cached := cf.get_uncached(func_name=cpfq.__name__, repolist=repo):
logging.warning(f"CONTRIBUTOR FILE HEATMAP - WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)
# GET ALL DATA FROM POSTGRES CACHE
df_file = cf.retrieve_from_cache(
tablename=rfq.__name__,
- repolist=repos,
+ repolist=repo,
)
df_actions = cf.retrieve_from_cache(
tablename=cnq.__name__,
- repolist=repos,
+ repolist=searchbar_repos,
)
df_file_cntrbs = cf.retrieve_from_cache(
tablename=cpfq.__name__,
- repolist=repos,
+ repolist=repo,
)
# necessary preprocessing steps that were lifted out of the querying step
@@ -305,6 +307,64 @@ def process_data(
directory,
bot_switch,
):
+ """
+ Processing steps
+
+ 1 - Cleans up file data to only include current files and relate files in the repository to the contributors who have reviewed them in past PRs.
+ 2 - For a given level in the directory tree, aggregate the list of contributors for sub-directories and for individual files at the level.
+ 3 - For each contributor, identify their most recent contribution.
+ 4 - Transforms dataframe where columns are months with counts of "last seen" dates in that month and the rows are the file/subdirectory
+ """
+
+ df_file = df_file_clean(df_file, df_file_cntbs, bot_switch)
+
+ df_dynamic_directory = cntrb_per_directory_value(directory, df_file)
+
+ # work around for using functions, will clean later
+ if df_dynamic_directory.empty:
+ return df_dynamic_directory
+
+ df_dynamic_directory = cntrb_to_last_activity(df_actions, df_dynamic_directory)
+
+ final = file_cntrb_activity_by_month(df_dynamic_directory, df_actions)
+
+ return final
+
+
+def create_figure(df: pd.DataFrame):
+ fig = px.imshow(
+ df,
+ labels=dict(x="Time", y="Directory Entries", color="Contributors"),
+ color_continuous_scale=px.colors.sequential.deep,
+ )
+
+ fig["layout"]["yaxis"]["tickmode"] = "linear"
+ fig["layout"]["height"] = 700
+ fig["layout"]["coloraxis_colorbar_x"] = -0.15
+ fig["layout"]["yaxis"]["side"] = "right"
+
+ return fig
+
+
+def df_file_clean(df_file: pd.DataFrame, df_file_cntbs: pd.DataFrame, bot_switch):
+ """
+ This function cleans the df_file data and combines it with the related cntrb_ids
+
+ Args:
+ -----
+ df_file : Pandas Dataframe
+ Dataframe with the output of the repo_files_query
+
+ df_file_cntrbs : Pandas Dataframe
+ Dataframe with the output of the cntrb_per_file_query
+
+ bot_switch : boolan
+ T/F for the status of the bot switch
+
+ Returns:
+ --------
+ df_file: df with file and cntrb_ids of contributors that reviewed a pr with that file in it
+ """
# strings to hold the values for each column (always the same for every row of this query)
repo_name = df_file["repo_name"].iloc[0]
repo_path = df_file["repo_path"].iloc[0]
@@ -326,7 +386,7 @@ def process_data(
df_file_cntbs.drop(["repo_id", "reviewer_ids"], axis=1, inplace=True)
# Left join on df_files to only get the files that are currently in the repository
- # and the contributors that have ever opened a pr that included edits on the file
+ # and the contributors that have ever reviewed a pr that included edits on the file
df_file = pd.merge(df_file, df_file_cntbs, on="file_path", how="left")
# replace nan with empty string to avoid errors in list comprehension
df_file.cntrb_ids.fillna("", inplace=True)
@@ -343,6 +403,26 @@ def process_data(
axis=1,
)
+ return df_file
+
+
+def cntrb_per_directory_value(directory, df_file):
+ """
+ This function gets the files in the specified directory, groups together any files in
+ subdirectories, and creates a list of their contributors cntrb_ids
+
+ Args:
+ -----
+ directory : string
+ Output from the directory drop down
+
+ df_file : Pandas Dataframe
+ Dataframe with file and related cntrb_id information
+
+ Returns:
+ --------
+ df_dynamic_directory: df with the file and subdirectories and their reviewers cntrb_ids
+ """
# determine directory level to use in later step
level = directory.count("/")
if directory == "Top Level Directory":
@@ -377,6 +457,25 @@ def process_data(
lambda row: set(row.cntrb_ids),
axis=1,
)
+ return df_dynamic_directory
+
+
+def cntrb_to_last_activity(df_actions: pd.DataFrame, df_dynamic_directory: pd.DataFrame):
+ """
+ This function creates a df with the files and the the dates of the most recent activity for each cntrb_id.
+
+ Args:
+ -----
+ df_actions : Pandas Dataframe
+ Dataframe with contributor activity
+
+ df_dynamic_directory : Pandas Dataframe
+ Dataframe with file and related cntrb_id information
+
+ Returns:
+ --------
+ df_dynamic_directory: df with the file and subdirectories and the dates of the most recent activity for the reviewers.
+ """
# date reformating
df_actions["created_at"] = pd.to_datetime(df_actions["created_at"], utc=True)
@@ -406,6 +505,26 @@ def process_data(
# most recent activity - preprocessing step
df_dynamic_directory = df_dynamic_directory.explode("dates")
+ return df_dynamic_directory
+
+
+def file_cntrb_activity_by_month(df_dynamic_directory: pd.DataFrame, df_actions: pd.DataFrame):
+ """
+ This function transforms the df_dynamic_directory to be counts of "last seen" contributors by month.
+
+ Args:
+ -----
+ df_actions : Pandas Dataframe
+ Dataframe with contributor activity
+
+ df_dynamic_directory : Pandas Dataframe
+ Dataframe with file and related cntrb_id information
+
+ Returns:
+ --------
+ df_final: df with files and subdirectories as rows and the months as columns
+ """
+
# get files that have no contributors and remove from set to prevent errors in grouper function
no_contribs = df_dynamic_directory["directory_value"][df_dynamic_directory.dates.isnull()].tolist()
@@ -415,8 +534,9 @@ def process_data(
there will be a column for every month even if there is no "last contribution" date in it. This greatly
improves the heatmap ploting"""
- # dates based on action so it represents the length of the project
- min_date = df_actions.created_at.min()
+ # dates based on action so it represents the length of the project, min based on PR
+ # open date to avoid committer inputted dates
+ min_date = df_actions[df_actions["Action"] == "PR Opened"].created_at.min()
max_date = df_actions.created_at.max()
dates = pd.date_range(start=min_date, end=max_date, freq="M", inclusive="both")
df_fill = dates.to_frame(index=False, name="dates")
@@ -436,18 +556,3 @@ def process_data(
final.loc[files] = None
return final
-
-
-def create_figure(df: pd.DataFrame):
- fig = px.imshow(
- df,
- labels=dict(x="Time", y="Directory Entries", color="Contributors"),
- color_continuous_scale=px.colors.sequential.deep,
- )
-
- fig["layout"]["yaxis"]["tickmode"] = "linear"
- fig["layout"]["height"] = 700
- fig["layout"]["coloraxis_colorbar_x"] = -0.15
- fig["layout"]["yaxis"]["side"] = "right"
-
- return fig
diff --git a/8Knot/pages/codebase/visualizations/contribution_file_heatmap.py b/8Knot/pages/codebase/visualizations/contribution_file_heatmap.py
index e7fccbe2..59a86caa 100644
--- a/8Knot/pages/codebase/visualizations/contribution_file_heatmap.py
+++ b/8Knot/pages/codebase/visualizations/contribution_file_heatmap.py
@@ -314,6 +314,65 @@ def process_data(
directory,
graph_view,
):
+ """
+ Processing steps
+
+ 1 - Cleans up file data to only include current files and relate files in the repository to the prs that impact them.
+ 2 - For a given level in the directory tree, aggregate the list of prs for sub-directories and for individual files at the level.
+ 3 - For each pr, identify their open and merged.
+ 4 - Transforms dataframe where columns are months with counts of pr open/merge dates in that month and the rows are the file/subdirectory
+ """
+
+ df_file = df_file_clean(df_file, df_file_pr)
+
+ df_dynamic_directory = pr_per_directory_value(directory, df_file)
+
+ # work around for using functions, will clean later
+ if df_dynamic_directory.empty:
+ return df_dynamic_directory
+
+ df_dynamic_directory = pr_to_dates(df_pr, df_dynamic_directory, graph_view)
+
+ final = file_pr_activity_by_month(df_dynamic_directory, df_pr, graph_view)
+
+ return final
+
+
+def create_figure(df: pd.DataFrame, graph_view):
+ legend_title = "PRs Opened"
+ if graph_view == "merged_at":
+ legend_title = "PRs Merged"
+
+ fig = px.imshow(
+ df,
+ labels=dict(x="Time", y="Directory Entries", color=legend_title),
+ color_continuous_scale=px.colors.sequential.deep,
+ )
+
+ fig["layout"]["yaxis"]["tickmode"] = "linear"
+ fig["layout"]["height"] = 700
+ fig["layout"]["coloraxis_colorbar_x"] = -0.15
+ fig["layout"]["yaxis"]["side"] = "right"
+
+ return fig
+
+
+def df_file_clean(df_file: pd.DataFrame, df_file_pr: pd.DataFrame):
+ """
+ This function cleans the df_file data and combines it with the related pull request ids
+
+ Args:
+ -----
+ df_file : Pandas Dataframe
+ Dataframe with the output of the repo_files_query
+
+ df_file_prs : Pandas Dataframe
+ Dataframe with the output of the pr_file_query
+
+ Returns:
+ --------
+ df_file: df with file and pull_request_ids of prs with that file in it
+ """
# strings to hold the values for each column (always the same for every row of this query)
repo_name = df_file["repo_name"].iloc[0]
repo_path = df_file["repo_path"].iloc[0]
@@ -323,9 +382,6 @@ def process_data(
path_slice = repo_id + "-" + repo_path + "/" + repo_name + "/"
df_file["file_path"] = df_file["file_path"].str.rsplit(path_slice, n=1).str[1]
- # drop columns not in the most recent collection
- df_file = df_file[df_file["rl_analysis_date"] == df_file["rl_analysis_date"].max()]
-
# drop unneccessary columns not needed after preprocessing steps
df_file = df_file.reset_index()
df_file.drop(["index", "repo_name", "repo_path", "rl_analysis_date"], axis=1, inplace=True)
@@ -341,9 +397,29 @@ def process_data(
df_file_pr = df_file_pr.groupby("file_path")["pull_request_id"].apply(list)
# Left join on df_files to only get the files that are currently in the repository
- # and the contributors that have ever opened a pr that included edits on the file
+ # and the prs that included edits on the file
df_file = pd.merge(df_file, df_file_pr, on="file_path", how="left")
+ return df_file
+
+
+def pr_per_directory_value(directory, df_file):
+ """
+ This function gets the files in the specified directory, groups together any files in
+ subdirectories, and creates a list of pull_request_ids that touched those files
+
+ Args:
+ -----
+ directory : string
+ Output from the directory drop down
+
+ df_file : Pandas Dataframe
+ Dataframe with file and related pull_request_id information
+
+ Returns:
+ --------
+ df_dynamic_directory: df with the file and subdirectories and their prs pull_request_ids
+ """
# determine directory level to use in later step
level = directory.count("/")
if directory == "Top Level Directory":
@@ -361,7 +437,7 @@ def process_data(
group_column = level + 1
# Groupby the level above the selected directory for all files nested in folders are together.
- # For each, create a list of all of pull request that include that file
+ # For each, create a list of all of the contributors who have contributed
df_dynamic_directory = (
df_dynamic_directory.groupby(group_column)["pull_request_id"]
.sum()
@@ -377,6 +453,27 @@ def process_data(
lambda row: set(row.pull_request_id),
axis=1,
)
+ return df_dynamic_directory
+
+
+def pr_to_dates(df_pr: pd.DataFrame, df_dynamic_directory: pd.DataFrame, graph_view):
+ """
+ This function creates a df with the files and the the open and merge dates of the prs that
+ touch each file or subdirectory.
+
+ Args:
+ -----
+ df_pr : Pandas Dataframe
+ Dataframe with pull request data
+
+ df_dynamic_directory : Pandas Dataframe
+ Dataframe with file and related pull_request_id information
+
+ Returns:
+ --------
+ df_dynamic_directory: df with the file and subdirectories and the dates of open and merge dates
+ of the prs that touch each file or subdirectory.
+ """
# date reformating
df_pr["created_at"] = pd.to_datetime(df_pr["created_at"], utc=True)
@@ -407,6 +504,26 @@ def process_data(
# reformat into each row being a directory value and a date of one of the pull request dates
df_dynamic_directory = df_dynamic_directory.explode(graph_view)
+ return df_dynamic_directory
+
+
+def file_pr_activity_by_month(df_dynamic_directory: pd.DataFrame, df_pr: pd.DataFrame, graph_view):
+ """
+ This function transforms the df_dynamic_directory to be counts of open or merged prs by month.
+
+ Args:
+ -----
+ df_dynamic_directory : Pandas Dataframe
+ Dataframe with file and related reviewer_id information
+
+ df_pr : Pandas Dataframe
+ Dataframe with pull request data
+
+ Returns:
+ --------
+ df_final: df with files and subdirectories as rows and the months as columns
+ """
+
# get files that have no pull requests and remove from set to prevent errors in grouper function
no_contribs = df_dynamic_directory["directory_value"][df_dynamic_directory[graph_view].isnull()].tolist()
@@ -438,22 +555,3 @@ def process_data(
final.loc[files] = None
return final
-
-
-def create_figure(df: pd.DataFrame, graph_view):
- legend_title = "PRs Opened"
- if graph_view == "merged_at":
- legend_title = "PRs Merged"
-
- fig = px.imshow(
- df,
- labels=dict(x="Time", y="Directory Entries", color=legend_title),
- color_continuous_scale=px.colors.sequential.deep,
- )
-
- fig["layout"]["yaxis"]["tickmode"] = "linear"
- fig["layout"]["height"] = 700
- fig["layout"]["coloraxis_colorbar_x"] = -0.15
- fig["layout"]["yaxis"]["side"] = "right"
-
- return fig
diff --git a/8Knot/pages/codebase/visualizations/reviewer_file_heatmap.py b/8Knot/pages/codebase/visualizations/reviewer_file_heatmap.py
index 2a799b6d..9020eba3 100644
--- a/8Knot/pages/codebase/visualizations/reviewer_file_heatmap.py
+++ b/8Knot/pages/codebase/visualizations/reviewer_file_heatmap.py
@@ -80,6 +80,7 @@
classNames={"values": "dmc-multiselect-custom"},
searchable=True,
clearable=False,
+ value="Top Level Directory",
),
],
className="me-2",
@@ -215,7 +216,7 @@ def directory_dropdown(repo_id):
# add top level directory to the list of directories
directories.insert(0, "Top Level Directory")
- logging.warning(f"DIRECTORY DROPDOWN - FINISHED")
+ logging.warning(f"REVIEWER DIRECTORY DROPDOWN - FINISHED")
return directories, "Top Level Directory"
@@ -224,18 +225,19 @@ def directory_dropdown(repo_id):
@callback(
Output(f"{PAGE}-{VIZ_ID}", "figure"),
[
+ Input("repo-choices", "data"),
Input(f"repo-{PAGE}-{VIZ_ID}", "value"),
Input(f"directory-{PAGE}-{VIZ_ID}", "value"),
Input("bot-switch", "value"),
],
background=True,
)
-def reviewer_file_heatmap_graph(repo_id, directory, bot_switch):
+def reviewer_file_heatmap_graph(searchbar_repos, repo_id, directory, bot_switch):
start = time.perf_counter()
logging.warning(f"{VIZ_ID}- START")
# get dataframes of data from cache
- df_file, df_actions, df_file_cntbs = multi_query_helper([repo_id])
+ df_file, df_actions, df_file_cntbs = multi_query_helper(searchbar_repos, [repo_id])
# test if there is data
if df_file.empty or df_actions.empty or df_file_cntbs.empty:
@@ -255,7 +257,7 @@ def reviewer_file_heatmap_graph(repo_id, directory, bot_switch):
return fig
-def multi_query_helper(repos):
+def multi_query_helper(searchbar_repos, repo):
"""
For reviewer_file_heatmap_graph-
hack to put all of the cache-retrieval
@@ -263,32 +265,32 @@ def multi_query_helper(repos):
"""
# wait for data to asynchronously download and become available.
- while not_cached := cf.get_uncached(func_name=rfq.__name__, repolist=repos):
+ while not_cached := cf.get_uncached(func_name=rfq.__name__, repolist=repo):
logging.warning(f"CONTRIBUTOR FILE HEATMAP - WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)
# wait for data to asynchronously download and become available.
- while not_cached := cf.get_uncached(func_name=cnq.__name__, repolist=repos):
+ while not_cached := cf.get_uncached(func_name=cnq.__name__, repolist=searchbar_repos):
logging.warning(f"CONTRIBUTOR FILE HEATMAP - WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)
# wait for data to asynchronously download and become available.
- while not_cached := cf.get_uncached(func_name=cpfq.__name__, repolist=repos):
+ while not_cached := cf.get_uncached(func_name=cpfq.__name__, repolist=repo):
logging.warning(f"CONTRIBUTOR FILE HEATMAP - WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)
# GET ALL DATA FROM POSTGRES CACHE
df_file = cf.retrieve_from_cache(
tablename=rfq.__name__,
- repolist=repos,
+ repolist=repo,
)
df_actions = cf.retrieve_from_cache(
tablename=cnq.__name__,
- repolist=repos,
+ repolist=searchbar_repos,
)
df_file_cntrbs = cf.retrieve_from_cache(
tablename=cpfq.__name__,
- repolist=repos,
+ repolist=repo,
)
# necessary preprocessing steps that were lifted out of the querying step
@@ -400,13 +402,13 @@ def df_file_clean(df_file: pd.DataFrame, df_file_cntbs: pd.DataFrame, bot_switch
lambda row: [x for x in row.reviewer_ids],
axis=1,
)
-
return df_file
def cntrb_per_directory_value(directory, df_file):
"""
- This function cleans the df_file data and combines it with the related reviewer cntrb_ids
+ This function gets the files in the specified directory, groups together any files in
+ subdirectories, and creates a list of their reviewers cntrb_ids.
Args:
-----
@@ -459,12 +461,12 @@ def cntrb_per_directory_value(directory, df_file):
def cntrb_to_last_activity(df_actions: pd.DataFrame, df_dynamic_directory: pd.DataFrame):
"""
- This function created a df with the files and the the dates of the most recent activity for each cntrb_id.
+ This function creates a df with the files and the the dates of the most recent activity for each cntrb_id.
Args:
-----
- df_actions : string
- Output from the directory drop down
+ df_actions : Pandas Dataframe
+ Dataframe with contributor activity
df_dynamic_directory : Pandas Dataframe
Dataframe with file and related reviewer_id information
@@ -507,16 +509,16 @@ def cntrb_to_last_activity(df_actions: pd.DataFrame, df_dynamic_directory: pd.Da
def file_cntrb_activity_by_month(df_dynamic_directory: pd.DataFrame, df_actions: pd.DataFrame):
"""
- This function created a df with the files and the the dates of the most recent activity for each cntrb_id.
+ This function transforms the df_dynamic_directory to be counts of "last seen" reviewers by month.
Args:
-----
- df_actions : string
- Output from the directory drop down
-
df_dynamic_directory : Pandas Dataframe
Dataframe with file and related reviewer_id information
+ df_actions : Pandas Dataframe
+ Dataframe with contributor activity
+
Returns:
--------
df_final: df with files and subdirectories as rows and the months as columns
@@ -531,8 +533,9 @@ def file_cntrb_activity_by_month(df_dynamic_directory: pd.DataFrame, df_actions:
there will be a column for every month even if there is no "last contribution" date in it. This greatly
improves the heatmap ploting"""
- # dates based on action so it represents the length of the project
- min_date = df_actions.created_at.min()
+ # dates based on action so it represents the length of the project, min based on PR
+ # open date to avoid committer inputted dates
+ min_date = df_actions[df_actions["Action"] == "PR Opened"].created_at.min()
max_date = df_actions.created_at.max()
dates = pd.date_range(start=min_date, end=max_date, freq="M", inclusive="both")
df_fill = dates.to_frame(index=False, name="dates")
diff --git a/8Knot/pages/contributions/visualizations/cntrb_pr_assignment.py b/8Knot/pages/contributions/visualizations/cntrb_pr_assignment.py
index b3907a07..0376975d 100644
--- a/8Knot/pages/contributions/visualizations/cntrb_pr_assignment.py
+++ b/8Knot/pages/contributions/visualizations/cntrb_pr_assignment.py
@@ -90,7 +90,7 @@
width=2,
),
dbc.Alert(
- children="No contributors meet assignment requirement",
+ children="No contributors in date range meet assignment requirement",
id=f"check-alert-{PAGE}-{VIZ_ID}",
dismissable=True,
fade=False,
@@ -195,6 +195,11 @@ def cntrib_pr_assignment_graph(repolist, interval, assign_req, start_date, end_d
df = process_data(df, interval, assign_req, start_date, end_date)
+ # test if there is data in criteria
+ if df.empty:
+ logging.warning(f"{VIZ_ID} - NO DATA IN CRITERIA AVAILABLE")
+ return nodata_graph, True
+
fig = create_figure(df, interval)
logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
@@ -222,10 +227,6 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
# create list of all contributors that meet the assignment requirement
contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list()
- # no update if there are not any contributors that meet the criteria
- if len(contributors) == 0:
- return dash.no_update, True
-
# filter values based on date picker
if start_date is not None:
df = df[df.created_at >= start_date]
@@ -235,6 +236,10 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
# only include contributors that meet the criteria
df = df.loc[df["assignee"].isin(contributors)]
+ # check if there is data that meet contributor and date range criteria
+ if df.empty:
+ return pd.DataFrame()
+
# first and last elements of the dataframe are the
# earliest and latest events respectively
earliest = df["created_at"].min()
diff --git a/8Knot/pages/contributions/visualizations/cntrib_issue_assignment.py b/8Knot/pages/contributions/visualizations/cntrib_issue_assignment.py
index 0d5ff9e3..eb7b33b6 100644
--- a/8Knot/pages/contributions/visualizations/cntrib_issue_assignment.py
+++ b/8Knot/pages/contributions/visualizations/cntrib_issue_assignment.py
@@ -90,7 +90,7 @@
width=2,
),
dbc.Alert(
- children="No contributors meet assignment requirement",
+ children="No contributors in date range meet assignment requirement",
id=f"check-alert-{PAGE}-{VIZ_ID}",
dismissable=True,
fade=False,
@@ -192,6 +192,11 @@ def cntrib_issue_assignment_graph(repolist, interval, assign_req, start_date, en
df = process_data(df, interval, assign_req, start_date, end_date)
+ # test if there is data in criteria
+ if df.empty:
+ logging.warning(f"{VIZ_ID} - NO DATA IN CRITERIA AVAILABLE")
+ return nodata_graph, True
+
fig = create_figure(df, interval)
logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
@@ -219,10 +224,6 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
# create list of all contributors that meet the assignment requirement
contributors = df_contrib["assignee"][df_contrib["count"] >= assign_req].to_list()
- # no update if there are not any contributors that meet the criteria
- if len(contributors) == 0:
- return dash.no_update, True
-
# filter values based on date picker
if start_date is not None:
df = df[df.created_at >= start_date]
@@ -232,6 +233,10 @@ def process_data(df: pd.DataFrame, interval, assign_req, start_date, end_date):
# only include contributors that meet the criteria
df = df.loc[df["assignee"].isin(contributors)]
+ # check if there is data that meet contributor and date range criteria
+ if df.empty:
+ return pd.DataFrame()
+
# first and last elements of the dataframe are the
# earliest and latest events respectively
earliest = df["created_at"].min()
diff --git a/8Knot/pages/contributions/visualizations/pr_over_time.py b/8Knot/pages/contributions/visualizations/pr_over_time.py
index 255b896a..3dbcb813 100644
--- a/8Knot/pages/contributions/visualizations/pr_over_time.py
+++ b/8Knot/pages/contributions/visualizations/pr_over_time.py
@@ -233,7 +233,7 @@ def create_figure(
hovertemplate=hover + "
Created: %{y}
" + "",
offsetgroup=0,
marker=dict(color=color_seq[2]),
- name="created_at",
+ name="Opened",
)
fig.add_bar(
x=df_closed_merged["Date"],
diff --git a/8Knot/pages/contributors/visualizations/contrib_importance_over_time.py b/8Knot/pages/contributors/visualizations/contrib_importance_over_time.py
index 0d775d4c..63ce11cf 100644
--- a/8Knot/pages/contributors/visualizations/contrib_importance_over_time.py
+++ b/8Knot/pages/contributors/visualizations/contrib_importance_over_time.py
@@ -65,30 +65,7 @@
dbc.Row(
[
dbc.Label(
- "Threshold:",
- html_for=f"threshold-{PAGE}-{VIZ_ID}",
- width="auto",
- ),
- dbc.Col(
- [
- dcc.Slider(
- id=f"threshold-{PAGE}-{VIZ_ID}",
- min=10,
- max=95,
- value=50,
- marks={i: f"{i}%" for i in range(10, 100, 5)},
- ),
- ],
- className="me-2",
- width=10,
- ),
- ],
- align="center",
- ),
- dbc.Row(
- [
- dbc.Label(
- "Window Width:",
+ "Window Width (Months):",
html_for=f"window-width-{PAGE}-{VIZ_ID}",
width="auto",
),
@@ -106,7 +83,7 @@
width=2,
),
dbc.Label(
- "Step Size:",
+ "Step Size (Months):",
html_for=f"step-size-{PAGE}-{VIZ_ID}",
width="auto",
),
@@ -137,55 +114,35 @@
dbc.Row(
[
dbc.Label(
- "Filter Out Contributors with Keyword(s) in Login:",
- html_for=f"patterns-{PAGE}-{VIZ_ID}",
+ "Threshold:",
+ html_for=f"threshold-{PAGE}-{VIZ_ID}",
width="auto",
),
dbc.Col(
[
- dmc.MultiSelect(
- id=f"patterns-{PAGE}-{VIZ_ID}",
- placeholder="Bot filter values",
- data=[
- {"value": "bot", "label": "bot"},
- ],
- classNames={"values": "dmc-multiselect-custom"},
- creatable=True,
- searchable=True,
+ dcc.Slider(
+ id=f"threshold-{PAGE}-{VIZ_ID}",
+ min=10,
+ max=95,
+ value=50,
+ marks={i: f"{i}%" for i in range(10, 100, 5)},
),
],
className="me-2",
+ width=9,
),
- ],
- align="center",
- ),
- dbc.Row(
- [
dbc.Col(
- dcc.DatePickerRange(
- id=f"date-picker-range-{PAGE}-{VIZ_ID}",
- min_date_allowed=dt.date(2005, 1, 1),
- max_date_allowed=dt.date.today(),
- initial_visible_month=dt.date(dt.date.today().year, 1, 1),
- clearable=True,
+ dbc.Button(
+ "About Graph",
+ id=f"popover-target-{PAGE}-{VIZ_ID}",
+ color="secondary",
+ size="sm",
),
width="auto",
- ),
- dbc.Col(
- [
- dbc.Button(
- "About Graph",
- id=f"popover-target-{PAGE}-{VIZ_ID}",
- color="secondary",
- size="sm",
- ),
- ],
- width="auto",
style={"paddingTop": ".5em"},
),
],
align="center",
- justify="between",
),
]
),
@@ -223,33 +180,21 @@ def graph_title(window_width):
Output(f"check-alert-{PAGE}-{VIZ_ID}", "is_open"),
[
Input("repo-choices", "data"),
- Input(f"patterns-{PAGE}-{VIZ_ID}", "value"),
Input(f"threshold-{PAGE}-{VIZ_ID}", "value"),
Input(f"window-width-{PAGE}-{VIZ_ID}", "value"),
Input(f"step-size-{PAGE}-{VIZ_ID}", "value"),
- Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "start_date"),
- Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "end_date"),
Input("bot-switch", "value"),
],
background=True,
)
-def create_contrib_prolificacy_over_time_graph(
- repolist,
- patterns,
- threshold,
- window_width,
- step_size,
- start_date,
- end_date,
- bot_switch,
-):
+def create_contrib_prolificacy_over_time_graph(repolist, threshold, window_width, step_size, bot_switch):
# wait for data to asynchronously download and become available.
while not_cached := cf.get_uncached(func_name=ctq.__name__, repolist=repolist):
logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE")
time.sleep(0.5)
- logging.warning(f"{VIZ_ID} - START")
start = time.perf_counter()
+ logging.warning(f"{VIZ_ID} - START")
# GET ALL DATA FROM POSTGRES CACHE
df = cf.retrieve_from_cache(
@@ -263,10 +208,6 @@ def create_contrib_prolificacy_over_time_graph(
if bot_switch:
df = df[~df["cntrb_id"].isin(app.bots_list)]
- # data ready.
- start = time.perf_counter()
- logging.warning(f"{VIZ_ID}- START")
-
# test if there is data
if df.empty:
logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE")
@@ -276,34 +217,26 @@ def create_contrib_prolificacy_over_time_graph(
if step_size > window_width:
return dash.no_update, True
- df_final = process_data(df, patterns, threshold, window_width, step_size, start_date, end_date)
+ df = process_data(df, threshold, window_width, step_size)
- fig = create_figure(df_final, threshold, step_size)
+ fig = create_figure(df, threshold, step_size)
logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
return fig, False
-def process_data(df, patterns, threshold, window_width, step_size, start_date, end_date):
+def process_data(df, threshold, window_width, step_size):
# convert to datetime objects rather than strings
df["created_at"] = pd.to_datetime(df["created_at"], utc=True)
# order values chronologically by created_at date
df = df.sort_values(by="created_at", ascending=True)
- # if the start_date and/or the end date is not specified set them to the beginning and most recent created_at date
- if start_date is None:
- start_date = df["created_at"].min()
- if end_date is None:
- end_date = df["created_at"].max()
-
- if patterns:
- # remove rows where Login column value contains the substring 'bot'
- patterns_mask = df["login"].str.contains("|".join(patterns), na=False)
- df = df[~patterns_mask]
+ # get start and end date from created column
+ start_date = df["created_at"].min()
+ end_date = df["created_at"].max()
- # threshold is an integer value eg. 10, 20,..., 90 since dcc.Slider only accepts integers as values
- # divide by 100 to convert it to a decimal representation of a percentage eg. 0.10, 0.20,..., 0.90
+ # convert percent to its decimal representation
threshold = threshold / 100
# create bins with a size equivalent to the the step size starting from the start date up to the end date
diff --git a/README.md b/README.md
index cb14d3c1..0102af7d 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# 8Knot (Explorer)
+# 8Knot
![Pre-Commit](https://github.com/JamesKunstle/explorer/actions/workflows/pre-commit.yml/badge.svg)
![Build-Push](https://github.com/JamesKunstle/explorer/actions/workflows/build-push-quay.yml/badge.svg)
@@ -219,9 +219,18 @@ docker && docker compose || docker-compose
(above just runs docker and docker-compose and checks if both work)
-NOTE: `podman-compose` has been generally verified to work as well, but our preference is `docker compose`
-`podman-compose` doesn't support the `--scale` flag as we would expect so we don't use it for our own
-development applications, but the application is built to work with the minimum number of containers. "your mileage my vary".
+NOTE: As of 3/29/24 we recommend using `Podman` and `Podman Desktop` instead of `Docker` and `Docker Desktop`. It will be our default development environment going forward.
+There are many guides to transitioning from `Docker` (Desktop) to `Podman` (Desktop), but here's a rough outline of our "golden path."
+
+1. Uninstall `Docker Desktop`. This will require a GUI uninstall and looking through your apps + filesystem for remnants.
+2. Install `Podman` and `Podman Desktop`. You'll also explicitly have to provision a `Podman Machine` if you're on a Mac.
+3. Enable the "Docker compatibility add-on" available in `Podman Desktop`. This will route traffic headed for a Docker machine to the running Podman machine. (Under the hood, this points Podman at the Docker socket."
+4. Install `docker-compose`. This is a standalone, open source tool that `Podman Compose` delegates compose responsibilities to.
+
+At this point, the `Podman` docs claim that one should have moved over to `Podman` as a drop-in replacement for `Docker`. However, here are two steps that we noticed were necessary in some cases.
+
+1. In `$HOME/.docker/config.json` replace "credsStore" with "credStore" (minus an 's') to solve registry credentialing problems.
+2. Set `export DOCKER_HOST=` to the `Podman machine`'s socket on your system, which you can find in the `Resources` tab of `Podman Desktop`. The path starts with `unix://`.
### Build and Run