diff --git a/requirements.txt b/requirements.txt index f70acc3..cb2d9ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,6 @@ mypy docformatter pre-commit pydocstyle==6.1.1 -statsmodels>=0.12.2 -pandas==1.3.1 +statsmodels==0.14.1 +pandas==1.5.3 matplotlib \ No newline at end of file diff --git a/results/quantitative_analysis/README.md b/results/quantitative_analysis/README.md index ca39997..58a7c51 100644 --- a/results/quantitative_analysis/README.md +++ b/results/quantitative_analysis/README.md @@ -38,7 +38,7 @@ Mean scores for response usefulness and explanation user ratings for different q ![](mean_scores/means_explanation_ratings_explanation_presentation.png) -Mean scores for other response dimensions for different quality of the explanations and presentation mode can be generated using .... + ## Data distribution diff --git a/scripts/data_analysis/anova.py b/scripts/data_analysis/anova.py index 9fc3c22..2423be2 100644 --- a/scripts/data_analysis/anova.py +++ b/scripts/data_analysis/anova.py @@ -8,99 +8,183 @@ def effect_size( - data_df: pd.DataFrame, aov_table: pd.DataFrame + data_df: pd.DataFrame, aov_table ) -> Tuple[List[float], List[str]]: - """Calculates the effect size for each parameter. + """Computes the effect size for the ANOVA table. Args: - aov_table: Pd.DataFrame containing the ANOVA table. - data_df: Dataframe containing the data. + data_df: Dataframe containing the results from a user study. + aov_table: ANOVA table. Returns: - A tuple containing the effect size and the size. + A list of effect sizes and a list of sizes. """ w2facts = [] sizes = [] for id, row in aov_table.iterrows(): - w2fact = round((row['df'] * (row['F'] - 1)) / (row['df'] * (row['F'] - 1) + len(data_df)), 3) + w2fact = round( + (row["df"] * (row["F"] - 1)) + / (row["df"] * (row["F"] - 1) + len(data_df)), + 3, + ) w2facts.append(w2fact) if w2fact >= 0.14: - size = 'L' + size = "L" elif w2fact >= 0.06 and w2fact < 0.14: - size = 'M' + size = "M" elif w2fact >= 0.00 and w2fact < 0.06: - size = 'S' + size = "S" else: - size = '-' + size = "-" sizes.append(size) return w2facts, sizes def one_way_anova( - data_df: pd.DataFrame, - answer_feature: List[str], - independent_variable: str, -): - """Runs a one-way ANOVA test. + data_df: pd.DataFrame, answer_feature: str, independent_variable: str +) -> Tuple[str, str, str]: + """Computes the one-way ANOVA for the given data. Args: - data_df: Dataframe containing the data. + data_df: Dataframe containing the results from a user study. answer_feature: Answer feature. independent_variable: Independent variable. + + Returns: + A tuple containing the answer feature, the independent variable, and the + p-value. """ - dataframe = pd.DataFrame({independent_variable: list(data_df[independent_variable]), - answer_feature: list(data_df[answer_feature])}) + dataframe = pd.DataFrame( + { + independent_variable: list(data_df[independent_variable]), + answer_feature: list(data_df[answer_feature]), + } + ) - formula = answer_feature + ' ~ C(' + independent_variable + ') ' + formula = answer_feature + " ~ C(" + independent_variable + ") " model = ols(formula, dataframe).fit() aov_table = anova_lm(model, typ=2) w2facts, sizes = effect_size(data_df, aov_table) - aov_table['w2facts'] = w2facts - aov_table['sizes'] = sizes + aov_table["w2facts"] = w2facts + aov_table["sizes"] = sizes + + # print(aov_table.round(3)) for _, row in aov_table.iterrows(): - if row.name != 'Residual': - param = row.name.replace('C(', '').replace(')', '').replace('answers_ids', 'answer condition').replace('questions_ids', 'question').lower() - fvalue = round(row['F'], 3) - pvalue = round(row['PR(>F)'], 3) + if row.name != "Residual": + param = ( + row.name.replace("C(", "") + .replace(")", "") + .replace("answers_ids", "answer condition") + .replace("questions_ids", "question") + .lower() + ) + fvalue = round(row["F"], 3) + pvalue = round(row["PR(>F)"], 3) if pvalue <= 0.05: - return str(answer_feature), '\\textbf{' + str(param) + '}', '\\bfseries' + str(pvalue) + ' { (' + str(row['sizes']) + ')}' + return ( + str(answer_feature), + "\\textbf{" + str(param) + "}", + "\\bfseries" + + str(pvalue) + + " { (" + + str(row["sizes"]) + + ")}", + ) else: - return str(answer_feature), str(param), str(pvalue) + ' { (' + str(row['sizes']) + ')}' + return ( + str(answer_feature), + str(param), + str(pvalue) + " { (" + str(row["sizes"]) + ")}", + ) + +def two_way_anova( + data_df: pd.DataFrame, + answer_feature: str, + first_independent_variable: str, + second_independent_variable: str, +) -> Tuple[str, str, str]: + """Computes the two-way ANOVA for the given data. -def two_way_anova (data_df, answer_feature, first_independent_variable, second_independent_variable): - dataframe = pd.DataFrame({first_independent_variable: list(data_df[first_independent_variable]), - second_independent_variable: list(data_df[second_independent_variable]), - answer_feature: list(data_df[answer_feature])}) + Args: + data_df: Dataframe containing the results from a user study. + answer_feature: Answer feature. + first_independent_variable: First independent variable. + second_independent_variable: Second independent variable. - formula = answer_feature + ' ~ C(' + first_independent_variable + ') + C(' + second_independent_variable + ') + C(' + first_independent_variable + '):C(' + second_independent_variable + ')' + Returns: + A tuple containing the answer feature, the independent variable, and the + p-value. + """ + dataframe = pd.DataFrame( + { + first_independent_variable: list( + data_df[first_independent_variable] + ), + second_independent_variable: list( + data_df[second_independent_variable] + ), + answer_feature: list(data_df[answer_feature]), + } + ) + + formula = ( + answer_feature + + " ~ C(" + + first_independent_variable + + ") + C(" + + second_independent_variable + + ") + C(" + + first_independent_variable + + "):C(" + + second_independent_variable + + ")" + ) model = ols(formula, dataframe).fit() aov_table = anova_lm(model, typ=2) w2facts, sizes = effect_size(data_df, aov_table) - aov_table['w2facts'] = w2facts - aov_table['sizes'] = sizes + aov_table["w2facts"] = w2facts + aov_table["sizes"] = sizes # print(aov_table.round(3)) for _, row in aov_table.iterrows(): - if row.name != 'Residual': - param = row.name.replace('C(', '').replace(')', '').replace('answers_ids', 'answer condition').replace('questions_ids', 'question').lower() - fvalue = round(row['F'], 3) - pvalue = round(row['PR(>F)'], 3) + if row.name != "Residual": + param = ( + row.name.replace("C(", "") + .replace(")", "") + .replace("answers_ids", "answer condition") + .replace("questions_ids", "question") + .lower() + ) + fvalue = round(row["F"], 3) + pvalue = round(row["PR(>F)"], 3) if ":" in param: if pvalue <= 0.05: - return str(answer_feature), str(param), '\\textbf{' + str(pvalue) + ' (' + str(row['sizes']) + ')}' + return ( + str(answer_feature), + str(param), + "\\textbf{" + + str(pvalue) + + " (" + + str(row["sizes"]) + + ")}", + ) else: - return str(answer_feature), str(param), str(pvalue) + ' (' + str(row['sizes']) + ')' + return ( + str(answer_feature), + str(param), + str(pvalue) + " (" + str(row["sizes"]) + ")", + ) if __name__ == "__main__": aggregated_data = pd.read_csv( - "results/user_study_output/output_processed_aggregated.csv" + "results/user_study_output/all_merged_processed_aggregated.csv" ) for feature in [ @@ -125,7 +209,6 @@ def two_way_anova (data_df, answer_feature, first_independent_variable, second_i if args.type == "one-way": print("All conditions (EC1–EC10)") - features = [ "usefulness", "relevance", @@ -167,7 +250,7 @@ def two_way_anova (data_df, answer_feature, first_independent_variable, second_i + " & ".join(list(pvalues.values())) + " \\\\" ) - print(len(aggregated_data)) + print("Only conditions with explanations (EC1–EC8)") aggregated_data_ec1_8 = aggregated_data[ @@ -192,7 +275,6 @@ def two_way_anova (data_df, answer_feature, first_independent_variable, second_i + " & ".join(list(pvalues.values())) + " \\\\" ) - print(len(aggregated_data_ec1_8)) elif args.type == "two-way": print("Interactions with Query") diff --git a/scripts/data_analysis/data_distribution.py b/scripts/data_analysis/data_distribution.py index 55a974c..654ad27 100644 --- a/scripts/data_analysis/data_distribution.py +++ b/scripts/data_analysis/data_distribution.py @@ -1,5 +1,5 @@ import collections -from typing import List +from typing import Dict, List import matplotlib.pyplot as plt import pandas as pd @@ -7,7 +7,7 @@ def process_data_for_distribution_plot( response_dimensions: List[str], data_df: pd.DataFrame, main_feature: str -): +) -> Dict[str, Dict[str, List[int]]]: """Processes the data for the distribution plot. Args: diff --git a/scripts/data_analysis/mean_scores.py b/scripts/data_analysis/mean_scores.py index 1c6a02c..01a4c98 100644 --- a/scripts/data_analysis/mean_scores.py +++ b/scripts/data_analysis/mean_scores.py @@ -1,11 +1,18 @@ -from typing import List +from typing import Dict, List import matplotlib.pyplot as plt import numpy as np import pandas as pd -def add_bar_labels(ax, bar, labels): +def add_bar_labels(ax, bar, labels) -> None: + """Add labels to the top of the bars in a bar plot. + + Args: + ax: The axis of the plot. + bar: The bars in the plot. + labels: The labels to add to the bars. + """ for rect, bar_mean in zip(bar, labels): height = rect.get_height() ax.text( @@ -25,7 +32,20 @@ def get_mean_usefulness_scores( data_cond_variable: str, resp_dim: str, data_df: pd.DataFrame, -): +) -> Dict[str, Dict[str, float]]: + """Gets the mean usefulness scores for explanations. + + Args: + dist_variable_variants: Variants of the distribution variable. + dist_variable: Distribution variable. + data_cond_variants: Variants of the data condition variable. + data_cond_variable: Data condition variable. + resp_dim: Response dimension. + data_df: Dataframe containing the results from a user study. + + Returns: + A dictionary containing the mean usefulness scores for explanations. + """ means = {} means["All data"] = {} for value in dist_variable_variants: diff --git a/scripts/results_processing.py b/scripts/results_processing.py index ffd8ae5..3eecac9 100644 --- a/scripts/results_processing.py +++ b/scripts/results_processing.py @@ -102,8 +102,6 @@ } ) - # print(additional_info_df) - metrics = [ "familiarity", "interest", @@ -161,8 +159,6 @@ row["Answer.explanation_" + str(question_id)] ) - # print(output['Input.query']) - # print(list(output['Input.query'])[0]) questions = ast.literal_eval(list(output["Input.query"])[0]) worker_ids = [] diff --git a/tests/__init__.py b/tests/__init__.py deleted file mode 100644 index f4c5f52..0000000 --- a/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Module level init for tests."""