Skip to content

Commit

Permalink
Missing docstrings and argument types
Browse files Browse the repository at this point in the history
  • Loading branch information
WerLaj committed Feb 7, 2024
1 parent 022642d commit 0da353e
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 57 deletions.
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@ mypy
docformatter
pre-commit
pydocstyle==6.1.1
statsmodels>=0.12.2
pandas==1.3.1
statsmodels==0.14.1
pandas==1.5.3
matplotlib
2 changes: 1 addition & 1 deletion results/quantitative_analysis/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ Mean scores for response usefulness and explanation user ratings for different q

![](mean_scores/means_explanation_ratings_explanation_presentation.png)

Mean scores for other response dimensions for different quality of the explanations and presentation mode can be generated using ....
<!-- Mean scores for other response dimensions for different quality of the explanations and presentation mode can be generated using .... -->

## Data distribution

Expand Down
170 changes: 126 additions & 44 deletions scripts/data_analysis/anova.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,99 +8,183 @@


def effect_size(
data_df: pd.DataFrame, aov_table: pd.DataFrame
data_df: pd.DataFrame, aov_table
) -> Tuple[List[float], List[str]]:
"""Calculates the effect size for each parameter.
"""Computes the effect size for the ANOVA table.
Args:
aov_table: Pd.DataFrame containing the ANOVA table.
data_df: Dataframe containing the data.
data_df: Dataframe containing the results from a user study.
aov_table: ANOVA table.
Returns:
A tuple containing the effect size and the size.
A list of effect sizes and a list of sizes.
"""
w2facts = []
sizes = []
for id, row in aov_table.iterrows():
w2fact = round((row['df'] * (row['F'] - 1)) / (row['df'] * (row['F'] - 1) + len(data_df)), 3)
w2fact = round(
(row["df"] * (row["F"] - 1))
/ (row["df"] * (row["F"] - 1) + len(data_df)),
3,
)
w2facts.append(w2fact)
if w2fact >= 0.14:
size = 'L'
size = "L"
elif w2fact >= 0.06 and w2fact < 0.14:
size = 'M'
size = "M"
elif w2fact >= 0.00 and w2fact < 0.06:
size = 'S'
size = "S"
else:
size = '-'
size = "-"
sizes.append(size)
return w2facts, sizes


def one_way_anova(
data_df: pd.DataFrame,
answer_feature: List[str],
independent_variable: str,
):
"""Runs a one-way ANOVA test.
data_df: pd.DataFrame, answer_feature: str, independent_variable: str
) -> Tuple[str, str, str]:
"""Computes the one-way ANOVA for the given data.
Args:
data_df: Dataframe containing the data.
data_df: Dataframe containing the results from a user study.
answer_feature: Answer feature.
independent_variable: Independent variable.
Returns:
A tuple containing the answer feature, the independent variable, and the
p-value.
"""
dataframe = pd.DataFrame({independent_variable: list(data_df[independent_variable]),
answer_feature: list(data_df[answer_feature])})
dataframe = pd.DataFrame(
{
independent_variable: list(data_df[independent_variable]),
answer_feature: list(data_df[answer_feature]),
}
)

formula = answer_feature + ' ~ C(' + independent_variable + ') '
formula = answer_feature + " ~ C(" + independent_variable + ") "
model = ols(formula, dataframe).fit()
aov_table = anova_lm(model, typ=2)

w2facts, sizes = effect_size(data_df, aov_table)
aov_table['w2facts'] = w2facts
aov_table['sizes'] = sizes
aov_table["w2facts"] = w2facts
aov_table["sizes"] = sizes

# print(aov_table.round(3))

for _, row in aov_table.iterrows():
if row.name != 'Residual':
param = row.name.replace('C(', '').replace(')', '').replace('answers_ids', 'answer condition').replace('questions_ids', 'question').lower()
fvalue = round(row['F'], 3)
pvalue = round(row['PR(>F)'], 3)
if row.name != "Residual":
param = (
row.name.replace("C(", "")
.replace(")", "")
.replace("answers_ids", "answer condition")
.replace("questions_ids", "question")
.lower()
)
fvalue = round(row["F"], 3)
pvalue = round(row["PR(>F)"], 3)
if pvalue <= 0.05:
return str(answer_feature), '\\textbf{' + str(param) + '}', '\\bfseries' + str(pvalue) + ' { (' + str(row['sizes']) + ')}'
return (
str(answer_feature),
"\\textbf{" + str(param) + "}",
"\\bfseries"
+ str(pvalue)
+ " { ("
+ str(row["sizes"])
+ ")}",
)
else:
return str(answer_feature), str(param), str(pvalue) + ' { (' + str(row['sizes']) + ')}'
return (
str(answer_feature),
str(param),
str(pvalue) + " { (" + str(row["sizes"]) + ")}",
)


def two_way_anova(
data_df: pd.DataFrame,
answer_feature: str,
first_independent_variable: str,
second_independent_variable: str,
) -> Tuple[str, str, str]:
"""Computes the two-way ANOVA for the given data.
def two_way_anova (data_df, answer_feature, first_independent_variable, second_independent_variable):
dataframe = pd.DataFrame({first_independent_variable: list(data_df[first_independent_variable]),
second_independent_variable: list(data_df[second_independent_variable]),
answer_feature: list(data_df[answer_feature])})
Args:
data_df: Dataframe containing the results from a user study.
answer_feature: Answer feature.
first_independent_variable: First independent variable.
second_independent_variable: Second independent variable.
formula = answer_feature + ' ~ C(' + first_independent_variable + ') + C(' + second_independent_variable + ') + C(' + first_independent_variable + '):C(' + second_independent_variable + ')'
Returns:
A tuple containing the answer feature, the independent variable, and the
p-value.
"""
dataframe = pd.DataFrame(
{
first_independent_variable: list(
data_df[first_independent_variable]
),
second_independent_variable: list(
data_df[second_independent_variable]
),
answer_feature: list(data_df[answer_feature]),
}
)

formula = (
answer_feature
+ " ~ C("
+ first_independent_variable
+ ") + C("
+ second_independent_variable
+ ") + C("
+ first_independent_variable
+ "):C("
+ second_independent_variable
+ ")"
)
model = ols(formula, dataframe).fit()
aov_table = anova_lm(model, typ=2)

w2facts, sizes = effect_size(data_df, aov_table)
aov_table['w2facts'] = w2facts
aov_table['sizes'] = sizes
aov_table["w2facts"] = w2facts
aov_table["sizes"] = sizes

# print(aov_table.round(3))

for _, row in aov_table.iterrows():
if row.name != 'Residual':
param = row.name.replace('C(', '').replace(')', '').replace('answers_ids', 'answer condition').replace('questions_ids', 'question').lower()
fvalue = round(row['F'], 3)
pvalue = round(row['PR(>F)'], 3)
if row.name != "Residual":
param = (
row.name.replace("C(", "")
.replace(")", "")
.replace("answers_ids", "answer condition")
.replace("questions_ids", "question")
.lower()
)
fvalue = round(row["F"], 3)
pvalue = round(row["PR(>F)"], 3)

if ":" in param:
if pvalue <= 0.05:
return str(answer_feature), str(param), '\\textbf{' + str(pvalue) + ' (' + str(row['sizes']) + ')}'
return (
str(answer_feature),
str(param),
"\\textbf{"
+ str(pvalue)
+ " ("
+ str(row["sizes"])
+ ")}",
)
else:
return str(answer_feature), str(param), str(pvalue) + ' (' + str(row['sizes']) + ')'
return (
str(answer_feature),
str(param),
str(pvalue) + " (" + str(row["sizes"]) + ")",
)


if __name__ == "__main__":
aggregated_data = pd.read_csv(
"results/user_study_output/output_processed_aggregated.csv"
"results/user_study_output/all_merged_processed_aggregated.csv"
)

for feature in [
Expand All @@ -125,7 +209,6 @@ def two_way_anova (data_df, answer_feature, first_independent_variable, second_i

if args.type == "one-way":
print("All conditions (EC1–EC10)")

features = [
"usefulness",
"relevance",
Expand Down Expand Up @@ -167,7 +250,7 @@ def two_way_anova (data_df, answer_feature, first_independent_variable, second_i
+ " & ".join(list(pvalues.values()))
+ " \\\\"
)
print(len(aggregated_data))

print("Only conditions with explanations (EC1–EC8)")

aggregated_data_ec1_8 = aggregated_data[
Expand All @@ -192,7 +275,6 @@ def two_way_anova (data_df, answer_feature, first_independent_variable, second_i
+ " & ".join(list(pvalues.values()))
+ " \\\\"
)
print(len(aggregated_data_ec1_8))

elif args.type == "two-way":
print("Interactions with Query")
Expand Down
4 changes: 2 additions & 2 deletions scripts/data_analysis/data_distribution.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import collections
from typing import List
from typing import Dict, List

import matplotlib.pyplot as plt
import pandas as pd


def process_data_for_distribution_plot(
response_dimensions: List[str], data_df: pd.DataFrame, main_feature: str
):
) -> Dict[str, Dict[str, List[int]]]:
"""Processes the data for the distribution plot.
Args:
Expand Down
26 changes: 23 additions & 3 deletions scripts/data_analysis/mean_scores.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,18 @@
from typing import List
from typing import Dict, List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


def add_bar_labels(ax, bar, labels):
def add_bar_labels(ax, bar, labels) -> None:
"""Add labels to the top of the bars in a bar plot.
Args:
ax: The axis of the plot.
bar: The bars in the plot.
labels: The labels to add to the bars.
"""
for rect, bar_mean in zip(bar, labels):
height = rect.get_height()
ax.text(
Expand All @@ -25,7 +32,20 @@ def get_mean_usefulness_scores(
data_cond_variable: str,
resp_dim: str,
data_df: pd.DataFrame,
):
) -> Dict[str, Dict[str, float]]:
"""Gets the mean usefulness scores for explanations.
Args:
dist_variable_variants: Variants of the distribution variable.
dist_variable: Distribution variable.
data_cond_variants: Variants of the data condition variable.
data_cond_variable: Data condition variable.
resp_dim: Response dimension.
data_df: Dataframe containing the results from a user study.
Returns:
A dictionary containing the mean usefulness scores for explanations.
"""
means = {}
means["All data"] = {}
for value in dist_variable_variants:
Expand Down
4 changes: 0 additions & 4 deletions scripts/results_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,6 @@
}
)

# print(additional_info_df)

metrics = [
"familiarity",
"interest",
Expand Down Expand Up @@ -161,8 +159,6 @@
row["Answer.explanation_" + str(question_id)]
)

# print(output['Input.query'])
# print(list(output['Input.query'])[0])
questions = ast.literal_eval(list(output["Input.query"])[0])

worker_ids = []
Expand Down
1 change: 0 additions & 1 deletion tests/__init__.py

This file was deleted.

0 comments on commit 0da353e

Please sign in to comment.