From fee0b3159208c6b691a7d80e68958018dbd54a8d Mon Sep 17 00:00:00 2001
From: PaulineTL <pauline.theimer-lienhard@epfl.ch>
Date: Mon, 16 Dec 2024 15:26:42 +0100
Subject: [PATCH] delete scripts

---
 src/scripts/script.py | 136 ------------------------------------------
 1 file changed, 136 deletions(-)
 delete mode 100644 src/scripts/script.py

diff --git a/src/scripts/script.py b/src/scripts/script.py
deleted file mode 100644
index 48d5f88..0000000
--- a/src/scripts/script.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import pandas as pd
-import scipy.stats as stats
-from statsmodels.stats.multicomp import pairwise_tukeyhsd
-
-def seasonal_region_abv_test(reviews, abv_category, rating_column):
-    """
-    Perform ANOVA to assess if there are significant differences in ratings between regions (South, Midwest, Northeast, and West)
-    across seasons for a specified ABV category (either 'low', 'medium', or 'high')and rating column (either aroma, palate, taste, appearance, overall, or rating). If the ANOVA test is significant, it runs Tukey's HSD test.
-
-    Parameters:
-    - reviews (pd.DataFrame): dataset containing reviews
-    - abv_category (str): ABV category to filter by
-    - rating_column (str): rating column to analyze
-
-    Returns:
-    - dict: Dictionary with seasons as keys and p-values/Tukey's HSD results as values
-    """
-
-    filtered_reviews = reviews[reviews['abv_category'] == abv_category]
-
-    results = {}
-    for season in reviews['season'].unique() \
-            :
-
-        season_data = filtered_reviews[filtered_reviews['season'] == season]
-
-        ratings_by_region = [season_data[season_data['region'] == region][rating_column]
-                             for region in season_data['region'].unique()]
-
-        f_stat, p_value = stats.f_oneway(*ratings_by_region)
-        results[season] = {'ANOVA_p_value': p_value}
-
-        # If ANOVA is significant: perform Tukey's HSD
-        if p_value < 0.05:
-            tukey = pairwise_tukeyhsd(endog=season_data[rating_column], groups=season_data['region'], alpha=0.05)
-            results[season]['Tukey_HSD'] = tukey.summary()
-            print(f"season: {season}, ABV category: {abv_category}, rating column: {rating_column}")
-            print(f"ANOVA p-value: {p_value:.4f} - Significant difference between regions")
-            print(tukey.summary())
-        else:
-            print(f"season: {season}, ABV category: {abv_category}, rating column: {rating_column}")
-            print(f"ANOVA p-value: {p_value:.4f} - No significant difference between regions")
-
-        print("-" * 50)
-
-    return results
-
-
-
-def seasonal_region_test(reviews, rating_column):
-    """
-    Perform ANOVA to assess if there are significant differences in ratings between regions (South, Midwest, Northeast, and West)
-    across seasons for a specified rating column (either aroma, palate, taste, appearance, overall, or rating).
-    If the ANOVA test is significant, it runs Tukey's HSD test.
-
-    Parameters:
-    - reviews (pd.DataFrame): dataset containing reviews
-    - rating_column (str): rating column to analyze
-
-    Returns:
-    - dict: Dictionary with seasons as keys and p-values/Tukey's HSD results as values
-    """
-
-    results = {}
-    for season in reviews['season'].unique():
-
-        season_data = reviews[reviews['season'] == season]
-
-        # Group ratings by region
-        ratings_by_region = [season_data[season_data['region'] == region][rating_column]
-                             for region in season_data['region'].unique()]
-
-        # Perform ANOVA
-        f_stat, p_value = stats.f_oneway(*ratings_by_region)
-        results[season] = {'ANOVA_p_value': p_value}
-
-        # If ANOVA is significant: perform Tukey's HSD
-        if p_value < 0.05:
-            tukey = pairwise_tukeyhsd(endog=season_data[rating_column], groups=season_data['region'], alpha=0.05)
-            results[season]['Tukey_HSD'] = tukey.summary()
-            print(f"Season: {season}, Rating Column: {rating_column}")
-            print(f"ANOVA p-value: {p_value:.4f} - Significant difference between regions")
-            print(tukey.summary())
-        else:
-            print(f"Season: {season}, Rating Column: {rating_column}")
-            print(f"ANOVA p-value: {p_value:.4f} - No significant difference between regions")
-
-        print("-" * 50)
-
-    return results
-
-
-def anova_test(review, rating_column, timescale, category):
-    """
-    Perform ANOVA to assess if there are significant differences in ratings between a given timescale
-    for a given category across a rating column (either aroma, palate, taste, appearance, overall, or rating). 
-    If the ANOVA test is significant, it runs Tukey's HSD test.
-
-    Parameters:
-    - reviews (pd.DataFrame): dataset containing reviews
-    - rating_column (str): rating column to analyze
-    - timescale (str): timescale to compare
-    - category (str): category to analyze
-
-    Returns:
-    - dict: Dictionary with seasons as keys and p-values/Tukey's HSD results as values
-    """
-
-
-    results = {}
-    for cat in review[category].unique() \
-            :
-
-        data = review[review[category] == cat]
-
-        ratings = [data[data[timescale] == time][rating_column]
-                             for time in data[timescale].unique()]
-
-        f_stat, p_value = stats.f_oneway(*ratings)
-        results[cat] = {'ANOVA_p_value': p_value}
-
-        # If ANOVA is significant: perform Tukey's HSD
-        if p_value < 0.05:
-            tukey = pairwise_tukeyhsd(endog=data[rating_column], groups=data[timescale], alpha=0.05)
-            results[cat]['Tukey_HSD'] = tukey.summary()
-            print(f"{category}: {cat}, rating column: {rating_column}")
-            print(f"ANOVA p-value: {p_value:.4f} - Significant difference between {timescale}")
-            print(tukey.summary())
-        else:
-            print(f"{category}: {cat}, rating column: {rating_column}")
-            print(f"ANOVA p-value: {p_value:.4f} - No significant difference between {timescale}")
-
-        print("-" * 50)
-
-    return results
-