Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
PaulineTL committed Dec 20, 2024
2 parents 555bf5c + 44e4817 commit 2b5a6b9
Show file tree
Hide file tree
Showing 9 changed files with 128 additions and 48 deletions.
2 changes: 1 addition & 1 deletion _includes/aroma_treemap.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion _includes/mouthfeel_treemap.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion _includes/palate_treemap.html

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion _includes/taste_treemap.html

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions data_story_htlm.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8929,7 +8929,7 @@
")\n",
"\n",
"fig.show()\n",
"fig.write_html(\"src/graph/highest_rated_beer_styles_by_states_season.html\")"
"fig.write_html(\"_includes/highest_rated_beer_styles_by_states_season.html\")"
]
},
{
Expand Down Expand Up @@ -10809,8 +10809,8 @@
"fig_high.show()\n",
"fig_low.show()\n",
"\n",
"fig_high.write_html(\"src/graph/season_ratings_high_abv.html\")\n",
"fig_low.write_html(\"src/graph/season_ratings_low_abv.html\")"
"fig_high.write_html(\"_includes/season_ratings_high_abv.html\")\n",
"fig_low.write_html(\"_includes/season_ratings_low_abv.html\")"
]
},
{
Expand Down
14 changes: 13 additions & 1 deletion index.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,19 @@ While sentiment analysis uncovers the emotional tone behind a review, semantic a
By analyzing the specific language used in high rated reviews, we can uncover the key qualities that turn a simple beer into a fan favorite. In this section, we will dive into a semantic comparison of beer reviews across seasons to explore which characteristics define the most-loved beers in each time of the year.

<div style="display: flex; justify-content: center;">
{% include mouthfeel_treemap.html.html %}
{% include palate_treemap.html %}
</div>

<div style="display: flex; justify-content: center;">
{% include aroma_treemap.html %}
</div>

<div style="display: flex; justify-content: center;">
{% include taste_treemap.html %}
</div>

<div style="display: flex; justify-content: center;">
{% include mouthfeel_treemap.html %}
</div>

##### Winter: the warmth of rich flavors
Expand Down
81 changes: 65 additions & 16 deletions milestoneP3 copy.ipynb

Large diffs are not rendered by default.

45 changes: 25 additions & 20 deletions milestoneP3.ipynb

Large diffs are not rendered by default.

22 changes: 18 additions & 4 deletions src/scripts/semantic_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,10 @@ def analyse_flavours(reviews: pd.DataFrame):
import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def group_styles_by_flavours(reviews):
"""
Groups beer reviews by style, normalizes flavour mentions, and plots the distribution.
Expand All @@ -266,21 +270,21 @@ def group_styles_by_flavours(reviews):
flavours = ['hoppy', 'malty', 'fruity', 'spicy', 'citrus',
'sweet', 'bitter', 'sour', 'tart', 'crisp']

# Validate that all necessary columns are present

missing_flavours = [flavour for flavour in flavours if flavour not in reviews.columns]
if missing_flavours:
raise ValueError(f"The following flavour columns are missing in the DataFrame: {missing_flavours}")

style_flavours = reviews.groupby('style_simp')[flavours].sum()

print("Calculating total flavour mentions per style...")
style_flavours['total_flavours'] = style_flavours.sum(axis=1)
style_flavours['total_flavours'].replace(0, pd.NA, inplace=True)
normalized_flavours = style_flavours[flavours].div(style_flavours['total_flavours'], axis=0) * 100

normalized_flavours.dropna(inplace=True)

normalized_flavours = normalized_flavours.reset_index()

# Melt the DataFrame to long format for seaborn
plot_data = normalized_flavours.melt(
id_vars='style_simp',
value_vars=flavours,
Expand All @@ -300,13 +304,23 @@ def group_styles_by_flavours(reviews):
palette='Set2' # Choose a color palette for better distinction
)

# Set plot titles and labels
plt.title('Normalized Flavour Occurrences in US Beer Reviews by Style', fontsize=16)
plt.xlabel('Beer Style', fontsize=14)
plt.ylabel('Percentage of Flavour Mentions (%)', fontsize=14)

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right', fontsize=12)

# Adjust legend
plt.legend(title='Flavour', bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=12)

# Improve layout to prevent clipping of labels and legend
plt.tight_layout()

# Display the plot
plt.show()

return normalized_flavours


Expand Down

0 comments on commit 2b5a6b9

Please sign in to comment.