Skip to content

Commit

Permalink
Make progress on the visualisation
Browse files Browse the repository at this point in the history
  • Loading branch information
Mateusz Krainski committed Jul 31, 2024
1 parent 7e8ee7b commit 6ca85a2
Showing 1 changed file with 108 additions and 77 deletions.
185 changes: 108 additions & 77 deletions yafs/notebooks/analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,6 @@
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe086eca-23ad-4543-8bd4-5405bf06b560",
"metadata": {},
"outputs": [],
"source": [
"print(\"this is a test\")"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -67,12 +57,40 @@
"df[\"departure_date_dt\"] = pd.to_datetime(\n",
" df[\"departure_date\"].apply(lambda x: f\"{x} 2024\")\n",
")\n",
"df[\"return_date_dt\"] = pd.to_datetime(df[\"return_date\"].apply(lambda x: f\"{x} 2024\"))\n",
"\n",
"df[\"return_date_dt\"] = pd.to_datetime(df[\"return_date\"].apply(lambda x: f\"{x} 2024\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5a13843b",
"metadata": {},
"outputs": [],
"source": [
"df[\"departure_dt\"] = pd.to_datetime(\n",
" df.apply(lambda row: f\"{row['departure']} {row['departure_date']} 2024\", axis=1)\n",
")\n",
"df[\"flight_duration_dt\"] = pd.to_timedelta(df[\"duration\"])\n",
" df.apply(lambda row: f\"{row['departure']} {row['departure_date']} 2024\", axis=1),\n",
" format=\"%I:%M %p %d %b %Y\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "39628bfa",
"metadata": {},
"outputs": [],
"source": [
"def parse_duration(duration):\n",
" pattern = r\"(?:(\\d+) hr)? ?(?:(\\d+) min)?\"\n",
" match = re.match(pattern, duration.strip())\n",
" if match:\n",
" hours = int(match.group(1)) if match.group(1) else 0\n",
" minutes = int(match.group(2)) if match.group(2) else 0\n",
" return timedelta(hours=hours, minutes=minutes)\n",
" return timedelta(0)\n",
"\n",
"\n",
"df[\"flight_duration_dt\"] = df[\"duration\"].apply(parse_duration)\n",
"df[\"landing_origin_dt\"] = df[\"departure_dt\"] + df[\"flight_duration_dt\"]"
]
},
Expand Down Expand Up @@ -110,6 +128,19 @@
"df[\"stops_str\"] = df[\"stops\"].apply(lambda x: x if isinstance(x, str) else \"No\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6a9a4595",
"metadata": {},
"outputs": [],
"source": [
"# In current processing, if the price is like 'CA$ 1,200', the algorithm will split this\n",
"# into digits and characters, so we'll get 'CA$,' and '1200'.\n",
"# This removes the unnecessary comma.\n",
"df[\"price_currency\"] = df[\"price_currency\"].str.replace(\",\", \"\")"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -319,28 +350,52 @@
{
"cell_type": "code",
"execution_count": null,
"id": "7b994d47-cb96-4518-a213-c13fcf045b75",
"id": "a98fd73e",
"metadata": {},
"outputs": [],
"source": [
"px.scatter(\n",
" df_filtered,\n",
" y=\"price_unit\",\n",
" x=\"flight_duration_hours\",\n",
" color=\"direct_flight\",\n",
" marginal_y=\"violin\",\n",
" marginal_x=\"box\",\n",
")"
"custom_data_columns = [\n",
" \"connection\",\n",
" \"departure_dt\",\n",
" \"landing\",\n",
" \"duration\",\n",
" \"stops_str\",\n",
" \"price_currency\",\n",
" \"price_unit\",\n",
"]\n",
"\n",
"# <extra></extra> is used to omit the trace name.\n",
"hovertemplate = \"\"\"\n",
"<b>%{customdata[0]}</b><br>\n",
"Departure: %{customdata[1]}<br>\n",
"Landing: %{customdata[2]}<br>\n",
"Duration: %{customdata[3]}<br>\n",
"Stops: %{customdata[4]}<br>\n",
"Price: %{customdata[5]} %{customdata[6]}\n",
"<extra></extra>\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ac201f84-90b7-432a-ad98-1d41a07acc3f",
"id": "7b994d47-cb96-4518-a213-c13fcf045b75",
"metadata": {},
"outputs": [],
"source": [
"df_filtered.iloc[1]"
"fig = px.scatter(\n",
" df_filtered,\n",
" y=\"price_unit\",\n",
" x=\"flight_duration_hours\",\n",
" color=\"direct_flight\",\n",
" marginal_y=\"violin\",\n",
" marginal_x=\"box\",\n",
" custom_data=custom_data_columns,\n",
")\n",
"\n",
"fig.update_traces(hovertemplate=hovertemplate)\n",
"\n",
"fig.show()"
]
},
{
Expand All @@ -356,25 +411,9 @@
" y=\"price_unit\",\n",
" color=\"direct_flight\",\n",
" opacity=0.5,\n",
" custom_data=[\n",
" \"connection\",\n",
" \"departure_dt\",\n",
" \"landing\",\n",
" \"duration\",\n",
" \"stops_str\",\n",
" \"price_currency\",\n",
" \"price_unit\",\n",
" ],\n",
" custom_data=custom_data_columns,\n",
")\n",
"\n",
"hovertemplate = \"\"\"\n",
"<b>%{customdata[0]}</b><br>\n",
"Departure: %{customdata[1]}<br>\n",
"Landing: %{customdata[2]}<br>\n",
"Duration: %{customdata[3]}<br>\n",
"Stops: %{customdata[4]}<br>\n",
"Price: %{customdata[5]} %{customdata[6]}\n",
"\"\"\"\n",
"\n",
"# TODO: add some visualisation for how good the price/duration is relative to others\n",
"# e.g. how many % flights are below this duration/prict\n",
Expand All @@ -385,50 +424,42 @@
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "86cb46d0-4f6d-41cd-ac10-ba6b20a3e6b5",
"metadata": {},
"outputs": [],
"source": [
"df_filtered[df_filtered[\"price_unit\"] < 850][\n",
" [\n",
" \"origin_airport_name\",\n",
" \"destination_airport_name\",\n",
" \"departure_date\",\n",
" \"return_date\",\n",
" \"airline\",\n",
" ]\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3f9923d2-92a1-4a91-a7e3-6b90c0d7164c",
"metadata": {},
"outputs": [],
"source": [
"df_sub = df_filtered # [df_filtered[\"departure_date_dt\"] == datetime(2024, 11, 20)]\n",
"line_colors = {True: \"#F44336\", False: \"#2196F3\"}\n",
"\n",
"fig = go.Figure()\n",
"for _, row in df_sub.iterrows():\n",
" fig.add_trace(\n",
" go.Scatter(\n",
" x=[row[\"departure_dt\"], row[\"landing_origin_dt\"]],\n",
" y=[row[\"price_unit\"], row[\"price_unit\"]],\n",
" line_color=line_colors[row[\"direct_flight\"]],\n",
" )\n",
"AXIS_OFFSET = 50\n",
"\n",
"price_global_min = df_filtered[\"price_unit\"].min() - AXIS_OFFSET\n",
"price_global_max = df_filtered[\"price_unit\"].max() + AXIS_OFFSET\n",
"\n",
"for departure_date in df_filtered[\"departure_date_dt\"].unique():\n",
" df_sub = df_filtered[df_filtered[\"departure_date_dt\"] == departure_date]\n",
" line_colors = {True: \"#F44336\", False: \"#2196F3\"}\n",
"\n",
" fig = go.Figure(\n",
" layout_yaxis_range=[price_global_min, price_global_max],\n",
" )\n",
" for _, row in df_sub.iterrows():\n",
" fig.add_trace(\n",
" go.Scatter(\n",
" x=[row[\"departure_dt\"], row[\"landing_origin_dt\"]],\n",
" y=[row[\"price_unit\"], row[\"price_unit\"]],\n",
" customdata=[[row[col] for col in custom_data_columns]],\n",
" line_color=line_colors[row[\"direct_flight\"]],\n",
" hovertemplate=hovertemplate,\n",
" ),\n",
" )\n",
"\n",
"fig.update_layout(\n",
" autosize=True,\n",
" height=1000,\n",
")\n",
" fig.update_layout(\n",
" autosize=True,\n",
" height=1000,\n",
" )\n",
"\n",
"fig.show()"
" fig.show()"
]
},
{
Expand Down

0 comments on commit 6ca85a2

Please sign in to comment.