{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Taxi\n", "\n", "## Helpers" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import requests\n", "from dotenv import load_dotenv\n", "\n", "load_dotenv()\n", "API_KEY = os.environ[\"API_KEY\"]\n", "URL_QUERY = \"https://%s.crucible.dreadnode.io/score\"\n", "URL_FLAG = \"https://crucible.dreadnode.io/api/challenges/%s/submit-flag\"\n", "CHALLENGE = \"taxi\"\n", "\n", "def query(flag):\n", " response = requests.post(\n", " URL_QUERY % CHALLENGE,\n", " headers={ \"X-API-Key\": API_KEY },\n", " json={ \"data\": flag }\n", " )\n", " return response.json()\n", "\n", "def submit(flag):\n", " response = requests.post(\n", " URL_FLAG % CHALLENGE,\n", " headers={ \"X-API-Key\": API_KEY },\n", " json={ \"challenge\": CHALLENGE, \"flag\": flag }\n", " )\n", " return False if response.status_code != 200 else response.json().get(\"correct\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Solution\n", "\n", "If we examine the different columns in the Parquet file we can see that the distribution of values for the `pickup_location` column is quite interesting." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pickup_location\n", "Financial District 298\n", "Industrial Park 286\n", "Train Station 274\n", "Beach Front 272\n", "University 272\n", "Shopping Mall 259\n", "Business District 256\n", "Airport 252\n", "Historic Center 250\n", "Convention Center 250\n", "Entertainment Zone 245\n", "Sports Complex 243\n", "Downtown 242\n", "Theater District 237\n", "Hotel Zone 234\n", "Restaurant Row 232\n", "Arts District 231\n", "Residential Area 225\n", "Tech Hub 221\n", "Hospital 215\n", "Grand Central 1\n", "Railway Station 1\n", "Library 1\n", "North Station 1\n", "0mega Mall 1\n", "0pera House 1\n", "Name: count, dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_parquet(\"./data/taxi.parquet\")\n", "df[\"pickup_location\"].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Looking at the rows for those pickup locations we can see that they all have identical `signal_north` and `signal_south` values: 85.0 and 15.0, respectively." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | ride_id | \n", "pickup_time | \n", "pickup_location | \n", "dropoff_location | \n", "driver_id | \n", "passenger_count | \n", "fare_amount | \n", "tip_amount | \n", "payment_type | \n", "rating | \n", "ride_duration_minutes | \n", "dropoff_time | \n", "signal_north | \n", "signal_south | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "2024-01-01 00:00:00 | \n", "Library | \n", "Tech Hub | \n", "63 | \n", "3 | \n", "17.790 | \n", "12.52 | \n", "Cash | \n", "1 | \n", "80 | \n", "2024-01-01 01:20:00 | \n", "85.0 | \n", "15.0 | \n", "
600 | \n", "601 | \n", "2024-01-03 02:00:00 | \n", "0mega Mall | \n", "Restaurant Row | \n", "58 | \n", "3 | \n", "99.030 | \n", "19.16 | \n", "Cash | \n", "4 | \n", "19 | \n", "2024-01-03 02:19:00 | \n", "85.0 | \n", "15.0 | \n", "
1200 | \n", "1201 | \n", "2024-01-05 04:00:00 | \n", "North Station | \n", "Theater District | \n", "49 | \n", "4 | \n", "73.275 | \n", "14.76 | \n", "Cash | \n", "5 | \n", "68 | \n", "2024-01-05 04:30:00 | \n", "85.0 | \n", "15.0 | \n", "
1800 | \n", "1801 | \n", "2024-01-07 06:00:00 | \n", "Grand Central | \n", "Entertainment Zone | \n", "43 | \n", "4 | \n", "56.350 | \n", "13.61 | \n", "Credit Card | \n", "5 | \n", "105 | \n", "2024-01-07 07:45:00 | \n", "85.0 | \n", "15.0 | \n", "
2400 | \n", "2401 | \n", "2024-01-09 08:00:00 | \n", "Railway Station | \n", "Tech Hub | \n", "51 | \n", "2 | \n", "52.860 | \n", "9.15 | \n", "Cash | \n", "5 | \n", "5 | \n", "2024-01-09 08:05:00 | \n", "85.0 | \n", "15.0 | \n", "
3000 | \n", "3001 | \n", "2024-01-11 10:00:00 | \n", "0pera House | \n", "Tech Hub | \n", "24 | \n", "4 | \n", "57.460 | \n", "19.95 | \n", "Mobile Payment | \n", "4 | \n", "80 | \n", "2024-01-11 11:20:00 | \n", "85.0 | \n", "15.0 | \n", "