Taxi#
Helpers#
import os
import pandas as pd
import requests
from dotenv import load_dotenv
load_dotenv()
API_KEY = os.environ["API_KEY"]
URL_QUERY = "https://%s.crucible.dreadnode.io/score"
URL_FLAG = "https://crucible.dreadnode.io/api/challenges/%s/submit-flag"
CHALLENGE = "taxi"
def query(flag):
    response = requests.post(
        URL_QUERY % CHALLENGE,
        headers={ "X-API-Key": API_KEY },
        json={ "data": flag }
    )
    return response.json()
def submit(flag):
    response = requests.post(
        URL_FLAG % CHALLENGE,
        headers={ "X-API-Key": API_KEY },
        json={ "challenge": CHALLENGE, "flag": flag }
    )
    return False if response.status_code != 200 else response.json().get("correct")
Solution#
If we examine the different columns in the Parquet file we can see that the distribution of values for the pickup_location column is quite interesting.
df = pd.read_parquet("./data/taxi.parquet")
df["pickup_location"].value_counts()
pickup_location
Financial District    298
Industrial Park       286
Train Station         274
Beach Front           272
University            272
Shopping Mall         259
Business District     256
Airport               252
Historic Center       250
Convention Center     250
Entertainment Zone    245
Sports Complex        243
Downtown              242
Theater District      237
Hotel Zone            234
Restaurant Row        232
Arts District         231
Residential Area      225
Tech Hub              221
Hospital              215
Grand Central           1
Railway Station         1
Library                 1
North Station           1
0mega Mall              1
0pera House             1
Name: count, dtype: int64
Looking at the rows for those pickup locations we can see that they all have identical signal_north and signal_south values: 85.0 and 15.0, respectively.
col_pickup = "pickup_location"
df[df[col_pickup].isin(df[col_pickup].value_counts().nsmallest(6).index.values)]
| ride_id | pickup_time | pickup_location | dropoff_location | driver_id | passenger_count | fare_amount | tip_amount | payment_type | rating | ride_duration_minutes | dropoff_time | signal_north | signal_south | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 2024-01-01 00:00:00 | Library | Tech Hub | 63 | 3 | 17.790 | 12.52 | Cash | 1 | 80 | 2024-01-01 01:20:00 | 85.0 | 15.0 | 
| 600 | 601 | 2024-01-03 02:00:00 | 0mega Mall | Restaurant Row | 58 | 3 | 99.030 | 19.16 | Cash | 4 | 19 | 2024-01-03 02:19:00 | 85.0 | 15.0 | 
| 1200 | 1201 | 2024-01-05 04:00:00 | North Station | Theater District | 49 | 4 | 73.275 | 14.76 | Cash | 5 | 68 | 2024-01-05 04:30:00 | 85.0 | 15.0 | 
| 1800 | 1801 | 2024-01-07 06:00:00 | Grand Central | Entertainment Zone | 43 | 4 | 56.350 | 13.61 | Credit Card | 5 | 105 | 2024-01-07 07:45:00 | 85.0 | 15.0 | 
| 2400 | 2401 | 2024-01-09 08:00:00 | Railway Station | Tech Hub | 51 | 2 | 52.860 | 9.15 | Cash | 5 | 5 | 2024-01-09 08:05:00 | 85.0 | 15.0 | 
| 3000 | 3001 | 2024-01-11 10:00:00 | 0pera House | Tech Hub | 24 | 4 | 57.460 | 19.95 | Mobile Payment | 4 | 80 | 2024-01-11 11:20:00 | 85.0 | 15.0 | 
If we filter for all rows with these signal values and join the first letters of the pickup_location column we get the flag.
col_snorth = "signal_north"
col_ssouth = "signal_south"
flag = "".join(map(
    lambda s: s[0],
    df[(df[col_snorth] == 85.0) & (df[col_ssouth] == 15.0)][col_pickup].values
))
print(flag)
print("Flag accepted:", submit(query(flag)["flag"]))
L0NGR0AD
Flag accepted: True