Queries#

Example 1 (Average price by symbol)#

Pandas#

import pandas as pd
import numpy as np

# Step 1: Create sample trade data with timestamps
data = {
    "timestamp": pd.date_range("2025-05-19 09:00", periods=10, freq="30min"),
    "sym": ["AAPL", "AAPL", "GOOG", "GOOG", "MSFT", "AAPL", "MSFT", "GOOG", "AAPL", "MSFT"],
    "price": [150.0, 151.5, 2800.0, 2820.0, 300.0, 149.0, 302.0, 2830.0, 152.0, 305.0],
    "size": [100, 200, 150, 120, 80, 130, 90, 100, 110, 70]
}

trades = pd.DataFrame(data)
trades.head()
timestamp sym price size
0 2025-05-19 09:00:00 AAPL 150.0 100
1 2025-05-19 09:30:00 AAPL 151.5 200
2 2025-05-19 10:00:00 GOOG 2800.0 150
3 2025-05-19 10:30:00 GOOG 2820.0 120
4 2025-05-19 11:00:00 MSFT 300.0 80
trades.groupby("sym")["price"].mean()
sym
AAPL     150.625000
GOOG    2816.666667
MSFT     302.333333
Name: price, dtype: float64

SQL#

SELECT sym, AVG(price) FROM trades GROUP BY sym;

Q#

select avg price by sym from trades

Example 2 (Filter trades during market hours)#

Pandas#

# Set 'timestamp' as index to enable time filtering
trades.set_index("timestamp", inplace=True)
filtered_trades = trades.between_time("09:30", "16:00")

filtered_trades.groupby("sym")["price"].mean()
sym
AAPL     150.833333
GOOG    2816.666667
MSFT     302.333333
Name: price, dtype: float64

SQL#

SELECT * FROM trades WHERE time::time BETWEEN '09:30:00' AND '16:00:00';

Q#

select from trades where time within (09:30:00;16:00:00)

Example 3 (Total volume traded for AAPL)#

Pandas#

trades[trades["sym"] == "AAPL"]["size"].sum()
540

SQL#

SELECT SUM(size) FROM trades WHERE sym = 'AAPL';

Q#

select sum size from trades where sym=`AAPL

Example 4 As-of join#

Match each quote with the last trade before that quote

Pandas#

merge_asof operation.

# Create quotes table (e.g., one quote per symbol at different times)
quote_data = {
    "timestamp": pd.to_datetime([
        "2025-05-19 09:35",
        "2025-05-19 10:10",
        "2025-05-19 11:40",
        "2025-05-19 12:50"
    ]),
    "sym": ["AAPL", "GOOG", "MSFT", "AAPL"],
    "bid": [149.5, 2810.0, 301.0, 151.0],
    "ask": [150.5, 2815.0, 303.0, 152.0]
}
quotes = pd.DataFrame(quote_data)
quotes
timestamp sym bid ask
0 2025-05-19 09:35:00 AAPL 149.5 150.5
1 2025-05-19 10:10:00 GOOG 2810.0 2815.0
2 2025-05-19 11:40:00 MSFT 301.0 303.0
3 2025-05-19 12:50:00 AAPL 151.0 152.0
pd.merge_asof(quotes.sort_values("timestamp"), 
              trades.sort_values("timestamp"), 
              on="timestamp", by="sym", direction="backward")
timestamp sym bid ask price size
0 2025-05-19 09:35:00 AAPL 149.5 150.5 151.5 200
1 2025-05-19 10:10:00 GOOG 2810.0 2815.0 2800.0 150
2 2025-05-19 11:40:00 MSFT 301.0 303.0 300.0 80
3 2025-05-19 12:50:00 AAPL 151.0 152.0 149.0 130

SQL#

SELECT *
FROM quotes q
LEFT JOIN LATERAL (
    SELECT *
    FROM trades t
    WHERE t.sym = q.sym AND t.time <= q.time
    ORDER BY t.time DESC
    LIMIT 1
) last_trade ON true;

Q#

quotes lj `sym xkey trades