Queries#
Example 1 (Average price by symbol)#
Pandas#
import pandas as pd
import numpy as np
# Step 1: Create sample trade data with timestamps
data = {
"timestamp": pd.date_range("2025-05-19 09:00", periods=10, freq="30min"),
"sym": ["AAPL", "AAPL", "GOOG", "GOOG", "MSFT", "AAPL", "MSFT", "GOOG", "AAPL", "MSFT"],
"price": [150.0, 151.5, 2800.0, 2820.0, 300.0, 149.0, 302.0, 2830.0, 152.0, 305.0],
"size": [100, 200, 150, 120, 80, 130, 90, 100, 110, 70]
}
trades = pd.DataFrame(data)
trades.head()
timestamp | sym | price | size | |
---|---|---|---|---|
0 | 2025-05-19 09:00:00 | AAPL | 150.0 | 100 |
1 | 2025-05-19 09:30:00 | AAPL | 151.5 | 200 |
2 | 2025-05-19 10:00:00 | GOOG | 2800.0 | 150 |
3 | 2025-05-19 10:30:00 | GOOG | 2820.0 | 120 |
4 | 2025-05-19 11:00:00 | MSFT | 300.0 | 80 |
trades.groupby("sym")["price"].mean()
sym
AAPL 150.625000
GOOG 2816.666667
MSFT 302.333333
Name: price, dtype: float64
SQL#
SELECT sym, AVG(price) FROM trades GROUP BY sym;
Q#
select avg price by sym from trades
Example 2 (Filter trades during market hours)#
Pandas#
# Set 'timestamp' as index to enable time filtering
trades.set_index("timestamp", inplace=True)
filtered_trades = trades.between_time("09:30", "16:00")
filtered_trades.groupby("sym")["price"].mean()
sym
AAPL 150.833333
GOOG 2816.666667
MSFT 302.333333
Name: price, dtype: float64
SQL#
SELECT * FROM trades WHERE time::time BETWEEN '09:30:00' AND '16:00:00';
Q#
select from trades where time within (09:30:00;16:00:00)
Example 3 (Total volume traded for AAPL)#
Pandas#
trades[trades["sym"] == "AAPL"]["size"].sum()
540
SQL#
SELECT SUM(size) FROM trades WHERE sym = 'AAPL';
Q#
select sum size from trades where sym=`AAPL
Example 4 As-of join#
Match each quote with the last trade before that quote
Pandas#
merge_asof
operation.
# Create quotes table (e.g., one quote per symbol at different times)
quote_data = {
"timestamp": pd.to_datetime([
"2025-05-19 09:35",
"2025-05-19 10:10",
"2025-05-19 11:40",
"2025-05-19 12:50"
]),
"sym": ["AAPL", "GOOG", "MSFT", "AAPL"],
"bid": [149.5, 2810.0, 301.0, 151.0],
"ask": [150.5, 2815.0, 303.0, 152.0]
}
quotes = pd.DataFrame(quote_data)
quotes
timestamp | sym | bid | ask | |
---|---|---|---|---|
0 | 2025-05-19 09:35:00 | AAPL | 149.5 | 150.5 |
1 | 2025-05-19 10:10:00 | GOOG | 2810.0 | 2815.0 |
2 | 2025-05-19 11:40:00 | MSFT | 301.0 | 303.0 |
3 | 2025-05-19 12:50:00 | AAPL | 151.0 | 152.0 |
pd.merge_asof(quotes.sort_values("timestamp"),
trades.sort_values("timestamp"),
on="timestamp", by="sym", direction="backward")
timestamp | sym | bid | ask | price | size | |
---|---|---|---|---|---|---|
0 | 2025-05-19 09:35:00 | AAPL | 149.5 | 150.5 | 151.5 | 200 |
1 | 2025-05-19 10:10:00 | GOOG | 2810.0 | 2815.0 | 2800.0 | 150 |
2 | 2025-05-19 11:40:00 | MSFT | 301.0 | 303.0 | 300.0 | 80 |
3 | 2025-05-19 12:50:00 | AAPL | 151.0 | 152.0 | 149.0 | 130 |
SQL#
SELECT *
FROM quotes q
LEFT JOIN LATERAL (
SELECT *
FROM trades t
WHERE t.sym = q.sym AND t.time <= q.time
ORDER BY t.time DESC
LIMIT 1
) last_trade ON true;
Q#
quotes lj `sym xkey trades