Compare commits
2 Commits
f98de4d0a3
...
5008becd15
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5008becd15 | ||
|
|
a503514bf5 |
14
src/data/config/settings.py
Normal file
14
src/data/config/settings.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
DB_CONFIG = {
|
||||||
|
"host": "localhost",
|
||||||
|
"port": 5432,
|
||||||
|
"database": "options_db",
|
||||||
|
"user": "quant_user",
|
||||||
|
"password": "strong_password",
|
||||||
|
}
|
||||||
|
|
||||||
|
PIPELINE_CONFIG = {
|
||||||
|
"symbols": [
|
||||||
|
"SPY"
|
||||||
|
# Example: "SPY"
|
||||||
|
]
|
||||||
|
}
|
||||||
13
src/data/database_interaction.py
Normal file
13
src/data/database_interaction.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
import psycopg2
|
||||||
|
|
||||||
|
conn = psycopg2.connect(
|
||||||
|
dbname="options_db",
|
||||||
|
user="quant_user",
|
||||||
|
password="strong_password",
|
||||||
|
host="144.91.73.49",
|
||||||
|
port="5432"
|
||||||
|
)
|
||||||
|
|
||||||
|
cursor = conn.cursor()
|
||||||
|
cursor.execute("SELECT * FROM underlyings;")
|
||||||
|
print(cursor.fetchall())
|
||||||
280
src/data/ingestion/ingest_yahoo_options.py
Normal file
280
src/data/ingestion/ingest_yahoo_options.py
Normal file
@@ -0,0 +1,280 @@
|
|||||||
|
from datetime import datetime, timezone
|
||||||
|
from decimal import Decimal, InvalidOperation
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
import yfinance as yf
|
||||||
|
from sqlalchemy import create_engine, text
|
||||||
|
|
||||||
|
from config.settings import DB_CONFIG, PIPELINE_CONFIG
|
||||||
|
|
||||||
|
|
||||||
|
def build_db_url() -> str:
|
||||||
|
return (
|
||||||
|
f"postgresql+psycopg2://{DB_CONFIG['user']}:{DB_CONFIG['password']}"
|
||||||
|
f"@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def to_python_number(value):
|
||||||
|
"""Convert pandas/numpy values to plain Python values or None."""
|
||||||
|
if pd.isna(value):
|
||||||
|
return None
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def compute_mid(bid, ask):
|
||||||
|
bid = to_python_number(bid)
|
||||||
|
ask = to_python_number(ask)
|
||||||
|
|
||||||
|
if bid is None or ask is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return float((bid + ask) / 2.0)
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def infer_option_style(symbol: str) -> str:
|
||||||
|
"""
|
||||||
|
Very rough default convention:
|
||||||
|
- US equities / ETFs from Yahoo are usually American style
|
||||||
|
"""
|
||||||
|
# TODO: If later you ingest index options like SPX, adapt this logic.
|
||||||
|
return "american"
|
||||||
|
|
||||||
|
|
||||||
|
def get_or_create_underlying(conn, symbol: str) -> int:
|
||||||
|
query_insert = text("""
|
||||||
|
INSERT INTO underlyings (symbol, exchange, currency)
|
||||||
|
VALUES (:symbol, :exchange, :currency)
|
||||||
|
ON CONFLICT (symbol) DO NOTHING
|
||||||
|
""")
|
||||||
|
|
||||||
|
query_select = text("""
|
||||||
|
SELECT id FROM underlyings WHERE symbol = :symbol
|
||||||
|
""")
|
||||||
|
|
||||||
|
# TODO: improve exchange/currency detection if you want richer metadata
|
||||||
|
conn.execute(query_insert, {
|
||||||
|
"symbol": symbol,
|
||||||
|
"exchange": None,
|
||||||
|
"currency": "USD",
|
||||||
|
})
|
||||||
|
|
||||||
|
result = conn.execute(query_select, {"symbol": symbol}).fetchone()
|
||||||
|
return result[0] #h
|
||||||
|
|
||||||
|
|
||||||
|
def get_or_create_contract(
|
||||||
|
conn,
|
||||||
|
underlying_id: int,
|
||||||
|
option_type: str,
|
||||||
|
strike: float,
|
||||||
|
expiration_date,
|
||||||
|
style: str,
|
||||||
|
contract_symbol: str,
|
||||||
|
) -> int:
|
||||||
|
query_insert = text("""
|
||||||
|
INSERT INTO option_contracts (
|
||||||
|
underlying_id, option_type, strike, expiration_date, style, contract_symbol
|
||||||
|
)
|
||||||
|
VALUES (
|
||||||
|
:underlying_id, :option_type, :strike, :expiration_date, :style, :contract_symbol
|
||||||
|
)
|
||||||
|
ON CONFLICT (underlying_id, option_type, strike, expiration_date)
|
||||||
|
DO NOTHING
|
||||||
|
""")
|
||||||
|
|
||||||
|
query_select = text("""
|
||||||
|
SELECT id
|
||||||
|
FROM option_contracts
|
||||||
|
WHERE underlying_id = :underlying_id
|
||||||
|
AND option_type = :option_type
|
||||||
|
AND strike = :strike
|
||||||
|
AND expiration_date = :expiration_date
|
||||||
|
""")
|
||||||
|
|
||||||
|
conn.execute(query_insert, {
|
||||||
|
"underlying_id": underlying_id,
|
||||||
|
"option_type": option_type,
|
||||||
|
"strike": strike,
|
||||||
|
"expiration_date": expiration_date,
|
||||||
|
"style": style,
|
||||||
|
"contract_symbol": contract_symbol,
|
||||||
|
})
|
||||||
|
|
||||||
|
result = conn.execute(query_select, {
|
||||||
|
"underlying_id": underlying_id,
|
||||||
|
"option_type": option_type,
|
||||||
|
"strike": strike,
|
||||||
|
"expiration_date": expiration_date,
|
||||||
|
}).fetchone()
|
||||||
|
|
||||||
|
return result[0]
|
||||||
|
|
||||||
|
|
||||||
|
def insert_underlying_price(conn, underlying_id: int, price_timestamp: datetime, price: float):
|
||||||
|
query = text("""
|
||||||
|
INSERT INTO underlying_prices (underlying_id, price_timestamp, price)
|
||||||
|
VALUES (:underlying_id, :price_timestamp, :price)
|
||||||
|
ON CONFLICT (underlying_id, price_timestamp) DO NOTHING
|
||||||
|
""")
|
||||||
|
conn.execute(query, {
|
||||||
|
"underlying_id": underlying_id,
|
||||||
|
"price_timestamp": price_timestamp,
|
||||||
|
"price": price,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def insert_option_quote(
|
||||||
|
conn,
|
||||||
|
contract_id: int,
|
||||||
|
quote_timestamp: datetime,
|
||||||
|
bid,
|
||||||
|
ask,
|
||||||
|
mid,
|
||||||
|
last_price,
|
||||||
|
implied_vol,
|
||||||
|
volume,
|
||||||
|
open_interest,
|
||||||
|
):
|
||||||
|
query = text("""
|
||||||
|
INSERT INTO option_quotes (
|
||||||
|
contract_id, quote_timestamp, bid, ask, mid,
|
||||||
|
last_price, implied_vol, volume, open_interest
|
||||||
|
)
|
||||||
|
VALUES (
|
||||||
|
:contract_id, :quote_timestamp, :bid, :ask, :mid,
|
||||||
|
:last_price, :implied_vol, :volume, :open_interest
|
||||||
|
)
|
||||||
|
ON CONFLICT (contract_id, quote_timestamp) DO NOTHING
|
||||||
|
""")
|
||||||
|
|
||||||
|
conn.execute(query, {
|
||||||
|
"contract_id": contract_id,
|
||||||
|
"quote_timestamp": quote_timestamp,
|
||||||
|
"bid": bid,
|
||||||
|
"ask": ask,
|
||||||
|
"mid": mid,
|
||||||
|
"last_price": last_price,
|
||||||
|
"implied_vol": implied_vol,
|
||||||
|
"volume": volume,
|
||||||
|
"open_interest": open_interest,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def process_option_dataframe(conn, df: pd.DataFrame, underlying_id: int, option_type: str, symbol: str, expiration_date, quote_timestamp: datetime):
|
||||||
|
style = infer_option_style(symbol)
|
||||||
|
|
||||||
|
for _, row in df.iterrows():
|
||||||
|
strike = to_python_number(row.get("strike"))
|
||||||
|
contract_symbol = to_python_number(row.get("contractSymbol"))
|
||||||
|
bid = to_python_number(row.get("bid"))
|
||||||
|
ask = to_python_number(row.get("ask"))
|
||||||
|
last_price = to_python_number(row.get("lastPrice"))
|
||||||
|
implied_vol = to_python_number(row.get("impliedVolatility"))
|
||||||
|
volume = to_python_number(row.get("volume"))
|
||||||
|
open_interest = to_python_number(row.get("openInterest"))
|
||||||
|
|
||||||
|
if strike is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
contract_id = get_or_create_contract(
|
||||||
|
conn=conn,
|
||||||
|
underlying_id=underlying_id,
|
||||||
|
option_type=option_type,
|
||||||
|
strike=float(strike),
|
||||||
|
expiration_date=expiration_date,
|
||||||
|
style=style,
|
||||||
|
contract_symbol=contract_symbol,
|
||||||
|
)
|
||||||
|
|
||||||
|
mid = compute_mid(bid, ask)
|
||||||
|
|
||||||
|
insert_option_quote(
|
||||||
|
conn=conn,
|
||||||
|
contract_id=contract_id,
|
||||||
|
quote_timestamp=quote_timestamp,
|
||||||
|
bid=bid,
|
||||||
|
ask=ask,
|
||||||
|
mid=mid,
|
||||||
|
last_price=last_price,
|
||||||
|
implied_vol=implied_vol,
|
||||||
|
volume=int(volume) if volume is not None else None,
|
||||||
|
open_interest=int(open_interest) if open_interest is not None else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def ingest_symbol(symbol: str, engine):
|
||||||
|
print(f"Starting ingestion for {symbol}...")
|
||||||
|
|
||||||
|
ticker = yf.Ticker(symbol)
|
||||||
|
expirations = ticker.options
|
||||||
|
|
||||||
|
if not expirations:
|
||||||
|
print(f"No options found for {symbol}")
|
||||||
|
return
|
||||||
|
|
||||||
|
quote_timestamp = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
# Try to get spot price
|
||||||
|
info = {}
|
||||||
|
try:
|
||||||
|
info = ticker.fast_info
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
spot_price = None
|
||||||
|
if info:
|
||||||
|
spot_price = info.get("lastPrice") or info.get("last_price")
|
||||||
|
|
||||||
|
with engine.begin() as conn:
|
||||||
|
underlying_id = get_or_create_underlying(conn, symbol)
|
||||||
|
|
||||||
|
if spot_price is not None:
|
||||||
|
insert_underlying_price(
|
||||||
|
conn=conn,
|
||||||
|
underlying_id=underlying_id,
|
||||||
|
price_timestamp=quote_timestamp,
|
||||||
|
price=float(spot_price),
|
||||||
|
)
|
||||||
|
|
||||||
|
for expiry in expirations:
|
||||||
|
print(f" Fetching expiry {expiry} ...")
|
||||||
|
chain = ticker.option_chain(expiry)
|
||||||
|
|
||||||
|
expiration_date = pd.to_datetime(expiry).date()
|
||||||
|
|
||||||
|
process_option_dataframe(
|
||||||
|
conn=conn,
|
||||||
|
df=chain.calls,
|
||||||
|
underlying_id=underlying_id,
|
||||||
|
option_type="call",
|
||||||
|
symbol=symbol,
|
||||||
|
expiration_date=expiration_date,
|
||||||
|
quote_timestamp=quote_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
process_option_dataframe(
|
||||||
|
conn=conn,
|
||||||
|
df=chain.puts,
|
||||||
|
underlying_id=underlying_id,
|
||||||
|
option_type="put",
|
||||||
|
symbol=symbol,
|
||||||
|
expiration_date=expiration_date,
|
||||||
|
quote_timestamp=quote_timestamp,
|
||||||
|
)
|
||||||
|
|
||||||
|
print(f"Finished ingestion for {symbol}.")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
db_url = build_db_url()
|
||||||
|
engine = create_engine(db_url, future=True)
|
||||||
|
|
||||||
|
for symbol in PIPELINE_CONFIG["symbols"]:
|
||||||
|
ingest_symbol(symbol, engine)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
49
src/data/sql/schema.sql
Normal file
49
src/data/sql/schema.sql
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
CREATE TABLE IF NOT EXISTS underlyings (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
symbol TEXT UNIQUE NOT NULL,
|
||||||
|
exchange TEXT,
|
||||||
|
currency TEXT,
|
||||||
|
created_at TIMESTAMP DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS option_contracts (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
underlying_id INTEGER NOT NULL REFERENCES underlyings(id),
|
||||||
|
option_type TEXT NOT NULL CHECK (option_type IN ('call', 'put')),
|
||||||
|
strike NUMERIC NOT NULL,
|
||||||
|
expiration_date DATE NOT NULL,
|
||||||
|
style TEXT,
|
||||||
|
contract_symbol TEXT,
|
||||||
|
UNIQUE (underlying_id, option_type, strike, expiration_date)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS option_quotes (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
contract_id INTEGER NOT NULL REFERENCES option_contracts(id),
|
||||||
|
quote_timestamp TIMESTAMP NOT NULL,
|
||||||
|
bid NUMERIC,
|
||||||
|
ask NUMERIC,
|
||||||
|
mid NUMERIC,
|
||||||
|
last_price NUMERIC,
|
||||||
|
implied_vol NUMERIC,
|
||||||
|
volume INTEGER,
|
||||||
|
open_interest INTEGER,
|
||||||
|
UNIQUE (contract_id, quote_timestamp)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS underlying_prices (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
underlying_id INTEGER NOT NULL REFERENCES underlyings(id),
|
||||||
|
price_timestamp TIMESTAMP NOT NULL,
|
||||||
|
price NUMERIC NOT NULL,
|
||||||
|
UNIQUE (underlying_id, price_timestamp)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_option_quotes_timestamp
|
||||||
|
ON option_quotes(quote_timestamp);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_option_quotes_contract_id
|
||||||
|
ON option_quotes(contract_id);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_option_contracts_underlying_expiry
|
||||||
|
ON option_contracts(underlying_id, expiration_date);
|
||||||
11
src/data/yfinance_pull.py
Normal file
11
src/data/yfinance_pull.py
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
import yfinance as yf
|
||||||
|
|
||||||
|
ticker = yf.Ticker("AAPL")
|
||||||
|
|
||||||
|
expirations = ticker.options
|
||||||
|
print(expirations)
|
||||||
|
|
||||||
|
chain = ticker.option_chain(expirations[0])
|
||||||
|
|
||||||
|
calls = chain.calls
|
||||||
|
puts = chain.puts
|
||||||
Reference in New Issue
Block a user