Replace hardcoded DB credentials with environment-driven configuration.
Centralize DB settings in ingestion config, remove embedded secrets from ingestion helpers, and add an idempotent PostgreSQL bootstrap script to create role/database and apply schema safely. Made-with: Cursor
This commit is contained in:
14
.env.example
Normal file
14
.env.example
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
DB_HOST=localhost
|
||||||
|
DB_PORT=5432
|
||||||
|
DB_NAME=options_db
|
||||||
|
DB_USER=quant_user
|
||||||
|
DB_PASSWORD=change_me
|
||||||
|
PIPELINE_SYMBOLS=SPY
|
||||||
|
|
||||||
|
# For scripts/setup_postgres.py when creating role/database:
|
||||||
|
# Use a superuser/admin account that can CREATE ROLE and CREATE DATABASE.
|
||||||
|
POSTGRES_ADMIN_USER=postgres
|
||||||
|
POSTGRES_ADMIN_PASSWORD=postgres
|
||||||
|
POSTGRES_ADMIN_HOST=localhost
|
||||||
|
POSTGRES_ADMIN_PORT=5432
|
||||||
|
POSTGRES_ADMIN_DB=postgres
|
||||||
108
scripts/setup_postgres.py
Normal file
108
scripts/setup_postgres.py
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Idempotent PostgreSQL bootstrap script for the option_pricing project.
|
||||||
|
|
||||||
|
What it does:
|
||||||
|
1) Creates the project role if it does not exist.
|
||||||
|
2) Creates the project database if it does not exist.
|
||||||
|
3) Grants ownership/privileges.
|
||||||
|
4) Applies src/data/sql/schema.sql to the project database.
|
||||||
|
|
||||||
|
Configuration comes from environment variables (see .env.example).
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
from psycopg2 import sql
|
||||||
|
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parents[1]
|
||||||
|
SCHEMA_PATH = ROOT / "src" / "data" / "sql" / "schema.sql"
|
||||||
|
|
||||||
|
|
||||||
|
def _env(name: str, default: str | None = None) -> str:
|
||||||
|
value = os.getenv(name, default)
|
||||||
|
if value is None:
|
||||||
|
raise RuntimeError(f"Missing required environment variable: {name}")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def admin_connect(dbname: str):
|
||||||
|
return psycopg2.connect(
|
||||||
|
dbname=dbname,
|
||||||
|
user=_env("POSTGRES_ADMIN_USER", "postgres"),
|
||||||
|
password=_env("POSTGRES_ADMIN_PASSWORD", "postgres"),
|
||||||
|
host=_env("POSTGRES_ADMIN_HOST", "localhost"),
|
||||||
|
port=_env("POSTGRES_ADMIN_PORT", "5432"),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_role_and_database() -> None:
|
||||||
|
db_user = _env("DB_USER", "quant_user")
|
||||||
|
db_password = _env("DB_PASSWORD", "")
|
||||||
|
db_name = _env("DB_NAME", "options_db")
|
||||||
|
|
||||||
|
admin_db = _env("POSTGRES_ADMIN_DB", "postgres")
|
||||||
|
with admin_connect(admin_db) as conn:
|
||||||
|
conn.autocommit = True
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("SELECT 1 FROM pg_roles WHERE rolname = %s", (db_user,))
|
||||||
|
role_exists = cur.fetchone() is not None
|
||||||
|
if not role_exists:
|
||||||
|
cur.execute(
|
||||||
|
sql.SQL("CREATE ROLE {} WITH LOGIN PASSWORD %s").format(
|
||||||
|
sql.Identifier(db_user)
|
||||||
|
),
|
||||||
|
(db_password,),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cur.execute(
|
||||||
|
sql.SQL("ALTER ROLE {} WITH LOGIN PASSWORD %s").format(
|
||||||
|
sql.Identifier(db_user)
|
||||||
|
),
|
||||||
|
(db_password,),
|
||||||
|
)
|
||||||
|
|
||||||
|
cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (db_name,))
|
||||||
|
db_exists = cur.fetchone() is not None
|
||||||
|
if not db_exists:
|
||||||
|
cur.execute(
|
||||||
|
sql.SQL("CREATE DATABASE {} OWNER {}").format(
|
||||||
|
sql.Identifier(db_name),
|
||||||
|
sql.Identifier(db_user),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cur.execute(
|
||||||
|
sql.SQL("ALTER DATABASE {} OWNER TO {}").format(
|
||||||
|
sql.Identifier(db_name),
|
||||||
|
sql.Identifier(db_user),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def apply_schema() -> None:
|
||||||
|
if not SCHEMA_PATH.exists():
|
||||||
|
raise FileNotFoundError(f"Schema file not found: {SCHEMA_PATH}")
|
||||||
|
|
||||||
|
schema_sql = SCHEMA_PATH.read_text(encoding="utf-8")
|
||||||
|
with admin_connect(_env("DB_NAME", "options_db")) as conn:
|
||||||
|
conn.autocommit = True
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(schema_sql)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
print("Ensuring role/database exist...")
|
||||||
|
ensure_role_and_database()
|
||||||
|
print("Applying schema...")
|
||||||
|
apply_schema()
|
||||||
|
print("Database setup complete.")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -1,14 +0,0 @@
|
|||||||
DB_CONFIG = {
|
|
||||||
"host": "localhost",
|
|
||||||
"port": 5432,
|
|
||||||
"database": "options_db",
|
|
||||||
"user": "quant_user",
|
|
||||||
"password": "strong_password",
|
|
||||||
}
|
|
||||||
|
|
||||||
PIPELINE_CONFIG = {
|
|
||||||
"symbols": [
|
|
||||||
"SPY"
|
|
||||||
# Example: "SPY"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -1,13 +1,15 @@
|
|||||||
import psycopg2
|
import pandas as pd
|
||||||
|
|
||||||
conn = psycopg2.connect(
|
from option_pricing.src.data.ingestion.db_connect import db_engine
|
||||||
dbname="options_db",
|
|
||||||
user="quant_user",
|
|
||||||
password="strong_password",
|
|
||||||
host="144.91.73.49",
|
|
||||||
port="5432"
|
|
||||||
)
|
|
||||||
|
|
||||||
cursor = conn.cursor()
|
|
||||||
cursor.execute("SELECT * FROM underlyings;")
|
def fetch_underlyings() -> pd.DataFrame:
|
||||||
print(cursor.fetchall())
|
"""
|
||||||
|
Fetch all entries from the underlyings table using configured DB credentials.
|
||||||
|
"""
|
||||||
|
engine = db_engine()
|
||||||
|
return pd.read_sql("SELECT * FROM underlyings;", engine)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print(fetch_underlyings())
|
||||||
3
src/data/ingestion/config/__init__.py
Normal file
3
src/data/ingestion/config/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
from .settings import DB_CONFIG, PIPELINE_CONFIG
|
||||||
|
|
||||||
|
__all__ = ["DB_CONFIG", "PIPELINE_CONFIG"]
|
||||||
31
src/data/ingestion/config/settings.py
Normal file
31
src/data/ingestion/config/settings.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def _get_env_int(name: str, default: int) -> int:
|
||||||
|
raw = os.getenv(name)
|
||||||
|
if raw is None:
|
||||||
|
return default
|
||||||
|
try:
|
||||||
|
return int(raw)
|
||||||
|
except ValueError as exc:
|
||||||
|
raise ValueError(f"Environment variable {name} must be an integer, got '{raw}'") from exc
|
||||||
|
|
||||||
|
|
||||||
|
def _get_env_list(name: str, default: list[str]) -> list[str]:
|
||||||
|
raw = os.getenv(name)
|
||||||
|
if not raw:
|
||||||
|
return default
|
||||||
|
return [x.strip() for x in raw.split(",") if x.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
DB_CONFIG = {
|
||||||
|
"host": os.getenv("DB_HOST", "localhost"),
|
||||||
|
"port": _get_env_int("DB_PORT", 5432),
|
||||||
|
"database": os.getenv("DB_NAME", "options_db"),
|
||||||
|
"user": os.getenv("DB_USER", "quant_user"),
|
||||||
|
"password": os.getenv("DB_PASSWORD", ""),
|
||||||
|
}
|
||||||
|
|
||||||
|
PIPELINE_CONFIG = {
|
||||||
|
"symbols": _get_env_list("PIPELINE_SYMBOLS", ["SPY"]),
|
||||||
|
}
|
||||||
13
src/data/ingestion/db_connect.py
Normal file
13
src/data/ingestion/db_connect.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
from sqlalchemy import create_engine
|
||||||
|
from option_pricing.src.data.ingestion.config.settings import DB_CONFIG
|
||||||
|
|
||||||
|
def build_db_url() -> str:
|
||||||
|
return (
|
||||||
|
f"postgresql+psycopg2://{DB_CONFIG['user']}:{DB_CONFIG['password']}"
|
||||||
|
f"@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def db_engine():
|
||||||
|
db_url = build_db_url()
|
||||||
|
engine = create_engine(db_url, future=True)
|
||||||
|
return engine
|
||||||
@@ -1,16 +1,14 @@
|
|||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import yfinance as yf
|
import yfinance as yf
|
||||||
from sqlalchemy import create_engine
|
|
||||||
|
from db_connect import db_engine
|
||||||
|
|
||||||
# --- CONFIG ---
|
# --- CONFIG ---
|
||||||
TICKERS = ["UBS", "^GSPC"]
|
TICKERS = ["UBS", "^GSPC"]
|
||||||
DAYS_BACK = 21 # ~3 weeks
|
DAYS_BACK = 21 # ~3 weeks
|
||||||
TABLE_NAME = "prices"
|
TABLE_NAME = "prices"
|
||||||
|
|
||||||
DB_URI = "postgresql://quant_user:strong_password@localhost:5432/options_db"
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_data(tickers, start_date, end_date):
|
def fetch_data(tickers, start_date, end_date):
|
||||||
data = yf.download(
|
data = yf.download(
|
||||||
tickers,
|
tickers,
|
||||||
@@ -64,7 +62,7 @@ def main():
|
|||||||
raw = fetch_data(TICKERS, start_date, end_date)
|
raw = fetch_data(TICKERS, start_date, end_date)
|
||||||
df = transform_data(raw)
|
df = transform_data(raw)
|
||||||
|
|
||||||
engine = create_engine(DB_URI)
|
engine = db_engine()
|
||||||
load_to_postgres(df, engine)
|
load_to_postgres(df, engine)
|
||||||
|
|
||||||
print("Ingestion complete.")
|
print("Ingestion complete.")
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from decimal import Decimal, InvalidOperation
|
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import yfinance as yf
|
import yfinance as yf
|
||||||
from sqlalchemy import create_engine, text
|
from sqlalchemy import text
|
||||||
|
|
||||||
from config.settings import DB_CONFIG, PIPELINE_CONFIG
|
from option_pricing.src.data.ingestion.config import DB_CONFIG, PIPELINE_CONFIG
|
||||||
|
from db_connect import db_engine
|
||||||
|
|
||||||
|
|
||||||
def build_db_url() -> str:
|
def build_db_url() -> str:
|
||||||
@@ -269,8 +269,7 @@ def ingest_symbol(symbol: str, engine):
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
db_url = build_db_url()
|
engine = db_engine()
|
||||||
engine = create_engine(db_url, future=True)
|
|
||||||
|
|
||||||
for symbol in PIPELINE_CONFIG["symbols"]:
|
for symbol in PIPELINE_CONFIG["symbols"]:
|
||||||
ingest_symbol(symbol, engine)
|
ingest_symbol(symbol, engine)
|
||||||
|
|||||||
Reference in New Issue
Block a user