From b3663258e4f7088e1da00b9d3e8a4d0371446807 Mon Sep 17 00:00:00 2001 From: David Doebel Date: Thu, 2 Apr 2026 16:30:50 +0200 Subject: [PATCH] Replace hardcoded DB credentials with environment-driven configuration. Centralize DB settings in ingestion config, remove embedded secrets from ingestion helpers, and add an idempotent PostgreSQL bootstrap script to create role/database and apply schema safely. Made-with: Cursor --- .env.example | 14 +++ scripts/setup_postgres.py | 108 ++++++++++++++++++++ src/data/config/__init__.py | 0 src/data/config/settings.py | 14 --- src/data/database_interaction.py | 24 +++-- src/data/ingestion/config/__init__.py | 3 + src/data/ingestion/config/settings.py | 31 ++++++ src/data/ingestion/db_connect.py | 13 +++ src/data/ingestion/ingest_ubs_comparison.py | 8 +- src/data/ingestion/ingest_yahoo_options.py | 9 +- 10 files changed, 189 insertions(+), 35 deletions(-) create mode 100644 .env.example create mode 100644 scripts/setup_postgres.py delete mode 100644 src/data/config/__init__.py delete mode 100644 src/data/config/settings.py create mode 100644 src/data/ingestion/config/__init__.py create mode 100644 src/data/ingestion/config/settings.py create mode 100644 src/data/ingestion/db_connect.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..aa78a21 --- /dev/null +++ b/.env.example @@ -0,0 +1,14 @@ +DB_HOST=localhost +DB_PORT=5432 +DB_NAME=options_db +DB_USER=quant_user +DB_PASSWORD=change_me +PIPELINE_SYMBOLS=SPY + +# For scripts/setup_postgres.py when creating role/database: +# Use a superuser/admin account that can CREATE ROLE and CREATE DATABASE. +POSTGRES_ADMIN_USER=postgres +POSTGRES_ADMIN_PASSWORD=postgres +POSTGRES_ADMIN_HOST=localhost +POSTGRES_ADMIN_PORT=5432 +POSTGRES_ADMIN_DB=postgres diff --git a/scripts/setup_postgres.py b/scripts/setup_postgres.py new file mode 100644 index 0000000..55fb84f --- /dev/null +++ b/scripts/setup_postgres.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 +""" +Idempotent PostgreSQL bootstrap script for the option_pricing project. + +What it does: +1) Creates the project role if it does not exist. +2) Creates the project database if it does not exist. +3) Grants ownership/privileges. +4) Applies src/data/sql/schema.sql to the project database. + +Configuration comes from environment variables (see .env.example). +""" + +from __future__ import annotations + +import os +from pathlib import Path + +import psycopg2 +from psycopg2 import sql + + +ROOT = Path(__file__).resolve().parents[1] +SCHEMA_PATH = ROOT / "src" / "data" / "sql" / "schema.sql" + + +def _env(name: str, default: str | None = None) -> str: + value = os.getenv(name, default) + if value is None: + raise RuntimeError(f"Missing required environment variable: {name}") + return value + + +def admin_connect(dbname: str): + return psycopg2.connect( + dbname=dbname, + user=_env("POSTGRES_ADMIN_USER", "postgres"), + password=_env("POSTGRES_ADMIN_PASSWORD", "postgres"), + host=_env("POSTGRES_ADMIN_HOST", "localhost"), + port=_env("POSTGRES_ADMIN_PORT", "5432"), + ) + + +def ensure_role_and_database() -> None: + db_user = _env("DB_USER", "quant_user") + db_password = _env("DB_PASSWORD", "") + db_name = _env("DB_NAME", "options_db") + + admin_db = _env("POSTGRES_ADMIN_DB", "postgres") + with admin_connect(admin_db) as conn: + conn.autocommit = True + with conn.cursor() as cur: + cur.execute("SELECT 1 FROM pg_roles WHERE rolname = %s", (db_user,)) + role_exists = cur.fetchone() is not None + if not role_exists: + cur.execute( + sql.SQL("CREATE ROLE {} WITH LOGIN PASSWORD %s").format( + sql.Identifier(db_user) + ), + (db_password,), + ) + else: + cur.execute( + sql.SQL("ALTER ROLE {} WITH LOGIN PASSWORD %s").format( + sql.Identifier(db_user) + ), + (db_password,), + ) + + cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (db_name,)) + db_exists = cur.fetchone() is not None + if not db_exists: + cur.execute( + sql.SQL("CREATE DATABASE {} OWNER {}").format( + sql.Identifier(db_name), + sql.Identifier(db_user), + ) + ) + else: + cur.execute( + sql.SQL("ALTER DATABASE {} OWNER TO {}").format( + sql.Identifier(db_name), + sql.Identifier(db_user), + ) + ) + + +def apply_schema() -> None: + if not SCHEMA_PATH.exists(): + raise FileNotFoundError(f"Schema file not found: {SCHEMA_PATH}") + + schema_sql = SCHEMA_PATH.read_text(encoding="utf-8") + with admin_connect(_env("DB_NAME", "options_db")) as conn: + conn.autocommit = True + with conn.cursor() as cur: + cur.execute(schema_sql) + + +def main() -> None: + print("Ensuring role/database exist...") + ensure_role_and_database() + print("Applying schema...") + apply_schema() + print("Database setup complete.") + + +if __name__ == "__main__": + main() diff --git a/src/data/config/__init__.py b/src/data/config/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/data/config/settings.py b/src/data/config/settings.py deleted file mode 100644 index 49cb6a0..0000000 --- a/src/data/config/settings.py +++ /dev/null @@ -1,14 +0,0 @@ -DB_CONFIG = { - "host": "localhost", - "port": 5432, - "database": "options_db", - "user": "quant_user", - "password": "strong_password", -} - -PIPELINE_CONFIG = { - "symbols": [ - "SPY" - # Example: "SPY" - ] -} \ No newline at end of file diff --git a/src/data/database_interaction.py b/src/data/database_interaction.py index 0d0f9c5..3871c77 100644 --- a/src/data/database_interaction.py +++ b/src/data/database_interaction.py @@ -1,13 +1,15 @@ -import psycopg2 +import pandas as pd -conn = psycopg2.connect( - dbname="options_db", - user="quant_user", - password="strong_password", - host="144.91.73.49", - port="5432" -) +from option_pricing.src.data.ingestion.db_connect import db_engine -cursor = conn.cursor() -cursor.execute("SELECT * FROM underlyings;") -print(cursor.fetchall()) \ No newline at end of file + +def fetch_underlyings() -> pd.DataFrame: + """ + Fetch all entries from the underlyings table using configured DB credentials. + """ + engine = db_engine() + return pd.read_sql("SELECT * FROM underlyings;", engine) + + +if __name__ == "__main__": + print(fetch_underlyings()) \ No newline at end of file diff --git a/src/data/ingestion/config/__init__.py b/src/data/ingestion/config/__init__.py new file mode 100644 index 0000000..3b0c909 --- /dev/null +++ b/src/data/ingestion/config/__init__.py @@ -0,0 +1,3 @@ +from .settings import DB_CONFIG, PIPELINE_CONFIG + +__all__ = ["DB_CONFIG", "PIPELINE_CONFIG"] diff --git a/src/data/ingestion/config/settings.py b/src/data/ingestion/config/settings.py new file mode 100644 index 0000000..863eecd --- /dev/null +++ b/src/data/ingestion/config/settings.py @@ -0,0 +1,31 @@ +import os + + +def _get_env_int(name: str, default: int) -> int: + raw = os.getenv(name) + if raw is None: + return default + try: + return int(raw) + except ValueError as exc: + raise ValueError(f"Environment variable {name} must be an integer, got '{raw}'") from exc + + +def _get_env_list(name: str, default: list[str]) -> list[str]: + raw = os.getenv(name) + if not raw: + return default + return [x.strip() for x in raw.split(",") if x.strip()] + + +DB_CONFIG = { + "host": os.getenv("DB_HOST", "localhost"), + "port": _get_env_int("DB_PORT", 5432), + "database": os.getenv("DB_NAME", "options_db"), + "user": os.getenv("DB_USER", "quant_user"), + "password": os.getenv("DB_PASSWORD", ""), +} + +PIPELINE_CONFIG = { + "symbols": _get_env_list("PIPELINE_SYMBOLS", ["SPY"]), +} \ No newline at end of file diff --git a/src/data/ingestion/db_connect.py b/src/data/ingestion/db_connect.py new file mode 100644 index 0000000..fe66920 --- /dev/null +++ b/src/data/ingestion/db_connect.py @@ -0,0 +1,13 @@ +from sqlalchemy import create_engine +from option_pricing.src.data.ingestion.config.settings import DB_CONFIG + +def build_db_url() -> str: + return ( + f"postgresql+psycopg2://{DB_CONFIG['user']}:{DB_CONFIG['password']}" + f"@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}" + ) + +def db_engine(): + db_url = build_db_url() + engine = create_engine(db_url, future=True) + return engine diff --git a/src/data/ingestion/ingest_ubs_comparison.py b/src/data/ingestion/ingest_ubs_comparison.py index d1fa615..7cdb9fc 100644 --- a/src/data/ingestion/ingest_ubs_comparison.py +++ b/src/data/ingestion/ingest_ubs_comparison.py @@ -1,16 +1,14 @@ from datetime import datetime, timedelta import pandas as pd import yfinance as yf -from sqlalchemy import create_engine + +from db_connect import db_engine # --- CONFIG --- TICKERS = ["UBS", "^GSPC"] DAYS_BACK = 21 # ~3 weeks TABLE_NAME = "prices" -DB_URI = "postgresql://quant_user:strong_password@localhost:5432/options_db" - - def fetch_data(tickers, start_date, end_date): data = yf.download( tickers, @@ -64,7 +62,7 @@ def main(): raw = fetch_data(TICKERS, start_date, end_date) df = transform_data(raw) - engine = create_engine(DB_URI) + engine = db_engine() load_to_postgres(df, engine) print("Ingestion complete.") diff --git a/src/data/ingestion/ingest_yahoo_options.py b/src/data/ingestion/ingest_yahoo_options.py index e4ac989..0b62c0c 100644 --- a/src/data/ingestion/ingest_yahoo_options.py +++ b/src/data/ingestion/ingest_yahoo_options.py @@ -1,11 +1,11 @@ from datetime import datetime, timezone -from decimal import Decimal, InvalidOperation import pandas as pd import yfinance as yf -from sqlalchemy import create_engine, text +from sqlalchemy import text -from config.settings import DB_CONFIG, PIPELINE_CONFIG +from option_pricing.src.data.ingestion.config import DB_CONFIG, PIPELINE_CONFIG +from db_connect import db_engine def build_db_url() -> str: @@ -269,8 +269,7 @@ def ingest_symbol(symbol: str, engine): def main(): - db_url = build_db_url() - engine = create_engine(db_url, future=True) + engine = db_engine() for symbol in PIPELINE_CONFIG["symbols"]: ingest_symbol(symbol, engine)