Replace hardcoded DB credentials with environment-driven configuration.

Centralize DB settings in ingestion config, remove embedded secrets from ingestion helpers, and add an idempotent PostgreSQL bootstrap script to create role/database and apply schema safely.

Made-with: Cursor
This commit is contained in:
David Doebel
2026-04-02 16:30:50 +02:00
parent e9b3a4aac3
commit b3663258e4
10 changed files with 189 additions and 35 deletions

14
.env.example Normal file
View File

@@ -0,0 +1,14 @@
DB_HOST=localhost
DB_PORT=5432
DB_NAME=options_db
DB_USER=quant_user
DB_PASSWORD=change_me
PIPELINE_SYMBOLS=SPY
# For scripts/setup_postgres.py when creating role/database:
# Use a superuser/admin account that can CREATE ROLE and CREATE DATABASE.
POSTGRES_ADMIN_USER=postgres
POSTGRES_ADMIN_PASSWORD=postgres
POSTGRES_ADMIN_HOST=localhost
POSTGRES_ADMIN_PORT=5432
POSTGRES_ADMIN_DB=postgres

108
scripts/setup_postgres.py Normal file
View File

@@ -0,0 +1,108 @@
#!/usr/bin/env python3
"""
Idempotent PostgreSQL bootstrap script for the option_pricing project.
What it does:
1) Creates the project role if it does not exist.
2) Creates the project database if it does not exist.
3) Grants ownership/privileges.
4) Applies src/data/sql/schema.sql to the project database.
Configuration comes from environment variables (see .env.example).
"""
from __future__ import annotations
import os
from pathlib import Path
import psycopg2
from psycopg2 import sql
ROOT = Path(__file__).resolve().parents[1]
SCHEMA_PATH = ROOT / "src" / "data" / "sql" / "schema.sql"
def _env(name: str, default: str | None = None) -> str:
value = os.getenv(name, default)
if value is None:
raise RuntimeError(f"Missing required environment variable: {name}")
return value
def admin_connect(dbname: str):
return psycopg2.connect(
dbname=dbname,
user=_env("POSTGRES_ADMIN_USER", "postgres"),
password=_env("POSTGRES_ADMIN_PASSWORD", "postgres"),
host=_env("POSTGRES_ADMIN_HOST", "localhost"),
port=_env("POSTGRES_ADMIN_PORT", "5432"),
)
def ensure_role_and_database() -> None:
db_user = _env("DB_USER", "quant_user")
db_password = _env("DB_PASSWORD", "")
db_name = _env("DB_NAME", "options_db")
admin_db = _env("POSTGRES_ADMIN_DB", "postgres")
with admin_connect(admin_db) as conn:
conn.autocommit = True
with conn.cursor() as cur:
cur.execute("SELECT 1 FROM pg_roles WHERE rolname = %s", (db_user,))
role_exists = cur.fetchone() is not None
if not role_exists:
cur.execute(
sql.SQL("CREATE ROLE {} WITH LOGIN PASSWORD %s").format(
sql.Identifier(db_user)
),
(db_password,),
)
else:
cur.execute(
sql.SQL("ALTER ROLE {} WITH LOGIN PASSWORD %s").format(
sql.Identifier(db_user)
),
(db_password,),
)
cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (db_name,))
db_exists = cur.fetchone() is not None
if not db_exists:
cur.execute(
sql.SQL("CREATE DATABASE {} OWNER {}").format(
sql.Identifier(db_name),
sql.Identifier(db_user),
)
)
else:
cur.execute(
sql.SQL("ALTER DATABASE {} OWNER TO {}").format(
sql.Identifier(db_name),
sql.Identifier(db_user),
)
)
def apply_schema() -> None:
if not SCHEMA_PATH.exists():
raise FileNotFoundError(f"Schema file not found: {SCHEMA_PATH}")
schema_sql = SCHEMA_PATH.read_text(encoding="utf-8")
with admin_connect(_env("DB_NAME", "options_db")) as conn:
conn.autocommit = True
with conn.cursor() as cur:
cur.execute(schema_sql)
def main() -> None:
print("Ensuring role/database exist...")
ensure_role_and_database()
print("Applying schema...")
apply_schema()
print("Database setup complete.")
if __name__ == "__main__":
main()

View File

@@ -1,14 +0,0 @@
DB_CONFIG = {
"host": "localhost",
"port": 5432,
"database": "options_db",
"user": "quant_user",
"password": "strong_password",
}
PIPELINE_CONFIG = {
"symbols": [
"SPY"
# Example: "SPY"
]
}

View File

@@ -1,13 +1,15 @@
import psycopg2 import pandas as pd
conn = psycopg2.connect( from option_pricing.src.data.ingestion.db_connect import db_engine
dbname="options_db",
user="quant_user",
password="strong_password",
host="144.91.73.49",
port="5432"
)
cursor = conn.cursor()
cursor.execute("SELECT * FROM underlyings;") def fetch_underlyings() -> pd.DataFrame:
print(cursor.fetchall()) """
Fetch all entries from the underlyings table using configured DB credentials.
"""
engine = db_engine()
return pd.read_sql("SELECT * FROM underlyings;", engine)
if __name__ == "__main__":
print(fetch_underlyings())

View File

@@ -0,0 +1,3 @@
from .settings import DB_CONFIG, PIPELINE_CONFIG
__all__ = ["DB_CONFIG", "PIPELINE_CONFIG"]

View File

@@ -0,0 +1,31 @@
import os
def _get_env_int(name: str, default: int) -> int:
raw = os.getenv(name)
if raw is None:
return default
try:
return int(raw)
except ValueError as exc:
raise ValueError(f"Environment variable {name} must be an integer, got '{raw}'") from exc
def _get_env_list(name: str, default: list[str]) -> list[str]:
raw = os.getenv(name)
if not raw:
return default
return [x.strip() for x in raw.split(",") if x.strip()]
DB_CONFIG = {
"host": os.getenv("DB_HOST", "localhost"),
"port": _get_env_int("DB_PORT", 5432),
"database": os.getenv("DB_NAME", "options_db"),
"user": os.getenv("DB_USER", "quant_user"),
"password": os.getenv("DB_PASSWORD", ""),
}
PIPELINE_CONFIG = {
"symbols": _get_env_list("PIPELINE_SYMBOLS", ["SPY"]),
}

View File

@@ -0,0 +1,13 @@
from sqlalchemy import create_engine
from option_pricing.src.data.ingestion.config.settings import DB_CONFIG
def build_db_url() -> str:
return (
f"postgresql+psycopg2://{DB_CONFIG['user']}:{DB_CONFIG['password']}"
f"@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
)
def db_engine():
db_url = build_db_url()
engine = create_engine(db_url, future=True)
return engine

View File

@@ -1,16 +1,14 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
import pandas as pd import pandas as pd
import yfinance as yf import yfinance as yf
from sqlalchemy import create_engine
from db_connect import db_engine
# --- CONFIG --- # --- CONFIG ---
TICKERS = ["UBS", "^GSPC"] TICKERS = ["UBS", "^GSPC"]
DAYS_BACK = 21 # ~3 weeks DAYS_BACK = 21 # ~3 weeks
TABLE_NAME = "prices" TABLE_NAME = "prices"
DB_URI = "postgresql://quant_user:strong_password@localhost:5432/options_db"
def fetch_data(tickers, start_date, end_date): def fetch_data(tickers, start_date, end_date):
data = yf.download( data = yf.download(
tickers, tickers,
@@ -64,7 +62,7 @@ def main():
raw = fetch_data(TICKERS, start_date, end_date) raw = fetch_data(TICKERS, start_date, end_date)
df = transform_data(raw) df = transform_data(raw)
engine = create_engine(DB_URI) engine = db_engine()
load_to_postgres(df, engine) load_to_postgres(df, engine)
print("Ingestion complete.") print("Ingestion complete.")

View File

@@ -1,11 +1,11 @@
from datetime import datetime, timezone from datetime import datetime, timezone
from decimal import Decimal, InvalidOperation
import pandas as pd import pandas as pd
import yfinance as yf import yfinance as yf
from sqlalchemy import create_engine, text from sqlalchemy import text
from config.settings import DB_CONFIG, PIPELINE_CONFIG from option_pricing.src.data.ingestion.config import DB_CONFIG, PIPELINE_CONFIG
from db_connect import db_engine
def build_db_url() -> str: def build_db_url() -> str:
@@ -269,8 +269,7 @@ def ingest_symbol(symbol: str, engine):
def main(): def main():
db_url = build_db_url() engine = db_engine()
engine = create_engine(db_url, future=True)
for symbol in PIPELINE_CONFIG["symbols"]: for symbol in PIPELINE_CONFIG["symbols"]:
ingest_symbol(symbol, engine) ingest_symbol(symbol, engine)