Replace hardcoded DB credentials with environment-driven configuration.
Centralize DB settings in ingestion config, remove embedded secrets from ingestion helpers, and add an idempotent PostgreSQL bootstrap script to create role/database and apply schema safely. Made-with: Cursor
This commit is contained in:
14
.env.example
Normal file
14
.env.example
Normal file
@@ -0,0 +1,14 @@
|
||||
DB_HOST=localhost
|
||||
DB_PORT=5432
|
||||
DB_NAME=options_db
|
||||
DB_USER=quant_user
|
||||
DB_PASSWORD=change_me
|
||||
PIPELINE_SYMBOLS=SPY
|
||||
|
||||
# For scripts/setup_postgres.py when creating role/database:
|
||||
# Use a superuser/admin account that can CREATE ROLE and CREATE DATABASE.
|
||||
POSTGRES_ADMIN_USER=postgres
|
||||
POSTGRES_ADMIN_PASSWORD=postgres
|
||||
POSTGRES_ADMIN_HOST=localhost
|
||||
POSTGRES_ADMIN_PORT=5432
|
||||
POSTGRES_ADMIN_DB=postgres
|
||||
108
scripts/setup_postgres.py
Normal file
108
scripts/setup_postgres.py
Normal file
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Idempotent PostgreSQL bootstrap script for the option_pricing project.
|
||||
|
||||
What it does:
|
||||
1) Creates the project role if it does not exist.
|
||||
2) Creates the project database if it does not exist.
|
||||
3) Grants ownership/privileges.
|
||||
4) Applies src/data/sql/schema.sql to the project database.
|
||||
|
||||
Configuration comes from environment variables (see .env.example).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg2
|
||||
from psycopg2 import sql
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SCHEMA_PATH = ROOT / "src" / "data" / "sql" / "schema.sql"
|
||||
|
||||
|
||||
def _env(name: str, default: str | None = None) -> str:
|
||||
value = os.getenv(name, default)
|
||||
if value is None:
|
||||
raise RuntimeError(f"Missing required environment variable: {name}")
|
||||
return value
|
||||
|
||||
|
||||
def admin_connect(dbname: str):
|
||||
return psycopg2.connect(
|
||||
dbname=dbname,
|
||||
user=_env("POSTGRES_ADMIN_USER", "postgres"),
|
||||
password=_env("POSTGRES_ADMIN_PASSWORD", "postgres"),
|
||||
host=_env("POSTGRES_ADMIN_HOST", "localhost"),
|
||||
port=_env("POSTGRES_ADMIN_PORT", "5432"),
|
||||
)
|
||||
|
||||
|
||||
def ensure_role_and_database() -> None:
|
||||
db_user = _env("DB_USER", "quant_user")
|
||||
db_password = _env("DB_PASSWORD", "")
|
||||
db_name = _env("DB_NAME", "options_db")
|
||||
|
||||
admin_db = _env("POSTGRES_ADMIN_DB", "postgres")
|
||||
with admin_connect(admin_db) as conn:
|
||||
conn.autocommit = True
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT 1 FROM pg_roles WHERE rolname = %s", (db_user,))
|
||||
role_exists = cur.fetchone() is not None
|
||||
if not role_exists:
|
||||
cur.execute(
|
||||
sql.SQL("CREATE ROLE {} WITH LOGIN PASSWORD %s").format(
|
||||
sql.Identifier(db_user)
|
||||
),
|
||||
(db_password,),
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
sql.SQL("ALTER ROLE {} WITH LOGIN PASSWORD %s").format(
|
||||
sql.Identifier(db_user)
|
||||
),
|
||||
(db_password,),
|
||||
)
|
||||
|
||||
cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (db_name,))
|
||||
db_exists = cur.fetchone() is not None
|
||||
if not db_exists:
|
||||
cur.execute(
|
||||
sql.SQL("CREATE DATABASE {} OWNER {}").format(
|
||||
sql.Identifier(db_name),
|
||||
sql.Identifier(db_user),
|
||||
)
|
||||
)
|
||||
else:
|
||||
cur.execute(
|
||||
sql.SQL("ALTER DATABASE {} OWNER TO {}").format(
|
||||
sql.Identifier(db_name),
|
||||
sql.Identifier(db_user),
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def apply_schema() -> None:
|
||||
if not SCHEMA_PATH.exists():
|
||||
raise FileNotFoundError(f"Schema file not found: {SCHEMA_PATH}")
|
||||
|
||||
schema_sql = SCHEMA_PATH.read_text(encoding="utf-8")
|
||||
with admin_connect(_env("DB_NAME", "options_db")) as conn:
|
||||
conn.autocommit = True
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(schema_sql)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
print("Ensuring role/database exist...")
|
||||
ensure_role_and_database()
|
||||
print("Applying schema...")
|
||||
apply_schema()
|
||||
print("Database setup complete.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,14 +0,0 @@
|
||||
DB_CONFIG = {
|
||||
"host": "localhost",
|
||||
"port": 5432,
|
||||
"database": "options_db",
|
||||
"user": "quant_user",
|
||||
"password": "strong_password",
|
||||
}
|
||||
|
||||
PIPELINE_CONFIG = {
|
||||
"symbols": [
|
||||
"SPY"
|
||||
# Example: "SPY"
|
||||
]
|
||||
}
|
||||
@@ -1,13 +1,15 @@
|
||||
import psycopg2
|
||||
import pandas as pd
|
||||
|
||||
conn = psycopg2.connect(
|
||||
dbname="options_db",
|
||||
user="quant_user",
|
||||
password="strong_password",
|
||||
host="144.91.73.49",
|
||||
port="5432"
|
||||
)
|
||||
from option_pricing.src.data.ingestion.db_connect import db_engine
|
||||
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT * FROM underlyings;")
|
||||
print(cursor.fetchall())
|
||||
|
||||
def fetch_underlyings() -> pd.DataFrame:
|
||||
"""
|
||||
Fetch all entries from the underlyings table using configured DB credentials.
|
||||
"""
|
||||
engine = db_engine()
|
||||
return pd.read_sql("SELECT * FROM underlyings;", engine)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(fetch_underlyings())
|
||||
3
src/data/ingestion/config/__init__.py
Normal file
3
src/data/ingestion/config/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .settings import DB_CONFIG, PIPELINE_CONFIG
|
||||
|
||||
__all__ = ["DB_CONFIG", "PIPELINE_CONFIG"]
|
||||
31
src/data/ingestion/config/settings.py
Normal file
31
src/data/ingestion/config/settings.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import os
|
||||
|
||||
|
||||
def _get_env_int(name: str, default: int) -> int:
|
||||
raw = os.getenv(name)
|
||||
if raw is None:
|
||||
return default
|
||||
try:
|
||||
return int(raw)
|
||||
except ValueError as exc:
|
||||
raise ValueError(f"Environment variable {name} must be an integer, got '{raw}'") from exc
|
||||
|
||||
|
||||
def _get_env_list(name: str, default: list[str]) -> list[str]:
|
||||
raw = os.getenv(name)
|
||||
if not raw:
|
||||
return default
|
||||
return [x.strip() for x in raw.split(",") if x.strip()]
|
||||
|
||||
|
||||
DB_CONFIG = {
|
||||
"host": os.getenv("DB_HOST", "localhost"),
|
||||
"port": _get_env_int("DB_PORT", 5432),
|
||||
"database": os.getenv("DB_NAME", "options_db"),
|
||||
"user": os.getenv("DB_USER", "quant_user"),
|
||||
"password": os.getenv("DB_PASSWORD", ""),
|
||||
}
|
||||
|
||||
PIPELINE_CONFIG = {
|
||||
"symbols": _get_env_list("PIPELINE_SYMBOLS", ["SPY"]),
|
||||
}
|
||||
13
src/data/ingestion/db_connect.py
Normal file
13
src/data/ingestion/db_connect.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from sqlalchemy import create_engine
|
||||
from option_pricing.src.data.ingestion.config.settings import DB_CONFIG
|
||||
|
||||
def build_db_url() -> str:
|
||||
return (
|
||||
f"postgresql+psycopg2://{DB_CONFIG['user']}:{DB_CONFIG['password']}"
|
||||
f"@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
|
||||
)
|
||||
|
||||
def db_engine():
|
||||
db_url = build_db_url()
|
||||
engine = create_engine(db_url, future=True)
|
||||
return engine
|
||||
@@ -1,16 +1,14 @@
|
||||
from datetime import datetime, timedelta
|
||||
import pandas as pd
|
||||
import yfinance as yf
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
from db_connect import db_engine
|
||||
|
||||
# --- CONFIG ---
|
||||
TICKERS = ["UBS", "^GSPC"]
|
||||
DAYS_BACK = 21 # ~3 weeks
|
||||
TABLE_NAME = "prices"
|
||||
|
||||
DB_URI = "postgresql://quant_user:strong_password@localhost:5432/options_db"
|
||||
|
||||
|
||||
def fetch_data(tickers, start_date, end_date):
|
||||
data = yf.download(
|
||||
tickers,
|
||||
@@ -64,7 +62,7 @@ def main():
|
||||
raw = fetch_data(TICKERS, start_date, end_date)
|
||||
df = transform_data(raw)
|
||||
|
||||
engine = create_engine(DB_URI)
|
||||
engine = db_engine()
|
||||
load_to_postgres(df, engine)
|
||||
|
||||
print("Ingestion complete.")
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from datetime import datetime, timezone
|
||||
from decimal import Decimal, InvalidOperation
|
||||
|
||||
import pandas as pd
|
||||
import yfinance as yf
|
||||
from sqlalchemy import create_engine, text
|
||||
from sqlalchemy import text
|
||||
|
||||
from config.settings import DB_CONFIG, PIPELINE_CONFIG
|
||||
from option_pricing.src.data.ingestion.config import DB_CONFIG, PIPELINE_CONFIG
|
||||
from db_connect import db_engine
|
||||
|
||||
|
||||
def build_db_url() -> str:
|
||||
@@ -269,8 +269,7 @@ def ingest_symbol(symbol: str, engine):
|
||||
|
||||
|
||||
def main():
|
||||
db_url = build_db_url()
|
||||
engine = create_engine(db_url, future=True)
|
||||
engine = db_engine()
|
||||
|
||||
for symbol in PIPELINE_CONFIG["symbols"]:
|
||||
ingest_symbol(symbol, engine)
|
||||
|
||||
Reference in New Issue
Block a user