Update README.md

Add references to further analysis
Update README.md
2026-04-02 23:02:39 +00:00 · 2026-04-02 15:50:18 +00:00 · 2026-04-02 16:31:07 +02:00 · 2026-04-02 16:30:56 +02:00 · 2026-04-02 16:30:50 +02:00 · 2026-04-02 16:30:43 +02:00
86 changed files with 3729 additions and 161 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,14 @@
+DB_HOST=localhost
+DB_PORT=5432
+DB_NAME=options_db
+DB_USER=quant_user
+DB_PASSWORD=change_me
+PIPELINE_SYMBOLS=SPY
+
+# For scripts/setup_postgres.py when creating role/database:
+# Use a superuser/admin account that can CREATE ROLE and CREATE DATABASE.
+POSTGRES_ADMIN_USER=postgres
+POSTGRES_ADMIN_PASSWORD=postgres
+POSTGRES_ADMIN_HOST=localhost
+POSTGRES_ADMIN_PORT=5432
+POSTGRES_ADMIN_DB=postgres
--- a/.gitea/workflows/ci.yml
+++ b/.gitea/workflows/ci.yml
@@ -20,7 +20,7 @@ jobs:

      - name: Configure
        run: |
-          mkdir build
+          mkdir build 
          cd build
          cmake ..

--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,24 @@
+# Built Python extension dropped next to qengine/__init__.py for local dev
+/qengine/*.so
+/qengine/*.dylib
+/qengine/__pycache__/
+
+/skbuild-build/
+
+/build/
+/.idea/
+**/__pycache__/
+/docs/html/
+/docs/latex/
+
+# Local reference tree (optional clone)
+/CPP-design-pattern-derivatives-pricing/
+
+# Local environment and secrets
+.env
+.env.*
+!.env.example
+
+# Local tooling caches
+/.pycache/
+/.mplconfig/
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -4,28 +4,53 @@ project(QuantEngine)
 set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_FLAGS "-O3 -march=native")

+option(BUILD_TESTING "Build GoogleTest target and tests" ON)
+
+set(PYBIND11_FINDPYTHON ON)
+find_package(Python3 REQUIRED COMPONENTS Interpreter Development.Module)
 find_package(Eigen3 REQUIRED)
+find_package(pybind11 CONFIG REQUIRED)
+#find_package(PostgreSQL REQUIRED)
+#find_package(PkgConfig REQUIRED)
+#pkg_check_modules(PQXX REQUIRED IMPORTED_TARGET libpqxx)

-add_subdirectory(src)
+add_subdirectory(cpp)

-# Testing
-enable_testing()
+find_package(Doxygen OPTIONAL_COMPONENTS dot)
+if(DOXYGEN_FOUND)
+    add_custom_target(
+        docs
+        COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_SOURCE_DIR}/docs/Doxyfile
+        WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+        COMMENT "Generating API documentation (HTML in docs/html)"
+        VERBATIM)
+endif()

-include(FetchContent)
+install(FILES "${CMAKE_SOURCE_DIR}/qengine/__init__.py" DESTINATION qengine)
+install(TARGETS qengine_cpp
+        LIBRARY DESTINATION qengine
+        RUNTIME DESTINATION qengine)

-FetchContent_Declare(
-        googletest
-        URL https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip
-        DOWNLOAD_EXTRACT_TIMESTAMP TRUE
-)
+if(BUILD_TESTING)
+    enable_testing()

-FetchContent_MakeAvailable(googletest)
+    include(FetchContent)

-add_executable(qengine_tests
-        tests/test_black_scholes.cpp
-        tests/stubs/FlatYieldCurve.cpp
-        tests/stubs/FlatVolatilitySurface.cpp)
+    FetchContent_Declare(
+            googletest
+            URL https://github.com/google/googletest/archive/refs/tags/v1.14.0.zip
+            DOWNLOAD_EXTRACT_TIMESTAMP TRUE
+    )

-target_link_libraries(qengine_tests qengine GTest::gtest_main)
-include(GoogleTest)
-gtest_discover_tests(qengine_tests)
+    FetchContent_MakeAvailable(googletest)
+
+    add_executable(qengine_tests
+            tests/test_black_scholes.cpp
+            tests/stubs/FlatYieldCurve.cpp
+            tests/stubs/FlatVolatilitySurface.cpp)
+
+    target_include_directories(qengine_tests PRIVATE ${CMAKE_SOURCE_DIR}/tests)
+    target_link_libraries(qengine_tests qengine_core GTest::gtest_main)
+    include(GoogleTest)
+    gtest_discover_tests(qengine_tests)
+endif()
--- a/README.md
+++ b/README.md
@@ -1,5 +1,175 @@
-# pricing
+# Option Pricing Engine with Market Data Pipeline 
+## 📌 Project Description

-Monte Carlo pricing of European options under Black–Scholes
+This repository implements a **production-style quantitative valuation pipeline** for equity options, combining high-performance pricing models with a full data and calibration workflow.

-### Project structure
+The system goes beyond a standalone pricer: it integrates **market data ingestion, structured storage, numerical pricing, and volatility surface calibration** into a single reproducible framework.
+### The goal of this project 
+
+The goal of this project is to serve as a **modular foundation for quantitative modeling and experimentation** in option pricing and financial time series.
+
+Rather than implementing a single model, the system is designed to support:
+
+- benchmarking different pricing approaches (analytical, simulation-based, and data-driven),
+- comparing numerical methods under realistic market data conditions,
+- and extending toward more advanced workflows such as statistical learning and model calibration.
+
+A key objective is to create an environment where **new ideas from research can be implemented, tested, and evaluated within a consistent pipeline**, rather than in isolated scripts or notebooks.
+
+This includes:
+
+- integrating alternative pricing methodologies into a shared framework,
+- analyzing model behavior across time and market regimes,
+- and building reproducible pipelines for both numerical and data-driven approaches.
+
+Ultimately, the project aims to bridge:
+- **theoretical models** (e.g. stochastic processes, volatility parameterizations),
+- **numerical methods** (simulation, calibration),
+- and **data-driven techniques** (time-series analysis, machine learning),
+
+within a single, extensible system. Moving closer to a production-grade pipeline. 
+### What the system does
+
+The system supports the following workflow:
+
+- Ingest listed option market data (Yahoo Finance)
+- Normalize and store it in a relational database (PostgreSQL)
+- Compute implied volatilities from observed prices
+- Calibrate parametric volatility surfaces (SVI)
+- Run pricing models (Black-Scholes, Monte Carlo)
+- Expose fast pricing routines via Python for analysis and research
+
+---
+This project aims to **unify these components into a coherent system**, with clear interfaces between:
+
+- **Data layer** (ingestion, storage, schema)
+- **Model layer** (C++ pricing engines)
+- **Analytics layer** (Python calibration and diagnostics)
+- **Execution layer** (reproducible pipelines)
+
+---
+
+### Technology choices
+
+The architecture deliberately combines multiple technologies, each chosen for a specific role:
+
+- **C++ (C++20)**  
+  Used for performance-critical pricing components (Monte Carlo, closed-form models) and clean domain modeling.
+
+- **Python**  
+  Used for orchestration, data processing, calibration (SVI), and rapid experimentation.
+
+- **pybind11**  
+  Bridges C++ and Python, enabling high-performance models to be used in flexible workflows.
+
+- **PostgreSQL + SQLAlchemy**  
+  Provides structured, queryable storage for market data and supports reproducible calibration pipelines.
+
+---
+
+### Key challenges addressed
+
+This project tackles several non-trivial challenges:
+
+- **Bridging performance and usability**  
+  Integrating a C++ pricing engine into a Python-driven research pipeline.
+
+- **Data consistency and reproducibility**  
+  Designing a schema and ingestion process that supports reliable downstream calibration.
+
+- **Implied volatility inversion and calibration**  
+  Implementing stable numerical inversion and robust SVI fitting under noisy market data.
+
+- **System design over isolated models**  
+  Ensuring that data, models, and workflows interact cleanly as a unified system.
+
+---
+
+### Future directions
+
+Planned improvements focus on moving further toward production-grade systems:
+
+- Arbitrage-free implied volatility surface construction
+- More robust calibration and smoothing techniques
+- Performance optimization (parallel Monte Carlo, batching)
+- Extension to additional data sources and APIs
+- Improved testing of end-to-end data and calibration pipelines
+- comparing classical stochastic models vs data-driven approaches for pricing or volatility forecasting
+
+## What is included
+
+- `cpp/`: core C++ pricing library (Monte Carlo + Black-Scholes closed form), DB ingestion hooks, and pybind bindings.
+- `qengine/`: Python package exposing the native extension (`import qengine`).
+- `src/ImpliedVolatility/`: SVI calibration and implied-volatility tooling.
+- `src/data/`: data ingestion, SQL schema, and analytics helpers.
+- `tests/`: C++ unit tests (GoogleTest).
+- `scripts/`: operational scripts, including PostgreSQL setup.
+- `docs/`: Doxygen configuration and generated API docs (ignored in git for publication).
+
+## Quickstart
+
+### 1) Clone and create a Python environment
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install --upgrade pip
+pip install -e .
+pip install pandas yfinance sqlalchemy psycopg2-binary matplotlib scipy
+```
+
+### 2) Configure environment variables
+
+```bash
+cp .env.example .env
+```
+
+Then edit `.env` with your local database credentials.
+
+### 3) Create database and schema
+
+Use the idempotent setup script:
+
+```bash
+source .env
+python scripts/setup_postgres.py
+```
+
+This script creates/updates:
+- database role (`DB_USER`)
+- database (`DB_NAME`)
+- tables/indexes from `src/data/sql/schema.sql`
+
+### 4) Build C++ extension and run tests
+
+```bash
+cmake -S . -B build
+cmake --build build -j
+ctest --test-dir build --output-on-failure
+```
+
+### 5) Run Yahoo options ingestion
+
+```bash
+source .env
+python src/data/ingestion/ingest_yahoo_options.py
+```
+
+`PIPELINE_SYMBOLS` in `.env` controls which symbols are ingested (comma-separated, e.g. `SPY,AAPL,QQQ`).
+
+## Generating C++ API docs
+
+```bash
+cmake --build build --target docs
+```
+## 📚 Further Analysis 
+
+A more detailed discussion of numerial stability, implied volatility inversion, and calibration challenges is available here 
+
+👉 [Project blog](https://notes.ddoebel.de/public-folder/Option-Pricing-Engine)
+
+This includes deeper analysis of:
+- implied volatility instability from raw market data
+- calibration challenges under noisy inputs
+- numerical experiments and diagnostics
+(see in particular [Observations and further analysis](https://notes.ddoebel.de/public-folder/Option-Pricing-Engine#-observations-and-further-analysis)) 
--- a/init.py
+++ b/init.py
--- a/cpp/BSWrapper.cpp
+++ b/cpp/BSWrapper.cpp
@@ -0,0 +1,49 @@
+//
+// Created by David Doebel  on 27.03.2026.
+//
+
+#include "BSWrapper.hpp"
+
+#include "BlackScholesClosedFormEngine.hpp"
+#include "BlackScholesProcess.hpp"
+#include "Instrument.hpp"
+#include "Option.hpp"
+#include "FlatVolatilitySurface.hpp"
+#include "FlatYieldCurve.hpp"
+#include <cassert>
+#include <iostream>
+
+class FlatYieldCurve;
+
+double BSWrapper::bs_price_wrapper(double S, double K, double T, double r, double sigma,
+    bool is_call) {
+    std::shared_ptr<FlatYieldCurve> flat_curve = std::make_shared<FlatYieldCurve>(r);
+    auto flat_vol_surface = std::make_shared<FlatVolatilitySurface>(sigma);
+    MarketData data(S,flat_curve, flat_vol_surface);
+    std::unique_ptr<BlackScholesProcess> process = std::make_unique<BlackScholesProcess>(data);
+    std::unique_ptr<BlackScholesClosedFormEngine> pricing_engine =
+        std::make_unique<BlackScholesClosedFormEngine>(std::move(process));
+    std::unique_ptr<Payoff> payoff;
+    if (is_call)
+        payoff = std::make_unique<CallPayoff>(K);
+    else payoff = std::make_unique<PutPayoff>(K);
+    EuropeanExercise exercise(T);
+    VanillaOption option(T,std::make_unique<EuropeanExercise>(exercise),
+        std::move(payoff),std::move(pricing_engine));
+    return option.price();
+}
+
+std::vector<double> BSWrapper::batch_bs_price_wrapper(const std::vector<double> &S, const std::vector<double> &K,
+    const std::vector<double> &T, const std::vector<double> &r, const std::vector<double> &sigma,
+    const std::vector<bool> &is_call) {
+    assert(K.size() == S.size() && K.size() == T.size() && K.size() == r.size() && K.size() ==
+        sigma.size() && K.size() == is_call.size());
+    std::size_t n = K.size();
+    std::vector<double> result(n);
+    for (std::size_t i = 0; i < n; ++i) {
+        result[i] = bs_price_wrapper(S[i], K[i], T[i], r[i], sigma[i], is_call[i]);
+        if (i % 100 == 0)
+            std::cout << "i = " << i << " result = " << result[i] << std::endl; // ( i % 1000 == 0)
+    }
+    return result;
+}
--- a/cpp/BSWrapper.hpp
+++ b/cpp/BSWrapper.hpp
@@ -0,0 +1,24 @@
+/**
+ * @file BSWrapper.hpp
+ * @brief Black–Scholes vanilla price (closed form; used from Python / implied vol).
+ */
+
+#ifndef QUANTENGINE_BSWRAPPER_HPP
+#define QUANTENGINE_BSWRAPPER_HPP
+#include <vector>
+
+/**
+ * @brief Static helpers wrapping scalar and batch pricing.
+ */
+class BSWrapper {
+public:
+    BSWrapper() = delete;
+    static double bs_price_wrapper(double S, double K, double T, double r, double sigma, bool is_call);
+    static std::vector<double> batch_bs_price_wrapper(const std::vector<double>& S, const std::vector<double>& K,
+        const std::vector<double>& T, const std::vector<double>& r, const std::vector<double>& sigma,
+        const std::vector<bool>& is_call);
+
+};
+
+
+#endif //QUANTENGINE_BSWRAPPER_HPP
--- a/cpp/BlackScholesClosedFormEngine.cpp
+++ b/cpp/BlackScholesClosedFormEngine.cpp
@@ -0,0 +1,69 @@
+/**
+ * @file BlackScholesClosedFormEngine.cpp
+ * @brief Black–Scholes closed-form pricing (calls, puts, cash-or-nothing digital).
+ */
+
+#include "BlackScholesClosedFormEngine.hpp"
+#include "Instrument.hpp"
+#include "Payoff.hpp"
+#include <cmath>
+#include <stdexcept>
+
+namespace {
+
+double norm_cdf(double x) {
+    return 0.5 * (1.0 + std::erf(x / std::sqrt(2.0)));
+}
+
+} // namespace
+
+double BlackScholesClosedFormEngine::calculate(const Instrument &instrument) const {
+    if (instrument.exerciseType() != Exercise::Type::European) {
+        throw std::invalid_argument("BlackScholesClosedFormEngine: European exercise only");
+    }
+
+    const double T = instrument.maturity();
+    const MarketData &md = process_->data();
+    const double S = md.spot();
+    double K = instrument.payoff().strike();
+    const PayoffKind pk = instrument.payoff().kind();
+
+    if (T <= 0.0) {
+        return instrument.payoff()(S);
+    }
+
+    const double r = md.yield_curve().zeroRate(T);
+    const double sigma = md.volatility_surface().sigma(K, T);
+    if (sigma <= 0.0) {
+        throw std::invalid_argument("BlackScholesClosedFormEngine: volatility must be positive");
+    }
+
+    const double disc = md.yield_curve().discount(T);
+    const double sqrtT = std::sqrt(T);
+    const double sig_sqrtT = sigma * sqrtT;
+
+    if (sig_sqrtT < 1e-15) {
+        const double forward = S * std::exp(r * T);
+        switch (pk) {
+        case PayoffKind::Call:
+            return disc * std::max(0.0, forward - K);
+        case PayoffKind::Put:
+            return disc * std::max(0.0, K - forward);
+        case PayoffKind::Digital:
+            return (forward > K) ? disc : 0.0;
+        }
+    }
+
+    const double d1 = (std::log(S / K) + (r + 0.5 * sigma * sigma) * T) / sig_sqrtT;
+    const double d2 = d1 - sig_sqrtT;
+
+    switch (pk) {
+    case PayoffKind::Call:
+        return S * norm_cdf(d1) - K * disc * norm_cdf(d2);
+    case PayoffKind::Put:
+        return K * disc * norm_cdf(-d2) - S * norm_cdf(-d1);
+    case PayoffKind::Digital:
+        return disc * norm_cdf(d2);
+    }
+    throw std::logic_error("BlackScholesClosedFormEngine: unhandled PayoffKind");
+}
--- a/cpp/BlackScholesClosedFormEngine.hpp
+++ b/cpp/BlackScholesClosedFormEngine.hpp
@@ -0,0 +1,22 @@
+/**
+ * @file BlackScholesClosedFormEngine.hpp
+ * @brief Risk-neutral Black–Scholes formula for European payoffs under GBM (flat or surface inputs via @ref MarketData).
+ */
+
+#ifndef QUANTENGINE_BLACKSCHOLESCLOSEDFORMENGINE_HPP
+#define QUANTENGINE_BLACKSCHOLESCLOSEDFORMENGINE_HPP
+
+#include "PricingEngine.hpp"
+
+/**
+ * @brief Analytic European vanilla / digital prices using @f$r@f$ and @f$\sigma(K,T)@f$ from the embedded process’s @ref MarketData.
+ */
+class BlackScholesClosedFormEngine : public PricingEngine {
+public:
+    explicit BlackScholesClosedFormEngine(std::unique_ptr<StochasticProcess> process)
+        : PricingEngine(std::move(process)) {}
+
+    double calculate(const Instrument &instrument) const override;
+};
+
+#endif // QUANTENGINE_BLACKSCHOLESCLOSEDFORMENGINE_HPP
--- a/cpp/BlackScholesProcess.cpp
+++ b/cpp/BlackScholesProcess.cpp
@@ -1,6 +1,7 @@
-//
-// Created by David Doebel  on 06.03.2026.
-//
+/**
+ * @file BlackScholesProcess.cpp
+ * @brief Black–Scholes GBM drift, diffusion, and step.
+ */

 #include "BlackScholesProcess.hpp"

--- a/cpp/BlackScholesProcess.hpp
+++ b/cpp/BlackScholesProcess.hpp
@@ -1,12 +1,15 @@
-//
-// Created by David Doebel  on 06.03.2026.
-//
+/**
+ * @file BlackScholesProcess.hpp
+ * @brief Geometric Brownian motion with yield and volatility surfaces.
+ */

 #ifndef QUANTENGINE_BLACKSCHOLESPROCESS_HPP
 #define QUANTENGINE_BLACKSCHOLESPROCESS_HPP
 #include "StochasticProcess.hpp"

-
+/**
+ * @brief GBM: drift @f$r_t S@f$, diffusion @f$\sigma(S,t) S@f$, exact log-step.
+ */
 class BlackScholesProcess : public StochasticProcess{
 public:
    explicit BlackScholesProcess(MarketData data) : StochasticProcess(std::move(data)){}
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -0,0 +1,65 @@
+add_library(qengine_core
+        Instrument.cpp
+        Instrument.hpp
+        Payoff.cpp
+        Payoff.hpp
+        Option.cpp
+        Option.hpp
+        PricingEngine.cpp
+        PricingEngine.hpp
+        MonteCarloEngine.cpp
+        MonteCarloEngine.hpp
+        StochasticProcess.cpp
+        StochasticProcess.hpp
+        Exercise.cpp
+        Exercise.hpp
+        MarketData.cpp
+        MarketData.hpp
+        YieldCurve.cpp
+        YieldCurve.hpp
+        VolatilitySurface.cpp
+        VolatilitySurface.hpp
+        RandomGenerator.cpp
+        RandomGenerator.hpp
+        Statistics.cpp
+        Statistics.hpp
+        BlackScholesClosedFormEngine.cpp
+        BlackScholesClosedFormEngine.hpp
+        BlackScholesProcess.cpp
+        BlackScholesProcess.hpp
+        DBIngest.cpp
+        DBIngest.hpp
+        BSWrapper.cpp
+        BSWrapper.hpp
+        NewtonSolver.cpp
+        NewtonSolver.hpp
+)
+
+target_include_directories(qengine_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(qengine_core PRIVATE
+        /opt/homebrew/include
+)
+
+find_library(PQXX_LIB pqxx PATHS /opt/homebrew/lib /usr/local/lib /usr/lib)
+find_library(PQ_LIB pq PATHS /opt/homebrew/opt/libpq/lib /opt/homebrew/lib /usr/local/lib /usr/lib)
+if(NOT PQXX_LIB OR NOT PQ_LIB)
+    message(FATAL_ERROR "Could not find libpqxx and/or libpq (install via Homebrew: brew install libpqxx libpq)")
+endif()
+
+target_link_libraries(qengine_core Eigen3::Eigen)
+target_link_libraries(qengine_core ${PQXX_LIB} ${PQ_LIB})
+
+# Python import path: package qengine, extension submodule qengine (file qengine/qengine*.so)
+pybind11_add_module(qengine_cpp MODULE ImpliedVolatility/Pybind.cpp)
+set_target_properties(qengine_cpp PROPERTIES OUTPUT_NAME qengine)
+target_link_libraries(qengine_cpp PRIVATE qengine_core)
+
+# Place the module next to qengine/__init__.py so `import qengine` works from the repo root
+set(_qengine_py_pkg "${CMAKE_SOURCE_DIR}/qengine")
+set_target_properties(qengine_cpp PROPERTIES
+        LIBRARY_OUTPUT_DIRECTORY "${_qengine_py_pkg}"
+        LIBRARY_OUTPUT_DIRECTORY_RELEASE "${_qengine_py_pkg}"
+        LIBRARY_OUTPUT_DIRECTORY_DEBUG "${_qengine_py_pkg}"
+        RUNTIME_OUTPUT_DIRECTORY "${_qengine_py_pkg}"
+        RUNTIME_OUTPUT_DIRECTORY_RELEASE "${_qengine_py_pkg}"
+        RUNTIME_OUTPUT_DIRECTORY_DEBUG "${_qengine_py_pkg}")
--- a/cpp/DBIngest.cpp
+++ b/cpp/DBIngest.cpp
@@ -0,0 +1,64 @@
+/**
+ * @file DBIngest.cpp
+ * @brief Database connection and placeholder update routines.
+ */
+
+#include "DBIngest.hpp"
+
+#include <cstdlib>
+#include <iostream>
+#include <sstream>
+
+bool DBIngest::connect() {
+    const char* db_name = std::getenv("DB_NAME");
+    const char* db_user = std::getenv("DB_USER");
+    const char* db_password = std::getenv("DB_PASSWORD");
+    const char* db_host = std::getenv("DB_HOST");
+    const char* db_port = std::getenv("DB_PORT");
+
+    std::ostringstream conn_str;
+    conn_str
+        << "dbname=" << (db_name ? db_name : "options_db")
+        << " user=" << (db_user ? db_user : "quant_user")
+        << " host=" << (db_host ? db_host : "localhost")
+        << " port=" << (db_port ? db_port : "5432")
+        << " password=" << (db_password ? db_password : "");
+
+    connection_ = pqxx::connection(conn_str.str());
+
+    if(connection_.is_open()) {
+        std::cout << "Connected\n";
+        return true;
+    }
+    std::cout << "Not connected\n";
+    return false;
+}
+
+bool DBIngest::disconnect() {
+    connection_.close();
+    return true;
+}
+
+bool DBIngest::update(VolatilitySurface &surface) {
+    std::string vol_surface_query = "SELECT c.strike, c.expiration_date, q.mid, u.price "
+                                    "FROM option_quotes q"
+                                    "JOIN option_contracts c "
+                                    "ON q.contract_id = c.id "
+                                    "JOIN underlying_prices u"
+                                    "ON u.underlying_id = c.underlying_id"
+                                    "WHERE q.timestamp = ("
+                                    "SELECT MAX(timestamp) FROM option_quotes"
+                                    ")";
+    pqxx::work work(connection_);
+    pqxx::result result = work.exec(vol_surface_query);
+    for (auto row : result) {
+        std::cout << row[0] << " " << row[1] << " " << row[2] << " " << row[3] << std::endl;
+    }
+    (void)surface;
+    return false;
+}
+
+bool DBIngest::update(YieldCurve &yield_curve) {
+    (void)yield_curve;
+    return false;
+}
--- a/cpp/DBIngest.hpp
+++ b/cpp/DBIngest.hpp
@@ -0,0 +1,28 @@
+/**
+ * @file DBIngest.hpp
+ * @brief PostgreSQL helpers to load market objects (work in progress).
+ */
+
+#ifndef QUANTENGINE_DBINGEST_HPP
+#define QUANTENGINE_DBINGEST_HPP
+
+#include <pqxx/pqxx>
+
+#include "VolatilitySurface.hpp"
+#include "YieldCurve.hpp"
+
+/**
+ * @brief Connects to Postgres via libpqxx and queries quotes for surface building.
+ */
+class DBIngest {
+
+    bool connect();
+    bool disconnect();
+    bool update(VolatilitySurface& surface);
+    bool update(YieldCurve& yield_curve);
+private:
+    pqxx::connection connection_;
+};
+
+
+#endif //QUANTENGINE_DBINGEST_HPP
--- a/cpp/Exercise.cpp
+++ b/cpp/Exercise.cpp
@@ -0,0 +1,6 @@
+/**
+ * @file Exercise.cpp
+ * @brief @ref Exercise translation unit (interface only).
+ */
+
+#include "Exercise.hpp"
--- a/cpp/Exercise.hpp
+++ b/cpp/Exercise.hpp
@@ -1,11 +1,15 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
+/**
+ * @file Exercise.hpp
+ * @brief Exercise style (European, American, Bermudan) and exercise times.
+ */

 #ifndef QUANTENGINE_EXERCISE_HPP
 #define QUANTENGINE_EXERCISE_HPP
 #include <vector>

+/**
+ * @brief Describes when the holder may exercise (metadata for pricing engines).
+ */
 class Exercise {
 public:
    Exercise() = default;
@@ -22,7 +26,9 @@ protected:

 };

+/** @brief Single exercise at maturity. */
 class EuropeanExercise : public Exercise {
+public:
    EuropeanExercise() : type_(Type::European) {};
    EuropeanExercise(double maturity) : type_(Type::European){
        exercise_times_.push_back(maturity);
@@ -35,7 +41,9 @@ private:
    Type type_;
 };

+/** @brief Continuous American exercise from @f$t=0@f$ to maturity (placeholder grid). */
 class AmericanExercise : public Exercise{
+public:
    AmericanExercise() : type_(Type::American) {};
    AmericanExercise(double maturity) : type_(Type::American) {
        exercise_times_.push_back(0);
--- a/cpp/FlatVolatilitySurface.cpp
+++ b/cpp/FlatVolatilitySurface.cpp
@@ -0,0 +1,5 @@
+/**
+ * @file FlatVolatilitySurface.cpp
+ * @brief Ensures link visibility for @ref FlatVolatilitySurface.
+ */
+#include "FlatVolatilitySurface.hpp"
--- a/cpp/FlatVolatilitySurface.hpp
+++ b/cpp/FlatVolatilitySurface.hpp
@@ -0,0 +1,21 @@
+/**
+ * @file FlatVolatilitySurface.hpp
+ * @brief Constant implied volatility surface.
+ */
+#ifndef QUANTENGINE_FLATVOLATILITYSURFACE_HPP
+#define QUANTENGINE_FLATVOLATILITYSURFACE_HPP
+#include "VolatilitySurface.hpp"
+
+/**
+ * @brief @f$\sigma(K,T)\equiv\sigma_0@f$.
+ */
+class FlatVolatilitySurface : public VolatilitySurface {
+public:
+    explicit FlatVolatilitySurface(double sigma = 0.2) : sigma_(sigma) {}
+
+    double sigma(double K, double T) const override {return sigma_;}
+
+private:
+    double sigma_;
+};
+#endif
--- a/cpp/FlatYieldCurve.cpp
+++ b/cpp/FlatYieldCurve.cpp
@@ -0,0 +1,5 @@
+/**
+ * @file FlatYieldCurve.cpp
+ * @brief Ensures link visibility for @ref FlatYieldCurve (inline methods in header).
+ */
+#include "FlatYieldCurve.hpp"
--- a/cpp/FlatYieldCurve.hpp
+++ b/cpp/FlatYieldCurve.hpp
@@ -0,0 +1,22 @@
+/**
+ * @file FlatYieldCurve.hpp
+ * @brief Constant zero rate yield curve.
+ */
+#ifndef QUANTENGINE_FLATYIELDCURVE_HPP
+#define QUANTENGINE_FLATYIELDCURVE_HPP
+#include "YieldCurve.hpp"
+#include <cmath>
+
+/**
+ * @brief @f$P(t)=e^{-r t}@f$, @f$f(t)\equiv r@f$.
+ */
+class FlatYieldCurve : public YieldCurve{
+public:
+    explicit FlatYieldCurve(double rate = 0.01) : rate_(rate) {}
+
+    double discount(double t) const override {return std::exp(-rate_ * t); };
+    double zeroRate(double t) const override {return rate_; }
+private:
+    double rate_ = 0.01;
+};
+#endif
--- a/cpp/ImpliedVolatility/Pybind.cpp
+++ b/cpp/ImpliedVolatility/Pybind.cpp
@@ -0,0 +1,93 @@
+/**
+ * @file Pybind.cpp
+ * @brief pybind11 module @c qengine exposing @ref BSWrapper::bs_price_wrapper overloads.
+ */
+
+#include <pybind11/numpy.h>
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include <cstdint>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#include "BSWrapper.hpp"
+
+namespace py = pybind11;
+
+namespace {
+
+std::vector<double> to_vector_double(const py::array_t<double> &a) {
+    py::buffer_info info = a.request();
+    if (info.ndim != 1) {
+        throw std::runtime_error("expected 1-D ndarray for S, K, T, r, sigma");
+    }
+    const auto *p = static_cast<const double *>(info.ptr);
+    const ssize_t n = info.shape[0];
+    return std::vector<double>(p, p + n);
+}
+
+std::vector<bool> to_vector_bool_1d(const py::array_t<bool> &a) {
+    py::buffer_info info = a.request();
+    if (info.ndim != 1) {
+        throw std::runtime_error("expected 1-D ndarray for is_call");
+    }
+    if (info.itemsize != 1) {
+        throw std::runtime_error("is_call: expected a boolean ndarray (dtype=bool)");
+    }
+    const ssize_t n = info.shape[0];
+    const auto *p = static_cast<const std::uint8_t *>(info.ptr);
+    std::vector<bool> out(static_cast<size_t>(n));
+    for (ssize_t i = 0; i < n; ++i) {
+        out[static_cast<size_t>(i)] = (p[i] != 0);
+    }
+    return out;
+}
+
+void check_same_length(size_t n, size_t k, const char *name) {
+    if (n != k) {
+        throw std::runtime_error(std::string("length mismatch for ") + name);
+    }
+}
+
+} // namespace
+
+PYBIND11_MODULE(qengine, m) {
+    m.doc() = "Binding for the Black Scholes model";
+
+    m.def(
+        "bs_price",
+        [](double S, double K, double T, double r, double sigma, bool is_call) {
+            return BSWrapper::bs_price_wrapper(S, K, T, r, sigma, is_call);
+        },
+        py::arg("S"), py::arg("K"), py::arg("T"), py::arg("r"), py::arg("sigma"), py::arg("is_call"));
+
+    m.def(
+        "bs_price",
+        [](py::array_t<double> S, py::array_t<double> K, py::array_t<double> T, py::array_t<double> r,
+            py::array_t<double> sigma, py::array_t<bool> is_call) {
+            std::vector<double> vS = to_vector_double(S);
+            std::vector<double> vK = to_vector_double(K);
+            std::vector<double> vT = to_vector_double(T);
+            std::vector<double> vr = to_vector_double(r);
+            std::vector<double> vsig = to_vector_double(sigma);
+            std::vector<bool> vC = to_vector_bool_1d(is_call);
+            const size_t n = vS.size();
+            check_same_length(n, vK.size(), "K");
+            check_same_length(n, vT.size(), "T");
+            check_same_length(n, vr.size(), "r");
+            check_same_length(n, vsig.size(), "sigma");
+            check_same_length(n, vC.size(), "is_call");
+            return BSWrapper::batch_bs_price_wrapper(vS, vK, vT, vr, vsig, vC);
+        },
+        py::arg("S"), py::arg("K"), py::arg("T"), py::arg("r"), py::arg("sigma"), py::arg("is_call"));
+
+    m.def(
+        "bs_price",
+        [](const std::vector<double> &S, const std::vector<double> &K, const std::vector<double> &T,
+            const std::vector<double> &r, const std::vector<double> &sigma, const std::vector<bool> &is_call) {
+            return BSWrapper::batch_bs_price_wrapper(S, K, T, r, sigma, is_call);
+        },
+        py::arg("S"), py::arg("K"), py::arg("T"), py::arg("r"), py::arg("sigma"), py::arg("is_call"));
+}
--- a/cpp/Instrument.cpp
+++ b/cpp/Instrument.cpp
@@ -1,6 +1,7 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
+/**
+ * @file Instrument.cpp
+ * @brief @ref Instrument implementation.
+ */

 #include "Instrument.hpp"

--- a/cpp/Instrument.hpp
+++ b/cpp/Instrument.hpp
@@ -1,15 +1,20 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
+/**
+ * @file Instrument.hpp
+ * @brief Generic derivative instrument: payoff plus pricing engine.
+ */

 #ifndef QUANTENGINE_INSTRUMENT_HPP
 #define QUANTENGINE_INSTRUMENT_HPP
+#include "Exercise.hpp"
 #include "Payoff.hpp"
 #include "PricingEngine.hpp"
 #include <memory>

 class PricingEngine;

+/**
+ * @brief Represents a tradeable claim priced via a @ref PricingEngine.
+ */
 class Instrument {
 public:
    Instrument() = default;
@@ -24,6 +29,9 @@ public:
        return *payoff_;
    }

+    /** @brief Base @ref Instrument is treated as European unless overridden by @ref Option. */
+    [[nodiscard]] virtual Exercise::Type exerciseType() const { return Exercise::Type::European; }
+
 protected:
    double maturity_;
    std::unique_ptr<Payoff> payoff_;
--- a/cpp/MarketData.cpp
+++ b/cpp/MarketData.cpp
@@ -1,6 +1,7 @@
-//
-// Created by David Doebel  on 06.03.2026.
-//
+/**
+ * @file MarketData.cpp
+ * @brief @ref MarketData accessors.
+ */

 #include "MarketData.hpp"

--- a/cpp/MarketData.hpp
+++ b/cpp/MarketData.hpp
@@ -1,6 +1,7 @@
-//
-// Created by David Doebel  on 06.03.2026.
-//
+/**
+ * @file MarketData.hpp
+ * @brief Spot, discount curve, and volatility surface bundle.
+ */

 #ifndef QUANTENGINE_MARKETDATA_HPP
 #define QUANTENGINE_MARKETDATA_HPP
@@ -8,6 +9,9 @@
 #include "VolatilitySurface.hpp"
 #include <memory>

+/**
+ * @brief Immutable snapshot of inputs needed to simulate or price.
+ */
 class MarketData {
 public:
    MarketData() = delete;
--- a/cpp/MonteCarloEngine.cpp
+++ b/cpp/MonteCarloEngine.cpp
@@ -1,6 +1,7 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
+/**
+ * @file MonteCarloEngine.cpp
+ * @brief Monte Carlo mean estimator with discounting.
+ */

 #include "MonteCarloEngine.hpp"
 #include <iostream>
--- a/cpp/MonteCarloEngine.hpp
+++ b/cpp/MonteCarloEngine.hpp
@@ -1,13 +1,16 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
+/**
+ * @file MonteCarloEngine.hpp
+ * @brief Monte Carlo pricing using a @ref StochasticProcess and @ref RandomGenerator.
+ */

 #ifndef QUANTENGINE_MONTECARLOENGINE_HPP
 #define QUANTENGINE_MONTECARLOENGINE_HPP
 #include "PricingEngine.hpp"
 #include "RandomGenerator.hpp"

-
+/**
+ * @brief Simple path simulation: one Euler/exact step to horizon, average discounted payoff.
+ */
 class MonteCarloEngine : public PricingEngine{
 public:
    MonteCarloEngine() = default;
--- a/cpp/NewtonSolver.cpp
+++ b/cpp/NewtonSolver.cpp
@@ -0,0 +1,8 @@
+/**
+ * @file NewtonSolver.cpp
+ * @brief Placeholder translation unit for @ref NewtonSolver.
+ */
+
+#include "NewtonSolver.hpp"
+
+
--- a/cpp/NewtonSolver.hpp
+++ b/cpp/NewtonSolver.hpp
@@ -0,0 +1,30 @@
+/**
+ * @file NewtonSolver.hpp
+ * @brief Generic Newton iteration helper (incomplete / reserved for solvers).
+ */
+
+#ifndef QUANTENGINE_GAUSSSOLVER_HPP
+#define QUANTENGINE_GAUSSSOLVER_HPP
+
+#include <functional>
+
+/**
+ * @brief Template Newton step loop with relative/absolute tolerances.
+ */
+class NewtonSolver {
+    template<typename F, typename DFinv, typename T>
+    bool solve(F&& func, DFinv&& dfinv,T x0 , double rtol, double atol) {
+        T x = x0;
+        int i = 0;
+        T increment;
+        do {
+            increment = dfinv(x) * func(x);
+            x -= increment;
+            ++i;
+        } while (i < 1000 && std::abs(increment)/ std::abs(x) > rtol && std::abs(increment) > atol);
+
+    }
+};
+
+
+#endif //QUANTENGINE_GAUSSSOLVER_HPP
--- a/cpp/Option.cpp
+++ b/cpp/Option.cpp
@@ -1,6 +1,7 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
+/**
+ * @file Option.cpp
+ * @brief @ref Option implementation.
+ */

 #include "Option.hpp"

--- a/cpp/Option.hpp
+++ b/cpp/Option.hpp
@@ -1,12 +1,16 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
+/**
+ * @file Option.hpp
+ * @brief Option instrument with exercise style (@ref Exercise).
+ */

 #ifndef QUANTENGINE_OPTION_HPP
 #define QUANTENGINE_OPTION_HPP
 #include "Instrument.hpp"
 #include "Exercise.hpp"

+/**
+ * @brief Extends @ref Instrument with exercise schedule / style metadata.
+ */
 class Option : public Instrument{
 public:
    Option() = default;
@@ -17,10 +21,13 @@ public:
        return *exercise_;
    }

+    [[nodiscard]] Exercise::Type exerciseType() const override { return exercise_->type(); }
+
 protected:
    std::unique_ptr<Exercise> exercise_;
 };

+/** @brief Plain-vanilla option using the base @ref Option constructor. */
 class VanillaOption : public Option {
 public:
    using Option::Option;
--- a/cpp/Payoff.cpp
+++ b/cpp/Payoff.cpp
@@ -1,6 +1,7 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
+/**
+ * @file Payoff.cpp
+ * @brief Payoff function implementations.
+ */

 #include "Payoff.hpp"
 #include <algorithm>
--- a/cpp/Payoff.hpp
+++ b/cpp/Payoff.hpp
@@ -1,11 +1,19 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
+/**
+ * @file Payoff.hpp
+ * @brief Payoff interface and standard European payoffs (call, put, digital).
+ */

 #ifndef QUANTENGINE_PAYOFF_HPP
 #define QUANTENGINE_PAYOFF_HPP

+/**
+ * @brief Standard payoff shapes for routing (e.g. analytic vs Monte Carlo).
+ */
+enum class PayoffKind { Call, Put, Digital };

+/**
+ * @brief Terminal payoff as a function of spot @f$S_T@f$.
+ */
 class Payoff {
 public:

@@ -14,35 +22,42 @@ public:
    virtual ~Payoff() = default;
    virtual double operator()(double S) = 0;
    virtual double strike() = 0;
+    [[nodiscard]] virtual PayoffKind kind() const = 0;
 };

+/** @brief Standard European call @f$\max(S-K,0)@f$. */
 class CallPayoff : public Payoff {
 public:
    CallPayoff() = default;
    CallPayoff(double strike) : strike_(strike) {}
    double operator()(double S) override;
    double strike() override {return strike_;}
+    [[nodiscard]] PayoffKind kind() const override { return PayoffKind::Call; }

 private:
    double strike_;
 };

+/** @brief Standard European put @f$\max(K-S,0)@f$. */
 class PutPayoff : public Payoff {
 public:
    PutPayoff() = default;
    PutPayoff(double strike) : strike_(strike) {}
    double operator()(double S) override;
    double strike() override {return strike_;}
+    [[nodiscard]] PayoffKind kind() const override { return PayoffKind::Put; }
 private:
    double strike_;
 };

+/** @brief Digital (cash-or-nothing style) payoff @f$1_{S>K}@f$. */
 class DigitalPayoff : public Payoff {
    public:
    DigitalPayoff() = default;
    DigitalPayoff(double strike) : strike_(strike) {}
    double operator()(double S) override;
    double strike() override {return strike_;}
+    [[nodiscard]] PayoffKind kind() const override { return PayoffKind::Digital; }
 private:
    double strike_;
 };
--- a/cpp/PricingEngine.cpp
+++ b/cpp/PricingEngine.cpp
@@ -0,0 +1,6 @@
+/**
+ * @file PricingEngine.cpp
+ * @brief @ref PricingEngine translation unit (interface only).
+ */
+
+#include "PricingEngine.hpp"
--- a/cpp/PricingEngine.hpp
+++ b/cpp/PricingEngine.hpp
@@ -1,6 +1,7 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
+/**
+ * @file PricingEngine.hpp
+ * @brief Abstract pricer for @ref Instrument given a stochastic model.
+ */

 #ifndef QUANTENGINE_PRICINGENGINE_HPP
 #define QUANTENGINE_PRICINGENGINE_HPP
@@ -10,6 +11,9 @@

 class Instrument;

+/**
+ * @brief Computes model price of an instrument (e.g. Monte Carlo, PDE, closed form).
+ */
 class PricingEngine {
 public:
    PricingEngine() = default;
--- a/cpp/RandomGenerator.cpp
+++ b/cpp/RandomGenerator.cpp
@@ -1,6 +1,7 @@
-//
-// Created by David Doebel  on 06.03.2026.
-//
+/**
+ * @file RandomGenerator.cpp
+ * @brief @ref MersenneTwister implementation.
+ */

 #include "RandomGenerator.hpp"

--- a/cpp/RandomGenerator.hpp
+++ b/cpp/RandomGenerator.hpp
@@ -1,11 +1,13 @@
-//
-// Created by David Doebel  on 06.03.2026.
-//
+/**
+ * @file RandomGenerator.hpp
+ * @brief Random numbers for Monte Carlo (Gaussian draws).
+ */

 #ifndef QUANTENGINE_RANDOMGENERATOR_HPP
 #define QUANTENGINE_RANDOMGENERATOR_HPP
 #include <random>

+/** @brief Interface for standard normal variates. */
 class RandomGenerator {
 public:
    RandomGenerator() = default;
@@ -14,6 +16,7 @@ public:
    virtual std::vector<double> nextGaussianVector(std::size_t n) = 0;
 };

+/** @brief @c std::mt19937 with normal distribution. */
 class MersenneTwister : public RandomGenerator {
 public:
    MersenneTwister() = default;
--- a/cpp/Statistics.cpp
+++ b/cpp/Statistics.cpp
@@ -1,6 +1,7 @@
-//
-// Created by David Doebel  on 06.03.2026.
-//
+/**
+ * @file Statistics.cpp
+ * @brief Streaming moment and extrema updates.
+ */

 #include "Statistics.hpp"

--- a/cpp/Statistics.hpp
+++ b/cpp/Statistics.hpp
@@ -1,12 +1,15 @@
-//
-// Created by David Doebel  on 06.03.2026.
-//
+/**
+ * @file Statistics.hpp
+ * @brief Online sample moments for Monte Carlo diagnostics.
+ */

 #ifndef QUANTENGINE_STATISTICS_HPP
 #define QUANTENGINE_STATISTICS_HPP
 #include <vector>

-
+/**
+ * @brief Accumulates count, mean/variance-related sums, and running min/max.
+ */
 class Statistics {
 public:
    Statistics() : moments_({0., 0., 0.}), n(0), max_(0.), min_(0.) {}
--- a/cpp/StochasticProcess.cpp
+++ b/cpp/StochasticProcess.cpp
@@ -0,0 +1,6 @@
+/**
+ * @file StochasticProcess.cpp
+ * @brief @ref StochasticProcess translation unit (interface only).
+ */
+
+#include "StochasticProcess.hpp"
--- a/cpp/StochasticProcess.hpp
+++ b/cpp/StochasticProcess.hpp
@@ -1,12 +1,16 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
+/**
+ * @file StochasticProcess.hpp
+ * @brief Interface for SDE drift, diffusion, and time stepping.
+ */

 #ifndef QUANTENGINE_STOCHASTICPROCESS_HPP
 #define QUANTENGINE_STOCHASTICPROCESS_HPP
 #include "MarketData.hpp"
 #include <memory>

+/**
+ * @brief Stochastic model for the underlying, driven by @ref MarketData.
+ */
 class StochasticProcess {
 public:
    StochasticProcess() = delete;
--- a/cpp/VolatilitySurface.cpp
+++ b/cpp/VolatilitySurface.cpp
@@ -0,0 +1,6 @@
+/**
+ * @file VolatilitySurface.cpp
+ * @brief @ref VolatilitySurface translation unit (interface only).
+ */
+
+#include "VolatilitySurface.hpp"
--- a/cpp/VolatilitySurface.hpp
+++ b/cpp/VolatilitySurface.hpp
@@ -0,0 +1,28 @@
+/**
+ * @file VolatilitySurface.hpp
+ * @brief Implied volatility as a function of strike and expiry.
+ */
+
+#ifndef QUANTENGINE_VOLATILITYSURFACE_HPP
+#define QUANTENGINE_VOLATILITYSURFACE_HPP
+#include <vector>
+
+/**
+ * @brief Local/vol surface @f$\sigma(K,T)@f$ used by simulation.
+ */
+class VolatilitySurface {
+public:
+    virtual ~VolatilitySurface() = default;
+    virtual double sigma(double K, double T) const = 0;
+private:
+
+};
+
+class SVI : public VolatilitySurface {
+public:
+    SVI() = default;
+    SVI(std::vector<double> K, std::vector<double> rho, std::vector<double> S, std::vector<double> T);
+};
+
+
+#endif //QUANTENGINE_VOLATILITYSURFACE_HPP
--- a/cpp/YieldCurve.cpp
+++ b/cpp/YieldCurve.cpp
@@ -0,0 +1,6 @@
+/**
+ * @file YieldCurve.cpp
+ * @brief @ref YieldCurve translation unit (interface only).
+ */
+
+#include "YieldCurve.hpp"
--- a/cpp/YieldCurve.hpp
+++ b/cpp/YieldCurve.hpp
@@ -1,11 +1,14 @@
-//
-// Created by David Doebel  on 06.03.2026.
-//
+/**
+ * @file YieldCurve.hpp
+ * @brief Abstract yield curve: discount factors and zero rates.
+ */

 #ifndef QUANTENGINE_YIELDCURVE_HPP
 #define QUANTENGINE_YIELDCURVE_HPP

-
+/**
+ * @brief Risk-free rate term structure for discounting and risk-neutral drift.
+ */
 class YieldCurve {
 public:
    YieldCurve() = default;
--- a/docs/Doxyfile
+++ b/docs/Doxyfile
@@ -0,0 +1,50 @@
+# Doxygen configuration for QuantEngine (option_pricing).
+# Run from repo root: doxygen docs/Doxyfile
+# Or: cmake --build build --target docs
+
+PROJECT_NAME           = QuantEngine
+PROJECT_BRIEF          = "Monte Carlo option pricing, market data abstractions, and Python bindings"
+
+OUTPUT_DIRECTORY       = docs/html
+CREATE_SUBDIRS         = NO
+ALLOW_UNICODE_NAMES    = YES
+
+JAVADOC_AUTOBRIEF      = YES
+QT_AUTOBRIEF           = NO
+OPTIMIZE_OUTPUT_FOR_CPLUSPLUS = YES
+
+FULL_PATH_NAMES        = YES
+STRIP_FROM_PATH        =
+
+QUIET                  = NO
+WARNINGS               = YES
+WARN_IF_UNDOCUMENTED   = NO
+WARN_NO_PARAMDOC       = NO
+
+INPUT                  = cpp
+INPUT_ENCODING         = UTF-8
+FILE_PATTERNS          = *.cpp *.hpp *.h
+RECURSIVE              = YES
+
+EXCLUDE_PATTERNS       =
+EXCLUDE_SYMBOLS        =
+
+GENERATE_HTML          = YES
+HTML_OUTPUT            = .
+HTML_COLORSTYLE_HUE    = 220
+GENERATE_LATEX         = NO
+
+SEARCHENGINE           = YES
+
+SOURCE_BROWSER         = YES
+REFERENCED_BY_RELATION = YES
+REFERENCES_RELATION    = YES
+
+ALPHABETICAL_INDEX     = YES
+ENABLE_PREPROCESSING   = YES
+MACRO_EXPANSION        = NO
+
+CLASS_DIAGRAMS         = YES
+HAVE_DOT               = NO
+
+PREDEFINED             = DOXYGEN
--- a/docs/SECURITY.md
+++ b/docs/SECURITY.md
@@ -0,0 +1,27 @@
+# Security Checklist
+
+## Secrets handling
+
+- Never commit `.env` or any file containing credentials.
+- Use `.env.example` for non-sensitive defaults only.
+- Set DB credentials through environment variables.
+- Rotate credentials if they have ever appeared in git history.
+
+## Database hardening
+
+- Use a dedicated runtime user with least required privileges.
+- Keep administrative users separate from ingestion users.
+- Restrict DB network access to trusted hosts/VPC/private network.
+- Enable SSL/TLS for non-local database connections.
+
+## Publication readiness
+
+Before making the repository public:
+
+1. Confirm `git status` has no secret files staged.
+2. Search for potential secret patterns:
+   - passwords
+   - API keys
+   - tokens
+3. Verify `.gitignore` includes local secret files (`.env*`).
+4. Regenerate credentials used during development.
--- a/docs/SETUP.md
+++ b/docs/SETUP.md
@@ -0,0 +1,60 @@
+# Setup Guide
+
+This guide describes a clean local setup for development and reproducible runs.
+
+## Prerequisites
+
+- Python 3.10+
+- CMake 3.16+
+- A C++20 compiler
+- PostgreSQL 14+ (or Docker)
+- On macOS, Homebrew packages for C++ DB support:
+  - `libpq`
+  - `libpqxx`
+  - `eigen`
+  - `pybind11`
+
+## Python dependencies
+
+```bash
+python3 -m venv .venv
+source .venv/bin/activate
+pip install --upgrade pip
+pip install -e .
+pip install pandas yfinance sqlalchemy psycopg2-binary matplotlib scipy
+```
+
+## Environment configuration
+
+```bash
+cp .env.example .env
+```
+
+Edit `.env` and set:
+
+- `DB_HOST`, `DB_PORT`, `DB_NAME`, `DB_USER`, `DB_PASSWORD`
+- `PIPELINE_SYMBOLS`
+- admin credentials used only by setup script (`POSTGRES_ADMIN_*`)
+
+## Database bootstrap
+
+```bash
+source .env
+python scripts/setup_postgres.py
+```
+
+The script is idempotent and safe to rerun.
+
+## Build and test C++
+
+```bash
+cmake -S . -B build
+cmake --build build -j
+ctest --test-dir build --output-on-failure
+```
+
+## Generate Doxygen docs
+
+```bash
+cmake --build build --target docs
+```
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,24 @@
+[build-system]
+requires = ["scikit-build-core>=0.5", "pybind11"]
+build-backend = "scikit_build_core.build"
+
+[project]
+name = "qengine"
+version = "0.1.0"
+description = "Quant engine with C++ backend"
+authors = [{name = "David"}]
+requires-python = ">=3.10"
+dependencies = [
+    "numpy",
+    "pandas",
+    "sqlalchemy",
+    "psycopg2-binary",
+    "yfinance",
+]
+
+[tool.scikit-build]
+# Keep separate from a local `cmake -B build` tree (different generators: Ninja vs Makefiles).
+build-dir       = "skbuild-build"
+cmake.version = ">=3.16"
+cmake.build-type      = "Release"
+cmake.define.BUILD_TESTING = "OFF"
--- a/qengine/init.py
+++ b/qengine/init.py
@@ -0,0 +1,5 @@
+"""Qengine: quant pricing backend (native extension in qengine.qengine)."""
+
+from .qengine import bs_price
+
+__all__ = ["bs_price"]
--- a/scripts/setup_postgres.py
+++ b/scripts/setup_postgres.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python3
+"""
+Idempotent PostgreSQL bootstrap script for the option_pricing project.
+
+What it does:
+1) Creates the project role if it does not exist.
+2) Creates the project database if it does not exist.
+3) Grants ownership/privileges.
+4) Applies src/data/sql/schema.sql to the project database.
+
+Configuration comes from environment variables (see .env.example).
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+import psycopg2
+from psycopg2 import sql
+
+
+ROOT = Path(__file__).resolve().parents[1]
+SCHEMA_PATH = ROOT / "src" / "data" / "sql" / "schema.sql"
+
+
+def _env(name: str, default: str | None = None) -> str:
+    value = os.getenv(name, default)
+    if value is None:
+        raise RuntimeError(f"Missing required environment variable: {name}")
+    return value
+
+
+def admin_connect(dbname: str):
+    return psycopg2.connect(
+        dbname=dbname,
+        user=_env("POSTGRES_ADMIN_USER", "postgres"),
+        password=_env("POSTGRES_ADMIN_PASSWORD", "postgres"),
+        host=_env("POSTGRES_ADMIN_HOST", "localhost"),
+        port=_env("POSTGRES_ADMIN_PORT", "5432"),
+    )
+
+
+def ensure_role_and_database() -> None:
+    db_user = _env("DB_USER", "quant_user")
+    db_password = _env("DB_PASSWORD", "")
+    db_name = _env("DB_NAME", "options_db")
+
+    admin_db = _env("POSTGRES_ADMIN_DB", "postgres")
+    with admin_connect(admin_db) as conn:
+        conn.autocommit = True
+        with conn.cursor() as cur:
+            cur.execute("SELECT 1 FROM pg_roles WHERE rolname = %s", (db_user,))
+            role_exists = cur.fetchone() is not None
+            if not role_exists:
+                cur.execute(
+                    sql.SQL("CREATE ROLE {} WITH LOGIN PASSWORD %s").format(
+                        sql.Identifier(db_user)
+                    ),
+                    (db_password,),
+                )
+            else:
+                cur.execute(
+                    sql.SQL("ALTER ROLE {} WITH LOGIN PASSWORD %s").format(
+                        sql.Identifier(db_user)
+                    ),
+                    (db_password,),
+                )
+
+            cur.execute("SELECT 1 FROM pg_database WHERE datname = %s", (db_name,))
+            db_exists = cur.fetchone() is not None
+            if not db_exists:
+                cur.execute(
+                    sql.SQL("CREATE DATABASE {} OWNER {}").format(
+                        sql.Identifier(db_name),
+                        sql.Identifier(db_user),
+                    )
+                )
+            else:
+                cur.execute(
+                    sql.SQL("ALTER DATABASE {} OWNER TO {}").format(
+                        sql.Identifier(db_name),
+                        sql.Identifier(db_user),
+                    )
+                )
+
+
+def apply_schema() -> None:
+    if not SCHEMA_PATH.exists():
+        raise FileNotFoundError(f"Schema file not found: {SCHEMA_PATH}")
+
+    schema_sql = SCHEMA_PATH.read_text(encoding="utf-8")
+    with admin_connect(_env("DB_NAME", "options_db")) as conn:
+        conn.autocommit = True
+        with conn.cursor() as cur:
+            cur.execute(schema_sql)
+
+
+def main() -> None:
+    print("Ensuring role/database exist...")
+    ensure_role_and_database()
+    print("Applying schema...")
+    apply_schema()
+    print("Database setup complete.")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/test_qengine_bindings.py
+++ b/scripts/test_qengine_bindings.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+"""Smoke test: use an installed `qengine` package (pip install .) or a dev build (cmake -> qengine/*.so)."""
+
+from __future__ import annotations
+
+import math
+import sys
+from pathlib import Path
+
+# Running `python scripts/this.py` puts `scripts/` on sys.path, not the repo root
+_REPO_ROOT = Path(__file__).resolve().parents[1]
+if str(_REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(_REPO_ROOT))
+
+
+def main() -> int:
+    try:
+        import qengine
+    except ImportError as e:
+        print(
+            f"Import failed ({e}). Install the package (pip install .) or build with CMake so "
+            "qengine/qengine.*.so exists next to qengine/__init__.py.",
+            file=sys.stderr,
+        )
+        return 1
+
+    call = qengine.bs_price(100.0, 100.0, 1.0, 0.05, 0.2, True)
+    put = qengine.bs_price(100.0, 100.0, 1.0, 0.05, 0.2, False)
+    batch_list = qengine.bs_price(
+        [100.0, 100.0],
+        [100.0, 110.0],
+        [1.0, 1.0],
+        [0.05, 0.05],
+        [0.2, 0.2],
+        [True, False],
+    )
+
+    assert math.isfinite(call) and math.isfinite(put)
+    assert len(batch_list) == 2 and all(math.isfinite(x) for x in batch_list)
+
+    print("qengine.bs_price (call):", call)
+    print("qengine.bs_price (put):", put)
+    print("qengine.bs_price (list batch):", list(batch_list))
+
+    try:
+        import numpy as np
+    except ImportError:
+        print("ok: overloads callable (NumPy not installed; skipped ndarray batch test).")
+        return 0
+
+    s = np.array([100.0, 100.0], dtype=np.float64)
+    k = np.array([100.0, 110.0], dtype=np.float64)
+    t = np.array([1.0, 1.0], dtype=np.float64)
+    r = np.array([0.05, 0.05], dtype=np.float64)
+    sig = np.array([0.2, 0.2], dtype=np.float64)
+    opt = np.array([True, False], dtype=bool)
+    batch_np = qengine.bs_price(s, k, t, r, sig, opt)
+    assert len(batch_np) == 2 and all(math.isfinite(float(x)) for x in batch_np)
+    print("qengine.bs_price (ndarray batch):", [float(x) for x in batch_np])
+    print("ok: overloads callable.")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -1,33 +0,0 @@
-add_library(qengine
-        Instrument.cpp
-        Instrument.hpp
-        Payoff.cpp
-        Payoff.hpp
-        Option.cpp
-        Option.hpp
-        PricingEngine.cpp
-        PricingEngine.hpp
-        MonteCarloEngine.cpp
-        MonteCarloEngine.hpp
-        StochasticProcess.cpp
-        StochasticProcess.hpp
-        Exercise.cpp
-        Exercise.hpp
-        MarketData.cpp
-        MarketData.hpp
-        YieldCurve.cpp
-        YieldCurve.hpp
-        VolatilitySurface.cpp
-        VolatilitySurface.hpp
-        RandomGenerator.cpp
-        RandomGenerator.hpp
-        Statistics.cpp
-        Statistics.hpp
-        BlackScholesProcess.cpp
-        BlackScholesProcess.hpp
-
-
-)
-
-target_include_directories(qengine PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
-target_link_libraries(qengine Eigen3::Eigen)
--- a/src/Exercise.cpp
+++ b/src/Exercise.cpp
@@ -1,5 +0,0 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
-
-#include "Exercise.hpp"
--- a/src/ImpliedVolatility/init.py
+++ b/src/ImpliedVolatility/init.py
--- a/src/ImpliedVolatility/compute_vls.py
+++ b/src/ImpliedVolatility/compute_vls.py
@@ -0,0 +1,49 @@
+import numpy as np
+import qengine
+from scipy.optimize import brentq
+
+
+def implied_vol(price, S, K, T, r, call):
+    """
+    Implied vol for each row. Arguments may be scalars or 1-D arrays-like (same length).
+    """
+    price = np.asarray(price, dtype=np.float64)
+    S = np.asarray(S, dtype=np.float64)
+    K = np.asarray(K, dtype=np.float64)
+    T = np.asarray(T, dtype=np.float64)
+    call = np.asarray(call, dtype=bool)
+    r = float(r)
+
+    scalar_in = price.ndim == 0
+    if scalar_in:
+        price = np.atleast_1d(price)
+        S = np.atleast_1d(S)
+        K = np.atleast_1d(K)
+        T = np.atleast_1d(T)
+        call = np.atleast_1d(call)
+
+    n = price.shape[0]
+    if (S.shape[0] != n or K.shape[0] != n or T.shape[0] != n or call.shape[0] != n):
+        raise ValueError(
+            f"implied_vol: length mismatch price={n}, S={S.shape[0]}, K={K.shape[0]}, "
+            f"T={T.shape[0]}, call={call.shape[0]}"
+        )
+
+    out = np.full(n, np.nan, dtype=np.float64)
+    for i in range(n):
+        p, s, k, t, c = float(price[i]), float(S[i]), float(K[i]), float(T[i]), bool(call[i])
+        if not np.isfinite(p) or not np.isfinite(s) or not np.isfinite(k) or not np.isfinite(t):
+            continue
+        if s <= 0 or k <= 0 or t <= 0:
+            continue
+        try:
+            def f(sig: float) -> float:
+                return qengine.bs_price(s, k, t, r, sig, c) - p
+
+            out[i] = brentq(f, 1e-6, 5.0)
+        except (ValueError, RuntimeError):
+            out[i] = np.nan
+
+    if scalar_in:
+        return float(out[0])
+    return out
--- a/src/ImpliedVolatility/setup.py
+++ b/src/ImpliedVolatility/setup.py
--- a/src/ImpliedVolatility/svi.py
+++ b/src/ImpliedVolatility/svi.py
--- a/src/PricingEngine.cpp
+++ b/src/PricingEngine.cpp
@@ -1,5 +0,0 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
-
-#include "PricingEngine.hpp"
--- a/src/StochasticProcess.cpp
+++ b/src/StochasticProcess.cpp
@@ -1,5 +0,0 @@
-//
-// Created by David Doebel  on 05.03.2026.
-//
-
-#include "StochasticProcess.hpp"
--- a/src/VolatilitySurface.cpp
+++ b/src/VolatilitySurface.cpp
@@ -1,5 +0,0 @@
-//
-// Created by David Doebel  on 06.03.2026.
-//
-
-#include "VolatilitySurface.hpp"
--- a/src/VolatilitySurface.hpp
+++ b/src/VolatilitySurface.hpp
@@ -1,18 +0,0 @@
-//
-// Created by David Doebel  on 06.03.2026.
-//
-
-#ifndef QUANTENGINE_VOLATILITYSURFACE_HPP
-#define QUANTENGINE_VOLATILITYSURFACE_HPP
-
-
-class VolatilitySurface {
-public:
-    virtual ~VolatilitySurface() = default;
-    virtual double sigma(double K, double T) const = 0;
-private:
-
-};
-
-
-#endif //QUANTENGINE_VOLATILITYSURFACE_HPP
--- a/src/YieldCurve.cpp
+++ b/src/YieldCurve.cpp
@@ -1,5 +0,0 @@
-//
-// Created by David Doebel  on 06.03.2026.
-//
-
-#include "YieldCurve.hpp"
--- a/src/init.py
+++ b/src/init.py
--- a/src/data/init.py
+++ b/src/data/init.py
--- a/src/data/database_interaction.py
+++ b/src/data/database_interaction.py
@@ -0,0 +1,15 @@
+import pandas as pd
+
+from option_pricing.src.data.ingestion.db_connect import db_engine
+
+
+def fetch_underlyings() -> pd.DataFrame:
+    """
+    Fetch all entries from the underlyings table using configured DB credentials.
+    """
+    engine = db_engine()
+    return pd.read_sql("SELECT * FROM underlyings;", engine)
+
+
+if __name__ == "__main__":
+    print(fetch_underlyings())
--- a/src/data/ingestion/init.py
+++ b/src/data/ingestion/init.py
--- a/src/data/ingestion/config/init.py
+++ b/src/data/ingestion/config/init.py
@@ -0,0 +1,3 @@
+from .settings import DB_CONFIG, PIPELINE_CONFIG
+
+__all__ = ["DB_CONFIG", "PIPELINE_CONFIG"]
--- a/src/data/ingestion/config/settings.py
+++ b/src/data/ingestion/config/settings.py
@@ -0,0 +1,31 @@
+import os
+
+
+def _get_env_int(name: str, default: int) -> int:
+    raw = os.getenv(name)
+    if raw is None:
+        return default
+    try:
+        return int(raw)
+    except ValueError as exc:
+        raise ValueError(f"Environment variable {name} must be an integer, got '{raw}'") from exc
+
+
+def _get_env_list(name: str, default: list[str]) -> list[str]:
+    raw = os.getenv(name)
+    if not raw:
+        return default
+    return [x.strip() for x in raw.split(",") if x.strip()]
+
+
+DB_CONFIG = {
+    "host": os.getenv("DB_HOST", "localhost"),
+    "port": _get_env_int("DB_PORT", 5432),
+    "database": os.getenv("DB_NAME", "options_db"),
+    "user": os.getenv("DB_USER", "quant_user"),
+    "password": os.getenv("DB_PASSWORD", ""),
+}
+
+PIPELINE_CONFIG = {
+    "symbols": _get_env_list("PIPELINE_SYMBOLS", ["SPY"]),
+}
--- a/src/data/ingestion/db_connect.py
+++ b/src/data/ingestion/db_connect.py
@@ -0,0 +1,13 @@
+from sqlalchemy import create_engine
+from option_pricing.src.data.ingestion.config.settings import DB_CONFIG
+
+def build_db_url() -> str:
+    return (
+        f"postgresql+psycopg2://{DB_CONFIG['user']}:{DB_CONFIG['password']}"
+        f"@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
+    )
+
+def db_engine():
+    db_url = build_db_url()
+    engine = create_engine(db_url, future=True)
+    return engine
--- a/src/data/ingestion/fred_data_ingestion.py
+++ b/src/data/ingestion/fred_data_ingestion.py
@@ -0,0 +1,4 @@
+from fredapi import Fred
+fred = Fred(api_key='471be0178bfc20ce10bb93e3fcceee3b')
+data = fred.get_series_latest_release('DTB3')
+print(data.tail())
--- a/src/data/ingestion/ingest_ubs_comparison.py
+++ b/src/data/ingestion/ingest_ubs_comparison.py
@@ -0,0 +1,72 @@
+from datetime import datetime, timedelta
+import pandas as pd
+import yfinance as yf
+
+from db_connect import db_engine
+
+# --- CONFIG ---
+TICKERS = ["UBS", "^GSPC"]
+DAYS_BACK = 21  # ~3 weeks
+TABLE_NAME = "prices"
+
+def fetch_data(tickers, start_date, end_date):
+    data = yf.download(
+        tickers,
+        start=start_date,
+        end=end_date,
+        group_by="ticker",
+        auto_adjust=True,
+        progress=False
+    )
+    return data
+
+
+def transform_data(raw_data):
+    frames = []
+
+    for ticker in raw_data.columns.levels[0]:
+        df = raw_data[ticker].copy()
+        df["ticker"] = ticker
+        df = df.reset_index()
+
+        # Keep only what we need
+        df = df[["Date", "ticker", "Close", "Volume"]]
+
+        df.rename(columns={
+            "Date": "date",
+            "Close": "close",
+            "Volume": "volume"
+        }, inplace=True)
+
+        # Compute daily returns
+        df["return"] = df["close"].pct_change()
+
+        frames.append(df)
+
+    return pd.concat(frames, ignore_index=True)
+
+
+def load_to_postgres(df, engine):
+    df.to_sql(
+        TABLE_NAME,
+        engine,
+        if_exists="append",
+        index=False
+    )
+
+
+def main():
+    end_date = datetime.utcnow()
+    start_date = end_date - timedelta(days=DAYS_BACK)
+
+    raw = fetch_data(TICKERS, start_date, end_date)
+    df = transform_data(raw)
+
+    engine = db_engine()
+    load_to_postgres(df, engine)
+
+    print("Ingestion complete.")
+
+
+if __name__ == "__main__":
+    main()
--- a/src/data/ingestion/ingest_yahoo_options.py
+++ b/src/data/ingestion/ingest_yahoo_options.py
@@ -0,0 +1,279 @@
+from datetime import datetime, timezone
+
+import pandas as pd
+import yfinance as yf
+from sqlalchemy import text
+
+from option_pricing.src.data.ingestion.config import DB_CONFIG, PIPELINE_CONFIG
+from db_connect import db_engine
+
+
+def build_db_url() -> str:
+    return (
+        f"postgresql+psycopg2://{DB_CONFIG['user']}:{DB_CONFIG['password']}"
+        f"@{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['database']}"
+    )
+
+
+def to_python_number(value):
+    """Convert pandas/numpy values to plain Python values or None."""
+    if pd.isna(value):
+        return None
+    return value
+
+
+def compute_mid(bid, ask):
+    bid = to_python_number(bid)
+    ask = to_python_number(ask)
+
+    if bid is None or ask is None:
+        return None
+    try:
+        return float((bid + ask) / 2.0)
+    except Exception:
+        return None
+
+
+def infer_option_style(symbol: str) -> str:
+    """
+    Very rough default convention:
+    - US equities / ETFs from Yahoo are usually American style
+    """
+    # TODO: If later you ingest index options like SPX, adapt this logic.
+    return "american"
+
+
+def get_or_create_underlying(conn, symbol: str) -> int:
+    query_insert = text("""
+                        INSERT INTO underlyings (symbol, exchange, currency)
+                        VALUES (:symbol, :exchange, :currency)
+                            ON CONFLICT (symbol) DO NOTHING
+                        """)
+
+    query_select = text("""
+                        SELECT id FROM underlyings WHERE symbol = :symbol
+                        """)
+
+    # TODO: improve exchange/currency detection if you want richer metadata
+    conn.execute(query_insert, {
+        "symbol": symbol,
+        "exchange": None,
+        "currency": "USD",
+    })
+
+    result = conn.execute(query_select, {"symbol": symbol}).fetchone()
+    return result[0] #h
+
+
+def get_or_create_contract(
+        conn,
+        underlying_id: int,
+        option_type: str,
+        strike: float,
+        expiration_date,
+        style: str,
+        contract_symbol: str,
+) -> int:
+    query_insert = text("""
+                        INSERT INTO option_contracts (
+                            underlying_id, option_type, strike, expiration_date, style, contract_symbol
+                        )
+                        VALUES (
+                                   :underlying_id, :option_type, :strike, :expiration_date, :style, :contract_symbol
+                               )
+                            ON CONFLICT (underlying_id, option_type, strike, expiration_date)
+        DO NOTHING
+                        """)
+
+    query_select = text("""
+                        SELECT id
+                        FROM option_contracts
+                        WHERE underlying_id = :underlying_id
+                          AND option_type = :option_type
+                          AND strike = :strike
+                          AND expiration_date = :expiration_date
+                        """)
+
+    conn.execute(query_insert, {
+        "underlying_id": underlying_id,
+        "option_type": option_type,
+        "strike": strike,
+        "expiration_date": expiration_date,
+        "style": style,
+        "contract_symbol": contract_symbol,
+    })
+
+    result = conn.execute(query_select, {
+        "underlying_id": underlying_id,
+        "option_type": option_type,
+        "strike": strike,
+        "expiration_date": expiration_date,
+    }).fetchone()
+
+    return result[0]
+
+
+def insert_underlying_price(conn, underlying_id: int, timestamp: datetime, price: float):
+    query = text("""
+                 INSERT INTO underlying_prices (underlying_id, timestamp, price)
+                 VALUES (:underlying_id, :timestamp, :price)
+                     ON CONFLICT (underlying_id, timestamp) DO NOTHING
+                 """)
+    conn.execute(query, {
+        "underlying_id": underlying_id,
+        "timestamp": timestamp,
+        "price": price,
+    })
+
+
+def insert_option_quote(
+        conn,
+        contract_id: int,
+        timestamp: datetime,
+        bid,
+        ask,
+        mid,
+        last_price,
+        implied_vol,
+        volume,
+        open_interest,
+):
+    query = text("""
+                 INSERT INTO option_quotes (
+                     contract_id, timestamp, bid, ask, mid,
+                     last_price, implied_vol, volume, open_interest
+                 )
+                 VALUES (
+                            :contract_id, :timestamp, :bid, :ask, :mid,
+                            :last_price, :implied_vol, :volume, :open_interest
+                        )
+                     ON CONFLICT (contract_id, timestamp) DO NOTHING
+                 """)
+
+    conn.execute(query, {
+        "contract_id": contract_id,
+        "timestamp": timestamp,
+        "bid": bid,
+        "ask": ask,
+        "mid": mid,
+        "last_price": last_price,
+        "implied_vol": implied_vol,
+        "volume": volume,
+        "open_interest": open_interest,
+    })
+
+
+def process_option_dataframe(conn, df: pd.DataFrame, underlying_id: int, option_type: str, symbol: str, expiration_date, timestamp: datetime):
+    style = infer_option_style(symbol)
+
+    for _, row in df.iterrows():
+        strike = to_python_number(row.get("strike"))
+        contract_symbol = to_python_number(row.get("contractSymbol"))
+        bid = to_python_number(row.get("bid"))
+        ask = to_python_number(row.get("ask"))
+        last_price = to_python_number(row.get("lastPrice"))
+        implied_vol = to_python_number(row.get("impliedVolatility"))
+        volume = to_python_number(row.get("volume"))
+        open_interest = to_python_number(row.get("openInterest"))
+
+        if strike is None:
+            continue
+
+        contract_id = get_or_create_contract(
+            conn=conn,
+            underlying_id=underlying_id,
+            option_type=option_type,
+            strike=float(strike),
+            expiration_date=expiration_date,
+            style=style,
+            contract_symbol=contract_symbol,
+        )
+
+        mid = compute_mid(bid, ask)
+
+        insert_option_quote(
+            conn=conn,
+            contract_id=contract_id,
+            timestamp=timestamp,
+            bid=bid,
+            ask=ask,
+            mid=mid,
+            last_price=last_price,
+            implied_vol=implied_vol,
+            volume=int(volume) if volume is not None else None,
+            open_interest=int(open_interest) if open_interest is not None else None,
+        )
+
+
+def ingest_symbol(symbol: str, engine):
+    print(f"Starting ingestion for {symbol}...")
+
+    ticker = yf.Ticker(symbol)
+    expirations = ticker.options
+
+    if not expirations:
+        print(f"No options found for {symbol}")
+        return
+
+    timestamp = datetime.now(timezone.utc)
+
+    # Try to get spot price
+    info = {}
+    try:
+        info = ticker.fast_info
+    except Exception:
+        pass
+
+    spot_price = None
+    if info:
+        spot_price = info.get("lastPrice") or info.get("last_price")
+
+    with engine.begin() as conn:
+        underlying_id = get_or_create_underlying(conn, symbol)
+
+        if spot_price is not None:
+            insert_underlying_price(
+                conn=conn,
+                underlying_id=underlying_id,
+                timestamp=timestamp,
+                price=float(spot_price),
+            )
+
+        for expiry in expirations:
+            print(f"  Fetching expiry {expiry} ...")
+            chain = ticker.option_chain(expiry)
+
+            expiration_date = pd.to_datetime(expiry).date()
+
+            process_option_dataframe(
+                conn=conn,
+                df=chain.calls,
+                underlying_id=underlying_id,
+                option_type="call",
+                symbol=symbol,
+                expiration_date=expiration_date,
+                timestamp=timestamp,
+            )
+
+            process_option_dataframe(
+                conn=conn,
+                df=chain.puts,
+                underlying_id=underlying_id,
+                option_type="put",
+                symbol=symbol,
+                expiration_date=expiration_date,
+                timestamp=timestamp,
+            )
+
+    print(f"Finished ingestion for {symbol}.")
+
+
+def main():
+    engine = db_engine()
+
+    for symbol in PIPELINE_CONFIG["symbols"]:
+        ingest_symbol(symbol, engine)
+
+
+if __name__ == "__main__":
+    main()
--- a/src/data/load_data.py
+++ b/src/data/load_data.py
@@ -0,0 +1,436 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+from option_pricing.src.data.ingestion.db_connect import db_engine
+from option_pricing.src.ImpliedVolatility.compute_vls import implied_vol
+
+
+def _normalize_quote_timestamp(df: pd.DataFrame) -> pd.DataFrame:
+    if "timestamp" not in df.columns and "quote_timestamp" in df.columns:
+        return df.rename(columns={"quote_timestamp": "timestamp"})
+    return df
+
+
+def _normalize_price_timestamp(df: pd.DataFrame) -> pd.DataFrame:
+    if "timestamp" not in df.columns and "price_timestamp" in df.columns:
+        return df.rename(columns={"price_timestamp": "timestamp"})
+    return df
+
+
+def load_data():
+    engine = db_engine()
+    underlyings = pd.read_sql("SELECT * FROM underlyings;", engine)
+    underlying_prices = _normalize_price_timestamp(
+        pd.read_sql("SELECT * FROM underlying_prices;", engine)
+    )
+    option_quotes = _normalize_quote_timestamp(pd.read_sql("SELECT * FROM option_quotes;", engine))
+    option_contracts = pd.read_sql("SELECT * FROM option_contracts;", engine)
+    return underlyings, underlying_prices, option_quotes, option_contracts
+
+
+def clean_data(data: pd.DataFrame):
+    data.dropna(inplace=True)
+    data = data[data["volume"] > 0]
+    data = data[data["open_interest"] > 10]
+    data["spread"] = data["ask"] - data["bid"]
+    #data = data[data["spread"] / data["mid"] < 1]
+    return data
+
+
+def merge_quotes_contracts(option_quotes: pd.DataFrame, option_contracts: pd.DataFrame):
+    if "timestamp" not in option_quotes.columns:
+        raise KeyError("option_quotes needs a quote time column ('timestamp' or 'quote_timestamp')")
+
+    option_quotes = option_quotes.groupby(["contract_id", "timestamp"], as_index=False).agg(
+        {
+            "bid": "mean",
+            "ask": "mean",
+            "mid": "mean",
+            "last_price": "mean",
+            "implied_vol": "mean",
+            "volume": "sum",
+            "open_interest": "sum",
+        }
+    )
+    option_quotes = option_quotes.merge(
+        option_contracts, left_on="contract_id", right_on="id", how="left"
+    )
+    option_quotes["timestamp"] = pd.to_datetime(option_quotes["timestamp"])
+    option_quotes["expiration_date"] = pd.to_datetime(option_quotes["expiration_date"])
+    option_quotes["T"] = (
+        option_quotes["expiration_date"] - option_quotes["timestamp"]
+    ).dt.total_seconds() / (365 * 24 * 3600)
+    return option_quotes
+
+
+def compute_iv(option_quotes_contracts, underlying_prices):
+    df = option_quotes_contracts.copy()
+    up = _normalize_price_timestamp(underlying_prices.copy())
+
+    up["timestamp"] = pd.to_datetime(up["timestamp"])
+    up = up.sort_values("timestamp").drop_duplicates(
+        ["underlying_id", "timestamp"], keep="last"
+    )
+
+    mask = df["T"] > 0
+    if not mask.any():
+        df["iv"] = np.nan
+        return df
+
+    sub = df.loc[mask].copy()
+    sub["_idx"] = sub.index
+
+    merged = sub.merge(
+        up[["underlying_id", "timestamp", "price"]],
+        on=["underlying_id", "timestamp"],
+        how="left",
+        validate="many_to_one",
+    )
+
+    # assign back using explicit index
+    df["spot"] = np.nan
+    df.loc[merged["_idx"], "spot"] = merged["price"].to_numpy()
+
+    price = merged["mid"].to_numpy(dtype=np.float64)
+    S = merged["price"].to_numpy(dtype=np.float64)
+    K = merged["strike"].to_numpy(dtype=np.float64)
+    T = merged["T"].to_numpy(dtype=np.float64)
+    call = (merged["option_type"] == "call").to_numpy()
+
+
+    df["iv"] = np.nan
+    df.loc[sub.index, "iv"] = implied_vol(price, S, K, T, 0.05, call)
+    return df
+
+def fit_ivsimle(option_quotes_contracts):
+    from scipy.interpolate import UnivariateSpline
+    sort = option_quotes_contracts.sort_values("log_moneyness").dropna()
+    x = sort["log_moneyness"]
+    y = sort["iv"]
+    y_yahoo = sort["implied_vol"]
+    print(x,y,y_yahoo)
+    f = UnivariateSpline(x, y, s=None)
+    f_yahoo = UnivariateSpline(x, y_yahoo, s=None)
+    # plot the smile
+    x_lin = np.linspace(x.min(), x.max(), 200)
+    plt.plot(x_lin, f(x_lin), label="iv smile")
+    plt.plot(x_lin, f_yahoo(x_lin), label="yahoo iv smile")
+    plt.legend()
+    plt.savefig("iv_smile_fit.pdf")
+
+
+    return f
+
+def calibrate_svi_surface(option_quotes_contracts: pd.DataFrame, r: float = 0.05, **kwargs):
+    """
+    Fit SVI per expiry on ``iv`` from :func:`compute_iv` and plot diagnostics.
+
+    See :func:`option_pricing.src.ImpliedVolatility.svi.calibrate_from_option_frame`.
+    """
+    from option_pricing.src.ImpliedVolatility.svi import calibrate_from_option_frame
+
+    return calibrate_from_option_frame(option_quotes_contracts, r=r, **kwargs)
+
+def clean_before_svi(option_quotes_contracts: pd.DataFrame):
+    option_quotes_contracts = option_quotes_contracts[option_quotes_contracts["T"] > 0.05]
+    return option_quotes_contracts
+
+
+def plot_smoothed_svi_surface(prep: pd.DataFrame, params: pd.DataFrame, r: float = 0.05):
+    """
+    Plot independent slice fits after maturity smoothing.
+
+    Outputs:
+    - svi_smoothed_surface.pdf
+    - svi_calendar_violation_heatmap.pdf
+    """
+    from option_pricing.src.ImpliedVolatility.svi import (
+        calendar_violation_matrix,
+        smooth_svi_parameters,
+    )
+
+    # Build smoothed maturity-parameter curves from calibrated slice parameters
+    curves = smooth_svi_parameters(
+        params,
+        T_col="T_mean",
+        smooth_factor_a=1e-4,
+        smooth_factor_m=1e-4,
+        smooth_factor_others=0.0,
+        min_T=0.05,
+        weight_col="n_points",
+    )
+
+    # Overlay market points and smoothed model by maturity
+    plot_df = prep.copy()
+    if "T" not in plot_df.columns or "total_var" not in plot_df.columns:
+        raise KeyError("prep must include columns 'T' and 'total_var'")
+
+    T_grid = np.sort(params.loc[params["success"], "T_mean"].to_numpy(dtype=np.float64))
+    if T_grid.size < 2:
+        return
+    k_grid = np.linspace(
+        float(plot_df["log_moneyness"].quantile(0.02)),
+        float(plot_df["log_moneyness"].quantile(0.98)),
+        180,
+    )
+
+    plt.figure(figsize=(11, 7))
+    cmap = plt.colormaps["viridis"]
+    nT = max(len(T_grid), 1)
+    for i, Ti in enumerate(T_grid):
+        color = cmap(i / max(nT - 1, 1)) if nT > 1 else cmap(0.5)
+        near = np.isclose(plot_df["T"].to_numpy(dtype=np.float64), Ti, rtol=0.03, atol=2e-3)
+        sub = plot_df.loc[near]
+        if sub.empty:
+            continue
+        # market IV points
+        iv_mkt = np.sqrt(
+            np.maximum(sub["total_var"].to_numpy(dtype=np.float64), 0.0)
+            / np.maximum(Ti, 1e-12)
+        )
+        plt.scatter(
+            sub["log_moneyness"].to_numpy(dtype=np.float64),
+            iv_mkt,
+            s=10,
+            alpha=0.35,
+            color=color,
+        )
+        # smoothed curve IV
+        w_model = curves.total_var(k_grid, np.array([Ti], dtype=np.float64))[0]
+        iv_model = np.sqrt(np.maximum(w_model, 0.0) / np.maximum(Ti, 1e-12))
+        plt.plot(k_grid, iv_model, color=color, lw=2, label=f"T={Ti:.3f}")
+
+    plt.xlabel("log moneyness log(K/F)")
+    plt.ylabel("implied vol")
+    plt.title("SVI surface: market points vs smoothed maturity curves")
+    plt.grid(alpha=0.3)
+    plt.legend(fontsize=8, ncol=2)
+    plt.tight_layout()
+    plt.savefig("svi_smoothed_surface.pdf", bbox_inches="tight")
+    plt.clf()
+
+    # Calendar diagnostics from smoothed surface
+    cal_diff = calendar_violation_matrix(curves, T_grid, k_grid)
+    # diff shape: (len(T_grid)-1, len(k_grid)) where negative is violation
+    plt.figure(figsize=(11, 4))
+    im = plt.imshow(
+        cal_diff,
+        aspect="auto",
+        origin="lower",
+        cmap="coolwarm",
+        vmin=-0.02,
+        vmax=0.02,
+        extent=[k_grid.min(), k_grid.max(), 0, cal_diff.shape[0]],
+    )
+    plt.colorbar(im, label="w(T_{j+1},k)-w(T_j,k)")
+    plt.xlabel("log moneyness")
+    plt.ylabel("maturity step j")
+    plt.title("Calendar diagnostic heatmap (negative = violation)")
+    plt.tight_layout()
+    plt.savefig("svi_calendar_violation_heatmap.pdf", bbox_inches="tight")
+    plt.clf()
+
+
+def _fit_slice_with_svi_py_model(
+    model: object,
+    model_name: str,
+    k: np.ndarray,
+    w: np.ndarray,
+    T: float,
+    *,
+    theta_ref: float,
+    prev_params: dict | None,
+    k_eval: np.ndarray,
+) -> tuple[np.ndarray, dict]:
+    """Fit one slice with a specific pysvi model and evaluate total variance on k_eval."""
+    T = float(T)
+    k = np.asarray(k, dtype=np.float64)
+    w = np.asarray(w, dtype=np.float64)
+    k_eval = np.asarray(k_eval, dtype=np.float64)
+
+    # ATM total variance proxy for models requiring theta
+    theta = float(np.interp(0.0, np.sort(k), w[np.argsort(k)]))
+    theta = max(theta, 1e-8)
+
+    kwargs: dict = {}
+    if model_name == "ssvi":
+        kwargs["theta"] = theta
+    elif model_name == "essvi":
+        kwargs["theta"] = theta
+        kwargs["theta_ref"] = max(float(theta_ref), 1e-8)
+    elif model_name in {"jumpwings", "jw"}:
+        kwargs["T"] = max(T, 1e-8)
+
+    # Option B: calendar penalty uses pysvi internal 200-point grid per current slice.
+    # Build w_prev on that exact grid to avoid shape mismatch.
+    if prev_params is not None:
+        k_cal = np.linspace(float(k.min()) - 0.5, float(k.max()) + 0.5, 200)
+        kwargs["w_prev"] = np.asarray(model.total_variance(k_cal, prev_params), dtype=np.float64)
+
+    params = model.calibrate(k, w, **kwargs)
+    if params is None:
+        raise RuntimeError(f"pysvi {model_name} calibration failed")
+    w_eval = model.total_variance(k_eval, params)
+    return np.asarray(w_eval, dtype=np.float64), params
+
+
+def compare_vs_svi_py(prep: pd.DataFrame, params: pd.DataFrame):
+    """
+    Compare in-house SVI fit against pysvi models with explicit no-arbitrage flags.
+
+    Outputs:
+    - svi_vs_pysvi_<model>_comparison.pdf for model in {svi, ssvi, essvi, jumpwings}
+    - svi_vs_pysvi_metrics.csv
+    """
+    from option_pricing.src.ImpliedVolatility.svi import SVIParams
+    from pysvi import ArbitrageFreedom, get_model
+
+    ok_params = params[params["success"]].copy()
+    if ok_params.empty:
+        print("compare_vs_svi_py: no successful in-house slices; skipping.")
+        return
+
+    k_min = float(prep["log_moneyness"].quantile(0.02))
+    k_max = float(prep["log_moneyness"].quantile(0.98))
+    k_grid = np.linspace(k_min, k_max, 180)
+
+    models = ["svi", "ssvi", "essvi", "jumpwings"]
+    rows: list[dict] = []
+
+    # reference theta for eSSVI from in-house successful slices
+    theta_ref = float(np.median(ok_params["T_mean"].to_numpy(dtype=np.float64) * 0 + 1.0))
+    # Better theta_ref proxy from observed market ATM if available
+    theta_vals = []
+    for _, row in ok_params.iterrows():
+        Ti = float(row["T_mean"])
+        near = np.isclose(prep["T"].to_numpy(dtype=np.float64), Ti, rtol=0.03, atol=2e-3)
+        sub = prep.loc[near].sort_values("log_moneyness")
+        if len(sub) < 10:
+            continue
+        ks = sub["log_moneyness"].to_numpy(dtype=np.float64)
+        ws = sub["total_var"].to_numpy(dtype=np.float64)
+        theta_vals.append(float(np.interp(0.0, np.sort(ks), ws[np.argsort(ks)])))
+    if theta_vals:
+        theta_ref = float(np.median(theta_vals))
+
+    sorted_rows = list(ok_params.sort_values("T_mean").iterrows())
+    for model_name in models:
+        flags = ArbitrageFreedom.NO_BUTTERFLY | ArbitrageFreedom.NO_CALENDAR
+        model = get_model(model_name, flags)
+        plt.figure(figsize=(11, 7))
+        cmap = plt.colormaps["tab20"]
+        prev_params = None
+        n_used = 0
+        for _, row in sorted_rows:
+            Ti = float(row["T_mean"])
+            near = np.isclose(prep["T"].to_numpy(dtype=np.float64), Ti, rtol=0.03, atol=2e-3)
+            sub = prep.loc[near].sort_values("log_moneyness")
+            if len(sub) < 10:
+                continue
+            k = sub["log_moneyness"].to_numpy(dtype=np.float64)
+            w = sub["total_var"].to_numpy(dtype=np.float64)
+
+            p_ours = SVIParams(
+                float(row["a"]), float(row["b"]), float(row["rho"]), float(row["m"]), float(row["sigma"])
+            )
+            w_ours = p_ours.total_var(k_grid)
+            rmse_ours = float(np.sqrt(np.mean((p_ours.total_var(k) - w) ** 2)))
+
+            try:
+                w_ext, ext_params = _fit_slice_with_svi_py_model(
+                    model,
+                    model_name,
+                    k,
+                    w,
+                    Ti,
+                    theta_ref=theta_ref,
+                    prev_params=prev_params,
+                    k_eval=k_grid,
+                )
+                rmse_ext = float(np.sqrt(np.mean((np.interp(k, k_grid, w_ext) - w) ** 2)))
+                rows.append(
+                    {
+                        "model": model_name,
+                        "T_mean": Ti,
+                        "rmse_ours": rmse_ours,
+                        "rmse_pysvi": rmse_ext,
+                        "delta_rmse": rmse_ext - rmse_ours,
+                        "ext_params": str(ext_params),
+                    }
+                )
+                color = cmap(n_used % 20)
+                n_used += 1
+                plt.plot(k_grid, np.sqrt(np.maximum(w_ours, 0) / max(Ti, 1e-12)), color=color, lw=2, alpha=0.9)
+                plt.plot(k_grid, np.sqrt(np.maximum(w_ext, 0) / max(Ti, 1e-12)), color=color, lw=1.5, ls="--", alpha=0.9)
+                prev_params = ext_params
+            except Exception as exc:
+                print(f"compare_vs_svi_py[{model_name}]: skipping T={Ti:.4f}, reason: {exc}")
+                continue
+
+        if n_used == 0:
+            plt.close()
+            continue
+
+        plt.xlabel("log moneyness")
+        plt.ylabel("implied vol")
+        plt.title(f"In-house SVI (solid) vs pysvi {model_name} (dashed)")
+        plt.grid(alpha=0.3)
+        plt.tight_layout()
+        plt.savefig(f"svi_vs_pysvi_{model_name}_comparison.pdf", bbox_inches="tight")
+        plt.clf()
+
+    out = pd.DataFrame(rows)
+    if out.empty:
+        print("compare_vs_svi_py: no slices compared (pysvi unavailable or incompatible).")
+        return
+    out = out.sort_values(["model", "T_mean"])
+    out.to_csv("svi_vs_pysvi_metrics.csv", index=False)
+    print(out.groupby("model")[["rmse_ours", "rmse_pysvi", "delta_rmse"]].mean())
+
+
+def plot_ivsmile(option_quotes_contracts):
+    option_quotes_contracts = option_quotes_contracts.sort_values("strike")
+    option_quotes_contracts["log_moneyness"] = np.log(
+        option_quotes_contracts["spot"] * np.exp(0.05 * option_quotes_contracts["T"])/option_quotes_contracts["strike"]
+    )
+    option_quotes_contracts = option_quotes_contracts[option_quotes_contracts["log_moneyness"].abs() < 0.2]
+    #option_quotes_contracts = option_quotes_contracts[option_quotes_contracts["mid"] > 0.2]
+    plt.plot(option_quotes_contracts["strike"], option_quotes_contracts["iv"], label="iv smile")
+    plt.plot(option_quotes_contracts["strike"], option_quotes_contracts["implied_vol"], label="i. vol")
+    plt.legend()
+    plt.savefig("iv_smile.pdf")
+    plt.xlabel("iv")
+    plt.ylabel("strike price")
+    plt.clf()
+    return option_quotes_contracts
+
+
+
+
+
+if __name__ == "__main__":
+    underlyings, underlying_prices, option_quotes, option_contracts = load_data()
+    option_quotes_contracts = merge_quotes_contracts(option_quotes, option_contracts)
+    option_quotes_contracts = clean_data(option_quotes_contracts)
+    option_quotes_contracts = compute_iv(option_quotes_contracts, underlying_prices)
+    mask = option_quotes_contracts["iv"].notna()
+    print(option_quotes_contracts)
+    print(option_quotes_contracts.columns)
+    #plt.plot(option_quotes_contracts["contract_id"][mask], option_quotes_contracts["implied_vol"][mask], label="i. iv")
+    #plt.plot(option_quotes_contracts["contract_id"][mask],option_quotes_contracts["iv"][mask], label="comp. iv")
+    #plt.legend()
+    #plt.show()
+    option_quotes_contracts = plot_ivsmile(option_quotes_contracts)
+    fit_ivsimle(option_quotes_contracts)
+    prep, svi_fit, params = calibrate_svi_surface(
+        clean_before_svi(option_quotes_contracts),
+        r=0.05,
+        plot_backend="matplotlib",
+        finplot_show=True,
+        # optionally: plot_path=None to avoid matplotlib PDF output
+    )
+    print(svi_fit)
+    plot_smoothed_svi_surface(prep, params, r=0.05)
+    compare_vs_svi_py(prep, params)
+
--- a/src/data/sql/schema.sql
+++ b/src/data/sql/schema.sql
@@ -0,0 +1,49 @@
+CREATE TABLE IF NOT EXISTS underlyings (
+                                           id SERIAL PRIMARY KEY,
+                                           symbol TEXT UNIQUE NOT NULL,
+                                           exchange TEXT,
+                                           currency TEXT,
+                                           created_at TIMESTAMP DEFAULT NOW()
+    );
+
+CREATE TABLE IF NOT EXISTS option_contracts (
+                                                id SERIAL PRIMARY KEY,
+                                                underlying_id INTEGER NOT NULL REFERENCES underlyings(id),
+    option_type TEXT NOT NULL CHECK (option_type IN ('call', 'put')),
+    strike NUMERIC NOT NULL,
+    expiration_date DATE NOT NULL,
+    style TEXT,
+    contract_symbol TEXT,
+    UNIQUE (underlying_id, option_type, strike, expiration_date)
+    );
+
+CREATE TABLE IF NOT EXISTS option_quotes (
+    id SERIAL PRIMARY KEY,
+    contract_id INTEGER NOT NULL REFERENCES option_contracts(id),
+    quote_timestamp TIMESTAMP NOT NULL,
+    bid NUMERIC,
+    ask NUMERIC,
+    mid NUMERIC,
+    last_price NUMERIC,
+    implied_vol NUMERIC,
+    volume INTEGER,
+    open_interest INTEGER,
+    UNIQUE (contract_id, quote_timestamp)
+    );
+
+CREATE TABLE IF NOT EXISTS underlying_prices (
+    id SERIAL PRIMARY KEY,
+    underlying_id INTEGER NOT NULL REFERENCES underlyings(id),
+    price_timestamp TIMESTAMP NOT NULL,
+    price NUMERIC NOT NULL,
+    UNIQUE (underlying_id, price_timestamp)
+    );
+
+CREATE INDEX IF NOT EXISTS idx_option_quotes_timestamp
+    ON option_quotes(quote_timestamp);
+
+CREATE INDEX IF NOT EXISTS idx_option_quotes_contract_id
+    ON option_quotes(contract_id);
+
+CREATE INDEX IF NOT EXISTS idx_option_contracts_underlying_expiry
+    ON option_contracts(underlying_id, expiration_date);
--- a/src/data/yfinance_pull.py
+++ b/src/data/yfinance_pull.py
@@ -0,0 +1,11 @@
+import yfinance as yf
+
+ticker = yf.Ticker("AAPL")
+
+expirations = ticker.options
+print(expirations)
+
+chain = ticker.option_chain(expirations[0])
+
+calls = chain.calls
+puts = chain.puts
--- a/standalone_numerical_experiments/local_volatility_instability/INDEPENDENT_STANDALONE.txt
+++ b/standalone_numerical_experiments/local_volatility_instability/INDEPENDENT_STANDALONE.txt
@@ -0,0 +1,6 @@
+This folder is intentionally self-contained.
+
+- No imports from the parent option_pricing package (no qengine, src/, cpp bindings).
+- Third-party dependencies: numpy, matplotlib (see requirements.txt).
+- Run: python run_experiment.py [--out lv_rmse.png]
+- Safe to copy elsewhere or run in isolation.
--- a/standalone_numerical_experiments/local_volatility_instability/figures/lv_relerr.png
+++ b/standalone_numerical_experiments/local_volatility_instability/figures/lv_relerr.png
--- a/standalone_numerical_experiments/local_volatility_instability/figures/lv_rmse.png
+++ b/standalone_numerical_experiments/local_volatility_instability/figures/lv_rmse.png
--- a/standalone_numerical_experiments/local_volatility_instability/figures/lv_sigma2.png
+++ b/standalone_numerical_experiments/local_volatility_instability/figures/lv_sigma2.png
--- a/standalone_numerical_experiments/local_volatility_instability/gatheral_local_vol.py
+++ b/standalone_numerical_experiments/local_volatility_instability/gatheral_local_vol.py
@@ -0,0 +1,108 @@
+"""
+Gatheral local variance in total-variance / log-moneyness form (practitioner's guide).
+
+sigma^2 = (d_T w) / ( 1 - (y/w) d_y w
+            + (1/4)(-1/4 - 1/w + y^2/w^2) (d_y w)^2
+            + (1/2) d_yy w )
+
+where w = omega is total implied variance, y is log-moneyness (convention as in the note).
+"""
+
+from __future__ import annotations
+
+import numpy as np
+
+
+def local_variance_from_derivatives(
+    y: np.ndarray,
+    w: np.ndarray,
+    dy_w: np.ndarray,
+    dyy_w: np.ndarray,
+    dT_w: np.ndarray,
+    *,
+    eps: float = 1e-14,
+) -> np.ndarray:
+    """Vectorized Gatheral formula. Invalid / near-singular points become nan."""
+    y = np.asarray(y, dtype=float)
+    w = np.asarray(w, dtype=float)
+    dy_w = np.asarray(dy_w, dtype=float)
+    dyy_w = np.asarray(dyy_w, dtype=float)
+    dT_w = np.asarray(dT_w, dtype=float)
+
+    out = np.full_like(y, np.nan, dtype=float)
+    ok = np.isfinite(w) & (np.abs(w) > eps) & np.isfinite(dy_w) & np.isfinite(dyy_w) & np.isfinite(dT_w)
+
+    denom = np.empty_like(w)
+    denom[ok] = (
+        1.0
+        - (y[ok] / w[ok]) * dy_w[ok]
+        + 0.25 * (-0.25 - 1.0 / w[ok] + (y[ok] ** 2) / (w[ok] ** 2)) * (dy_w[ok] ** 2)
+        + 0.5 * dyy_w[ok]
+    )
+
+    ok2 = ok & (np.abs(denom) > eps)
+    out[ok2] = dT_w[ok2] / denom[ok2]
+    return out
+
+
+def quadratic_total_variance(
+    y: np.ndarray,
+    alpha: float,
+    beta: float,
+    gamma: float,
+    T: float,
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """
+    w(y,T) = T * (alpha + beta*y + gamma*y^2), with derivatives as in the note:
+
+      d_T w = alpha + beta*y + gamma*y^2
+      d_y w = T * (beta + 2*gamma*y)
+      d_yy w = 2*gamma*T
+    """
+    y = np.asarray(y, dtype=float)
+    f = alpha + beta * y + gamma * y ** 2
+    w = T * f
+    dT_w = f
+    dy_w = T * (beta + 2.0 * gamma * y)
+    dyy_w = np.full_like(y, 2.0 * gamma * T)
+    return w, dT_w, dy_w, dyy_w
+
+
+def analytic_local_variance_quadratic(
+    y: np.ndarray,
+    alpha: float,
+    beta: float,
+    gamma: float,
+    T: float,
+) -> np.ndarray:
+    """Closed form from the note (equivalent to plugging derivatives into Gatheral)."""
+    y = np.asarray(y, dtype=float)
+    w, dT_w, dy_w, dyy_w = quadratic_total_variance(y, alpha, beta, gamma, T)
+    return local_variance_from_derivatives(y, w, dy_w, dyy_w, dT_w)
+
+
+def central_first_derivative_uniform(w: np.ndarray, h: float) -> np.ndarray:
+    """Interior (w[i+1]-w[i-1])/(2h); endpoints nan."""
+    w = np.asarray(w, dtype=float)
+    out = np.full_like(w, np.nan)
+    out[1:-1] = (w[2:] - w[:-2]) / (2.0 * h)
+    return out
+
+
+def second_derivative_uniform(w: np.ndarray, h: float) -> np.ndarray:
+    """Interior second difference / h^2; endpoints nan."""
+    w = np.asarray(w, dtype=float)
+    out = np.full_like(w, np.nan)
+    out[1:-1] = (w[2:] - 2.0 * w[1:-1] + w[:-2]) / (h ** 2)
+    return out
+
+
+def add_multiplicative_noise(
+    w: np.ndarray,
+    sigma_noise: float,
+    rng: np.random.Generator,
+) -> np.ndarray:
+    """tilde w(y_i) = w(y_i) * (1 + eps), eps ~ N(0, sigma_noise^2)."""
+    w = np.asarray(w, dtype=float)
+    eps = rng.normal(0.0, sigma_noise, size=w.shape)
+    return w * (1.0 + eps)
--- a/standalone_numerical_experiments/local_volatility_instability/lv_rmse.png
+++ b/standalone_numerical_experiments/local_volatility_instability/lv_rmse.png
--- a/standalone_numerical_experiments/local_volatility_instability/requirements.txt
+++ b/standalone_numerical_experiments/local_volatility_instability/requirements.txt
@@ -0,0 +1,2 @@
+numpy>=1.20
+matplotlib>=3.5
--- a/standalone_numerical_experiments/local_volatility_instability/run_experiment.py
+++ b/standalone_numerical_experiments/local_volatility_instability/run_experiment.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+"""
+Local-volatility instability experiment (Gatheral total variance in log-moneyness).
+
+We compare the analytic local variance σ²(y) from a quadratic total variance
+w(y,T) = T(α + βy + γy²) to σ² reconstructed from a noisy discrete surface
+w̃(y_i) = w(y_i)(1 + ε_i) using finite differences in y, for several levels of
+multiplicative noise σ_noise. This script only produces the figure: RMSE of the
+FD reconstruction vs σ_noise (log–log), with a y = σ reference line of slope 1.
+
+Dependencies: numpy, matplotlib only (see INDEPENDENT_STANDALONE.txt).
+"""
+
+from __future__ import annotations
+
+import argparse
+import os
+import sys
+from typing import Literal
+
+# Prevent accidental imports from the parent repository
+_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
+if _REPO_ROOT in sys.path:
+    sys.path.remove(_REPO_ROOT)
+
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import numpy as np
+
+from gatheral_local_vol import (
+    add_multiplicative_noise,
+    analytic_local_variance_quadratic,
+    central_first_derivative_uniform,
+    local_variance_from_derivatives,
+    quadratic_total_variance,
+    second_derivative_uniform,
+)
+
+# ---------------------------------------------------------------------------
+# Defaults (quadratic total variance, positive w on y ∈ [-0.5, 0.5])
+# ---------------------------------------------------------------------------
+
+ALPHA = 0.04
+BETA = 0.0
+GAMMA = 0.1
+T_MATURITY = 1.0
+Y_MIN = -0.5
+Y_MAX = 0.5
+N_GRID = 201
+
+
+def ensure_parent_dir(path: str) -> None:
+    parent = os.path.dirname(os.path.abspath(path))
+    if parent:
+        os.makedirs(parent, exist_ok=True)
+
+
+def log_uniform_sigma_grid(n_points: int, sigma_min: float, sigma_max: float) -> np.ndarray:
+    """
+    Return `n_points` values of σ_noise with log₁₀(σ) equally spaced.
+
+    This is the correct sampling for a log–log RMSE plot; it is not linspace(σ_min, σ_max).
+    """
+    n_points = max(4, n_points)
+    if sigma_min <= 0 or sigma_max <= 0 or sigma_max < sigma_min:
+        raise ValueError("Require 0 < sigma_min <= sigma_max.")
+    return np.logspace(np.log10(sigma_min), np.log10(sigma_max), n_points)
+
+
+def relative_pointwise_error(
+    sigma2_analytic: np.ndarray, sigma2_fd: np.ndarray, eps: float = 1e-12
+) -> np.ndarray:
+    return (sigma2_fd - sigma2_analytic) / np.maximum(np.abs(sigma2_analytic), eps)
+
+
+def rmse_absolute(
+    sigma2_analytic: np.ndarray,
+    sigma2_fd: np.ndarray,
+    interior: slice,
+) -> float:
+    """RMSE of (σ²_FD − σ²_analytic) on interior indices."""
+    sa = np.asarray(sigma2_analytic, dtype=float)[interior]
+    sf = np.asarray(sigma2_fd, dtype=float)[interior]
+    m = np.isfinite(sa) & np.isfinite(sf)
+    if not np.any(m):
+        return float("nan")
+    d = sf[m] - sa[m]
+    return float(np.sqrt(np.mean(d * d)))
+
+
+def rmse_relative(
+    sigma2_analytic: np.ndarray,
+    sigma2_fd: np.ndarray,
+    interior: slice,
+    eps: float = 1e-12,
+) -> float:
+    """RMSE over grid points of relative error (σ²_FD − σ²_analytic) / |σ²_analytic|."""
+    re = relative_pointwise_error(sigma2_analytic, sigma2_fd, eps=eps)[interior]
+    m = np.isfinite(re)
+    if not np.any(m):
+        return float("nan")
+    return float(np.sqrt(np.mean(re[m] ** 2)))
+
+
+def local_variance_one_draw(
+    y: np.ndarray,
+    h: float,
+    alpha: float,
+    beta: float,
+    gamma: float,
+    T: float,
+    sigma_noise: float,
+    rng: np.random.Generator,
+    dT_mode: Literal["exact", "noisy_ratio"],
+) -> tuple[np.ndarray, np.ndarray]:
+    """One noisy surface and FD local variance; returns (σ²_analytic, σ²_FD)."""
+    w_true, dT_w_true, _, _ = quadratic_total_variance(y, alpha, beta, gamma, T)
+    sigma2_a = analytic_local_variance_quadratic(y, alpha, beta, gamma, T)
+
+    w_tilde = add_multiplicative_noise(w_true, sigma_noise, rng)
+    dy = central_first_derivative_uniform(w_tilde, h)
+    dyy = second_derivative_uniform(w_tilde, h)
+
+    if dT_mode == "exact":
+        dT = dT_w_true
+    elif dT_mode == "noisy_ratio":
+        dT = w_tilde / T
+    else:
+        raise ValueError(dT_mode)
+
+    sigma2_fd = local_variance_from_derivatives(y, w_tilde, dy, dyy, dT)
+    return sigma2_a, sigma2_fd
+
+
+def rmse_curves_averaged(
+    y: np.ndarray,
+    h: float,
+    alpha: float,
+    beta: float,
+    gamma: float,
+    T: float,
+    sigma_grid: np.ndarray,
+    rng: np.random.Generator,
+    dT_mode: Literal["exact", "noisy_ratio"],
+    interior: slice,
+    trials_per_sigma: int,
+) -> tuple[np.ndarray, np.ndarray]:
+    """
+    For each σ in `sigma_grid`, average RMSE (relative and absolute) over
+    `trials_per_sigma` independent noise draws.
+    """
+    rel: list[float] = []
+    abs_: list[float] = []
+    trials_per_sigma = max(1, trials_per_sigma)
+
+    for sig in sigma_grid:
+        tr: list[float] = []
+        ta: list[float] = []
+        for _ in range(trials_per_sigma):
+            sa, sf = local_variance_one_draw(
+                y, h, alpha, beta, gamma, T, float(sig), rng, dT_mode
+            )
+            tr.append(rmse_relative(sa, sf, interior))
+            ta.append(rmse_absolute(sa, sf, interior))
+        rel.append(float(np.nanmean(tr)))
+        abs_.append(float(np.nanmean(ta)))
+
+    return np.asarray(rel, dtype=float), np.asarray(abs_, dtype=float)
+
+
+def plot_rmse_vs_noise(
+    sigma_grid: np.ndarray,
+    rmse_rel: np.ndarray,
+    rmse_abs: np.ndarray,
+    *,
+    h: float,
+    T: float,
+    dT_mode: str,
+    trials_per_sigma: int,
+) -> mpl.figure.Figure:
+    """
+    Log–log plot: RMSE (relative and absolute in σ²) vs σ_noise, reference y = σ.
+    """
+    fig, ax = plt.subplots(figsize=(5.8, 3.8), constrained_layout=True)
+
+    x = np.asarray(sigma_grid, dtype=float)
+    pos = x > 0
+    n = len(x)
+    ms = 3.5 if n > 50 else 4.5
+
+    ax.loglog(
+        x[pos],
+        rmse_rel[pos],
+        "o-",
+        ms=ms,
+        lw=1.25,
+        label=r"RMSE of relative error $(\sigma^2_{\mathrm{FD}}-\sigma^2_{\mathrm{nat}})/|\sigma^2_{\mathrm{nat}}|$",
+        zorder=3,
+    )
+    ax.loglog(
+        x[pos],
+        rmse_abs[pos],
+        "s--",
+        ms=ms - 1,
+        lw=1.0,
+        alpha=0.9,
+        label=r"RMSE of $\sigma^2$ error $|\sigma^2_{\mathrm{FD}}-\sigma^2_{\mathrm{nat}}|$",
+        zorder=2,
+    )
+
+    s_lo, s_hi = float(x[pos].min()), float(x[pos].max())
+    ax.loglog([s_lo, s_hi], [s_lo, s_hi], ":", color="0.4", lw=2.0, zorder=1, label=r"reference slope 1: $y=\sigma_{\mathrm{noise}}$")
+
+    ax.set_xlabel(r"$\sigma_{\mathrm{noise}}$ (multiplicative noise on $\tilde{w}$)")
+    ax.set_ylabel("RMSE (interior $y$)")
+    subtitle = f"$T={T}$, $h={h:.4f}$, $\\partial_T w$: {dT_mode}"
+    if trials_per_sigma > 1:
+        subtitle += f", mean over {trials_per_sigma} draws per $\\sigma$"
+    ax.set_title("FD local variance: RMSE vs noise\n" + subtitle, fontsize=10)
+    ax.grid(True, which="both", alpha=0.35)
+    ax.legend(loc="best", fontsize=8, framealpha=0.95)
+
+    return fig
+
+
+def configure_matplotlib_style() -> None:
+    """Conservative defaults suitable for print."""
+    mpl.rcParams.update(
+        {
+            "figure.dpi": 120,
+            "savefig.dpi": 300,
+            "font.size": 10,
+            "axes.labelsize": 10,
+            "axes.titlesize": 10,
+            "legend.fontsize": 8,
+            "axes.grid": True,
+        }
+    )
+
+
+def main() -> None:
+    configure_matplotlib_style()
+
+    parser = argparse.ArgumentParser(
+        description="RMSE of finite-difference local variance vs multiplicative noise (single figure).",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument("--seed", type=int, default=42, help="RNG seed.")
+    parser.add_argument(
+        "--out",
+        type=str,
+        default="lv_rmse.png",
+        help="Output image path.",
+    )
+    parser.add_argument(
+        "--dT-mode",
+        choices=("exact", "noisy_ratio"),
+        default="exact",
+        help="Treatment of ∂_T w when w is replaced by noisy w̃ on the grid.",
+    )
+    parser.add_argument("--rmse-points", type=int, default=35, help="Number of σ_noise values (log-uniform).")
+    parser.add_argument("--rmse-sigma-min", type=float, default=1e-5, help="Smallest σ_noise.")
+    parser.add_argument("--rmse-sigma-max", type=float, default=5e-4, help="Largest σ_noise.")
+    parser.add_argument(
+        "--rmse-trials",
+        type=int,
+        default=50,
+        help="Independent noisy surfaces per σ_noise; RMSE is averaged.",
+    )
+    args = parser.parse_args()
+
+    rng = np.random.default_rng(args.seed)
+    y = np.linspace(Y_MIN, Y_MAX, N_GRID)
+    h = float(y[1] - y[0])
+    interior = slice(1, -1)
+
+    sigma_grid = log_uniform_sigma_grid(args.rmse_points, args.rmse_sigma_min, args.rmse_sigma_max)
+    rmse_rel, rmse_abs = rmse_curves_averaged(
+        y,
+        h,
+        ALPHA,
+        BETA,
+        GAMMA,
+        T_MATURITY,
+        sigma_grid,
+        rng,
+        args.dT_mode,
+        interior,
+        args.rmse_trials,
+    )
+
+    fig = plot_rmse_vs_noise(
+        sigma_grid,
+        rmse_rel,
+        rmse_abs,
+        h=h,
+        T=T_MATURITY,
+        dT_mode=args.dT_mode,
+        trials_per_sigma=args.rmse_trials,
+    )
+
+    ensure_parent_dir(args.out)
+    fig.savefig(args.out, bbox_inches="tight")
+    print(f"Wrote {args.out}")
+    plt.close(fig)
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/test_black_scholes.cpp
+++ b/tests/test_black_scholes.cpp
@@ -3,6 +3,7 @@
 //

 #include <gtest/gtest.h>
+#include "BlackScholesClosedFormEngine.hpp"
 #include "BlackScholesProcess.hpp"
 #include "MonteCarloEngine.hpp"
 #include "Instrument.hpp"
@@ -51,3 +52,28 @@ TEST(BlackScholesProcess, ExpectedValue) {
    ASSERT_NEAR(callPrice, callGT, tol);
    ASSERT_NEAR(putPrice,  putGT,  tol);
 }
+
+TEST(BlackScholesClosedForm, MatchesReference) {
+    const double K = 100.0;
+    const double T = 1.0;
+
+    const MarketData marketData(
+        100.0,
+        std::make_shared<FlatYieldCurve>(0.01),
+        std::make_shared<FlatVolatilitySurface>(0.2));
+
+    auto processCall = std::make_unique<BlackScholesProcess>(marketData);
+    auto processPut = std::make_unique<BlackScholesProcess>(marketData);
+
+    auto analyticCall = std::make_unique<BlackScholesClosedFormEngine>(std::move(processCall));
+    auto analyticPut = std::make_unique<BlackScholesClosedFormEngine>(std::move(processPut));
+
+    Instrument callInstr(T, std::make_unique<CallPayoff>(K), std::move(analyticCall));
+    Instrument putInstr(T, std::make_unique<PutPayoff>(K), std::move(analyticPut));
+
+    const double callGT = 8.4333186901;
+    const double putGT = 7.4383020650;
+
+    ASSERT_NEAR(callInstr.price(), callGT, 1e-9);
+    ASSERT_NEAR(putInstr.price(), putGT, 1e-9);
+}
Author	SHA1	Message	Date
ddoebel	0174a9b38c	Update README.md Some checks failed C++ CI / build (push) Has been cancelled Details Add references to further analysis	2026-04-02 23:02:39 +00:00
ddoebel	edda985fc1	Update README.md Some checks failed C++ CI / build (push) Has been cancelled Details Add a precise project description	2026-04-02 15:50:18 +00:00
David Doebel	23a28c6776	Add repository root package marker. Some checks failed C++ CI / build (push) Has been cancelled Details Track the top-level __init__.py so local package imports remain consistent with the current project layout. Made-with: Cursor	2026-04-02 16:31:07 +02:00
David Doebel	3dacc0a418	Add publication-ready documentation and reproducible experiment package. Rewrite the README with secure setup instructions, add dedicated setup/security docs, and include the standalone local-volatility instability experiment materials for reproducible analysis. Made-with: Cursor	2026-04-02 16:30:56 +02:00
David Doebel	b3663258e4	Replace hardcoded DB credentials with environment-driven configuration. Centralize DB settings in ingestion config, remove embedded secrets from ingestion helpers, and add an idempotent PostgreSQL bootstrap script to create role/database and apply schema safely. Made-with: Cursor	2026-04-02 16:30:50 +02:00
David Doebel	e9b3a4aac3	Add Python implied-volatility package and data analysis pipeline. Introduce the SVI/implied-vol modules as a package, update project metadata, and add loading/processing utilities that connect database snapshots to calibration workflows. Made-with: Cursor	2026-04-02 16:30:43 +02:00
David Doebel	087a2f0d74	Restructure C++ core into cpp module and package bindings. Move the pricing engine sources out of src/ into cpp/, add the closed-form engine and pybind wiring, and align tests/build targets with the new project layout. Made-with: Cursor	2026-04-02 16:30:33 +02:00
David Doebel	61df0b425d	Ingestion for UBS data draft Some checks failed C++ CI / build (push) Has been cancelled Details	2026-03-25 21:54:05 +01:00
David	ff30a3e1ce	Add __init__.py files Some checks failed C++ CI / build (push) Has been cancelled Details	2026-03-25 20:30:01 +01:00
David Doebel	5008becd15	Write first data ingestion and SQL support Some checks failed C++ CI / build (push) Has been cancelled Details	2026-03-12 14:50:47 +01:00
David Doebel	a503514bf5	Write first data ingestion and SQL support	2026-03-12 13:43:35 +01:00